remdb 0.3.114__py3-none-any.whl → 0.3.172__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/__init__.py +16 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +103 -5
- rem/agentic/context_builder.py +36 -9
- rem/agentic/mcp/tool_wrapper.py +161 -18
- rem/agentic/otel/setup.py +1 -0
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +172 -30
- rem/agentic/schema.py +8 -4
- rem/api/deps.py +3 -5
- rem/api/main.py +26 -4
- rem/api/mcp_router/resources.py +15 -10
- rem/api/mcp_router/server.py +11 -3
- rem/api/mcp_router/tools.py +418 -4
- rem/api/middleware/tracking.py +5 -5
- rem/api/routers/admin.py +218 -1
- rem/api/routers/auth.py +349 -6
- rem/api/routers/chat/completions.py +255 -7
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +126 -19
- rem/api/routers/feedback.py +134 -14
- rem/api/routers/messages.py +24 -15
- rem/api/routers/query.py +6 -3
- rem/auth/__init__.py +13 -3
- rem/auth/jwt.py +352 -0
- rem/auth/middleware.py +115 -10
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/README.md +42 -0
- rem/cli/commands/cluster.py +617 -168
- rem/cli/commands/configure.py +4 -7
- rem/cli/commands/db.py +66 -22
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/schema.py +6 -5
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/config.py +8 -1
- rem/models/core/experiment.py +58 -14
- rem/models/entities/__init__.py +4 -0
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +4 -3
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +513 -0
- rem/services/email/templates.py +360 -0
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/README.md +38 -0
- rem/services/postgres/diff_service.py +127 -6
- rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
- rem/services/postgres/repository.py +5 -4
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/session/compression.py +120 -50
- rem/services/session/reload.py +14 -7
- rem/services/user_service.py +41 -9
- rem/settings.py +442 -23
- rem/sql/migrations/001_install.sql +156 -0
- rem/sql/migrations/002_install_models.sql +1951 -88
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/README.md +45 -0
- rem/utils/__init__.py +18 -0
- rem/utils/files.py +157 -1
- rem/utils/schema_loader.py +139 -10
- rem/utils/sql_paths.py +146 -0
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/METADATA +218 -180
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/RECORD +83 -68
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/WHEEL +0 -0
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/entry_points.txt +0 -0
rem/cli/commands/schema.py
CHANGED
|
@@ -16,6 +16,7 @@ from loguru import logger
|
|
|
16
16
|
|
|
17
17
|
from ...settings import settings
|
|
18
18
|
from ...services.postgres.schema_generator import SchemaGenerator
|
|
19
|
+
from ...utils.sql_paths import get_package_sql_dir, get_package_migrations_dir
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
def _import_model_modules() -> list[str]:
|
|
@@ -55,7 +56,7 @@ def _import_model_modules() -> list[str]:
|
|
|
55
56
|
"--output-dir",
|
|
56
57
|
type=click.Path(path_type=Path),
|
|
57
58
|
default=None,
|
|
58
|
-
help=
|
|
59
|
+
help="Base output directory (default: package sql/migrations)",
|
|
59
60
|
)
|
|
60
61
|
def generate(output: Path, output_dir: Path | None):
|
|
61
62
|
"""
|
|
@@ -110,8 +111,8 @@ def generate(output: Path, output_dir: Path | None):
|
|
|
110
111
|
models = registry.get_models(include_core=True)
|
|
111
112
|
click.echo(f"Generating schema from {len(models)} registered models")
|
|
112
113
|
|
|
113
|
-
# Default to migrations directory
|
|
114
|
-
actual_output_dir = output_dir or
|
|
114
|
+
# Default to package migrations directory
|
|
115
|
+
actual_output_dir = output_dir or get_package_migrations_dir()
|
|
115
116
|
generator = SchemaGenerator(output_dir=actual_output_dir)
|
|
116
117
|
|
|
117
118
|
# Generate schema from registry
|
|
@@ -124,7 +125,7 @@ def generate(output: Path, output_dir: Path | None):
|
|
|
124
125
|
# Generate background indexes in parent sql dir
|
|
125
126
|
background_indexes = generator.generate_background_indexes()
|
|
126
127
|
if background_indexes:
|
|
127
|
-
bg_file =
|
|
128
|
+
bg_file = get_package_sql_dir() / "background_indexes.sql"
|
|
128
129
|
bg_file.write_text(background_indexes)
|
|
129
130
|
click.echo(f"✓ Background indexes: {bg_file}")
|
|
130
131
|
|
|
@@ -204,7 +205,7 @@ def validate():
|
|
|
204
205
|
"-o",
|
|
205
206
|
type=click.Path(path_type=Path),
|
|
206
207
|
default=None,
|
|
207
|
-
help=
|
|
208
|
+
help="Output file for background indexes (default: package sql/background_indexes.sql)",
|
|
208
209
|
)
|
|
209
210
|
def indexes(output: Path):
|
|
210
211
|
"""
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI command for viewing and simulating session conversations.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
rem session show <user_id> [--session-id] [--role user|assistant|system]
|
|
6
|
+
rem session show <user_id> --simulate-next [--save] [--custom-sim-prompt "..."]
|
|
7
|
+
|
|
8
|
+
Examples:
|
|
9
|
+
# Show all messages for a user
|
|
10
|
+
rem session show 11111111-1111-1111-1111-111111111001
|
|
11
|
+
|
|
12
|
+
# Show only user messages
|
|
13
|
+
rem session show 11111111-1111-1111-1111-111111111001 --role user
|
|
14
|
+
|
|
15
|
+
# Simulate next user message
|
|
16
|
+
rem session show 11111111-1111-1111-1111-111111111001 --simulate-next
|
|
17
|
+
|
|
18
|
+
# Simulate with custom prompt and save
|
|
19
|
+
rem session show 11111111-1111-1111-1111-111111111001 --simulate-next --save \
|
|
20
|
+
--custom-sim-prompt "Respond as an anxious patient"
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Literal
|
|
26
|
+
|
|
27
|
+
import click
|
|
28
|
+
import yaml
|
|
29
|
+
from loguru import logger
|
|
30
|
+
|
|
31
|
+
from ...models.entities.user import User
|
|
32
|
+
from ...models.entities.message import Message
|
|
33
|
+
from ...services.postgres import get_postgres_service
|
|
34
|
+
from ...services.postgres.repository import Repository
|
|
35
|
+
from ...settings import settings
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
SIMULATOR_PROMPT = """You are simulating a patient in a mental health conversation.
|
|
39
|
+
|
|
40
|
+
## Context
|
|
41
|
+
You are continuing a conversation with a clinical evaluation agent. Based on the
|
|
42
|
+
user profile and conversation history below, generate the next realistic patient message.
|
|
43
|
+
|
|
44
|
+
## User Profile
|
|
45
|
+
{user_profile}
|
|
46
|
+
|
|
47
|
+
## Conversation History
|
|
48
|
+
{conversation_history}
|
|
49
|
+
|
|
50
|
+
## Instructions
|
|
51
|
+
- Stay in character as the patient described in the profile
|
|
52
|
+
- Your response should be natural, conversational, and consistent with the patient's presentation
|
|
53
|
+
- Consider the patient's risk level, symptoms, and communication style
|
|
54
|
+
- Do NOT include any metadata or role labels - just the raw message content
|
|
55
|
+
- Keep responses concise (1-3 sentences typical for conversation)
|
|
56
|
+
|
|
57
|
+
Generate the next patient message:"""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def _load_user_and_messages(
|
|
61
|
+
user_id: str,
|
|
62
|
+
session_id: str | None = None,
|
|
63
|
+
role_filter: str | None = None,
|
|
64
|
+
limit: int = 100,
|
|
65
|
+
) -> tuple[User | None, list[Message]]:
|
|
66
|
+
"""Load user profile and messages from database."""
|
|
67
|
+
pg = get_postgres_service()
|
|
68
|
+
if not pg:
|
|
69
|
+
logger.error("PostgreSQL not available")
|
|
70
|
+
return None, []
|
|
71
|
+
|
|
72
|
+
await pg.connect()
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
# Load user
|
|
76
|
+
user_repo = Repository(User, "users", db=pg)
|
|
77
|
+
user = await user_repo.get_by_id(user_id, tenant_id="default")
|
|
78
|
+
|
|
79
|
+
# Load messages
|
|
80
|
+
message_repo = Repository(Message, "messages", db=pg)
|
|
81
|
+
filters = {"user_id": user_id}
|
|
82
|
+
if session_id:
|
|
83
|
+
filters["session_id"] = session_id
|
|
84
|
+
|
|
85
|
+
messages = await message_repo.find(
|
|
86
|
+
filters=filters,
|
|
87
|
+
order_by="created_at ASC",
|
|
88
|
+
limit=limit,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Filter by role if specified
|
|
92
|
+
if role_filter:
|
|
93
|
+
messages = [m for m in messages if m.message_type == role_filter]
|
|
94
|
+
|
|
95
|
+
return user, messages
|
|
96
|
+
|
|
97
|
+
finally:
|
|
98
|
+
await pg.disconnect()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _format_user_yaml(user: User | None) -> str:
|
|
102
|
+
"""Format user profile as YAML."""
|
|
103
|
+
if not user:
|
|
104
|
+
return "# No user found"
|
|
105
|
+
|
|
106
|
+
data = {
|
|
107
|
+
"id": str(user.id),
|
|
108
|
+
"name": user.name,
|
|
109
|
+
"summary": user.summary,
|
|
110
|
+
"interests": user.interests,
|
|
111
|
+
"preferred_topics": user.preferred_topics,
|
|
112
|
+
"metadata": user.metadata,
|
|
113
|
+
}
|
|
114
|
+
return yaml.dump(data, default_flow_style=False, allow_unicode=True)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _format_messages_yaml(messages: list[Message]) -> str:
|
|
118
|
+
"""Format messages as YAML."""
|
|
119
|
+
if not messages:
|
|
120
|
+
return "# No messages found"
|
|
121
|
+
|
|
122
|
+
data = []
|
|
123
|
+
for msg in messages:
|
|
124
|
+
data.append({
|
|
125
|
+
"role": msg.message_type or "unknown",
|
|
126
|
+
"content": msg.content,
|
|
127
|
+
"session_id": msg.session_id,
|
|
128
|
+
"created_at": msg.created_at.isoformat() if msg.created_at else None,
|
|
129
|
+
})
|
|
130
|
+
return yaml.dump(data, default_flow_style=False, allow_unicode=True)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _format_conversation_for_llm(messages: list[Message]) -> str:
|
|
134
|
+
"""Format conversation history for LLM context."""
|
|
135
|
+
lines = []
|
|
136
|
+
for msg in messages:
|
|
137
|
+
role = msg.message_type or "unknown"
|
|
138
|
+
lines.append(f"[{role.upper()}]: {msg.content}")
|
|
139
|
+
return "\n\n".join(lines) if lines else "(No previous messages)"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def _simulate_next_message(
|
|
143
|
+
user: User | None,
|
|
144
|
+
messages: list[Message],
|
|
145
|
+
custom_prompt: str | None = None,
|
|
146
|
+
) -> str:
|
|
147
|
+
"""Use LLM to simulate the next patient message."""
|
|
148
|
+
from pydantic_ai import Agent
|
|
149
|
+
|
|
150
|
+
# Build context
|
|
151
|
+
user_profile = _format_user_yaml(user) if user else "Unknown patient"
|
|
152
|
+
conversation_history = _format_conversation_for_llm(messages)
|
|
153
|
+
|
|
154
|
+
# Use custom prompt or default
|
|
155
|
+
if custom_prompt:
|
|
156
|
+
# Check if it's a file path
|
|
157
|
+
if Path(custom_prompt).exists():
|
|
158
|
+
prompt_template = Path(custom_prompt).read_text()
|
|
159
|
+
else:
|
|
160
|
+
prompt_template = custom_prompt
|
|
161
|
+
# Simple variable substitution
|
|
162
|
+
prompt = prompt_template.replace("{user_profile}", user_profile)
|
|
163
|
+
prompt = prompt.replace("{conversation_history}", conversation_history)
|
|
164
|
+
else:
|
|
165
|
+
prompt = SIMULATOR_PROMPT.format(
|
|
166
|
+
user_profile=user_profile,
|
|
167
|
+
conversation_history=conversation_history,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Create simple agent for simulation
|
|
171
|
+
agent = Agent(
|
|
172
|
+
model=settings.llm.default_model,
|
|
173
|
+
system_prompt="You are a patient simulator. Generate realistic patient responses.",
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
result = await agent.run(prompt)
|
|
177
|
+
return result.output
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
async def _save_message(
|
|
181
|
+
user_id: str,
|
|
182
|
+
session_id: str | None,
|
|
183
|
+
content: str,
|
|
184
|
+
role: str = "user",
|
|
185
|
+
) -> Message:
|
|
186
|
+
"""Save a simulated message to the database."""
|
|
187
|
+
from uuid import uuid4
|
|
188
|
+
|
|
189
|
+
pg = get_postgres_service()
|
|
190
|
+
if not pg:
|
|
191
|
+
raise RuntimeError("PostgreSQL not available")
|
|
192
|
+
|
|
193
|
+
await pg.connect()
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
message_repo = Repository(Message, "messages", db=pg)
|
|
197
|
+
|
|
198
|
+
message = Message(
|
|
199
|
+
id=uuid4(),
|
|
200
|
+
user_id=user_id,
|
|
201
|
+
tenant_id="default",
|
|
202
|
+
session_id=session_id or str(uuid4()),
|
|
203
|
+
content=content,
|
|
204
|
+
message_type=role,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
await message_repo.upsert(message)
|
|
208
|
+
return message
|
|
209
|
+
|
|
210
|
+
finally:
|
|
211
|
+
await pg.disconnect()
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@click.group()
|
|
215
|
+
def session():
|
|
216
|
+
"""Session viewing and simulation commands."""
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@session.command("show")
|
|
221
|
+
@click.argument("user_id")
|
|
222
|
+
@click.option("--session-id", "-s", help="Filter by session ID")
|
|
223
|
+
@click.option(
|
|
224
|
+
"--role", "-r",
|
|
225
|
+
type=click.Choice(["user", "assistant", "system", "tool"]),
|
|
226
|
+
help="Filter messages by role",
|
|
227
|
+
)
|
|
228
|
+
@click.option("--limit", "-l", default=100, help="Max messages to load")
|
|
229
|
+
@click.option("--simulate-next", is_flag=True, help="Simulate the next patient message")
|
|
230
|
+
@click.option("--save", is_flag=True, help="Save simulated message to database")
|
|
231
|
+
@click.option(
|
|
232
|
+
"--custom-sim-prompt", "-p",
|
|
233
|
+
help="Custom simulation prompt (text or file path)",
|
|
234
|
+
)
|
|
235
|
+
def show(
|
|
236
|
+
user_id: str,
|
|
237
|
+
session_id: str | None,
|
|
238
|
+
role: str | None,
|
|
239
|
+
limit: int,
|
|
240
|
+
simulate_next: bool,
|
|
241
|
+
save: bool,
|
|
242
|
+
custom_sim_prompt: str | None,
|
|
243
|
+
):
|
|
244
|
+
"""
|
|
245
|
+
Show user profile and session messages.
|
|
246
|
+
|
|
247
|
+
USER_ID: The user identifier to load.
|
|
248
|
+
|
|
249
|
+
Examples:
|
|
250
|
+
|
|
251
|
+
# Show user and all messages
|
|
252
|
+
rem session show 11111111-1111-1111-1111-111111111001
|
|
253
|
+
|
|
254
|
+
# Show only assistant responses
|
|
255
|
+
rem session show 11111111-1111-1111-1111-111111111001 --role assistant
|
|
256
|
+
|
|
257
|
+
# Simulate next patient message
|
|
258
|
+
rem session show 11111111-1111-1111-1111-111111111001 --simulate-next
|
|
259
|
+
|
|
260
|
+
# Simulate and save to database
|
|
261
|
+
rem session show 11111111-1111-1111-1111-111111111001 --simulate-next --save
|
|
262
|
+
"""
|
|
263
|
+
asyncio.run(_show_async(
|
|
264
|
+
user_id=user_id,
|
|
265
|
+
session_id=session_id,
|
|
266
|
+
role_filter=role,
|
|
267
|
+
limit=limit,
|
|
268
|
+
simulate_next=simulate_next,
|
|
269
|
+
save=save,
|
|
270
|
+
custom_sim_prompt=custom_sim_prompt,
|
|
271
|
+
))
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
async def _show_async(
|
|
275
|
+
user_id: str,
|
|
276
|
+
session_id: str | None,
|
|
277
|
+
role_filter: str | None,
|
|
278
|
+
limit: int,
|
|
279
|
+
simulate_next: bool,
|
|
280
|
+
save: bool,
|
|
281
|
+
custom_sim_prompt: str | None,
|
|
282
|
+
):
|
|
283
|
+
"""Async implementation of show command."""
|
|
284
|
+
# Load data
|
|
285
|
+
user, messages = await _load_user_and_messages(
|
|
286
|
+
user_id=user_id,
|
|
287
|
+
session_id=session_id,
|
|
288
|
+
role_filter=role_filter if not simulate_next else None, # Need all messages for simulation
|
|
289
|
+
limit=limit,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Display user profile
|
|
293
|
+
click.echo("\n# User Profile")
|
|
294
|
+
click.echo("---")
|
|
295
|
+
click.echo(_format_user_yaml(user))
|
|
296
|
+
|
|
297
|
+
# Display messages (apply filter for display if simulating)
|
|
298
|
+
display_messages = messages
|
|
299
|
+
if simulate_next and role_filter:
|
|
300
|
+
display_messages = [m for m in messages if m.message_type == role_filter]
|
|
301
|
+
|
|
302
|
+
click.echo("\n# Messages")
|
|
303
|
+
click.echo("---")
|
|
304
|
+
click.echo(_format_messages_yaml(display_messages))
|
|
305
|
+
|
|
306
|
+
# Simulate next message if requested
|
|
307
|
+
if simulate_next:
|
|
308
|
+
click.echo("\n# Simulated Next Message")
|
|
309
|
+
click.echo("---")
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
simulated = await _simulate_next_message(
|
|
313
|
+
user=user,
|
|
314
|
+
messages=messages,
|
|
315
|
+
custom_prompt=custom_sim_prompt,
|
|
316
|
+
)
|
|
317
|
+
click.echo(f"role: user")
|
|
318
|
+
click.echo(f"content: |\n {simulated}")
|
|
319
|
+
|
|
320
|
+
if save:
|
|
321
|
+
saved_msg = await _save_message(
|
|
322
|
+
user_id=user_id,
|
|
323
|
+
session_id=session_id,
|
|
324
|
+
content=simulated,
|
|
325
|
+
role="user",
|
|
326
|
+
)
|
|
327
|
+
logger.success(f"Saved message: {saved_msg.id}")
|
|
328
|
+
|
|
329
|
+
except Exception as e:
|
|
330
|
+
logger.error(f"Simulation failed: {e}")
|
|
331
|
+
raise
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def register_command(cli_group):
|
|
335
|
+
"""Register the session command group."""
|
|
336
|
+
cli_group.add_command(session)
|
rem/cli/dreaming.py
CHANGED
|
@@ -43,7 +43,7 @@ rem-dreaming full --user-id=user-123 --rem-api-url=http://localhost:8000
|
|
|
43
43
|
Environment Variables:
|
|
44
44
|
- REM_API_URL: REM API endpoint (default: http://rem-api:8000)
|
|
45
45
|
- REM_EMBEDDING_PROVIDER: Embedding provider (default: text-embedding-3-small)
|
|
46
|
-
- REM_DEFAULT_MODEL: LLM model (default: gpt-
|
|
46
|
+
- REM_DEFAULT_MODEL: LLM model (default: gpt-4.1)
|
|
47
47
|
- REM_LOOKBACK_HOURS: Default lookback window (default: 24)
|
|
48
48
|
- OPENAI_API_KEY: OpenAI API key
|
|
49
49
|
|
|
@@ -83,7 +83,7 @@ def get_worker() -> DreamingWorker:
|
|
|
83
83
|
embedding_provider=os.getenv(
|
|
84
84
|
"REM_EMBEDDING_PROVIDER", "text-embedding-3-small"
|
|
85
85
|
),
|
|
86
|
-
default_model=os.getenv("REM_DEFAULT_MODEL", "gpt-
|
|
86
|
+
default_model=os.getenv("REM_DEFAULT_MODEL", "gpt-4.1"),
|
|
87
87
|
lookback_hours=int(os.getenv("REM_LOOKBACK_HOURS", "24")),
|
|
88
88
|
)
|
|
89
89
|
|
rem/cli/main.py
CHANGED
|
@@ -96,6 +96,7 @@ from .commands.serve import register_command as register_serve_command
|
|
|
96
96
|
from .commands.mcp import register_command as register_mcp_command
|
|
97
97
|
from .commands.scaffold import scaffold as scaffold_command
|
|
98
98
|
from .commands.cluster import register_commands as register_cluster_commands
|
|
99
|
+
from .commands.session import register_command as register_session_command
|
|
99
100
|
|
|
100
101
|
register_schema_commands(schema)
|
|
101
102
|
register_db_commands(db)
|
|
@@ -108,6 +109,7 @@ register_serve_command(cli)
|
|
|
108
109
|
register_mcp_command(cli)
|
|
109
110
|
cli.add_command(experiments_group)
|
|
110
111
|
cli.add_command(scaffold_command)
|
|
112
|
+
register_session_command(cli)
|
|
111
113
|
|
|
112
114
|
|
|
113
115
|
def main():
|
rem/config.py
CHANGED
|
@@ -95,9 +95,16 @@ def load_config() -> dict[str, Any]:
|
|
|
95
95
|
"""
|
|
96
96
|
Load configuration from ~/.rem/config.yaml.
|
|
97
97
|
|
|
98
|
+
Set REM_SKIP_CONFIG=1 to skip loading the config file (useful when using .env files).
|
|
99
|
+
|
|
98
100
|
Returns:
|
|
99
|
-
Configuration dictionary (empty if file doesn't exist)
|
|
101
|
+
Configuration dictionary (empty if file doesn't exist or skipped)
|
|
100
102
|
"""
|
|
103
|
+
# Allow skipping config file via environment variable
|
|
104
|
+
if os.environ.get("REM_SKIP_CONFIG", "").lower() in ("1", "true", "yes"):
|
|
105
|
+
logger.debug("Skipping config file (REM_SKIP_CONFIG is set)")
|
|
106
|
+
return {}
|
|
107
|
+
|
|
101
108
|
config_path = get_config_path()
|
|
102
109
|
|
|
103
110
|
if not config_path.exists():
|
rem/models/core/experiment.py
CHANGED
|
@@ -138,18 +138,14 @@ class DatasetReference(BaseModel):
|
|
|
138
138
|
|
|
139
139
|
path: str = Field(
|
|
140
140
|
description=(
|
|
141
|
-
"Path to dataset
|
|
141
|
+
"Path to dataset. Format is inferred from file extension.\n"
|
|
142
|
+
"Supported: .csv, .tsv, .parquet, .json, .jsonl, .xlsx, .ods, .avro, .ipc\n"
|
|
142
143
|
"- Git: Relative path from experiment root (e.g., 'datasets/ground_truth.csv')\n"
|
|
143
|
-
"- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/
|
|
144
|
+
"- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/data.parquet')\n"
|
|
144
145
|
"- Hybrid: S3 URI for data, Git path for schema"
|
|
145
146
|
)
|
|
146
147
|
)
|
|
147
148
|
|
|
148
|
-
format: Literal["csv", "jsonl", "parquet", "json"] = Field(
|
|
149
|
-
default="csv",
|
|
150
|
-
description="Dataset file format"
|
|
151
|
-
)
|
|
152
|
-
|
|
153
149
|
schema_path: str | None = Field(
|
|
154
150
|
default=None,
|
|
155
151
|
description=(
|
|
@@ -262,8 +258,7 @@ class ExperimentConfig(BaseModel):
|
|
|
262
258
|
datasets:
|
|
263
259
|
ground_truth:
|
|
264
260
|
location: git
|
|
265
|
-
path: datasets/ground_truth.csv
|
|
266
|
-
format: csv
|
|
261
|
+
path: datasets/ground_truth.csv # format inferred from extension
|
|
267
262
|
results:
|
|
268
263
|
location: git
|
|
269
264
|
base_path: results/
|
|
@@ -288,12 +283,10 @@ class ExperimentConfig(BaseModel):
|
|
|
288
283
|
ground_truth:
|
|
289
284
|
location: s3
|
|
290
285
|
path: s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet
|
|
291
|
-
format: parquet
|
|
292
286
|
schema_path: datasets/schema.yaml # Schema in Git for documentation
|
|
293
287
|
test_cases:
|
|
294
288
|
location: s3
|
|
295
289
|
path: s3://rem-prod/experiments/cv-parser-production/datasets/test_cases.jsonl
|
|
296
|
-
format: jsonl
|
|
297
290
|
results:
|
|
298
291
|
location: hybrid
|
|
299
292
|
base_path: s3://rem-prod/experiments/cv-parser-production/results/
|
|
@@ -318,6 +311,15 @@ class ExperimentConfig(BaseModel):
|
|
|
318
311
|
)
|
|
319
312
|
)
|
|
320
313
|
|
|
314
|
+
task: str = Field(
|
|
315
|
+
default="general",
|
|
316
|
+
description=(
|
|
317
|
+
"Task name for organizing experiments by purpose.\n"
|
|
318
|
+
"Used with agent name to form directory: {agent}/{task}/\n"
|
|
319
|
+
"Examples: 'risk-assessment', 'classification', 'general'"
|
|
320
|
+
)
|
|
321
|
+
)
|
|
322
|
+
|
|
321
323
|
description: str = Field(
|
|
322
324
|
description="Human-readable description of experiment purpose and goals"
|
|
323
325
|
)
|
|
@@ -410,6 +412,24 @@ class ExperimentConfig(BaseModel):
|
|
|
410
412
|
|
|
411
413
|
return v
|
|
412
414
|
|
|
415
|
+
@field_validator("task")
|
|
416
|
+
@classmethod
|
|
417
|
+
def validate_task(cls, v: str) -> str:
|
|
418
|
+
"""Validate task name follows conventions."""
|
|
419
|
+
if not v:
|
|
420
|
+
return "general" # Default value
|
|
421
|
+
|
|
422
|
+
if not v.islower():
|
|
423
|
+
raise ValueError("Task name must be lowercase")
|
|
424
|
+
|
|
425
|
+
if " " in v:
|
|
426
|
+
raise ValueError("Task name cannot contain spaces (use hyphens)")
|
|
427
|
+
|
|
428
|
+
if not all(c.isalnum() or c == "-" for c in v):
|
|
429
|
+
raise ValueError("Task name can only contain lowercase letters, numbers, and hyphens")
|
|
430
|
+
|
|
431
|
+
return v
|
|
432
|
+
|
|
413
433
|
@field_validator("tags")
|
|
414
434
|
@classmethod
|
|
415
435
|
def validate_tags(cls, v: list[str]) -> list[str]:
|
|
@@ -420,6 +440,15 @@ class ExperimentConfig(BaseModel):
|
|
|
420
440
|
"""Get the experiment directory path."""
|
|
421
441
|
return Path(base_path) / self.name
|
|
422
442
|
|
|
443
|
+
def get_agent_task_dir(self, base_path: str = ".experiments") -> Path:
|
|
444
|
+
"""
|
|
445
|
+
Get the experiment directory path organized by agent/task.
|
|
446
|
+
|
|
447
|
+
Returns: Path like .experiments/{agent}/{task}/
|
|
448
|
+
This is the recommended structure for S3 export compatibility.
|
|
449
|
+
"""
|
|
450
|
+
return Path(base_path) / self.agent_schema_ref.name / self.task
|
|
451
|
+
|
|
423
452
|
def get_config_path(self, base_path: str = ".experiments") -> Path:
|
|
424
453
|
"""Get the path to experiment.yaml file."""
|
|
425
454
|
return self.get_experiment_dir(base_path) / "experiment.yaml"
|
|
@@ -428,6 +457,22 @@ class ExperimentConfig(BaseModel):
|
|
|
428
457
|
"""Get the path to README.md file."""
|
|
429
458
|
return self.get_experiment_dir(base_path) / "README.md"
|
|
430
459
|
|
|
460
|
+
def get_evaluator_filename(self) -> str:
|
|
461
|
+
"""
|
|
462
|
+
Get the evaluator filename with task prefix.
|
|
463
|
+
|
|
464
|
+
Returns: {agent_name}-{task}.yaml (e.g., siggy-risk-assessment.yaml)
|
|
465
|
+
"""
|
|
466
|
+
return f"{self.agent_schema_ref.name}-{self.task}.yaml"
|
|
467
|
+
|
|
468
|
+
def get_s3_export_path(self, bucket: str, version: str = "v0") -> str:
|
|
469
|
+
"""
|
|
470
|
+
Get the S3 path for exporting this experiment.
|
|
471
|
+
|
|
472
|
+
Returns: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
|
|
473
|
+
"""
|
|
474
|
+
return f"s3://{bucket}/{version}/datasets/calibration/experiments/{self.agent_schema_ref.name}/{self.task}"
|
|
475
|
+
|
|
431
476
|
def to_yaml(self) -> str:
|
|
432
477
|
"""Export configuration as YAML string."""
|
|
433
478
|
import yaml
|
|
@@ -483,6 +528,7 @@ class ExperimentConfig(BaseModel):
|
|
|
483
528
|
## Configuration
|
|
484
529
|
|
|
485
530
|
**Status**: `{self.status.value}`
|
|
531
|
+
**Task**: `{self.task}`
|
|
486
532
|
**Tags**: {', '.join(f'`{tag}`' for tag in self.tags) if self.tags else 'None'}
|
|
487
533
|
|
|
488
534
|
## Agent Schema
|
|
@@ -494,6 +540,7 @@ class ExperimentConfig(BaseModel):
|
|
|
494
540
|
## Evaluator Schema
|
|
495
541
|
|
|
496
542
|
- **Name**: `{self.evaluator_schema_ref.name}`
|
|
543
|
+
- **File**: `{self.get_evaluator_filename()}`
|
|
497
544
|
- **Type**: `{self.evaluator_schema_ref.type}`
|
|
498
545
|
|
|
499
546
|
## Datasets
|
|
@@ -504,7 +551,6 @@ class ExperimentConfig(BaseModel):
|
|
|
504
551
|
|
|
505
552
|
- **Location**: `{dataset.location.value}`
|
|
506
553
|
- **Path**: `{dataset.path}`
|
|
507
|
-
- **Format**: `{dataset.format}`
|
|
508
554
|
"""
|
|
509
555
|
if dataset.description:
|
|
510
556
|
readme += f"- **Description**: {dataset.description}\n"
|
|
@@ -575,7 +621,6 @@ EXAMPLE_SMALL_EXPERIMENT = ExperimentConfig(
|
|
|
575
621
|
"ground_truth": DatasetReference(
|
|
576
622
|
location=DatasetLocation.GIT,
|
|
577
623
|
path="datasets/ground_truth.csv",
|
|
578
|
-
format="csv",
|
|
579
624
|
description="10 manually curated test cases"
|
|
580
625
|
)
|
|
581
626
|
},
|
|
@@ -605,7 +650,6 @@ EXAMPLE_LARGE_EXPERIMENT = ExperimentConfig(
|
|
|
605
650
|
"ground_truth": DatasetReference(
|
|
606
651
|
location=DatasetLocation.S3,
|
|
607
652
|
path="s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet",
|
|
608
|
-
format="parquet",
|
|
609
653
|
schema_path="datasets/schema.yaml",
|
|
610
654
|
description="10,000 CV/resume pairs with ground truth extractions"
|
|
611
655
|
)
|
rem/models/entities/__init__.py
CHANGED
|
@@ -39,6 +39,7 @@ from .shared_session import (
|
|
|
39
39
|
SharedWithMeResponse,
|
|
40
40
|
SharedWithMeSummary,
|
|
41
41
|
)
|
|
42
|
+
from .subscriber import Subscriber, SubscriberOrigin, SubscriberStatus
|
|
42
43
|
from .user import User, UserTier
|
|
43
44
|
|
|
44
45
|
__all__ = [
|
|
@@ -56,6 +57,9 @@ __all__ = [
|
|
|
56
57
|
"FeedbackCategory",
|
|
57
58
|
"User",
|
|
58
59
|
"UserTier",
|
|
60
|
+
"Subscriber",
|
|
61
|
+
"SubscriberStatus",
|
|
62
|
+
"SubscriberOrigin",
|
|
59
63
|
"File",
|
|
60
64
|
"Moment",
|
|
61
65
|
"Schema",
|
rem/models/entities/ontology.py
CHANGED
|
@@ -129,7 +129,7 @@ class Ontology(CoreModel):
|
|
|
129
129
|
file_id="file-uuid-456",
|
|
130
130
|
agent_schema_id="contract-parser-v2",
|
|
131
131
|
provider_name="openai",
|
|
132
|
-
model_name="gpt-
|
|
132
|
+
model_name="gpt-4.1",
|
|
133
133
|
extracted_data={
|
|
134
134
|
"contract_type": "supplier_agreement",
|
|
135
135
|
"parties": [
|
|
@@ -74,7 +74,7 @@ class OntologyConfig(CoreModel):
|
|
|
74
74
|
priority=200, # Higher priority = runs first
|
|
75
75
|
enabled=True,
|
|
76
76
|
provider_name="openai", # Override default provider
|
|
77
|
-
model_name="gpt-
|
|
77
|
+
model_name="gpt-4.1",
|
|
78
78
|
tenant_id="acme-corp",
|
|
79
79
|
tags=["legal", "procurement"]
|
|
80
80
|
)
|