remdb 0.3.133__py3-none-any.whl → 0.3.157__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/agents/__init__.py +16 -0
- rem/agentic/agents/agent_manager.py +310 -0
- rem/agentic/context_builder.py +5 -3
- rem/agentic/mcp/tool_wrapper.py +48 -6
- rem/agentic/providers/phoenix.py +91 -21
- rem/agentic/providers/pydantic_ai.py +77 -43
- rem/api/deps.py +2 -2
- rem/api/main.py +1 -1
- rem/api/mcp_router/server.py +2 -0
- rem/api/mcp_router/tools.py +90 -0
- rem/api/routers/auth.py +208 -4
- rem/api/routers/chat/streaming.py +77 -22
- rem/auth/__init__.py +13 -3
- rem/auth/middleware.py +66 -1
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/configure.py +3 -4
- rem/cli/commands/experiments.py +50 -49
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/models/core/experiment.py +4 -14
- rem/models/entities/__init__.py +4 -0
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +134 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +4 -3
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +459 -0
- rem/services/email/templates.py +360 -0
- rem/services/postgres/README.md +38 -0
- rem/services/postgres/diff_service.py +19 -3
- rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
- rem/services/session/compression.py +113 -50
- rem/services/session/reload.py +14 -7
- rem/settings.py +191 -4
- rem/sql/migrations/002_install_models.sql +91 -91
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/README.md +45 -0
- rem/utils/files.py +157 -1
- rem/utils/vision.py +1 -1
- {remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/METADATA +7 -5
- {remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/RECORD +51 -42
- {remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/WHEEL +0 -0
- {remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI command for viewing and simulating session conversations.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
rem session show <user_id> [--session-id] [--role user|assistant|system]
|
|
6
|
+
rem session show <user_id> --simulate-next [--save] [--custom-sim-prompt "..."]
|
|
7
|
+
|
|
8
|
+
Examples:
|
|
9
|
+
# Show all messages for a user
|
|
10
|
+
rem session show 11111111-1111-1111-1111-111111111001
|
|
11
|
+
|
|
12
|
+
# Show only user messages
|
|
13
|
+
rem session show 11111111-1111-1111-1111-111111111001 --role user
|
|
14
|
+
|
|
15
|
+
# Simulate next user message
|
|
16
|
+
rem session show 11111111-1111-1111-1111-111111111001 --simulate-next
|
|
17
|
+
|
|
18
|
+
# Simulate with custom prompt and save
|
|
19
|
+
rem session show 11111111-1111-1111-1111-111111111001 --simulate-next --save \
|
|
20
|
+
--custom-sim-prompt "Respond as an anxious patient"
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Literal
|
|
26
|
+
|
|
27
|
+
import click
|
|
28
|
+
import yaml
|
|
29
|
+
from loguru import logger
|
|
30
|
+
|
|
31
|
+
from ...models.entities.user import User
|
|
32
|
+
from ...models.entities.message import Message
|
|
33
|
+
from ...services.postgres import get_postgres_service
|
|
34
|
+
from ...services.postgres.repository import Repository
|
|
35
|
+
from ...settings import settings
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
SIMULATOR_PROMPT = """You are simulating a patient in a mental health conversation.
|
|
39
|
+
|
|
40
|
+
## Context
|
|
41
|
+
You are continuing a conversation with a clinical evaluation agent. Based on the
|
|
42
|
+
user profile and conversation history below, generate the next realistic patient message.
|
|
43
|
+
|
|
44
|
+
## User Profile
|
|
45
|
+
{user_profile}
|
|
46
|
+
|
|
47
|
+
## Conversation History
|
|
48
|
+
{conversation_history}
|
|
49
|
+
|
|
50
|
+
## Instructions
|
|
51
|
+
- Stay in character as the patient described in the profile
|
|
52
|
+
- Your response should be natural, conversational, and consistent with the patient's presentation
|
|
53
|
+
- Consider the patient's risk level, symptoms, and communication style
|
|
54
|
+
- Do NOT include any metadata or role labels - just the raw message content
|
|
55
|
+
- Keep responses concise (1-3 sentences typical for conversation)
|
|
56
|
+
|
|
57
|
+
Generate the next patient message:"""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def _load_user_and_messages(
|
|
61
|
+
user_id: str,
|
|
62
|
+
session_id: str | None = None,
|
|
63
|
+
role_filter: str | None = None,
|
|
64
|
+
limit: int = 100,
|
|
65
|
+
) -> tuple[User | None, list[Message]]:
|
|
66
|
+
"""Load user profile and messages from database."""
|
|
67
|
+
pg = get_postgres_service()
|
|
68
|
+
if not pg:
|
|
69
|
+
logger.error("PostgreSQL not available")
|
|
70
|
+
return None, []
|
|
71
|
+
|
|
72
|
+
await pg.connect()
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
# Load user
|
|
76
|
+
user_repo = Repository(User, "users", db=pg)
|
|
77
|
+
user = await user_repo.get_by_id(user_id, tenant_id="default")
|
|
78
|
+
|
|
79
|
+
# Load messages
|
|
80
|
+
message_repo = Repository(Message, "messages", db=pg)
|
|
81
|
+
filters = {"user_id": user_id}
|
|
82
|
+
if session_id:
|
|
83
|
+
filters["session_id"] = session_id
|
|
84
|
+
|
|
85
|
+
messages = await message_repo.find(
|
|
86
|
+
filters=filters,
|
|
87
|
+
order_by="created_at ASC",
|
|
88
|
+
limit=limit,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Filter by role if specified
|
|
92
|
+
if role_filter:
|
|
93
|
+
messages = [m for m in messages if m.message_type == role_filter]
|
|
94
|
+
|
|
95
|
+
return user, messages
|
|
96
|
+
|
|
97
|
+
finally:
|
|
98
|
+
await pg.disconnect()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _format_user_yaml(user: User | None) -> str:
|
|
102
|
+
"""Format user profile as YAML."""
|
|
103
|
+
if not user:
|
|
104
|
+
return "# No user found"
|
|
105
|
+
|
|
106
|
+
data = {
|
|
107
|
+
"id": str(user.id),
|
|
108
|
+
"name": user.name,
|
|
109
|
+
"summary": user.summary,
|
|
110
|
+
"interests": user.interests,
|
|
111
|
+
"preferred_topics": user.preferred_topics,
|
|
112
|
+
"metadata": user.metadata,
|
|
113
|
+
}
|
|
114
|
+
return yaml.dump(data, default_flow_style=False, allow_unicode=True)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _format_messages_yaml(messages: list[Message]) -> str:
|
|
118
|
+
"""Format messages as YAML."""
|
|
119
|
+
if not messages:
|
|
120
|
+
return "# No messages found"
|
|
121
|
+
|
|
122
|
+
data = []
|
|
123
|
+
for msg in messages:
|
|
124
|
+
data.append({
|
|
125
|
+
"role": msg.message_type or "unknown",
|
|
126
|
+
"content": msg.content,
|
|
127
|
+
"session_id": msg.session_id,
|
|
128
|
+
"created_at": msg.created_at.isoformat() if msg.created_at else None,
|
|
129
|
+
})
|
|
130
|
+
return yaml.dump(data, default_flow_style=False, allow_unicode=True)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _format_conversation_for_llm(messages: list[Message]) -> str:
|
|
134
|
+
"""Format conversation history for LLM context."""
|
|
135
|
+
lines = []
|
|
136
|
+
for msg in messages:
|
|
137
|
+
role = msg.message_type or "unknown"
|
|
138
|
+
lines.append(f"[{role.upper()}]: {msg.content}")
|
|
139
|
+
return "\n\n".join(lines) if lines else "(No previous messages)"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def _simulate_next_message(
|
|
143
|
+
user: User | None,
|
|
144
|
+
messages: list[Message],
|
|
145
|
+
custom_prompt: str | None = None,
|
|
146
|
+
) -> str:
|
|
147
|
+
"""Use LLM to simulate the next patient message."""
|
|
148
|
+
from pydantic_ai import Agent
|
|
149
|
+
|
|
150
|
+
# Build context
|
|
151
|
+
user_profile = _format_user_yaml(user) if user else "Unknown patient"
|
|
152
|
+
conversation_history = _format_conversation_for_llm(messages)
|
|
153
|
+
|
|
154
|
+
# Use custom prompt or default
|
|
155
|
+
if custom_prompt:
|
|
156
|
+
# Check if it's a file path
|
|
157
|
+
if Path(custom_prompt).exists():
|
|
158
|
+
prompt_template = Path(custom_prompt).read_text()
|
|
159
|
+
else:
|
|
160
|
+
prompt_template = custom_prompt
|
|
161
|
+
# Simple variable substitution
|
|
162
|
+
prompt = prompt_template.replace("{user_profile}", user_profile)
|
|
163
|
+
prompt = prompt.replace("{conversation_history}", conversation_history)
|
|
164
|
+
else:
|
|
165
|
+
prompt = SIMULATOR_PROMPT.format(
|
|
166
|
+
user_profile=user_profile,
|
|
167
|
+
conversation_history=conversation_history,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Create simple agent for simulation
|
|
171
|
+
agent = Agent(
|
|
172
|
+
model=settings.llm.default_model,
|
|
173
|
+
system_prompt="You are a patient simulator. Generate realistic patient responses.",
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
result = await agent.run(prompt)
|
|
177
|
+
return result.output
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
async def _save_message(
|
|
181
|
+
user_id: str,
|
|
182
|
+
session_id: str | None,
|
|
183
|
+
content: str,
|
|
184
|
+
role: str = "user",
|
|
185
|
+
) -> Message:
|
|
186
|
+
"""Save a simulated message to the database."""
|
|
187
|
+
from uuid import uuid4
|
|
188
|
+
|
|
189
|
+
pg = get_postgres_service()
|
|
190
|
+
if not pg:
|
|
191
|
+
raise RuntimeError("PostgreSQL not available")
|
|
192
|
+
|
|
193
|
+
await pg.connect()
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
message_repo = Repository(Message, "messages", db=pg)
|
|
197
|
+
|
|
198
|
+
message = Message(
|
|
199
|
+
id=uuid4(),
|
|
200
|
+
user_id=user_id,
|
|
201
|
+
tenant_id="default",
|
|
202
|
+
session_id=session_id or str(uuid4()),
|
|
203
|
+
content=content,
|
|
204
|
+
message_type=role,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
await message_repo.upsert(message)
|
|
208
|
+
return message
|
|
209
|
+
|
|
210
|
+
finally:
|
|
211
|
+
await pg.disconnect()
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@click.group()
|
|
215
|
+
def session():
|
|
216
|
+
"""Session viewing and simulation commands."""
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@session.command("show")
|
|
221
|
+
@click.argument("user_id")
|
|
222
|
+
@click.option("--session-id", "-s", help="Filter by session ID")
|
|
223
|
+
@click.option(
|
|
224
|
+
"--role", "-r",
|
|
225
|
+
type=click.Choice(["user", "assistant", "system", "tool"]),
|
|
226
|
+
help="Filter messages by role",
|
|
227
|
+
)
|
|
228
|
+
@click.option("--limit", "-l", default=100, help="Max messages to load")
|
|
229
|
+
@click.option("--simulate-next", is_flag=True, help="Simulate the next patient message")
|
|
230
|
+
@click.option("--save", is_flag=True, help="Save simulated message to database")
|
|
231
|
+
@click.option(
|
|
232
|
+
"--custom-sim-prompt", "-p",
|
|
233
|
+
help="Custom simulation prompt (text or file path)",
|
|
234
|
+
)
|
|
235
|
+
def show(
|
|
236
|
+
user_id: str,
|
|
237
|
+
session_id: str | None,
|
|
238
|
+
role: str | None,
|
|
239
|
+
limit: int,
|
|
240
|
+
simulate_next: bool,
|
|
241
|
+
save: bool,
|
|
242
|
+
custom_sim_prompt: str | None,
|
|
243
|
+
):
|
|
244
|
+
"""
|
|
245
|
+
Show user profile and session messages.
|
|
246
|
+
|
|
247
|
+
USER_ID: The user identifier to load.
|
|
248
|
+
|
|
249
|
+
Examples:
|
|
250
|
+
|
|
251
|
+
# Show user and all messages
|
|
252
|
+
rem session show 11111111-1111-1111-1111-111111111001
|
|
253
|
+
|
|
254
|
+
# Show only assistant responses
|
|
255
|
+
rem session show 11111111-1111-1111-1111-111111111001 --role assistant
|
|
256
|
+
|
|
257
|
+
# Simulate next patient message
|
|
258
|
+
rem session show 11111111-1111-1111-1111-111111111001 --simulate-next
|
|
259
|
+
|
|
260
|
+
# Simulate and save to database
|
|
261
|
+
rem session show 11111111-1111-1111-1111-111111111001 --simulate-next --save
|
|
262
|
+
"""
|
|
263
|
+
asyncio.run(_show_async(
|
|
264
|
+
user_id=user_id,
|
|
265
|
+
session_id=session_id,
|
|
266
|
+
role_filter=role,
|
|
267
|
+
limit=limit,
|
|
268
|
+
simulate_next=simulate_next,
|
|
269
|
+
save=save,
|
|
270
|
+
custom_sim_prompt=custom_sim_prompt,
|
|
271
|
+
))
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
async def _show_async(
|
|
275
|
+
user_id: str,
|
|
276
|
+
session_id: str | None,
|
|
277
|
+
role_filter: str | None,
|
|
278
|
+
limit: int,
|
|
279
|
+
simulate_next: bool,
|
|
280
|
+
save: bool,
|
|
281
|
+
custom_sim_prompt: str | None,
|
|
282
|
+
):
|
|
283
|
+
"""Async implementation of show command."""
|
|
284
|
+
# Load data
|
|
285
|
+
user, messages = await _load_user_and_messages(
|
|
286
|
+
user_id=user_id,
|
|
287
|
+
session_id=session_id,
|
|
288
|
+
role_filter=role_filter if not simulate_next else None, # Need all messages for simulation
|
|
289
|
+
limit=limit,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Display user profile
|
|
293
|
+
click.echo("\n# User Profile")
|
|
294
|
+
click.echo("---")
|
|
295
|
+
click.echo(_format_user_yaml(user))
|
|
296
|
+
|
|
297
|
+
# Display messages (apply filter for display if simulating)
|
|
298
|
+
display_messages = messages
|
|
299
|
+
if simulate_next and role_filter:
|
|
300
|
+
display_messages = [m for m in messages if m.message_type == role_filter]
|
|
301
|
+
|
|
302
|
+
click.echo("\n# Messages")
|
|
303
|
+
click.echo("---")
|
|
304
|
+
click.echo(_format_messages_yaml(display_messages))
|
|
305
|
+
|
|
306
|
+
# Simulate next message if requested
|
|
307
|
+
if simulate_next:
|
|
308
|
+
click.echo("\n# Simulated Next Message")
|
|
309
|
+
click.echo("---")
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
simulated = await _simulate_next_message(
|
|
313
|
+
user=user,
|
|
314
|
+
messages=messages,
|
|
315
|
+
custom_prompt=custom_sim_prompt,
|
|
316
|
+
)
|
|
317
|
+
click.echo(f"role: user")
|
|
318
|
+
click.echo(f"content: |\n {simulated}")
|
|
319
|
+
|
|
320
|
+
if save:
|
|
321
|
+
saved_msg = await _save_message(
|
|
322
|
+
user_id=user_id,
|
|
323
|
+
session_id=session_id,
|
|
324
|
+
content=simulated,
|
|
325
|
+
role="user",
|
|
326
|
+
)
|
|
327
|
+
logger.success(f"Saved message: {saved_msg.id}")
|
|
328
|
+
|
|
329
|
+
except Exception as e:
|
|
330
|
+
logger.error(f"Simulation failed: {e}")
|
|
331
|
+
raise
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def register_command(cli_group):
|
|
335
|
+
"""Register the session command group."""
|
|
336
|
+
cli_group.add_command(session)
|
rem/cli/dreaming.py
CHANGED
|
@@ -43,7 +43,7 @@ rem-dreaming full --user-id=user-123 --rem-api-url=http://localhost:8000
|
|
|
43
43
|
Environment Variables:
|
|
44
44
|
- REM_API_URL: REM API endpoint (default: http://rem-api:8000)
|
|
45
45
|
- REM_EMBEDDING_PROVIDER: Embedding provider (default: text-embedding-3-small)
|
|
46
|
-
- REM_DEFAULT_MODEL: LLM model (default: gpt-
|
|
46
|
+
- REM_DEFAULT_MODEL: LLM model (default: gpt-4.1)
|
|
47
47
|
- REM_LOOKBACK_HOURS: Default lookback window (default: 24)
|
|
48
48
|
- OPENAI_API_KEY: OpenAI API key
|
|
49
49
|
|
|
@@ -83,7 +83,7 @@ def get_worker() -> DreamingWorker:
|
|
|
83
83
|
embedding_provider=os.getenv(
|
|
84
84
|
"REM_EMBEDDING_PROVIDER", "text-embedding-3-small"
|
|
85
85
|
),
|
|
86
|
-
default_model=os.getenv("REM_DEFAULT_MODEL", "gpt-
|
|
86
|
+
default_model=os.getenv("REM_DEFAULT_MODEL", "gpt-4.1"),
|
|
87
87
|
lookback_hours=int(os.getenv("REM_LOOKBACK_HOURS", "24")),
|
|
88
88
|
)
|
|
89
89
|
|
rem/cli/main.py
CHANGED
|
@@ -96,6 +96,7 @@ from .commands.serve import register_command as register_serve_command
|
|
|
96
96
|
from .commands.mcp import register_command as register_mcp_command
|
|
97
97
|
from .commands.scaffold import scaffold as scaffold_command
|
|
98
98
|
from .commands.cluster import register_commands as register_cluster_commands
|
|
99
|
+
from .commands.session import register_command as register_session_command
|
|
99
100
|
|
|
100
101
|
register_schema_commands(schema)
|
|
101
102
|
register_db_commands(db)
|
|
@@ -108,6 +109,7 @@ register_serve_command(cli)
|
|
|
108
109
|
register_mcp_command(cli)
|
|
109
110
|
cli.add_command(experiments_group)
|
|
110
111
|
cli.add_command(scaffold_command)
|
|
112
|
+
register_session_command(cli)
|
|
111
113
|
|
|
112
114
|
|
|
113
115
|
def main():
|
rem/models/core/experiment.py
CHANGED
|
@@ -138,18 +138,14 @@ class DatasetReference(BaseModel):
|
|
|
138
138
|
|
|
139
139
|
path: str = Field(
|
|
140
140
|
description=(
|
|
141
|
-
"Path to dataset
|
|
141
|
+
"Path to dataset. Format is inferred from file extension.\n"
|
|
142
|
+
"Supported: .csv, .tsv, .parquet, .json, .jsonl, .xlsx, .ods, .avro, .ipc\n"
|
|
142
143
|
"- Git: Relative path from experiment root (e.g., 'datasets/ground_truth.csv')\n"
|
|
143
|
-
"- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/
|
|
144
|
+
"- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/data.parquet')\n"
|
|
144
145
|
"- Hybrid: S3 URI for data, Git path for schema"
|
|
145
146
|
)
|
|
146
147
|
)
|
|
147
148
|
|
|
148
|
-
format: Literal["csv", "jsonl", "parquet", "json"] = Field(
|
|
149
|
-
default="csv",
|
|
150
|
-
description="Dataset file format"
|
|
151
|
-
)
|
|
152
|
-
|
|
153
149
|
schema_path: str | None = Field(
|
|
154
150
|
default=None,
|
|
155
151
|
description=(
|
|
@@ -262,8 +258,7 @@ class ExperimentConfig(BaseModel):
|
|
|
262
258
|
datasets:
|
|
263
259
|
ground_truth:
|
|
264
260
|
location: git
|
|
265
|
-
path: datasets/ground_truth.csv
|
|
266
|
-
format: csv
|
|
261
|
+
path: datasets/ground_truth.csv # format inferred from extension
|
|
267
262
|
results:
|
|
268
263
|
location: git
|
|
269
264
|
base_path: results/
|
|
@@ -288,12 +283,10 @@ class ExperimentConfig(BaseModel):
|
|
|
288
283
|
ground_truth:
|
|
289
284
|
location: s3
|
|
290
285
|
path: s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet
|
|
291
|
-
format: parquet
|
|
292
286
|
schema_path: datasets/schema.yaml # Schema in Git for documentation
|
|
293
287
|
test_cases:
|
|
294
288
|
location: s3
|
|
295
289
|
path: s3://rem-prod/experiments/cv-parser-production/datasets/test_cases.jsonl
|
|
296
|
-
format: jsonl
|
|
297
290
|
results:
|
|
298
291
|
location: hybrid
|
|
299
292
|
base_path: s3://rem-prod/experiments/cv-parser-production/results/
|
|
@@ -558,7 +551,6 @@ class ExperimentConfig(BaseModel):
|
|
|
558
551
|
|
|
559
552
|
- **Location**: `{dataset.location.value}`
|
|
560
553
|
- **Path**: `{dataset.path}`
|
|
561
|
-
- **Format**: `{dataset.format}`
|
|
562
554
|
"""
|
|
563
555
|
if dataset.description:
|
|
564
556
|
readme += f"- **Description**: {dataset.description}\n"
|
|
@@ -629,7 +621,6 @@ EXAMPLE_SMALL_EXPERIMENT = ExperimentConfig(
|
|
|
629
621
|
"ground_truth": DatasetReference(
|
|
630
622
|
location=DatasetLocation.GIT,
|
|
631
623
|
path="datasets/ground_truth.csv",
|
|
632
|
-
format="csv",
|
|
633
624
|
description="10 manually curated test cases"
|
|
634
625
|
)
|
|
635
626
|
},
|
|
@@ -659,7 +650,6 @@ EXAMPLE_LARGE_EXPERIMENT = ExperimentConfig(
|
|
|
659
650
|
"ground_truth": DatasetReference(
|
|
660
651
|
location=DatasetLocation.S3,
|
|
661
652
|
path="s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet",
|
|
662
|
-
format="parquet",
|
|
663
653
|
schema_path="datasets/schema.yaml",
|
|
664
654
|
description="10,000 CV/resume pairs with ground truth extractions"
|
|
665
655
|
)
|
rem/models/entities/__init__.py
CHANGED
|
@@ -39,6 +39,7 @@ from .shared_session import (
|
|
|
39
39
|
SharedWithMeResponse,
|
|
40
40
|
SharedWithMeSummary,
|
|
41
41
|
)
|
|
42
|
+
from .subscriber import Subscriber, SubscriberOrigin, SubscriberStatus
|
|
42
43
|
from .user import User, UserTier
|
|
43
44
|
|
|
44
45
|
__all__ = [
|
|
@@ -56,6 +57,9 @@ __all__ = [
|
|
|
56
57
|
"FeedbackCategory",
|
|
57
58
|
"User",
|
|
58
59
|
"UserTier",
|
|
60
|
+
"Subscriber",
|
|
61
|
+
"SubscriberStatus",
|
|
62
|
+
"SubscriberOrigin",
|
|
59
63
|
"File",
|
|
60
64
|
"Moment",
|
|
61
65
|
"Schema",
|
rem/models/entities/ontology.py
CHANGED
|
@@ -129,7 +129,7 @@ class Ontology(CoreModel):
|
|
|
129
129
|
file_id="file-uuid-456",
|
|
130
130
|
agent_schema_id="contract-parser-v2",
|
|
131
131
|
provider_name="openai",
|
|
132
|
-
model_name="gpt-
|
|
132
|
+
model_name="gpt-4.1",
|
|
133
133
|
extracted_data={
|
|
134
134
|
"contract_type": "supplier_agreement",
|
|
135
135
|
"parties": [
|
|
@@ -74,7 +74,7 @@ class OntologyConfig(CoreModel):
|
|
|
74
74
|
priority=200, # Higher priority = runs first
|
|
75
75
|
enabled=True,
|
|
76
76
|
provider_name="openai", # Override default provider
|
|
77
|
-
model_name="gpt-
|
|
77
|
+
model_name="gpt-4.1",
|
|
78
78
|
tenant_id="acme-corp",
|
|
79
79
|
tags=["legal", "procurement"]
|
|
80
80
|
)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Subscriber - Email subscription management.
|
|
3
|
+
|
|
4
|
+
This model stores subscribers who sign up via websites/apps.
|
|
5
|
+
Subscribers can be collected before user registration for newsletters,
|
|
6
|
+
updates, and approval-based access control.
|
|
7
|
+
|
|
8
|
+
Key features:
|
|
9
|
+
- Deterministic UUID from email (same email = same ID)
|
|
10
|
+
- Approval workflow for access control
|
|
11
|
+
- Tags for segmentation
|
|
12
|
+
- Origin tracking for analytics
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import uuid
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
from pydantic import Field, EmailStr, model_validator
|
|
21
|
+
|
|
22
|
+
from ..core import CoreModel
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SubscriberStatus(str, Enum):
|
|
26
|
+
"""Subscription status."""
|
|
27
|
+
|
|
28
|
+
ACTIVE = "active" # Actively subscribed
|
|
29
|
+
UNSUBSCRIBED = "unsubscribed" # User unsubscribed
|
|
30
|
+
BOUNCED = "bounced" # Email bounced
|
|
31
|
+
PENDING = "pending" # Pending confirmation (if double opt-in)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SubscriberOrigin(str, Enum):
|
|
35
|
+
"""Where the subscription originated from."""
|
|
36
|
+
|
|
37
|
+
WEBSITE = "website" # Main website subscribe form
|
|
38
|
+
LANDING_PAGE = "landing_page" # Campaign landing page
|
|
39
|
+
APP = "app" # In-app subscription
|
|
40
|
+
IMPORT = "import" # Bulk import
|
|
41
|
+
REFERRAL = "referral" # Referred by another user
|
|
42
|
+
OTHER = "other"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Subscriber(CoreModel):
|
|
46
|
+
"""
|
|
47
|
+
Email subscriber for newsletters and access control.
|
|
48
|
+
|
|
49
|
+
This model captures subscribers who sign up via the website, landing pages,
|
|
50
|
+
or in-app prompts. Uses deterministic UUID from email for natural upserts.
|
|
51
|
+
|
|
52
|
+
Access control via `approved` field:
|
|
53
|
+
- When email auth checks subscriber status, only approved subscribers
|
|
54
|
+
can complete login (if approval is enabled in settings).
|
|
55
|
+
- Subscribers can be pre-approved, or approved manually/automatically.
|
|
56
|
+
|
|
57
|
+
Usage:
|
|
58
|
+
from rem.services.postgres import Repository
|
|
59
|
+
from rem.models.entities import Subscriber, SubscriberStatus
|
|
60
|
+
|
|
61
|
+
repo = Repository(Subscriber, db=db)
|
|
62
|
+
|
|
63
|
+
# Create subscriber (ID auto-generated from email)
|
|
64
|
+
subscriber = Subscriber(
|
|
65
|
+
email="user@example.com",
|
|
66
|
+
name="John Doe",
|
|
67
|
+
origin=SubscriberOrigin.WEBSITE,
|
|
68
|
+
)
|
|
69
|
+
await repo.upsert(subscriber)
|
|
70
|
+
|
|
71
|
+
# Check if approved for login
|
|
72
|
+
subscriber = await repo.get_by_id(subscriber.id, tenant_id="default")
|
|
73
|
+
if subscriber and subscriber.approved:
|
|
74
|
+
# Allow login
|
|
75
|
+
pass
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
# Required field
|
|
79
|
+
email: EmailStr = Field(
|
|
80
|
+
description="Subscriber's email address (unique identifier)"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Optional fields
|
|
84
|
+
name: Optional[str] = Field(
|
|
85
|
+
default=None,
|
|
86
|
+
description="Subscriber's name (optional)"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
comment: Optional[str] = Field(
|
|
90
|
+
default=None,
|
|
91
|
+
max_length=500,
|
|
92
|
+
description="Optional comment or message from subscriber"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
status: SubscriberStatus = Field(
|
|
96
|
+
default=SubscriberStatus.ACTIVE,
|
|
97
|
+
description="Current subscription status"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Access control
|
|
101
|
+
approved: bool = Field(
|
|
102
|
+
default=False,
|
|
103
|
+
description="Whether subscriber is approved for login (for approval workflows)"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
approved_at: Optional[datetime] = Field(
|
|
107
|
+
default=None,
|
|
108
|
+
description="When the subscriber was approved"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
approved_by: Optional[str] = Field(
|
|
112
|
+
default=None,
|
|
113
|
+
description="Who approved the subscriber (user ID or 'system')"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Origin tracking
|
|
117
|
+
origin: SubscriberOrigin = Field(
|
|
118
|
+
default=SubscriberOrigin.WEBSITE,
|
|
119
|
+
description="Where the subscription originated"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
origin_detail: Optional[str] = Field(
|
|
123
|
+
default=None,
|
|
124
|
+
description="Additional origin context (e.g., campaign name, page URL)"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Timestamps
|
|
128
|
+
subscribed_at: datetime = Field(
|
|
129
|
+
default_factory=lambda: datetime.now(timezone.utc),
|
|
130
|
+
description="When the subscription was created"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
unsubscribed_at: Optional[datetime] = Field(
|
|
134
|
+
default=None,
|
|
135
|
+
description="When the user unsubscribed (if applicable)"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Compliance
|
|
139
|
+
ip_address: Optional[str] = Field(
|
|
140
|
+
default=None,
|
|
141
|
+
description="IP address at subscription time (for compliance)"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
user_agent: Optional[str] = Field(
|
|
145
|
+
default=None,
|
|
146
|
+
description="Browser user agent at subscription time"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Segmentation
|
|
150
|
+
tags: list[str] = Field(
|
|
151
|
+
default_factory=list,
|
|
152
|
+
description="Tags for segmentation (e.g., ['early-access', 'beta'])"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
@staticmethod
|
|
156
|
+
def email_to_uuid(email: str) -> uuid.UUID:
|
|
157
|
+
"""Generate a deterministic UUID from an email address.
|
|
158
|
+
|
|
159
|
+
Uses UUID v5 with DNS namespace for consistency with
|
|
160
|
+
EmailService.generate_user_id_from_email().
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
email: Email address
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Deterministic UUID
|
|
167
|
+
"""
|
|
168
|
+
return uuid.uuid5(uuid.NAMESPACE_DNS, email.lower().strip())
|
|
169
|
+
|
|
170
|
+
@model_validator(mode="after")
|
|
171
|
+
def set_id_from_email(self) -> "Subscriber":
|
|
172
|
+
"""Auto-generate deterministic ID from email for natural upsert."""
|
|
173
|
+
if self.email:
|
|
174
|
+
self.id = self.email_to_uuid(self.email)
|
|
175
|
+
return self
|
rem/models/entities/user.py
CHANGED