remdb 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +32 -2
- rem/agentic/agents/agent_manager.py +310 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -27
- rem/agentic/context_builder.py +5 -3
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +155 -18
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +280 -57
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +215 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +132 -40
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +28 -5
- rem/api/mcp_router/tools.py +555 -7
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +278 -4
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +697 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/__init__.py +13 -3
- rem/auth/middleware.py +186 -22
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +4 -7
- rem/cli/commands/db.py +386 -143
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +97 -50
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +29 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/experiment.py +58 -14
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +25 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/registry.py +10 -4
- rem/schemas/agents/core/agent-builder.yaml +134 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +92 -19
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +459 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/client.py +154 -14
- rem/services/postgres/README.md +197 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +547 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +470 -140
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +137 -51
- rem/services/session/reload.py +15 -8
- rem/settings.py +515 -27
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2304 -377
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/README.md +45 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/files.py +157 -1
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +220 -22
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA +340 -229
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/RECORD +109 -80
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1051
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0
rem/cli/main.py
CHANGED
|
@@ -22,17 +22,30 @@ except Exception:
|
|
|
22
22
|
__version__ = "unknown"
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
def _configure_logger(level: str):
|
|
26
|
+
"""Configure loguru with custom level icons."""
|
|
27
|
+
logger.remove()
|
|
28
|
+
|
|
29
|
+
# Configure level icons - only warnings and errors get visual indicators
|
|
30
|
+
logger.level("DEBUG", icon=" ")
|
|
31
|
+
logger.level("INFO", icon=" ")
|
|
32
|
+
logger.level("WARNING", icon="🟠")
|
|
33
|
+
logger.level("ERROR", icon="🔴")
|
|
34
|
+
logger.level("CRITICAL", icon="🔴")
|
|
35
|
+
|
|
36
|
+
logger.add(
|
|
37
|
+
sys.stderr,
|
|
38
|
+
level=level,
|
|
39
|
+
format="<green>{time:HH:mm:ss}</green> | {level.icon} <level>{level: <8}</level> | <level>{message}</level>",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
25
43
|
@click.group()
|
|
26
44
|
@click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging")
|
|
27
45
|
@click.version_option(version=__version__, prog_name="rem")
|
|
28
46
|
def cli(verbose: bool):
|
|
29
47
|
"""REM - Resources Entities Moments system CLI."""
|
|
30
|
-
if verbose
|
|
31
|
-
logger.remove()
|
|
32
|
-
logger.add(sys.stderr, level="DEBUG")
|
|
33
|
-
else:
|
|
34
|
-
logger.remove()
|
|
35
|
-
logger.add(sys.stderr, level="INFO")
|
|
48
|
+
_configure_logger("DEBUG" if verbose else "INFO")
|
|
36
49
|
|
|
37
50
|
|
|
38
51
|
@cli.group()
|
|
@@ -65,6 +78,12 @@ def dreaming():
|
|
|
65
78
|
pass
|
|
66
79
|
|
|
67
80
|
|
|
81
|
+
@cli.group()
|
|
82
|
+
def cluster():
|
|
83
|
+
"""Kubernetes cluster deployment and management."""
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
|
|
68
87
|
# Register commands
|
|
69
88
|
from .commands.schema import register_commands as register_schema_commands
|
|
70
89
|
from .commands.db import register_commands as register_db_commands
|
|
@@ -76,17 +95,21 @@ from .commands.configure import register_command as register_configure_command
|
|
|
76
95
|
from .commands.serve import register_command as register_serve_command
|
|
77
96
|
from .commands.mcp import register_command as register_mcp_command
|
|
78
97
|
from .commands.scaffold import scaffold as scaffold_command
|
|
98
|
+
from .commands.cluster import register_commands as register_cluster_commands
|
|
99
|
+
from .commands.session import register_command as register_session_command
|
|
79
100
|
|
|
80
101
|
register_schema_commands(schema)
|
|
81
102
|
register_db_commands(db)
|
|
82
103
|
register_process_commands(process)
|
|
83
104
|
register_dreaming_commands(dreaming)
|
|
105
|
+
register_cluster_commands(cluster)
|
|
84
106
|
register_ask_command(cli)
|
|
85
107
|
register_configure_command(cli)
|
|
86
108
|
register_serve_command(cli)
|
|
87
109
|
register_mcp_command(cli)
|
|
88
110
|
cli.add_command(experiments_group)
|
|
89
111
|
cli.add_command(scaffold_command)
|
|
112
|
+
register_session_command(cli)
|
|
90
113
|
|
|
91
114
|
|
|
92
115
|
def main():
|
rem/config.py
CHANGED
|
@@ -15,7 +15,7 @@ File Format (~/.rem/config.yaml):
|
|
|
15
15
|
pool_max_size: 20
|
|
16
16
|
|
|
17
17
|
llm:
|
|
18
|
-
default_model:
|
|
18
|
+
default_model: openai:gpt-4.1
|
|
19
19
|
openai_api_key: sk-...
|
|
20
20
|
anthropic_api_key: sk-ant-...
|
|
21
21
|
|
|
@@ -95,9 +95,16 @@ def load_config() -> dict[str, Any]:
|
|
|
95
95
|
"""
|
|
96
96
|
Load configuration from ~/.rem/config.yaml.
|
|
97
97
|
|
|
98
|
+
Set REM_SKIP_CONFIG=1 to skip loading the config file (useful when using .env files).
|
|
99
|
+
|
|
98
100
|
Returns:
|
|
99
|
-
Configuration dictionary (empty if file doesn't exist)
|
|
101
|
+
Configuration dictionary (empty if file doesn't exist or skipped)
|
|
100
102
|
"""
|
|
103
|
+
# Allow skipping config file via environment variable
|
|
104
|
+
if os.environ.get("REM_SKIP_CONFIG", "").lower() in ("1", "true", "yes"):
|
|
105
|
+
logger.debug("Skipping config file (REM_SKIP_CONFIG is set)")
|
|
106
|
+
return {}
|
|
107
|
+
|
|
101
108
|
config_path = get_config_path()
|
|
102
109
|
|
|
103
110
|
if not config_path.exists():
|
|
@@ -216,7 +223,7 @@ def get_default_config() -> dict[str, Any]:
|
|
|
216
223
|
"pool_max_size": 20,
|
|
217
224
|
},
|
|
218
225
|
"llm": {
|
|
219
|
-
"default_model": "
|
|
226
|
+
"default_model": "openai:gpt-4.1",
|
|
220
227
|
"default_temperature": 0.5,
|
|
221
228
|
# API keys will be prompted for in wizard
|
|
222
229
|
# "openai_api_key": "",
|
rem/models/core/core_model.py
CHANGED
|
@@ -52,7 +52,13 @@ class CoreModel(BaseModel):
|
|
|
52
52
|
default=None, description="Tenant identifier for multi-tenancy isolation"
|
|
53
53
|
)
|
|
54
54
|
user_id: Optional[str] = Field(
|
|
55
|
-
default=None,
|
|
55
|
+
default=None,
|
|
56
|
+
description=(
|
|
57
|
+
"Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, "
|
|
58
|
+
"to allow flexibility for external identity providers. Typically generated as "
|
|
59
|
+
"a hash of the user's email address. In future, other strong unique claims "
|
|
60
|
+
"(e.g., OAuth sub, verified phone) could also be used for generation."
|
|
61
|
+
),
|
|
56
62
|
)
|
|
57
63
|
graph_edges: list[dict] = Field(
|
|
58
64
|
default_factory=list,
|
rem/models/core/experiment.py
CHANGED
|
@@ -138,18 +138,14 @@ class DatasetReference(BaseModel):
|
|
|
138
138
|
|
|
139
139
|
path: str = Field(
|
|
140
140
|
description=(
|
|
141
|
-
"Path to dataset
|
|
141
|
+
"Path to dataset. Format is inferred from file extension.\n"
|
|
142
|
+
"Supported: .csv, .tsv, .parquet, .json, .jsonl, .xlsx, .ods, .avro, .ipc\n"
|
|
142
143
|
"- Git: Relative path from experiment root (e.g., 'datasets/ground_truth.csv')\n"
|
|
143
|
-
"- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/
|
|
144
|
+
"- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/data.parquet')\n"
|
|
144
145
|
"- Hybrid: S3 URI for data, Git path for schema"
|
|
145
146
|
)
|
|
146
147
|
)
|
|
147
148
|
|
|
148
|
-
format: Literal["csv", "jsonl", "parquet", "json"] = Field(
|
|
149
|
-
default="csv",
|
|
150
|
-
description="Dataset file format"
|
|
151
|
-
)
|
|
152
|
-
|
|
153
149
|
schema_path: str | None = Field(
|
|
154
150
|
default=None,
|
|
155
151
|
description=(
|
|
@@ -262,8 +258,7 @@ class ExperimentConfig(BaseModel):
|
|
|
262
258
|
datasets:
|
|
263
259
|
ground_truth:
|
|
264
260
|
location: git
|
|
265
|
-
path: datasets/ground_truth.csv
|
|
266
|
-
format: csv
|
|
261
|
+
path: datasets/ground_truth.csv # format inferred from extension
|
|
267
262
|
results:
|
|
268
263
|
location: git
|
|
269
264
|
base_path: results/
|
|
@@ -288,12 +283,10 @@ class ExperimentConfig(BaseModel):
|
|
|
288
283
|
ground_truth:
|
|
289
284
|
location: s3
|
|
290
285
|
path: s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet
|
|
291
|
-
format: parquet
|
|
292
286
|
schema_path: datasets/schema.yaml # Schema in Git for documentation
|
|
293
287
|
test_cases:
|
|
294
288
|
location: s3
|
|
295
289
|
path: s3://rem-prod/experiments/cv-parser-production/datasets/test_cases.jsonl
|
|
296
|
-
format: jsonl
|
|
297
290
|
results:
|
|
298
291
|
location: hybrid
|
|
299
292
|
base_path: s3://rem-prod/experiments/cv-parser-production/results/
|
|
@@ -318,6 +311,15 @@ class ExperimentConfig(BaseModel):
|
|
|
318
311
|
)
|
|
319
312
|
)
|
|
320
313
|
|
|
314
|
+
task: str = Field(
|
|
315
|
+
default="general",
|
|
316
|
+
description=(
|
|
317
|
+
"Task name for organizing experiments by purpose.\n"
|
|
318
|
+
"Used with agent name to form directory: {agent}/{task}/\n"
|
|
319
|
+
"Examples: 'risk-assessment', 'classification', 'general'"
|
|
320
|
+
)
|
|
321
|
+
)
|
|
322
|
+
|
|
321
323
|
description: str = Field(
|
|
322
324
|
description="Human-readable description of experiment purpose and goals"
|
|
323
325
|
)
|
|
@@ -410,6 +412,24 @@ class ExperimentConfig(BaseModel):
|
|
|
410
412
|
|
|
411
413
|
return v
|
|
412
414
|
|
|
415
|
+
@field_validator("task")
|
|
416
|
+
@classmethod
|
|
417
|
+
def validate_task(cls, v: str) -> str:
|
|
418
|
+
"""Validate task name follows conventions."""
|
|
419
|
+
if not v:
|
|
420
|
+
return "general" # Default value
|
|
421
|
+
|
|
422
|
+
if not v.islower():
|
|
423
|
+
raise ValueError("Task name must be lowercase")
|
|
424
|
+
|
|
425
|
+
if " " in v:
|
|
426
|
+
raise ValueError("Task name cannot contain spaces (use hyphens)")
|
|
427
|
+
|
|
428
|
+
if not all(c.isalnum() or c == "-" for c in v):
|
|
429
|
+
raise ValueError("Task name can only contain lowercase letters, numbers, and hyphens")
|
|
430
|
+
|
|
431
|
+
return v
|
|
432
|
+
|
|
413
433
|
@field_validator("tags")
|
|
414
434
|
@classmethod
|
|
415
435
|
def validate_tags(cls, v: list[str]) -> list[str]:
|
|
@@ -420,6 +440,15 @@ class ExperimentConfig(BaseModel):
|
|
|
420
440
|
"""Get the experiment directory path."""
|
|
421
441
|
return Path(base_path) / self.name
|
|
422
442
|
|
|
443
|
+
def get_agent_task_dir(self, base_path: str = ".experiments") -> Path:
|
|
444
|
+
"""
|
|
445
|
+
Get the experiment directory path organized by agent/task.
|
|
446
|
+
|
|
447
|
+
Returns: Path like .experiments/{agent}/{task}/
|
|
448
|
+
This is the recommended structure for S3 export compatibility.
|
|
449
|
+
"""
|
|
450
|
+
return Path(base_path) / self.agent_schema_ref.name / self.task
|
|
451
|
+
|
|
423
452
|
def get_config_path(self, base_path: str = ".experiments") -> Path:
|
|
424
453
|
"""Get the path to experiment.yaml file."""
|
|
425
454
|
return self.get_experiment_dir(base_path) / "experiment.yaml"
|
|
@@ -428,6 +457,22 @@ class ExperimentConfig(BaseModel):
|
|
|
428
457
|
"""Get the path to README.md file."""
|
|
429
458
|
return self.get_experiment_dir(base_path) / "README.md"
|
|
430
459
|
|
|
460
|
+
def get_evaluator_filename(self) -> str:
|
|
461
|
+
"""
|
|
462
|
+
Get the evaluator filename with task prefix.
|
|
463
|
+
|
|
464
|
+
Returns: {agent_name}-{task}.yaml (e.g., siggy-risk-assessment.yaml)
|
|
465
|
+
"""
|
|
466
|
+
return f"{self.agent_schema_ref.name}-{self.task}.yaml"
|
|
467
|
+
|
|
468
|
+
def get_s3_export_path(self, bucket: str, version: str = "v0") -> str:
|
|
469
|
+
"""
|
|
470
|
+
Get the S3 path for exporting this experiment.
|
|
471
|
+
|
|
472
|
+
Returns: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
|
|
473
|
+
"""
|
|
474
|
+
return f"s3://{bucket}/{version}/datasets/calibration/experiments/{self.agent_schema_ref.name}/{self.task}"
|
|
475
|
+
|
|
431
476
|
def to_yaml(self) -> str:
|
|
432
477
|
"""Export configuration as YAML string."""
|
|
433
478
|
import yaml
|
|
@@ -483,6 +528,7 @@ class ExperimentConfig(BaseModel):
|
|
|
483
528
|
## Configuration
|
|
484
529
|
|
|
485
530
|
**Status**: `{self.status.value}`
|
|
531
|
+
**Task**: `{self.task}`
|
|
486
532
|
**Tags**: {', '.join(f'`{tag}`' for tag in self.tags) if self.tags else 'None'}
|
|
487
533
|
|
|
488
534
|
## Agent Schema
|
|
@@ -494,6 +540,7 @@ class ExperimentConfig(BaseModel):
|
|
|
494
540
|
## Evaluator Schema
|
|
495
541
|
|
|
496
542
|
- **Name**: `{self.evaluator_schema_ref.name}`
|
|
543
|
+
- **File**: `{self.get_evaluator_filename()}`
|
|
497
544
|
- **Type**: `{self.evaluator_schema_ref.type}`
|
|
498
545
|
|
|
499
546
|
## Datasets
|
|
@@ -504,7 +551,6 @@ class ExperimentConfig(BaseModel):
|
|
|
504
551
|
|
|
505
552
|
- **Location**: `{dataset.location.value}`
|
|
506
553
|
- **Path**: `{dataset.path}`
|
|
507
|
-
- **Format**: `{dataset.format}`
|
|
508
554
|
"""
|
|
509
555
|
if dataset.description:
|
|
510
556
|
readme += f"- **Description**: {dataset.description}\n"
|
|
@@ -575,7 +621,6 @@ EXAMPLE_SMALL_EXPERIMENT = ExperimentConfig(
|
|
|
575
621
|
"ground_truth": DatasetReference(
|
|
576
622
|
location=DatasetLocation.GIT,
|
|
577
623
|
path="datasets/ground_truth.csv",
|
|
578
|
-
format="csv",
|
|
579
624
|
description="10 manually curated test cases"
|
|
580
625
|
)
|
|
581
626
|
},
|
|
@@ -605,7 +650,6 @@ EXAMPLE_LARGE_EXPERIMENT = ExperimentConfig(
|
|
|
605
650
|
"ground_truth": DatasetReference(
|
|
606
651
|
location=DatasetLocation.S3,
|
|
607
652
|
path="s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet",
|
|
608
|
-
format="parquet",
|
|
609
653
|
schema_path="datasets/schema.yaml",
|
|
610
654
|
description="10,000 CV/resume pairs with ground truth extractions"
|
|
611
655
|
)
|
rem/models/core/rem_query.py
CHANGED
|
@@ -112,7 +112,7 @@ class SearchParameters(BaseModel):
|
|
|
112
112
|
table_name: str = Field(..., description="Table to search (resources, moments, etc.)")
|
|
113
113
|
limit: int = Field(default=10, gt=0, description="Maximum results")
|
|
114
114
|
min_similarity: float = Field(
|
|
115
|
-
default=0.
|
|
115
|
+
default=0.3, ge=0.0, le=1.0, description="Minimum similarity score (0.3 recommended for general queries)"
|
|
116
116
|
)
|
|
117
117
|
|
|
118
118
|
|
|
@@ -198,7 +198,10 @@ class RemQuery(BaseModel):
|
|
|
198
198
|
| SQLParameters
|
|
199
199
|
| TraverseParameters
|
|
200
200
|
) = Field(..., description="Query parameters")
|
|
201
|
-
user_id: str = Field(
|
|
201
|
+
user_id: Optional[str] = Field(
|
|
202
|
+
default=None,
|
|
203
|
+
description="User identifier (UUID5 hash of email). None = anonymous (shared/public data only)"
|
|
204
|
+
)
|
|
202
205
|
|
|
203
206
|
|
|
204
207
|
class TraverseStage(BaseModel):
|
rem/models/entities/__init__.py
CHANGED
|
@@ -5,6 +5,9 @@ Core entity types for the REM system:
|
|
|
5
5
|
- Resources: Base content units (documents, conversations, artifacts)
|
|
6
6
|
- ImageResources: Image-specific resources with CLIP embeddings
|
|
7
7
|
- Messages: Communication content
|
|
8
|
+
- Sessions: Conversation sessions (normal or evaluation mode)
|
|
9
|
+
- SharedSessions: Session sharing between users for collaboration
|
|
10
|
+
- Feedback: User feedback on messages/sessions with trace integration
|
|
8
11
|
- Users: User entities
|
|
9
12
|
- Files: File metadata and tracking
|
|
10
13
|
- Moments: Temporal narratives (meetings, coding sessions, conversations)
|
|
@@ -19,6 +22,8 @@ All entities inherit from CoreModel and support:
|
|
|
19
22
|
- Natural language labels for conversational queries
|
|
20
23
|
"""
|
|
21
24
|
|
|
25
|
+
from .domain_resource import DomainResource
|
|
26
|
+
from .feedback import Feedback, FeedbackCategory
|
|
22
27
|
from .file import File
|
|
23
28
|
from .image_resource import ImageResource
|
|
24
29
|
from .message import Message
|
|
@@ -27,14 +32,34 @@ from .ontology import Ontology
|
|
|
27
32
|
from .ontology_config import OntologyConfig
|
|
28
33
|
from .resource import Resource
|
|
29
34
|
from .schema import Schema
|
|
35
|
+
from .session import Session, SessionMode
|
|
36
|
+
from .shared_session import (
|
|
37
|
+
SharedSession,
|
|
38
|
+
SharedSessionCreate,
|
|
39
|
+
SharedWithMeResponse,
|
|
40
|
+
SharedWithMeSummary,
|
|
41
|
+
)
|
|
42
|
+
from .subscriber import Subscriber, SubscriberOrigin, SubscriberStatus
|
|
30
43
|
from .user import User, UserTier
|
|
31
44
|
|
|
32
45
|
__all__ = [
|
|
33
46
|
"Resource",
|
|
47
|
+
"DomainResource",
|
|
34
48
|
"ImageResource",
|
|
35
49
|
"Message",
|
|
50
|
+
"Session",
|
|
51
|
+
"SessionMode",
|
|
52
|
+
"SharedSession",
|
|
53
|
+
"SharedSessionCreate",
|
|
54
|
+
"SharedWithMeResponse",
|
|
55
|
+
"SharedWithMeSummary",
|
|
56
|
+
"Feedback",
|
|
57
|
+
"FeedbackCategory",
|
|
36
58
|
"User",
|
|
37
59
|
"UserTier",
|
|
60
|
+
"Subscriber",
|
|
61
|
+
"SubscriberStatus",
|
|
62
|
+
"SubscriberOrigin",
|
|
38
63
|
"File",
|
|
39
64
|
"Moment",
|
|
40
65
|
"Schema",
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DomainResource - Curated internal knowledge in REM.
|
|
3
|
+
|
|
4
|
+
DomainResources are a specialized subclass of Resource for storing curated,
|
|
5
|
+
domain-specific internal knowledge that is not part of general knowledge.
|
|
6
|
+
This includes proprietary information, internal documentation, institutional
|
|
7
|
+
knowledge, and other content that requires more careful curation.
|
|
8
|
+
|
|
9
|
+
Key Differences from Resource:
|
|
10
|
+
- Intended for curated, internal knowledge (not raw ingested content)
|
|
11
|
+
- Higher quality bar - content is reviewed/vetted before ingestion
|
|
12
|
+
- May contain proprietary or sensitive information
|
|
13
|
+
- Subject to different retention/governance policies
|
|
14
|
+
|
|
15
|
+
Use Cases:
|
|
16
|
+
- Internal documentation and procedures
|
|
17
|
+
- Proprietary research and analysis
|
|
18
|
+
- Institutional knowledge bases
|
|
19
|
+
- Domain-specific ontologies and taxonomies
|
|
20
|
+
- Curated best practices and guidelines
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from .resource import Resource
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DomainResource(Resource):
|
|
27
|
+
"""
|
|
28
|
+
Curated domain-specific knowledge resource.
|
|
29
|
+
|
|
30
|
+
Inherits all fields from Resource but stored in a separate table
|
|
31
|
+
(domain_resources) to distinguish curated internal knowledge from
|
|
32
|
+
general ingested content.
|
|
33
|
+
|
|
34
|
+
The schema is identical to Resource, allowing seamless migration
|
|
35
|
+
of content between tables as curation status changes.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
pass
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Feedback - User feedback on chat messages and sessions.
|
|
3
|
+
|
|
4
|
+
Feedback allows users to rate and categorize responses, providing
|
|
5
|
+
data for evaluation and model improvement. Feedback can be attached
|
|
6
|
+
to specific messages or entire sessions.
|
|
7
|
+
|
|
8
|
+
Trace Integration:
|
|
9
|
+
- Feedback references trace_id/span_id for OTEL/Phoenix integration
|
|
10
|
+
- Can attach annotations to Phoenix spans for unified observability
|
|
11
|
+
|
|
12
|
+
Predefined Categories (system-defined, extensible):
|
|
13
|
+
- INCOMPLETE: Response lacks expected information
|
|
14
|
+
- INACCURATE: Response contains factual errors
|
|
15
|
+
- POOR_TONE: Inappropriate or unprofessional tone
|
|
16
|
+
- OFF_TOPIC: Response doesn't address the question
|
|
17
|
+
- TOO_VERBOSE: Unnecessarily long response
|
|
18
|
+
- TOO_BRIEF: Insufficiently detailed response
|
|
19
|
+
- HELPFUL: Positive feedback marker
|
|
20
|
+
- EXCELLENT: Exceptionally good response
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from enum import Enum
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from pydantic import Field
|
|
27
|
+
|
|
28
|
+
from ..core import CoreModel
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FeedbackCategory(str, Enum):
|
|
32
|
+
"""Predefined feedback categories (system-defined)."""
|
|
33
|
+
|
|
34
|
+
# Negative categories
|
|
35
|
+
INCOMPLETE = "incomplete"
|
|
36
|
+
INACCURATE = "inaccurate"
|
|
37
|
+
POOR_TONE = "poor_tone"
|
|
38
|
+
OFF_TOPIC = "off_topic"
|
|
39
|
+
TOO_VERBOSE = "too_verbose"
|
|
40
|
+
TOO_BRIEF = "too_brief"
|
|
41
|
+
CONFUSING = "confusing"
|
|
42
|
+
UNSAFE = "unsafe"
|
|
43
|
+
|
|
44
|
+
# Positive categories
|
|
45
|
+
HELPFUL = "helpful"
|
|
46
|
+
EXCELLENT = "excellent"
|
|
47
|
+
ACCURATE = "accurate"
|
|
48
|
+
WELL_WRITTEN = "well_written"
|
|
49
|
+
|
|
50
|
+
# Neutral/Other
|
|
51
|
+
OTHER = "other"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Feedback(CoreModel):
|
|
55
|
+
"""
|
|
56
|
+
User feedback on a message or session.
|
|
57
|
+
|
|
58
|
+
Captures structured feedback including:
|
|
59
|
+
- Rating (1-5 scale or thumbs up/down)
|
|
60
|
+
- Categories (predefined or custom)
|
|
61
|
+
- Free-text comment
|
|
62
|
+
- Trace reference for OTEL/Phoenix integration
|
|
63
|
+
|
|
64
|
+
The feedback can be attached to:
|
|
65
|
+
- A specific message (message_id set)
|
|
66
|
+
- An entire session (session_id set, message_id null)
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
# Target reference (at least one required)
|
|
70
|
+
session_id: str = Field(
|
|
71
|
+
...,
|
|
72
|
+
description="Session ID this feedback relates to",
|
|
73
|
+
)
|
|
74
|
+
message_id: str | None = Field(
|
|
75
|
+
default=None,
|
|
76
|
+
description="Specific message ID (null for session-level feedback)",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Rating (flexible: 1-5, or -1/1 for thumbs)
|
|
80
|
+
rating: int | None = Field(
|
|
81
|
+
default=None,
|
|
82
|
+
ge=-1,
|
|
83
|
+
le=5,
|
|
84
|
+
description="Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Categories (can select multiple)
|
|
88
|
+
categories: list[str] = Field(
|
|
89
|
+
default_factory=list,
|
|
90
|
+
description="Selected feedback categories (from FeedbackCategory or custom)",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Free-text comment
|
|
94
|
+
comment: str | None = Field(
|
|
95
|
+
default=None,
|
|
96
|
+
description="Optional free-text feedback comment",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Trace reference for OTEL/Phoenix integration
|
|
100
|
+
trace_id: str | None = Field(
|
|
101
|
+
default=None,
|
|
102
|
+
description="OTEL trace ID for linking to observability",
|
|
103
|
+
)
|
|
104
|
+
span_id: str | None = Field(
|
|
105
|
+
default=None,
|
|
106
|
+
description="OTEL span ID for specific span feedback",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Phoenix annotation status
|
|
110
|
+
phoenix_synced: bool = Field(
|
|
111
|
+
default=False,
|
|
112
|
+
description="Whether feedback has been synced to Phoenix as annotation",
|
|
113
|
+
)
|
|
114
|
+
phoenix_annotation_id: str | None = Field(
|
|
115
|
+
default=None,
|
|
116
|
+
description="Phoenix annotation ID after sync",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Annotator info
|
|
120
|
+
annotator_kind: str = Field(
|
|
121
|
+
default="HUMAN",
|
|
122
|
+
description="Annotator type: HUMAN, LLM, CODE",
|
|
123
|
+
)
|
rem/models/entities/message.py
CHANGED
|
@@ -6,6 +6,11 @@ that can be grouped into conversations or moments.
|
|
|
6
6
|
|
|
7
7
|
Messages are simpler than Resources but share the same graph connectivity
|
|
8
8
|
through CoreModel inheritance.
|
|
9
|
+
|
|
10
|
+
Trace Integration:
|
|
11
|
+
- trace_id: OTEL trace ID for linking to observability
|
|
12
|
+
- span_id: OTEL span ID for specific span reference
|
|
13
|
+
- These enable feedback to be attached to Phoenix annotations
|
|
9
14
|
"""
|
|
10
15
|
|
|
11
16
|
from pydantic import Field
|
|
@@ -19,6 +24,9 @@ class Message(CoreModel):
|
|
|
19
24
|
|
|
20
25
|
Represents individual messages in conversations, chats, or other
|
|
21
26
|
communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
|
|
27
|
+
|
|
28
|
+
Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix
|
|
29
|
+
for observability and feedback annotation.
|
|
22
30
|
"""
|
|
23
31
|
|
|
24
32
|
content: str = Field(
|
|
@@ -27,9 +35,30 @@ class Message(CoreModel):
|
|
|
27
35
|
)
|
|
28
36
|
message_type: str | None = Field(
|
|
29
37
|
default=None,
|
|
30
|
-
description="Message type e.g role",
|
|
38
|
+
description="Message type e.g. role: 'user', 'assistant', 'system', 'tool'",
|
|
31
39
|
)
|
|
32
40
|
session_id: str | None = Field(
|
|
33
41
|
default=None,
|
|
34
42
|
description="Session identifier for tracking message context",
|
|
35
43
|
)
|
|
44
|
+
prompt: str | None = Field(
|
|
45
|
+
default=None,
|
|
46
|
+
description="Custom prompt used for this message (if overridden from default)",
|
|
47
|
+
)
|
|
48
|
+
model: str | None = Field(
|
|
49
|
+
default=None,
|
|
50
|
+
description="Model used for generating this message (provider:model format)",
|
|
51
|
+
)
|
|
52
|
+
token_count: int | None = Field(
|
|
53
|
+
default=None,
|
|
54
|
+
description="Token count for this message",
|
|
55
|
+
)
|
|
56
|
+
# OTEL/Phoenix trace integration
|
|
57
|
+
trace_id: str | None = Field(
|
|
58
|
+
default=None,
|
|
59
|
+
description="OTEL trace ID for observability integration",
|
|
60
|
+
)
|
|
61
|
+
span_id: str | None = Field(
|
|
62
|
+
default=None,
|
|
63
|
+
description="OTEL span ID for specific span reference",
|
|
64
|
+
)
|
rem/models/entities/ontology.py
CHANGED
|
@@ -129,7 +129,7 @@ class Ontology(CoreModel):
|
|
|
129
129
|
file_id="file-uuid-456",
|
|
130
130
|
agent_schema_id="contract-parser-v2",
|
|
131
131
|
provider_name="openai",
|
|
132
|
-
model_name="gpt-
|
|
132
|
+
model_name="gpt-4.1",
|
|
133
133
|
extracted_data={
|
|
134
134
|
"contract_type": "supplier_agreement",
|
|
135
135
|
"parties": [
|
|
@@ -74,7 +74,7 @@ class OntologyConfig(CoreModel):
|
|
|
74
74
|
priority=200, # Higher priority = runs first
|
|
75
75
|
enabled=True,
|
|
76
76
|
provider_name="openai", # Override default provider
|
|
77
|
-
model_name="gpt-
|
|
77
|
+
model_name="gpt-4.1",
|
|
78
78
|
tenant_id="acme-corp",
|
|
79
79
|
tags=["legal", "procurement"]
|
|
80
80
|
)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Session - Conversation sessions in REM.
|
|
3
|
+
|
|
4
|
+
Sessions group related messages together and can have different modes:
|
|
5
|
+
- normal: Standard conversation session
|
|
6
|
+
- evaluation: For LLM evaluation, stores original trace and overridden settings
|
|
7
|
+
|
|
8
|
+
Sessions allow overriding settings like model, temperature, and custom prompts
|
|
9
|
+
for evaluation and experimentation purposes.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from enum import Enum
|
|
13
|
+
|
|
14
|
+
from pydantic import Field
|
|
15
|
+
|
|
16
|
+
from ..core import CoreModel
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SessionMode(str, Enum):
|
|
20
|
+
"""Session mode types."""
|
|
21
|
+
|
|
22
|
+
NORMAL = "normal"
|
|
23
|
+
EVALUATION = "evaluation"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Session(CoreModel):
|
|
27
|
+
"""
|
|
28
|
+
Conversation session container.
|
|
29
|
+
|
|
30
|
+
Groups messages together and supports different modes for normal conversations
|
|
31
|
+
and evaluation/experimentation scenarios.
|
|
32
|
+
|
|
33
|
+
For evaluation sessions, stores:
|
|
34
|
+
- original_trace_id: Reference to the original session being evaluated
|
|
35
|
+
- settings_overrides: Model, temperature, prompt overrides
|
|
36
|
+
- prompt: Custom prompt being tested
|
|
37
|
+
|
|
38
|
+
Default sessions are lightweight - just a session_id on messages.
|
|
39
|
+
Special sessions store additional metadata for experiments.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
name: str = Field(
|
|
43
|
+
...,
|
|
44
|
+
description="Session name/identifier",
|
|
45
|
+
json_schema_extra={"entity_key": True},
|
|
46
|
+
)
|
|
47
|
+
mode: SessionMode = Field(
|
|
48
|
+
default=SessionMode.NORMAL,
|
|
49
|
+
description="Session mode: 'normal' or 'evaluation'",
|
|
50
|
+
)
|
|
51
|
+
description: str | None = Field(
|
|
52
|
+
default=None,
|
|
53
|
+
description="Optional session description",
|
|
54
|
+
)
|
|
55
|
+
# Evaluation-specific fields
|
|
56
|
+
original_trace_id: str | None = Field(
|
|
57
|
+
default=None,
|
|
58
|
+
description="For evaluation mode: ID of the original session/trace being evaluated",
|
|
59
|
+
)
|
|
60
|
+
settings_overrides: dict | None = Field(
|
|
61
|
+
default=None,
|
|
62
|
+
description="Settings overrides (model, temperature, max_tokens, system_prompt)",
|
|
63
|
+
)
|
|
64
|
+
prompt: str | None = Field(
|
|
65
|
+
default=None,
|
|
66
|
+
description="Custom prompt for this session (can override agent prompt)",
|
|
67
|
+
)
|
|
68
|
+
# Agent context
|
|
69
|
+
agent_schema_uri: str | None = Field(
|
|
70
|
+
default=None,
|
|
71
|
+
description="Agent schema used for this session",
|
|
72
|
+
)
|
|
73
|
+
# Summary stats (updated as session progresses)
|
|
74
|
+
message_count: int = Field(
|
|
75
|
+
default=0,
|
|
76
|
+
description="Number of messages in this session",
|
|
77
|
+
)
|
|
78
|
+
total_tokens: int | None = Field(
|
|
79
|
+
default=None,
|
|
80
|
+
description="Total tokens used in this session",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
model_config = {"use_enum_values": True}
|