remdb 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. rem/agentic/README.md +76 -0
  2. rem/agentic/__init__.py +15 -0
  3. rem/agentic/agents/__init__.py +32 -2
  4. rem/agentic/agents/agent_manager.py +310 -0
  5. rem/agentic/agents/sse_simulator.py +502 -0
  6. rem/agentic/context.py +51 -27
  7. rem/agentic/context_builder.py +5 -3
  8. rem/agentic/llm_provider_models.py +301 -0
  9. rem/agentic/mcp/tool_wrapper.py +155 -18
  10. rem/agentic/otel/setup.py +93 -4
  11. rem/agentic/providers/phoenix.py +371 -108
  12. rem/agentic/providers/pydantic_ai.py +280 -57
  13. rem/agentic/schema.py +361 -21
  14. rem/agentic/tools/rem_tools.py +3 -3
  15. rem/api/README.md +215 -1
  16. rem/api/deps.py +255 -0
  17. rem/api/main.py +132 -40
  18. rem/api/mcp_router/resources.py +1 -1
  19. rem/api/mcp_router/server.py +28 -5
  20. rem/api/mcp_router/tools.py +555 -7
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +278 -4
  23. rem/api/routers/chat/completions.py +402 -20
  24. rem/api/routers/chat/models.py +88 -10
  25. rem/api/routers/chat/otel_utils.py +33 -0
  26. rem/api/routers/chat/sse_events.py +542 -0
  27. rem/api/routers/chat/streaming.py +697 -45
  28. rem/api/routers/dev.py +81 -0
  29. rem/api/routers/feedback.py +268 -0
  30. rem/api/routers/messages.py +473 -0
  31. rem/api/routers/models.py +78 -0
  32. rem/api/routers/query.py +360 -0
  33. rem/api/routers/shared_sessions.py +406 -0
  34. rem/auth/__init__.py +13 -3
  35. rem/auth/middleware.py +186 -22
  36. rem/auth/providers/__init__.py +4 -1
  37. rem/auth/providers/email.py +215 -0
  38. rem/cli/commands/README.md +237 -64
  39. rem/cli/commands/cluster.py +1808 -0
  40. rem/cli/commands/configure.py +4 -7
  41. rem/cli/commands/db.py +386 -143
  42. rem/cli/commands/experiments.py +468 -76
  43. rem/cli/commands/process.py +14 -8
  44. rem/cli/commands/schema.py +97 -50
  45. rem/cli/commands/session.py +336 -0
  46. rem/cli/dreaming.py +2 -2
  47. rem/cli/main.py +29 -6
  48. rem/config.py +10 -3
  49. rem/models/core/core_model.py +7 -1
  50. rem/models/core/experiment.py +58 -14
  51. rem/models/core/rem_query.py +5 -2
  52. rem/models/entities/__init__.py +25 -0
  53. rem/models/entities/domain_resource.py +38 -0
  54. rem/models/entities/feedback.py +123 -0
  55. rem/models/entities/message.py +30 -1
  56. rem/models/entities/ontology.py +1 -1
  57. rem/models/entities/ontology_config.py +1 -1
  58. rem/models/entities/session.py +83 -0
  59. rem/models/entities/shared_session.py +180 -0
  60. rem/models/entities/subscriber.py +175 -0
  61. rem/models/entities/user.py +1 -0
  62. rem/registry.py +10 -4
  63. rem/schemas/agents/core/agent-builder.yaml +134 -0
  64. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  65. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  66. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  67. rem/schemas/agents/rem.yaml +7 -3
  68. rem/services/__init__.py +3 -1
  69. rem/services/content/service.py +92 -19
  70. rem/services/email/__init__.py +10 -0
  71. rem/services/email/service.py +459 -0
  72. rem/services/email/templates.py +360 -0
  73. rem/services/embeddings/api.py +4 -4
  74. rem/services/embeddings/worker.py +16 -16
  75. rem/services/phoenix/client.py +154 -14
  76. rem/services/postgres/README.md +197 -15
  77. rem/services/postgres/__init__.py +2 -1
  78. rem/services/postgres/diff_service.py +547 -0
  79. rem/services/postgres/pydantic_to_sqlalchemy.py +470 -140
  80. rem/services/postgres/repository.py +132 -0
  81. rem/services/postgres/schema_generator.py +205 -4
  82. rem/services/postgres/service.py +6 -6
  83. rem/services/rem/parser.py +44 -9
  84. rem/services/rem/service.py +36 -2
  85. rem/services/session/compression.py +137 -51
  86. rem/services/session/reload.py +15 -8
  87. rem/settings.py +515 -27
  88. rem/sql/background_indexes.sql +21 -16
  89. rem/sql/migrations/001_install.sql +387 -54
  90. rem/sql/migrations/002_install_models.sql +2304 -377
  91. rem/sql/migrations/003_optional_extensions.sql +326 -0
  92. rem/sql/migrations/004_cache_system.sql +548 -0
  93. rem/sql/migrations/005_schema_update.sql +145 -0
  94. rem/utils/README.md +45 -0
  95. rem/utils/__init__.py +18 -0
  96. rem/utils/date_utils.py +2 -2
  97. rem/utils/files.py +157 -1
  98. rem/utils/model_helpers.py +156 -1
  99. rem/utils/schema_loader.py +220 -22
  100. rem/utils/sql_paths.py +146 -0
  101. rem/utils/sql_types.py +3 -1
  102. rem/utils/vision.py +1 -1
  103. rem/workers/__init__.py +3 -1
  104. rem/workers/db_listener.py +579 -0
  105. rem/workers/unlogged_maintainer.py +463 -0
  106. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA +340 -229
  107. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/RECORD +109 -80
  108. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/WHEEL +1 -1
  109. rem/sql/002_install_models.sql +0 -1068
  110. rem/sql/install_models.sql +0 -1051
  111. rem/sql/migrations/003_seed_default_user.sql +0 -48
  112. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0
rem/cli/main.py CHANGED
@@ -22,17 +22,30 @@ except Exception:
22
22
  __version__ = "unknown"
23
23
 
24
24
 
25
+ def _configure_logger(level: str):
26
+ """Configure loguru with custom level icons."""
27
+ logger.remove()
28
+
29
+ # Configure level icons - only warnings and errors get visual indicators
30
+ logger.level("DEBUG", icon=" ")
31
+ logger.level("INFO", icon=" ")
32
+ logger.level("WARNING", icon="🟠")
33
+ logger.level("ERROR", icon="🔴")
34
+ logger.level("CRITICAL", icon="🔴")
35
+
36
+ logger.add(
37
+ sys.stderr,
38
+ level=level,
39
+ format="<green>{time:HH:mm:ss}</green> | {level.icon} <level>{level: <8}</level> | <level>{message}</level>",
40
+ )
41
+
42
+
25
43
  @click.group()
26
44
  @click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging")
27
45
  @click.version_option(version=__version__, prog_name="rem")
28
46
  def cli(verbose: bool):
29
47
  """REM - Resources Entities Moments system CLI."""
30
- if verbose:
31
- logger.remove()
32
- logger.add(sys.stderr, level="DEBUG")
33
- else:
34
- logger.remove()
35
- logger.add(sys.stderr, level="INFO")
48
+ _configure_logger("DEBUG" if verbose else "INFO")
36
49
 
37
50
 
38
51
  @cli.group()
@@ -65,6 +78,12 @@ def dreaming():
65
78
  pass
66
79
 
67
80
 
81
+ @cli.group()
82
+ def cluster():
83
+ """Kubernetes cluster deployment and management."""
84
+ pass
85
+
86
+
68
87
  # Register commands
69
88
  from .commands.schema import register_commands as register_schema_commands
70
89
  from .commands.db import register_commands as register_db_commands
@@ -76,17 +95,21 @@ from .commands.configure import register_command as register_configure_command
76
95
  from .commands.serve import register_command as register_serve_command
77
96
  from .commands.mcp import register_command as register_mcp_command
78
97
  from .commands.scaffold import scaffold as scaffold_command
98
+ from .commands.cluster import register_commands as register_cluster_commands
99
+ from .commands.session import register_command as register_session_command
79
100
 
80
101
  register_schema_commands(schema)
81
102
  register_db_commands(db)
82
103
  register_process_commands(process)
83
104
  register_dreaming_commands(dreaming)
105
+ register_cluster_commands(cluster)
84
106
  register_ask_command(cli)
85
107
  register_configure_command(cli)
86
108
  register_serve_command(cli)
87
109
  register_mcp_command(cli)
88
110
  cli.add_command(experiments_group)
89
111
  cli.add_command(scaffold_command)
112
+ register_session_command(cli)
90
113
 
91
114
 
92
115
  def main():
rem/config.py CHANGED
@@ -15,7 +15,7 @@ File Format (~/.rem/config.yaml):
15
15
  pool_max_size: 20
16
16
 
17
17
  llm:
18
- default_model: anthropic:claude-sonnet-4-5-20250929
18
+ default_model: openai:gpt-4.1
19
19
  openai_api_key: sk-...
20
20
  anthropic_api_key: sk-ant-...
21
21
 
@@ -95,9 +95,16 @@ def load_config() -> dict[str, Any]:
95
95
  """
96
96
  Load configuration from ~/.rem/config.yaml.
97
97
 
98
+ Set REM_SKIP_CONFIG=1 to skip loading the config file (useful when using .env files).
99
+
98
100
  Returns:
99
- Configuration dictionary (empty if file doesn't exist)
101
+ Configuration dictionary (empty if file doesn't exist or skipped)
100
102
  """
103
+ # Allow skipping config file via environment variable
104
+ if os.environ.get("REM_SKIP_CONFIG", "").lower() in ("1", "true", "yes"):
105
+ logger.debug("Skipping config file (REM_SKIP_CONFIG is set)")
106
+ return {}
107
+
101
108
  config_path = get_config_path()
102
109
 
103
110
  if not config_path.exists():
@@ -216,7 +223,7 @@ def get_default_config() -> dict[str, Any]:
216
223
  "pool_max_size": 20,
217
224
  },
218
225
  "llm": {
219
- "default_model": "anthropic:claude-sonnet-4-5-20250929",
226
+ "default_model": "openai:gpt-4.1",
220
227
  "default_temperature": 0.5,
221
228
  # API keys will be prompted for in wizard
222
229
  # "openai_api_key": "",
@@ -52,7 +52,13 @@ class CoreModel(BaseModel):
52
52
  default=None, description="Tenant identifier for multi-tenancy isolation"
53
53
  )
54
54
  user_id: Optional[str] = Field(
55
- default=None, description="Owner user identifier (tenant-scoped)"
55
+ default=None,
56
+ description=(
57
+ "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, "
58
+ "to allow flexibility for external identity providers. Typically generated as "
59
+ "a hash of the user's email address. In future, other strong unique claims "
60
+ "(e.g., OAuth sub, verified phone) could also be used for generation."
61
+ ),
56
62
  )
57
63
  graph_edges: list[dict] = Field(
58
64
  default_factory=list,
@@ -138,18 +138,14 @@ class DatasetReference(BaseModel):
138
138
 
139
139
  path: str = Field(
140
140
  description=(
141
- "Path to dataset:\n"
141
+ "Path to dataset. Format is inferred from file extension.\n"
142
+ "Supported: .csv, .tsv, .parquet, .json, .jsonl, .xlsx, .ods, .avro, .ipc\n"
142
143
  "- Git: Relative path from experiment root (e.g., 'datasets/ground_truth.csv')\n"
143
- "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/ground_truth.csv')\n"
144
+ "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/data.parquet')\n"
144
145
  "- Hybrid: S3 URI for data, Git path for schema"
145
146
  )
146
147
  )
147
148
 
148
- format: Literal["csv", "jsonl", "parquet", "json"] = Field(
149
- default="csv",
150
- description="Dataset file format"
151
- )
152
-
153
149
  schema_path: str | None = Field(
154
150
  default=None,
155
151
  description=(
@@ -262,8 +258,7 @@ class ExperimentConfig(BaseModel):
262
258
  datasets:
263
259
  ground_truth:
264
260
  location: git
265
- path: datasets/ground_truth.csv
266
- format: csv
261
+ path: datasets/ground_truth.csv # format inferred from extension
267
262
  results:
268
263
  location: git
269
264
  base_path: results/
@@ -288,12 +283,10 @@ class ExperimentConfig(BaseModel):
288
283
  ground_truth:
289
284
  location: s3
290
285
  path: s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet
291
- format: parquet
292
286
  schema_path: datasets/schema.yaml # Schema in Git for documentation
293
287
  test_cases:
294
288
  location: s3
295
289
  path: s3://rem-prod/experiments/cv-parser-production/datasets/test_cases.jsonl
296
- format: jsonl
297
290
  results:
298
291
  location: hybrid
299
292
  base_path: s3://rem-prod/experiments/cv-parser-production/results/
@@ -318,6 +311,15 @@ class ExperimentConfig(BaseModel):
318
311
  )
319
312
  )
320
313
 
314
+ task: str = Field(
315
+ default="general",
316
+ description=(
317
+ "Task name for organizing experiments by purpose.\n"
318
+ "Used with agent name to form directory: {agent}/{task}/\n"
319
+ "Examples: 'risk-assessment', 'classification', 'general'"
320
+ )
321
+ )
322
+
321
323
  description: str = Field(
322
324
  description="Human-readable description of experiment purpose and goals"
323
325
  )
@@ -410,6 +412,24 @@ class ExperimentConfig(BaseModel):
410
412
 
411
413
  return v
412
414
 
415
+ @field_validator("task")
416
+ @classmethod
417
+ def validate_task(cls, v: str) -> str:
418
+ """Validate task name follows conventions."""
419
+ if not v:
420
+ return "general" # Default value
421
+
422
+ if not v.islower():
423
+ raise ValueError("Task name must be lowercase")
424
+
425
+ if " " in v:
426
+ raise ValueError("Task name cannot contain spaces (use hyphens)")
427
+
428
+ if not all(c.isalnum() or c == "-" for c in v):
429
+ raise ValueError("Task name can only contain lowercase letters, numbers, and hyphens")
430
+
431
+ return v
432
+
413
433
  @field_validator("tags")
414
434
  @classmethod
415
435
  def validate_tags(cls, v: list[str]) -> list[str]:
@@ -420,6 +440,15 @@ class ExperimentConfig(BaseModel):
420
440
  """Get the experiment directory path."""
421
441
  return Path(base_path) / self.name
422
442
 
443
+ def get_agent_task_dir(self, base_path: str = ".experiments") -> Path:
444
+ """
445
+ Get the experiment directory path organized by agent/task.
446
+
447
+ Returns: Path like .experiments/{agent}/{task}/
448
+ This is the recommended structure for S3 export compatibility.
449
+ """
450
+ return Path(base_path) / self.agent_schema_ref.name / self.task
451
+
423
452
  def get_config_path(self, base_path: str = ".experiments") -> Path:
424
453
  """Get the path to experiment.yaml file."""
425
454
  return self.get_experiment_dir(base_path) / "experiment.yaml"
@@ -428,6 +457,22 @@ class ExperimentConfig(BaseModel):
428
457
  """Get the path to README.md file."""
429
458
  return self.get_experiment_dir(base_path) / "README.md"
430
459
 
460
+ def get_evaluator_filename(self) -> str:
461
+ """
462
+ Get the evaluator filename with task prefix.
463
+
464
+ Returns: {agent_name}-{task}.yaml (e.g., siggy-risk-assessment.yaml)
465
+ """
466
+ return f"{self.agent_schema_ref.name}-{self.task}.yaml"
467
+
468
+ def get_s3_export_path(self, bucket: str, version: str = "v0") -> str:
469
+ """
470
+ Get the S3 path for exporting this experiment.
471
+
472
+ Returns: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
473
+ """
474
+ return f"s3://{bucket}/{version}/datasets/calibration/experiments/{self.agent_schema_ref.name}/{self.task}"
475
+
431
476
  def to_yaml(self) -> str:
432
477
  """Export configuration as YAML string."""
433
478
  import yaml
@@ -483,6 +528,7 @@ class ExperimentConfig(BaseModel):
483
528
  ## Configuration
484
529
 
485
530
  **Status**: `{self.status.value}`
531
+ **Task**: `{self.task}`
486
532
  **Tags**: {', '.join(f'`{tag}`' for tag in self.tags) if self.tags else 'None'}
487
533
 
488
534
  ## Agent Schema
@@ -494,6 +540,7 @@ class ExperimentConfig(BaseModel):
494
540
  ## Evaluator Schema
495
541
 
496
542
  - **Name**: `{self.evaluator_schema_ref.name}`
543
+ - **File**: `{self.get_evaluator_filename()}`
497
544
  - **Type**: `{self.evaluator_schema_ref.type}`
498
545
 
499
546
  ## Datasets
@@ -504,7 +551,6 @@ class ExperimentConfig(BaseModel):
504
551
 
505
552
  - **Location**: `{dataset.location.value}`
506
553
  - **Path**: `{dataset.path}`
507
- - **Format**: `{dataset.format}`
508
554
  """
509
555
  if dataset.description:
510
556
  readme += f"- **Description**: {dataset.description}\n"
@@ -575,7 +621,6 @@ EXAMPLE_SMALL_EXPERIMENT = ExperimentConfig(
575
621
  "ground_truth": DatasetReference(
576
622
  location=DatasetLocation.GIT,
577
623
  path="datasets/ground_truth.csv",
578
- format="csv",
579
624
  description="10 manually curated test cases"
580
625
  )
581
626
  },
@@ -605,7 +650,6 @@ EXAMPLE_LARGE_EXPERIMENT = ExperimentConfig(
605
650
  "ground_truth": DatasetReference(
606
651
  location=DatasetLocation.S3,
607
652
  path="s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet",
608
- format="parquet",
609
653
  schema_path="datasets/schema.yaml",
610
654
  description="10,000 CV/resume pairs with ground truth extractions"
611
655
  )
@@ -112,7 +112,7 @@ class SearchParameters(BaseModel):
112
112
  table_name: str = Field(..., description="Table to search (resources, moments, etc.)")
113
113
  limit: int = Field(default=10, gt=0, description="Maximum results")
114
114
  min_similarity: float = Field(
115
- default=0.7, ge=0.0, le=1.0, description="Minimum similarity score"
115
+ default=0.3, ge=0.0, le=1.0, description="Minimum similarity score (0.3 recommended for general queries)"
116
116
  )
117
117
 
118
118
 
@@ -198,7 +198,10 @@ class RemQuery(BaseModel):
198
198
  | SQLParameters
199
199
  | TraverseParameters
200
200
  ) = Field(..., description="Query parameters")
201
- user_id: str = Field(..., description="User identifier for isolation")
201
+ user_id: Optional[str] = Field(
202
+ default=None,
203
+ description="User identifier (UUID5 hash of email). None = anonymous (shared/public data only)"
204
+ )
202
205
 
203
206
 
204
207
  class TraverseStage(BaseModel):
@@ -5,6 +5,9 @@ Core entity types for the REM system:
5
5
  - Resources: Base content units (documents, conversations, artifacts)
6
6
  - ImageResources: Image-specific resources with CLIP embeddings
7
7
  - Messages: Communication content
8
+ - Sessions: Conversation sessions (normal or evaluation mode)
9
+ - SharedSessions: Session sharing between users for collaboration
10
+ - Feedback: User feedback on messages/sessions with trace integration
8
11
  - Users: User entities
9
12
  - Files: File metadata and tracking
10
13
  - Moments: Temporal narratives (meetings, coding sessions, conversations)
@@ -19,6 +22,8 @@ All entities inherit from CoreModel and support:
19
22
  - Natural language labels for conversational queries
20
23
  """
21
24
 
25
+ from .domain_resource import DomainResource
26
+ from .feedback import Feedback, FeedbackCategory
22
27
  from .file import File
23
28
  from .image_resource import ImageResource
24
29
  from .message import Message
@@ -27,14 +32,34 @@ from .ontology import Ontology
27
32
  from .ontology_config import OntologyConfig
28
33
  from .resource import Resource
29
34
  from .schema import Schema
35
+ from .session import Session, SessionMode
36
+ from .shared_session import (
37
+ SharedSession,
38
+ SharedSessionCreate,
39
+ SharedWithMeResponse,
40
+ SharedWithMeSummary,
41
+ )
42
+ from .subscriber import Subscriber, SubscriberOrigin, SubscriberStatus
30
43
  from .user import User, UserTier
31
44
 
32
45
  __all__ = [
33
46
  "Resource",
47
+ "DomainResource",
34
48
  "ImageResource",
35
49
  "Message",
50
+ "Session",
51
+ "SessionMode",
52
+ "SharedSession",
53
+ "SharedSessionCreate",
54
+ "SharedWithMeResponse",
55
+ "SharedWithMeSummary",
56
+ "Feedback",
57
+ "FeedbackCategory",
36
58
  "User",
37
59
  "UserTier",
60
+ "Subscriber",
61
+ "SubscriberStatus",
62
+ "SubscriberOrigin",
38
63
  "File",
39
64
  "Moment",
40
65
  "Schema",
@@ -0,0 +1,38 @@
1
+ """
2
+ DomainResource - Curated internal knowledge in REM.
3
+
4
+ DomainResources are a specialized subclass of Resource for storing curated,
5
+ domain-specific internal knowledge that is not part of general knowledge.
6
+ This includes proprietary information, internal documentation, institutional
7
+ knowledge, and other content that requires more careful curation.
8
+
9
+ Key Differences from Resource:
10
+ - Intended for curated, internal knowledge (not raw ingested content)
11
+ - Higher quality bar - content is reviewed/vetted before ingestion
12
+ - May contain proprietary or sensitive information
13
+ - Subject to different retention/governance policies
14
+
15
+ Use Cases:
16
+ - Internal documentation and procedures
17
+ - Proprietary research and analysis
18
+ - Institutional knowledge bases
19
+ - Domain-specific ontologies and taxonomies
20
+ - Curated best practices and guidelines
21
+ """
22
+
23
+ from .resource import Resource
24
+
25
+
26
+ class DomainResource(Resource):
27
+ """
28
+ Curated domain-specific knowledge resource.
29
+
30
+ Inherits all fields from Resource but stored in a separate table
31
+ (domain_resources) to distinguish curated internal knowledge from
32
+ general ingested content.
33
+
34
+ The schema is identical to Resource, allowing seamless migration
35
+ of content between tables as curation status changes.
36
+ """
37
+
38
+ pass
@@ -0,0 +1,123 @@
1
+ """
2
+ Feedback - User feedback on chat messages and sessions.
3
+
4
+ Feedback allows users to rate and categorize responses, providing
5
+ data for evaluation and model improvement. Feedback can be attached
6
+ to specific messages or entire sessions.
7
+
8
+ Trace Integration:
9
+ - Feedback references trace_id/span_id for OTEL/Phoenix integration
10
+ - Can attach annotations to Phoenix spans for unified observability
11
+
12
+ Predefined Categories (system-defined, extensible):
13
+ - INCOMPLETE: Response lacks expected information
14
+ - INACCURATE: Response contains factual errors
15
+ - POOR_TONE: Inappropriate or unprofessional tone
16
+ - OFF_TOPIC: Response doesn't address the question
17
+ - TOO_VERBOSE: Unnecessarily long response
18
+ - TOO_BRIEF: Insufficiently detailed response
19
+ - HELPFUL: Positive feedback marker
20
+ - EXCELLENT: Exceptionally good response
21
+ """
22
+
23
+ from enum import Enum
24
+ from typing import Any
25
+
26
+ from pydantic import Field
27
+
28
+ from ..core import CoreModel
29
+
30
+
31
+ class FeedbackCategory(str, Enum):
32
+ """Predefined feedback categories (system-defined)."""
33
+
34
+ # Negative categories
35
+ INCOMPLETE = "incomplete"
36
+ INACCURATE = "inaccurate"
37
+ POOR_TONE = "poor_tone"
38
+ OFF_TOPIC = "off_topic"
39
+ TOO_VERBOSE = "too_verbose"
40
+ TOO_BRIEF = "too_brief"
41
+ CONFUSING = "confusing"
42
+ UNSAFE = "unsafe"
43
+
44
+ # Positive categories
45
+ HELPFUL = "helpful"
46
+ EXCELLENT = "excellent"
47
+ ACCURATE = "accurate"
48
+ WELL_WRITTEN = "well_written"
49
+
50
+ # Neutral/Other
51
+ OTHER = "other"
52
+
53
+
54
+ class Feedback(CoreModel):
55
+ """
56
+ User feedback on a message or session.
57
+
58
+ Captures structured feedback including:
59
+ - Rating (1-5 scale or thumbs up/down)
60
+ - Categories (predefined or custom)
61
+ - Free-text comment
62
+ - Trace reference for OTEL/Phoenix integration
63
+
64
+ The feedback can be attached to:
65
+ - A specific message (message_id set)
66
+ - An entire session (session_id set, message_id null)
67
+ """
68
+
69
+ # Target reference (at least one required)
70
+ session_id: str = Field(
71
+ ...,
72
+ description="Session ID this feedback relates to",
73
+ )
74
+ message_id: str | None = Field(
75
+ default=None,
76
+ description="Specific message ID (null for session-level feedback)",
77
+ )
78
+
79
+ # Rating (flexible: 1-5, or -1/1 for thumbs)
80
+ rating: int | None = Field(
81
+ default=None,
82
+ ge=-1,
83
+ le=5,
84
+ description="Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale",
85
+ )
86
+
87
+ # Categories (can select multiple)
88
+ categories: list[str] = Field(
89
+ default_factory=list,
90
+ description="Selected feedback categories (from FeedbackCategory or custom)",
91
+ )
92
+
93
+ # Free-text comment
94
+ comment: str | None = Field(
95
+ default=None,
96
+ description="Optional free-text feedback comment",
97
+ )
98
+
99
+ # Trace reference for OTEL/Phoenix integration
100
+ trace_id: str | None = Field(
101
+ default=None,
102
+ description="OTEL trace ID for linking to observability",
103
+ )
104
+ span_id: str | None = Field(
105
+ default=None,
106
+ description="OTEL span ID for specific span feedback",
107
+ )
108
+
109
+ # Phoenix annotation status
110
+ phoenix_synced: bool = Field(
111
+ default=False,
112
+ description="Whether feedback has been synced to Phoenix as annotation",
113
+ )
114
+ phoenix_annotation_id: str | None = Field(
115
+ default=None,
116
+ description="Phoenix annotation ID after sync",
117
+ )
118
+
119
+ # Annotator info
120
+ annotator_kind: str = Field(
121
+ default="HUMAN",
122
+ description="Annotator type: HUMAN, LLM, CODE",
123
+ )
@@ -6,6 +6,11 @@ that can be grouped into conversations or moments.
6
6
 
7
7
  Messages are simpler than Resources but share the same graph connectivity
8
8
  through CoreModel inheritance.
9
+
10
+ Trace Integration:
11
+ - trace_id: OTEL trace ID for linking to observability
12
+ - span_id: OTEL span ID for specific span reference
13
+ - These enable feedback to be attached to Phoenix annotations
9
14
  """
10
15
 
11
16
  from pydantic import Field
@@ -19,6 +24,9 @@ class Message(CoreModel):
19
24
 
20
25
  Represents individual messages in conversations, chats, or other
21
26
  communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
27
+
28
+ Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix
29
+ for observability and feedback annotation.
22
30
  """
23
31
 
24
32
  content: str = Field(
@@ -27,9 +35,30 @@ class Message(CoreModel):
27
35
  )
28
36
  message_type: str | None = Field(
29
37
  default=None,
30
- description="Message type e.g role",
38
+ description="Message type e.g. role: 'user', 'assistant', 'system', 'tool'",
31
39
  )
32
40
  session_id: str | None = Field(
33
41
  default=None,
34
42
  description="Session identifier for tracking message context",
35
43
  )
44
+ prompt: str | None = Field(
45
+ default=None,
46
+ description="Custom prompt used for this message (if overridden from default)",
47
+ )
48
+ model: str | None = Field(
49
+ default=None,
50
+ description="Model used for generating this message (provider:model format)",
51
+ )
52
+ token_count: int | None = Field(
53
+ default=None,
54
+ description="Token count for this message",
55
+ )
56
+ # OTEL/Phoenix trace integration
57
+ trace_id: str | None = Field(
58
+ default=None,
59
+ description="OTEL trace ID for observability integration",
60
+ )
61
+ span_id: str | None = Field(
62
+ default=None,
63
+ description="OTEL span ID for specific span reference",
64
+ )
@@ -129,7 +129,7 @@ class Ontology(CoreModel):
129
129
  file_id="file-uuid-456",
130
130
  agent_schema_id="contract-parser-v2",
131
131
  provider_name="openai",
132
- model_name="gpt-4o",
132
+ model_name="gpt-4.1",
133
133
  extracted_data={
134
134
  "contract_type": "supplier_agreement",
135
135
  "parties": [
@@ -74,7 +74,7 @@ class OntologyConfig(CoreModel):
74
74
  priority=200, # Higher priority = runs first
75
75
  enabled=True,
76
76
  provider_name="openai", # Override default provider
77
- model_name="gpt-4o",
77
+ model_name="gpt-4.1",
78
78
  tenant_id="acme-corp",
79
79
  tags=["legal", "procurement"]
80
80
  )
@@ -0,0 +1,83 @@
1
+ """
2
+ Session - Conversation sessions in REM.
3
+
4
+ Sessions group related messages together and can have different modes:
5
+ - normal: Standard conversation session
6
+ - evaluation: For LLM evaluation, stores original trace and overridden settings
7
+
8
+ Sessions allow overriding settings like model, temperature, and custom prompts
9
+ for evaluation and experimentation purposes.
10
+ """
11
+
12
+ from enum import Enum
13
+
14
+ from pydantic import Field
15
+
16
+ from ..core import CoreModel
17
+
18
+
19
+ class SessionMode(str, Enum):
20
+ """Session mode types."""
21
+
22
+ NORMAL = "normal"
23
+ EVALUATION = "evaluation"
24
+
25
+
26
+ class Session(CoreModel):
27
+ """
28
+ Conversation session container.
29
+
30
+ Groups messages together and supports different modes for normal conversations
31
+ and evaluation/experimentation scenarios.
32
+
33
+ For evaluation sessions, stores:
34
+ - original_trace_id: Reference to the original session being evaluated
35
+ - settings_overrides: Model, temperature, prompt overrides
36
+ - prompt: Custom prompt being tested
37
+
38
+ Default sessions are lightweight - just a session_id on messages.
39
+ Special sessions store additional metadata for experiments.
40
+ """
41
+
42
+ name: str = Field(
43
+ ...,
44
+ description="Session name/identifier",
45
+ json_schema_extra={"entity_key": True},
46
+ )
47
+ mode: SessionMode = Field(
48
+ default=SessionMode.NORMAL,
49
+ description="Session mode: 'normal' or 'evaluation'",
50
+ )
51
+ description: str | None = Field(
52
+ default=None,
53
+ description="Optional session description",
54
+ )
55
+ # Evaluation-specific fields
56
+ original_trace_id: str | None = Field(
57
+ default=None,
58
+ description="For evaluation mode: ID of the original session/trace being evaluated",
59
+ )
60
+ settings_overrides: dict | None = Field(
61
+ default=None,
62
+ description="Settings overrides (model, temperature, max_tokens, system_prompt)",
63
+ )
64
+ prompt: str | None = Field(
65
+ default=None,
66
+ description="Custom prompt for this session (can override agent prompt)",
67
+ )
68
+ # Agent context
69
+ agent_schema_uri: str | None = Field(
70
+ default=None,
71
+ description="Agent schema used for this session",
72
+ )
73
+ # Summary stats (updated as session progresses)
74
+ message_count: int = Field(
75
+ default=0,
76
+ description="Number of messages in this session",
77
+ )
78
+ total_tokens: int | None = Field(
79
+ default=None,
80
+ description="Total tokens used in this session",
81
+ )
82
+
83
+ model_config = {"use_enum_values": True}