remdb 0.2.6__py3-none-any.whl → 0.3.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (82) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +7 -5
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/providers/phoenix.py +32 -43
  9. rem/agentic/providers/pydantic_ai.py +84 -10
  10. rem/api/README.md +238 -1
  11. rem/api/deps.py +255 -0
  12. rem/api/main.py +70 -22
  13. rem/api/mcp_router/server.py +8 -1
  14. rem/api/mcp_router/tools.py +80 -0
  15. rem/api/middleware/tracking.py +172 -0
  16. rem/api/routers/admin.py +277 -0
  17. rem/api/routers/auth.py +124 -0
  18. rem/api/routers/chat/completions.py +123 -14
  19. rem/api/routers/chat/models.py +7 -3
  20. rem/api/routers/chat/sse_events.py +526 -0
  21. rem/api/routers/chat/streaming.py +468 -45
  22. rem/api/routers/dev.py +81 -0
  23. rem/api/routers/feedback.py +455 -0
  24. rem/api/routers/messages.py +473 -0
  25. rem/api/routers/models.py +78 -0
  26. rem/api/routers/shared_sessions.py +406 -0
  27. rem/auth/middleware.py +126 -27
  28. rem/cli/commands/ask.py +15 -11
  29. rem/cli/commands/configure.py +169 -94
  30. rem/cli/commands/db.py +53 -7
  31. rem/cli/commands/experiments.py +278 -96
  32. rem/cli/commands/process.py +8 -7
  33. rem/cli/commands/scaffold.py +47 -0
  34. rem/cli/commands/schema.py +9 -9
  35. rem/cli/main.py +10 -0
  36. rem/config.py +2 -2
  37. rem/models/core/core_model.py +7 -1
  38. rem/models/entities/__init__.py +21 -0
  39. rem/models/entities/domain_resource.py +38 -0
  40. rem/models/entities/feedback.py +123 -0
  41. rem/models/entities/message.py +30 -1
  42. rem/models/entities/session.py +83 -0
  43. rem/models/entities/shared_session.py +206 -0
  44. rem/models/entities/user.py +10 -3
  45. rem/registry.py +367 -0
  46. rem/schemas/agents/rem.yaml +7 -3
  47. rem/services/content/providers.py +94 -140
  48. rem/services/content/service.py +85 -16
  49. rem/services/dreaming/affinity_service.py +2 -16
  50. rem/services/dreaming/moment_service.py +2 -15
  51. rem/services/embeddings/api.py +20 -13
  52. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  53. rem/services/phoenix/client.py +252 -19
  54. rem/services/postgres/README.md +29 -10
  55. rem/services/postgres/repository.py +132 -0
  56. rem/services/postgres/schema_generator.py +86 -5
  57. rem/services/rate_limit.py +113 -0
  58. rem/services/rem/README.md +14 -0
  59. rem/services/session/compression.py +17 -1
  60. rem/services/user_service.py +98 -0
  61. rem/settings.py +115 -17
  62. rem/sql/background_indexes.sql +10 -0
  63. rem/sql/migrations/001_install.sql +152 -2
  64. rem/sql/migrations/002_install_models.sql +580 -231
  65. rem/sql/migrations/003_seed_default_user.sql +48 -0
  66. rem/utils/constants.py +97 -0
  67. rem/utils/date_utils.py +228 -0
  68. rem/utils/embeddings.py +17 -4
  69. rem/utils/files.py +167 -0
  70. rem/utils/mime_types.py +158 -0
  71. rem/utils/model_helpers.py +156 -1
  72. rem/utils/schema_loader.py +273 -14
  73. rem/utils/sql_types.py +3 -1
  74. rem/utils/vision.py +9 -14
  75. rem/workers/README.md +14 -14
  76. rem/workers/db_maintainer.py +74 -0
  77. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/METADATA +486 -132
  78. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/RECORD +80 -57
  79. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/WHEEL +1 -1
  80. rem/sql/002_install_models.sql +0 -1068
  81. rem/sql/install_models.sql +0 -1038
  82. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/entry_points.txt +0 -0
@@ -5,13 +5,20 @@ Provides synchronous and async wrappers for embedding generation using
5
5
  raw HTTP requests (no OpenAI SDK dependency).
6
6
  """
7
7
 
8
- import os
9
8
  from typing import Optional, cast
10
9
 
11
10
  import httpx
12
11
  import requests
13
12
  from loguru import logger
14
13
 
14
+ from rem.utils.constants import DEFAULT_EMBEDDING_DIMS, HTTP_TIMEOUT_DEFAULT
15
+
16
+
17
+ def _get_openai_api_key() -> Optional[str]:
18
+ """Get OpenAI API key from settings."""
19
+ from rem.settings import settings
20
+ return settings.llm.openai_api_key
21
+
15
22
 
16
23
  def generate_embedding(
17
24
  text: str,
@@ -26,16 +33,16 @@ def generate_embedding(
26
33
  text: Text to embed
27
34
  model: Model name (default: text-embedding-3-small)
28
35
  provider: Provider name (default: openai)
29
- api_key: API key (defaults to OPENAI_API_KEY env var)
36
+ api_key: API key (defaults to settings.llm.openai_api_key)
30
37
 
31
38
  Returns:
32
39
  Embedding vector (1536 dimensions for text-embedding-3-small)
33
40
  """
34
41
  if provider == "openai":
35
- api_key = api_key or os.getenv("OPENAI_API_KEY")
42
+ api_key = api_key or _get_openai_api_key()
36
43
  if not api_key:
37
44
  logger.warning("No OpenAI API key - returning zero vector")
38
- return [0.0] * 1536
45
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
39
46
 
40
47
  try:
41
48
  logger.info(f"Generating OpenAI embedding for text using {model}")
@@ -47,7 +54,7 @@ def generate_embedding(
47
54
  "Content-Type": "application/json",
48
55
  },
49
56
  json={"input": [text], "model": model},
50
- timeout=30,
57
+ timeout=HTTP_TIMEOUT_DEFAULT,
51
58
  )
52
59
  response.raise_for_status()
53
60
 
@@ -58,11 +65,11 @@ def generate_embedding(
58
65
 
59
66
  except Exception as e:
60
67
  logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
61
- return [0.0] * 1536
68
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
62
69
 
63
70
  else:
64
71
  logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
65
- return [0.0] * 1536
72
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
66
73
 
67
74
 
68
75
  async def generate_embedding_async(
@@ -78,16 +85,16 @@ async def generate_embedding_async(
78
85
  text: Text to embed
79
86
  model: Model name (default: text-embedding-3-small)
80
87
  provider: Provider name (default: openai)
81
- api_key: API key (defaults to OPENAI_API_KEY env var)
88
+ api_key: API key (defaults to settings.llm.openai_api_key)
82
89
 
83
90
  Returns:
84
91
  Embedding vector (1536 dimensions for text-embedding-3-small)
85
92
  """
86
93
  if provider == "openai":
87
- api_key = api_key or os.getenv("OPENAI_API_KEY")
94
+ api_key = api_key or _get_openai_api_key()
88
95
  if not api_key:
89
96
  logger.warning("No OpenAI API key - returning zero vector")
90
- return [0.0] * 1536
97
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
91
98
 
92
99
  try:
93
100
  logger.info(f"Generating OpenAI embedding for text using {model}")
@@ -100,7 +107,7 @@ async def generate_embedding_async(
100
107
  "Content-Type": "application/json",
101
108
  },
102
109
  json={"input": [text], "model": model},
103
- timeout=30.0,
110
+ timeout=HTTP_TIMEOUT_DEFAULT,
104
111
  )
105
112
  response.raise_for_status()
106
113
 
@@ -113,8 +120,8 @@ async def generate_embedding_async(
113
120
 
114
121
  except Exception as e:
115
122
  logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
116
- return [0.0] * 1536
123
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
117
124
 
118
125
  else:
119
126
  logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
120
- return [0.0] * 1536
127
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
@@ -164,7 +164,7 @@ cp curated-queries.csv experiments/rem-001/validation/production/
164
164
  **Option C: Curated Engrams**
165
165
  ```bash
166
166
  # Generate engrams from REM data
167
- rem dreaming full --user-id test-user --tenant-id acme --generate-test-cases
167
+ rem dreaming full --user-id test-user --generate-test-cases
168
168
 
169
169
  # Review and select high-quality engrams
170
170
  rem engram list --quality high --limit 100 --output engrams.csv
@@ -357,7 +357,7 @@ Level 4 (Mature): Multiple cycles, full query capabilities
357
357
  # Generate engrams from REM data
358
358
  rem dreaming full \
359
359
  --user-id test-user \
360
- --tenant-id acme \
360
+ \
361
361
  --generate-test-cases \
362
362
  --quality-level 3
363
363
 
@@ -1027,7 +1027,7 @@ rem experiments experiment run rem-lookup-ask_rem-golden \
1027
1027
 
1028
1028
  ```bash
1029
1029
  # 1. Generate high-quality engrams
1030
- rem dreaming full --tenant-id acme --generate-test-cases --quality-level 4
1030
+ rem dreaming full --generate-test-cases --quality-level 4
1031
1031
 
1032
1032
  # 2. Export engrams
1033
1033
  rem engram export rem-engrams-mature-mixed --output engrams.csv --format phoenix
@@ -53,7 +53,7 @@ from datetime import datetime
53
53
  from pathlib import Path
54
54
  from typing import Any, Callable, TYPE_CHECKING, cast
55
55
 
56
- import pandas as pd
56
+ import polars as pl
57
57
  from loguru import logger
58
58
 
59
59
  from .config import PhoenixConfig
@@ -64,6 +64,95 @@ if TYPE_CHECKING:
64
64
  from phoenix.client.resources.experiments.types import RanExperiment
65
65
 
66
66
 
67
+ def dataframe_to_phoenix_dataset(
68
+ client: "PhoenixClient",
69
+ df: pl.DataFrame,
70
+ dataset_name: str,
71
+ input_keys: list[str] | None = None,
72
+ output_keys: list[str] | None = None,
73
+ metadata_keys: list[str] | None = None,
74
+ description: str | None = None,
75
+ ) -> "Dataset":
76
+ """Convert a Polars DataFrame to a Phoenix Dataset.
77
+
78
+ This function transforms a Polars DataFrame into a Phoenix Dataset by:
79
+ 1. Extracting input columns (what agents receive)
80
+ 2. Extracting output columns (ground truth/expected output)
81
+ 3. Extracting metadata columns (optional labels, difficulty, etc.)
82
+
83
+ If column keys are not specified, uses smart defaults:
84
+ - input_keys: columns containing 'input', 'query', 'question', or 'prompt'
85
+ - output_keys: columns containing 'output', 'expected', 'answer', or 'response'
86
+ - metadata_keys: remaining columns
87
+
88
+ Args:
89
+ client: PhoenixClient instance
90
+ df: Polars DataFrame with experiment data
91
+ dataset_name: Name for the created Phoenix dataset
92
+ input_keys: Optional list of column names for inputs
93
+ output_keys: Optional list of column names for outputs (ground truth)
94
+ metadata_keys: Optional list of column names for metadata
95
+ description: Optional dataset description
96
+
97
+ Returns:
98
+ Phoenix Dataset instance
99
+
100
+ Example:
101
+ >>> df = pl.read_csv("golden_set.csv")
102
+ >>> dataset = dataframe_to_phoenix_dataset(
103
+ ... client=phoenix_client,
104
+ ... df=df,
105
+ ... dataset_name="my-golden-set",
106
+ ... input_keys=["query"],
107
+ ... output_keys=["expected_output"],
108
+ ... metadata_keys=["difficulty"]
109
+ ... )
110
+ """
111
+ columns = df.columns
112
+
113
+ # Smart defaults for column detection
114
+ if input_keys is None:
115
+ input_keys = [c for c in columns if any(
116
+ k in c.lower() for k in ["input", "query", "question", "prompt"]
117
+ )]
118
+ if not input_keys:
119
+ # Fallback: first column
120
+ input_keys = [columns[0]] if columns else []
121
+
122
+ if output_keys is None:
123
+ output_keys = [c for c in columns if any(
124
+ k in c.lower() for k in ["output", "expected", "answer", "response", "reference"]
125
+ )]
126
+ if not output_keys:
127
+ # Fallback: second column
128
+ output_keys = [columns[1]] if len(columns) > 1 else []
129
+
130
+ if metadata_keys is None:
131
+ used_keys = set(input_keys) | set(output_keys)
132
+ metadata_keys = [c for c in columns if c not in used_keys]
133
+
134
+ logger.debug(
135
+ f"DataFrame to Phoenix Dataset: inputs={input_keys}, "
136
+ f"outputs={output_keys}, metadata={metadata_keys}"
137
+ )
138
+
139
+ # Convert to list of dicts
140
+ records = df.to_dicts()
141
+
142
+ inputs = [{k: row.get(k) for k in input_keys} for row in records]
143
+ outputs = [{k: row.get(k) for k in output_keys} for row in records]
144
+ metadata = [{k: row.get(k) for k in metadata_keys} for row in records] if metadata_keys else None
145
+
146
+ # Create Phoenix dataset
147
+ return client.create_dataset_from_data(
148
+ name=dataset_name,
149
+ inputs=inputs,
150
+ outputs=outputs,
151
+ metadata=metadata,
152
+ description=description,
153
+ )
154
+
155
+
67
156
  class PhoenixClient:
68
157
  """High-level Phoenix client for REM evaluation workflows.
69
158
 
@@ -260,19 +349,22 @@ class PhoenixClient:
260
349
  "SEARCH semantic AI engineer",sarah-chen,person,medium,SEARCH
261
350
  """
262
351
  try:
263
- # Load CSV
264
- df = pd.read_csv(csv_file_path)
352
+ # Load CSV with Polars
353
+ df = pl.read_csv(csv_file_path)
354
+
355
+ # Convert to list of dicts
356
+ records = df.to_dicts()
265
357
 
266
358
  # Extract inputs
267
- inputs = cast(list[dict[str, Any]], df[input_keys].to_dict("records"))
359
+ inputs = [{k: row.get(k) for k in input_keys} for row in records]
268
360
 
269
361
  # Extract outputs
270
- outputs = cast(list[dict[str, Any]], df[output_keys].to_dict("records"))
362
+ outputs = [{k: row.get(k) for k in output_keys} for row in records]
271
363
 
272
364
  # Extract metadata if specified
273
365
  metadata = None
274
366
  if metadata_keys:
275
- metadata = cast(list[dict[str, Any]], df[metadata_keys].to_dict("records"))
367
+ metadata = [{k: row.get(k) for k in metadata_keys} for row in records]
276
368
 
277
369
  return self.create_dataset_from_data(
278
370
  name=name,
@@ -331,13 +423,16 @@ class PhoenixClient:
331
423
 
332
424
  def run_experiment(
333
425
  self,
334
- dataset: "Dataset" | str,
426
+ dataset: "Dataset" | str | pl.DataFrame,
335
427
  task: Callable[[Any], Any] | None = None,
336
428
  evaluators: list[Callable[[Any], Any]] | None = None,
337
429
  experiment_name: str | None = None,
338
430
  experiment_description: str | None = None,
339
431
  experiment_metadata: dict[str, Any] | None = None,
340
432
  experiment_config: Any | None = None,
433
+ input_keys: list[str] | None = None,
434
+ output_keys: list[str] | None = None,
435
+ metadata_keys: list[str] | None = None,
341
436
  ) -> "RanExperiment":
342
437
  """Run an evaluation experiment.
343
438
 
@@ -346,14 +441,22 @@ class PhoenixClient:
346
441
  2. Agent run: Provide task function to execute agents on dataset
347
442
  3. Evaluator run: Provide evaluators to score existing outputs
348
443
 
444
+ Dataset can be:
445
+ - Phoenix Dataset instance
446
+ - Dataset name (string) - will be loaded from Phoenix
447
+ - Polars DataFrame - will be converted to Phoenix Dataset
448
+
349
449
  Args:
350
- dataset: Dataset instance or name (required unless experiment_config provided)
450
+ dataset: Dataset instance, name, or Polars DataFrame
351
451
  task: Optional task function to run on each example (agent execution)
352
452
  evaluators: Optional list of evaluator functions
353
453
  experiment_name: Optional experiment name
354
454
  experiment_description: Optional description
355
455
  experiment_metadata: Optional metadata dict
356
456
  experiment_config: Optional ExperimentConfig instance (overrides other params)
457
+ input_keys: Column names for inputs (required if dataset is DataFrame)
458
+ output_keys: Column names for outputs (required if dataset is DataFrame)
459
+ metadata_keys: Optional column names for metadata
357
460
 
358
461
  Returns:
359
462
  RanExperiment with results
@@ -369,6 +472,16 @@ class PhoenixClient:
369
472
  ... experiment_name="rem-v1-baseline"
370
473
  ... )
371
474
 
475
+ Example - With Polars DataFrame:
476
+ >>> df = pl.read_csv("golden_set.csv")
477
+ >>> experiment = client.run_experiment(
478
+ ... dataset=df,
479
+ ... task=run_agent,
480
+ ... experiment_name="rem-v1-baseline",
481
+ ... input_keys=["query"],
482
+ ... output_keys=["expected_output"]
483
+ ... )
484
+
372
485
  Example - Evaluator Run (Phase 2b):
373
486
  >>> experiment = client.run_experiment(
374
487
  ... dataset=agent_results,
@@ -407,6 +520,21 @@ class PhoenixClient:
407
520
  else:
408
521
  dataset = dataset_ref.path
409
522
 
523
+ # Convert Polars DataFrame to Phoenix Dataset
524
+ if isinstance(dataset, pl.DataFrame):
525
+ dataset_name_for_phoenix = f"{experiment_name or 'experiment'}-dataset-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
526
+ logger.info(f"Converting Polars DataFrame to Phoenix Dataset: {dataset_name_for_phoenix}")
527
+ dataset = dataframe_to_phoenix_dataset(
528
+ client=self,
529
+ df=dataset,
530
+ dataset_name=dataset_name_for_phoenix,
531
+ input_keys=input_keys,
532
+ output_keys=output_keys,
533
+ metadata_keys=metadata_keys,
534
+ description=f"Auto-created from DataFrame for experiment: {experiment_name}",
535
+ )
536
+ logger.info(f"✓ Created Phoenix Dataset: {dataset_name_for_phoenix}")
537
+
410
538
  # Load dataset if name provided
411
539
  if isinstance(dataset, str):
412
540
  dataset = self.get_dataset(dataset)
@@ -454,7 +582,7 @@ class PhoenixClient:
454
582
  root_spans_only: bool = True,
455
583
  trace_id: str | None = None,
456
584
  span_id: str | None = None,
457
- ) -> pd.DataFrame:
585
+ ) -> pl.DataFrame:
458
586
  """Query traces from Phoenix.
459
587
 
460
588
  Args:
@@ -467,7 +595,7 @@ class PhoenixClient:
467
595
  span_id: Filter by specific span ID
468
596
 
469
597
  Returns:
470
- DataFrame with trace data
598
+ Polars DataFrame with trace data
471
599
 
472
600
  Example:
473
601
  >>> traces = client.get_traces(
@@ -492,8 +620,11 @@ class PhoenixClient:
492
620
  if span_id:
493
621
  query_params["span_id"] = span_id
494
622
 
495
- # Query traces
496
- traces_df = self._client.query_spans(limit=limit, **query_params) # type: ignore[attr-defined]
623
+ # Query traces (Phoenix returns pandas DataFrame)
624
+ pandas_df = self._client.query_spans(limit=limit, **query_params) # type: ignore[attr-defined]
625
+
626
+ # Convert pandas to Polars
627
+ traces_df = pl.from_pandas(pandas_df)
497
628
 
498
629
  logger.debug(f"Retrieved {len(traces_df)} traces")
499
630
  return traces_df
@@ -535,7 +666,7 @@ class PhoenixClient:
535
666
  ... )
536
667
  """
537
668
  try:
538
- # Query traces
669
+ # Query traces (returns Polars DataFrame)
539
670
  traces_df = self.get_traces(
540
671
  project_name=project_name,
541
672
  start_time=start_time,
@@ -547,12 +678,15 @@ class PhoenixClient:
547
678
  if len(traces_df) == 0:
548
679
  raise ValueError("No traces found matching criteria")
549
680
 
681
+ # Convert to list of dicts for iteration
682
+ records = traces_df.to_dicts()
683
+
550
684
  # Extract inputs and outputs from traces
551
685
  inputs = []
552
686
  outputs = []
553
687
  metadata = []
554
688
 
555
- for _, row in traces_df.iterrows():
689
+ for row in records:
556
690
  # Extract input
557
691
  span_input = row.get("attributes.input")
558
692
  if span_input:
@@ -658,29 +792,128 @@ class PhoenixClient:
658
792
  label: str | None = None,
659
793
  score: float | None = None,
660
794
  explanation: str | None = None,
661
- ) -> None:
795
+ metadata: dict[str, Any] | None = None,
796
+ ) -> str | None:
662
797
  """Add feedback annotation to a span.
663
798
 
664
799
  Args:
665
800
  span_id: Span ID to annotate
666
- annotation_name: Name of the annotation (e.g., "correctness")
801
+ annotation_name: Name of the annotation (e.g., "correctness", "user_feedback")
667
802
  annotator_kind: Type of annotator ("HUMAN", "LLM", "CODE")
668
- label: Optional label (e.g., "correct", "incorrect")
803
+ label: Optional label (e.g., "correct", "incorrect", "helpful")
669
804
  score: Optional numeric score (0.0-1.0)
670
805
  explanation: Optional explanation text
806
+ metadata: Optional additional metadata dict
807
+
808
+ Returns:
809
+ Annotation ID if successful, None otherwise
671
810
  """
672
811
  try:
673
- self._client.add_span_annotation( # type: ignore[attr-defined]
812
+ result = self._client.add_span_annotation( # type: ignore[attr-defined]
674
813
  span_id=span_id,
675
814
  name=annotation_name,
676
815
  annotator_kind=annotator_kind,
677
816
  label=label,
678
817
  score=score,
679
818
  explanation=explanation,
819
+ metadata=metadata,
680
820
  )
681
821
 
682
- logger.info(f"Added {annotator_kind} feedback to span {span_id}")
822
+ annotation_id = getattr(result, "id", None) if result else None
823
+ logger.info(f"Added {annotator_kind} feedback to span {span_id} -> {annotation_id}")
824
+
825
+ return annotation_id
683
826
 
684
827
  except Exception as e:
685
828
  logger.error(f"Failed to add span feedback: {e}")
686
829
  raise
830
+
831
+ def sync_user_feedback(
832
+ self,
833
+ span_id: str,
834
+ rating: int | None = None,
835
+ categories: list[str] | None = None,
836
+ comment: str | None = None,
837
+ feedback_id: str | None = None,
838
+ ) -> str | None:
839
+ """Sync user feedback to Phoenix as a span annotation.
840
+
841
+ Convenience method for syncing Feedback entities to Phoenix.
842
+ Converts REM feedback format to Phoenix annotation format.
843
+
844
+ Args:
845
+ span_id: OTEL span ID to annotate
846
+ rating: User rating (-1, 1-5 scale)
847
+ categories: List of feedback categories
848
+ comment: Free-text comment
849
+ feedback_id: Optional REM feedback ID for reference
850
+
851
+ Returns:
852
+ Phoenix annotation ID if successful
853
+
854
+ Example:
855
+ >>> client.sync_user_feedback(
856
+ ... span_id="abc123",
857
+ ... rating=4,
858
+ ... categories=["helpful", "accurate"],
859
+ ... comment="Great response!"
860
+ ... )
861
+ """
862
+ # Convert rating to 0-1 score
863
+ score = None
864
+ if rating is not None:
865
+ if rating == -1:
866
+ score = 0.0
867
+ elif 1 <= rating <= 5:
868
+ score = rating / 5.0
869
+
870
+ # Use primary category as label
871
+ label = categories[0] if categories else None
872
+
873
+ # Build explanation from comment and additional categories
874
+ explanation = comment
875
+ if categories and len(categories) > 1:
876
+ cats_str = ", ".join(categories[1:])
877
+ if explanation:
878
+ explanation = f"{explanation} [Categories: {cats_str}]"
879
+ else:
880
+ explanation = f"Categories: {cats_str}"
881
+
882
+ # Build metadata
883
+ metadata = {
884
+ "rating": rating,
885
+ "categories": categories or [],
886
+ }
887
+ if feedback_id:
888
+ metadata["rem_feedback_id"] = feedback_id
889
+
890
+ return self.add_span_feedback(
891
+ span_id=span_id,
892
+ annotation_name="user_feedback",
893
+ annotator_kind="HUMAN",
894
+ label=label,
895
+ score=score,
896
+ explanation=explanation,
897
+ metadata=metadata,
898
+ )
899
+
900
+ def get_span_annotations(
901
+ self,
902
+ span_id: str,
903
+ annotation_name: str | None = None,
904
+ ) -> list[dict[str, Any]]:
905
+ """Get annotations for a span.
906
+
907
+ Args:
908
+ span_id: Span ID to query
909
+ annotation_name: Optional filter by annotation name
910
+
911
+ Returns:
912
+ List of annotation dicts
913
+
914
+ TODO: Implement once Phoenix client exposes this method
915
+ """
916
+ # TODO: Phoenix client doesn't expose annotation query yet
917
+ # This is a stub for future implementation
918
+ logger.warning("get_span_annotations not yet implemented in Phoenix client")
919
+ return []
@@ -516,32 +516,51 @@ results = await service.vector_search(
516
516
 
517
517
  ## Migrations
518
518
 
519
- Run migrations in order:
519
+ ### Using the CLI (Recommended)
520
520
 
521
521
  ```bash
522
- psql -d remdb -f sql/migrations/001_setup_extensions.sql
523
- psql -d remdb -f sql/migrations/002_kv_store_cache.sql
524
- psql -d remdb -f sql/generated_schema.sql
522
+ # Apply all migrations
523
+ rem db migrate
524
+
525
+ # Check migration status
526
+ rem db status
525
527
  ```
526
528
 
527
- Background indexes (after data load):
529
+ ### Migration Files
528
530
 
531
+ Located in `src/rem/sql/migrations/`:
532
+ - `001_install.sql` - Core infrastructure (extensions, functions, kv_store)
533
+ - `002_install_models.sql` - Entity tables (auto-generated from Pydantic models)
534
+ - `003_seed_default_user.sql` - Default user setup
535
+
536
+ Background indexes (after data load):
529
537
  ```bash
530
- psql -d remdb -f sql/background_indexes.sql
538
+ rem db migrate --background-indexes
531
539
  ```
532
540
 
533
541
  ## CLI Usage
534
542
 
535
- Generate schema from models:
543
+ ### Generate Schema from Models
544
+
545
+ When you add or modify Pydantic models, regenerate the schema:
536
546
 
537
547
  ```bash
538
- rem schema generate --models src/rem/models/entities --output sql/schema.sql
548
+ # Generate 002_install_models.sql from entity models
549
+ rem db schema generate --models src/rem/models/entities
550
+
551
+ # Output: src/rem/sql/migrations/002_install_models.sql
552
+ # Then apply: rem db migrate
539
553
  ```
540
554
 
541
- Validate models:
555
+ **Workflow for adding new models:**
556
+ 1. Add/modify models in `src/rem/models/entities/`
557
+ 2. Run `rem db schema generate -m src/rem/models/entities`
558
+ 3. Run `rem db migrate` to apply changes
559
+
560
+ ### Validate Models
542
561
 
543
562
  ```bash
544
- rem schema validate --models src/rem/models/entities
563
+ rem db schema validate --models src/rem/models/entities
545
564
  ```
546
565
 
547
566
  ## Configuration