remdb 0.3.0__py3-none-any.whl → 0.3.114__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (98) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +28 -22
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/otel/setup.py +92 -4
  9. rem/agentic/providers/phoenix.py +32 -43
  10. rem/agentic/providers/pydantic_ai.py +142 -22
  11. rem/agentic/schema.py +358 -21
  12. rem/agentic/tools/rem_tools.py +3 -3
  13. rem/api/README.md +238 -1
  14. rem/api/deps.py +255 -0
  15. rem/api/main.py +151 -37
  16. rem/api/mcp_router/resources.py +1 -1
  17. rem/api/mcp_router/server.py +17 -2
  18. rem/api/mcp_router/tools.py +143 -7
  19. rem/api/middleware/tracking.py +172 -0
  20. rem/api/routers/admin.py +277 -0
  21. rem/api/routers/auth.py +124 -0
  22. rem/api/routers/chat/completions.py +152 -16
  23. rem/api/routers/chat/models.py +7 -3
  24. rem/api/routers/chat/sse_events.py +526 -0
  25. rem/api/routers/chat/streaming.py +608 -45
  26. rem/api/routers/dev.py +81 -0
  27. rem/api/routers/feedback.py +148 -0
  28. rem/api/routers/messages.py +473 -0
  29. rem/api/routers/models.py +78 -0
  30. rem/api/routers/query.py +357 -0
  31. rem/api/routers/shared_sessions.py +406 -0
  32. rem/auth/middleware.py +126 -27
  33. rem/cli/commands/README.md +201 -70
  34. rem/cli/commands/ask.py +13 -10
  35. rem/cli/commands/cluster.py +1359 -0
  36. rem/cli/commands/configure.py +4 -3
  37. rem/cli/commands/db.py +350 -137
  38. rem/cli/commands/experiments.py +76 -72
  39. rem/cli/commands/process.py +22 -15
  40. rem/cli/commands/scaffold.py +47 -0
  41. rem/cli/commands/schema.py +95 -49
  42. rem/cli/main.py +29 -6
  43. rem/config.py +2 -2
  44. rem/models/core/core_model.py +7 -1
  45. rem/models/core/rem_query.py +5 -2
  46. rem/models/entities/__init__.py +21 -0
  47. rem/models/entities/domain_resource.py +38 -0
  48. rem/models/entities/feedback.py +123 -0
  49. rem/models/entities/message.py +30 -1
  50. rem/models/entities/session.py +83 -0
  51. rem/models/entities/shared_session.py +180 -0
  52. rem/models/entities/user.py +10 -3
  53. rem/registry.py +373 -0
  54. rem/schemas/agents/rem.yaml +7 -3
  55. rem/services/content/providers.py +94 -140
  56. rem/services/content/service.py +92 -20
  57. rem/services/dreaming/affinity_service.py +2 -16
  58. rem/services/dreaming/moment_service.py +2 -15
  59. rem/services/embeddings/api.py +24 -17
  60. rem/services/embeddings/worker.py +16 -16
  61. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  62. rem/services/phoenix/client.py +252 -19
  63. rem/services/postgres/README.md +159 -15
  64. rem/services/postgres/__init__.py +2 -1
  65. rem/services/postgres/diff_service.py +426 -0
  66. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  67. rem/services/postgres/repository.py +132 -0
  68. rem/services/postgres/schema_generator.py +86 -5
  69. rem/services/postgres/service.py +6 -6
  70. rem/services/rate_limit.py +113 -0
  71. rem/services/rem/README.md +14 -0
  72. rem/services/rem/parser.py +44 -9
  73. rem/services/rem/service.py +36 -2
  74. rem/services/session/compression.py +17 -1
  75. rem/services/session/reload.py +1 -1
  76. rem/services/user_service.py +98 -0
  77. rem/settings.py +169 -17
  78. rem/sql/background_indexes.sql +21 -16
  79. rem/sql/migrations/001_install.sql +231 -54
  80. rem/sql/migrations/002_install_models.sql +457 -393
  81. rem/sql/migrations/003_optional_extensions.sql +326 -0
  82. rem/utils/constants.py +97 -0
  83. rem/utils/date_utils.py +228 -0
  84. rem/utils/embeddings.py +17 -4
  85. rem/utils/files.py +167 -0
  86. rem/utils/mime_types.py +158 -0
  87. rem/utils/model_helpers.py +156 -1
  88. rem/utils/schema_loader.py +191 -35
  89. rem/utils/sql_types.py +3 -1
  90. rem/utils/vision.py +9 -14
  91. rem/workers/README.md +14 -14
  92. rem/workers/db_maintainer.py +74 -0
  93. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/METADATA +303 -164
  94. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/RECORD +96 -70
  95. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/WHEEL +1 -1
  96. rem/sql/002_install_models.sql +0 -1068
  97. rem/sql/install_models.sql +0 -1038
  98. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/entry_points.txt +0 -0
@@ -5,13 +5,20 @@ Provides synchronous and async wrappers for embedding generation using
5
5
  raw HTTP requests (no OpenAI SDK dependency).
6
6
  """
7
7
 
8
- import os
9
8
  from typing import Optional, cast
10
9
 
11
10
  import httpx
12
11
  import requests
13
12
  from loguru import logger
14
13
 
14
+ from rem.utils.constants import DEFAULT_EMBEDDING_DIMS, HTTP_TIMEOUT_DEFAULT
15
+
16
+
17
+ def _get_openai_api_key() -> Optional[str]:
18
+ """Get OpenAI API key from settings."""
19
+ from rem.settings import settings
20
+ return settings.llm.openai_api_key
21
+
15
22
 
16
23
  def generate_embedding(
17
24
  text: str,
@@ -26,19 +33,19 @@ def generate_embedding(
26
33
  text: Text to embed
27
34
  model: Model name (default: text-embedding-3-small)
28
35
  provider: Provider name (default: openai)
29
- api_key: API key (defaults to OPENAI_API_KEY env var)
36
+ api_key: API key (defaults to settings.llm.openai_api_key)
30
37
 
31
38
  Returns:
32
39
  Embedding vector (1536 dimensions for text-embedding-3-small)
33
40
  """
34
41
  if provider == "openai":
35
- api_key = api_key or os.getenv("OPENAI_API_KEY")
42
+ api_key = api_key or _get_openai_api_key()
36
43
  if not api_key:
37
44
  logger.warning("No OpenAI API key - returning zero vector")
38
- return [0.0] * 1536
45
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
39
46
 
40
47
  try:
41
- logger.info(f"Generating OpenAI embedding for text using {model}")
48
+ logger.debug(f"Generating OpenAI embedding for text using {model}")
42
49
 
43
50
  response = requests.post(
44
51
  "https://api.openai.com/v1/embeddings",
@@ -47,22 +54,22 @@ def generate_embedding(
47
54
  "Content-Type": "application/json",
48
55
  },
49
56
  json={"input": [text], "model": model},
50
- timeout=30,
57
+ timeout=HTTP_TIMEOUT_DEFAULT,
51
58
  )
52
59
  response.raise_for_status()
53
60
 
54
61
  data = response.json()
55
62
  embedding = data["data"][0]["embedding"]
56
- logger.info(f"Successfully generated embedding (dimension: {len(embedding)})")
63
+ logger.debug(f"Successfully generated embedding (dimension: {len(embedding)})")
57
64
  return cast(list[float], embedding)
58
65
 
59
66
  except Exception as e:
60
67
  logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
61
- return [0.0] * 1536
68
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
62
69
 
63
70
  else:
64
71
  logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
65
- return [0.0] * 1536
72
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
66
73
 
67
74
 
68
75
  async def generate_embedding_async(
@@ -78,19 +85,19 @@ async def generate_embedding_async(
78
85
  text: Text to embed
79
86
  model: Model name (default: text-embedding-3-small)
80
87
  provider: Provider name (default: openai)
81
- api_key: API key (defaults to OPENAI_API_KEY env var)
88
+ api_key: API key (defaults to settings.llm.openai_api_key)
82
89
 
83
90
  Returns:
84
91
  Embedding vector (1536 dimensions for text-embedding-3-small)
85
92
  """
86
93
  if provider == "openai":
87
- api_key = api_key or os.getenv("OPENAI_API_KEY")
94
+ api_key = api_key or _get_openai_api_key()
88
95
  if not api_key:
89
96
  logger.warning("No OpenAI API key - returning zero vector")
90
- return [0.0] * 1536
97
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
91
98
 
92
99
  try:
93
- logger.info(f"Generating OpenAI embedding for text using {model}")
100
+ logger.debug(f"Generating OpenAI embedding for text using {model}")
94
101
 
95
102
  async with httpx.AsyncClient() as client:
96
103
  response = await client.post(
@@ -100,21 +107,21 @@ async def generate_embedding_async(
100
107
  "Content-Type": "application/json",
101
108
  },
102
109
  json={"input": [text], "model": model},
103
- timeout=30.0,
110
+ timeout=HTTP_TIMEOUT_DEFAULT,
104
111
  )
105
112
  response.raise_for_status()
106
113
 
107
114
  data = response.json()
108
115
  embedding = data["data"][0]["embedding"]
109
- logger.info(
116
+ logger.debug(
110
117
  f"Successfully generated embedding (dimension: {len(embedding)})"
111
118
  )
112
119
  return cast(list[float], embedding)
113
120
 
114
121
  except Exception as e:
115
122
  logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
116
- return [0.0] * 1536
123
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
117
124
 
118
125
  else:
119
126
  logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
120
- return [0.0] * 1536
127
+ return [0.0] * DEFAULT_EMBEDDING_DIMS
@@ -69,7 +69,7 @@ def get_global_embedding_worker(postgres_service: Any = None) -> "EmbeddingWorke
69
69
  if postgres_service is None:
70
70
  raise RuntimeError("Must provide postgres_service on first call to get_global_embedding_worker")
71
71
  _global_worker = EmbeddingWorker(postgres_service=postgres_service)
72
- logger.info("Created global EmbeddingWorker singleton")
72
+ logger.debug("Created global EmbeddingWorker singleton")
73
73
 
74
74
  return _global_worker
75
75
 
@@ -117,7 +117,7 @@ class EmbeddingWorker:
117
117
  "No OpenAI API key provided - embeddings will use zero vectors"
118
118
  )
119
119
 
120
- logger.info(
120
+ logger.debug(
121
121
  f"Initialized EmbeddingWorker: {num_workers} workers, "
122
122
  f"batch_size={batch_size}, timeout={batch_timeout}s"
123
123
  )
@@ -125,17 +125,17 @@ class EmbeddingWorker:
125
125
  async def start(self) -> None:
126
126
  """Start worker pool."""
127
127
  if self.running:
128
- logger.warning("EmbeddingWorker already running")
128
+ logger.debug("EmbeddingWorker already running")
129
129
  return
130
130
 
131
131
  self.running = True
132
- logger.info(f"Starting {self.num_workers} embedding workers")
132
+ logger.debug(f"Starting {self.num_workers} embedding workers")
133
133
 
134
134
  for i in range(self.num_workers):
135
135
  worker = asyncio.create_task(self._worker_loop(i))
136
136
  self.workers.append(worker)
137
137
 
138
- logger.info("EmbeddingWorker started")
138
+ logger.debug("EmbeddingWorker started")
139
139
 
140
140
  async def stop(self) -> None:
141
141
  """Stop worker pool gracefully - processes remaining queue before stopping."""
@@ -143,7 +143,7 @@ class EmbeddingWorker:
143
143
  return
144
144
 
145
145
  queue_size = self.task_queue.qsize()
146
- logger.info(f"Stopping EmbeddingWorker (processing {queue_size} queued tasks first)")
146
+ logger.debug(f"Stopping EmbeddingWorker (processing {queue_size} queued tasks first)")
147
147
 
148
148
  # Wait for queue to drain (with timeout)
149
149
  max_wait = 30 # 30 seconds max
@@ -171,7 +171,7 @@ class EmbeddingWorker:
171
171
  await asyncio.gather(*self.workers, return_exceptions=True)
172
172
 
173
173
  self.workers.clear()
174
- logger.info("EmbeddingWorker stopped")
174
+ logger.debug("EmbeddingWorker stopped")
175
175
 
176
176
  async def queue_task(self, task: EmbeddingTask) -> None:
177
177
  """
@@ -195,7 +195,7 @@ class EmbeddingWorker:
195
195
  Args:
196
196
  worker_id: Unique worker identifier
197
197
  """
198
- logger.info(f"Worker {worker_id} started")
198
+ logger.debug(f"Worker {worker_id} started")
199
199
 
200
200
  while self.running:
201
201
  try:
@@ -205,7 +205,7 @@ class EmbeddingWorker:
205
205
  if not batch:
206
206
  continue
207
207
 
208
- logger.info(f"Worker {worker_id} processing batch of {len(batch)} tasks")
208
+ logger.debug(f"Worker {worker_id} processing batch of {len(batch)} tasks")
209
209
 
210
210
  # Generate embeddings for batch
211
211
  await self._process_batch(batch)
@@ -213,14 +213,14 @@ class EmbeddingWorker:
213
213
  logger.debug(f"Worker {worker_id} completed batch")
214
214
 
215
215
  except asyncio.CancelledError:
216
- logger.info(f"Worker {worker_id} cancelled")
216
+ logger.debug(f"Worker {worker_id} cancelled")
217
217
  break
218
218
  except Exception as e:
219
219
  logger.error(f"Worker {worker_id} error: {e}", exc_info=True)
220
220
  # Continue processing (don't crash worker on error)
221
221
  await asyncio.sleep(1)
222
222
 
223
- logger.info(f"Worker {worker_id} stopped")
223
+ logger.debug(f"Worker {worker_id} stopped")
224
224
 
225
225
  async def _collect_batch(self) -> list[EmbeddingTask]:
226
226
  """
@@ -284,10 +284,10 @@ class EmbeddingWorker:
284
284
  )
285
285
 
286
286
  # Upsert to database
287
- logger.info(f"Upserting {len(embeddings)} embeddings to database...")
287
+ logger.debug(f"Upserting {len(embeddings)} embeddings to database...")
288
288
  await self._upsert_embeddings(batch, embeddings)
289
289
 
290
- logger.info(
290
+ logger.debug(
291
291
  f"Successfully generated and stored {len(embeddings)} embeddings "
292
292
  f"(provider={provider}, model={model})"
293
293
  )
@@ -315,7 +315,7 @@ class EmbeddingWorker:
315
315
  """
316
316
  if provider == "openai" and self.openai_api_key:
317
317
  try:
318
- logger.info(
318
+ logger.debug(
319
319
  f"Generating OpenAI embeddings for {len(texts)} texts using {model}"
320
320
  )
321
321
 
@@ -336,7 +336,7 @@ class EmbeddingWorker:
336
336
  data = response.json()
337
337
  embeddings = [item["embedding"] for item in data["data"]]
338
338
 
339
- logger.info(
339
+ logger.debug(
340
340
  f"Successfully generated {len(embeddings)} embeddings from OpenAI"
341
341
  )
342
342
  return embeddings
@@ -409,7 +409,7 @@ class EmbeddingWorker:
409
409
  ),
410
410
  )
411
411
 
412
- logger.info(
412
+ logger.debug(
413
413
  f"Upserted embedding: {task.table_name}.{task.entity_id}.{task.field_name}"
414
414
  )
415
415
 
@@ -164,7 +164,7 @@ cp curated-queries.csv experiments/rem-001/validation/production/
164
164
  **Option C: Curated Engrams**
165
165
  ```bash
166
166
  # Generate engrams from REM data
167
- rem dreaming full --user-id test-user --tenant-id acme --generate-test-cases
167
+ rem dreaming full --user-id test-user --generate-test-cases
168
168
 
169
169
  # Review and select high-quality engrams
170
170
  rem engram list --quality high --limit 100 --output engrams.csv
@@ -357,7 +357,7 @@ Level 4 (Mature): Multiple cycles, full query capabilities
357
357
  # Generate engrams from REM data
358
358
  rem dreaming full \
359
359
  --user-id test-user \
360
- --tenant-id acme \
360
+ \
361
361
  --generate-test-cases \
362
362
  --quality-level 3
363
363
 
@@ -1027,7 +1027,7 @@ rem experiments experiment run rem-lookup-ask_rem-golden \
1027
1027
 
1028
1028
  ```bash
1029
1029
  # 1. Generate high-quality engrams
1030
- rem dreaming full --tenant-id acme --generate-test-cases --quality-level 4
1030
+ rem dreaming full --generate-test-cases --quality-level 4
1031
1031
 
1032
1032
  # 2. Export engrams
1033
1033
  rem engram export rem-engrams-mature-mixed --output engrams.csv --format phoenix
@@ -53,7 +53,7 @@ from datetime import datetime
53
53
  from pathlib import Path
54
54
  from typing import Any, Callable, TYPE_CHECKING, cast
55
55
 
56
- import pandas as pd
56
+ import polars as pl
57
57
  from loguru import logger
58
58
 
59
59
  from .config import PhoenixConfig
@@ -64,6 +64,95 @@ if TYPE_CHECKING:
64
64
  from phoenix.client.resources.experiments.types import RanExperiment
65
65
 
66
66
 
67
+ def dataframe_to_phoenix_dataset(
68
+ client: "PhoenixClient",
69
+ df: pl.DataFrame,
70
+ dataset_name: str,
71
+ input_keys: list[str] | None = None,
72
+ output_keys: list[str] | None = None,
73
+ metadata_keys: list[str] | None = None,
74
+ description: str | None = None,
75
+ ) -> "Dataset":
76
+ """Convert a Polars DataFrame to a Phoenix Dataset.
77
+
78
+ This function transforms a Polars DataFrame into a Phoenix Dataset by:
79
+ 1. Extracting input columns (what agents receive)
80
+ 2. Extracting output columns (ground truth/expected output)
81
+ 3. Extracting metadata columns (optional labels, difficulty, etc.)
82
+
83
+ If column keys are not specified, uses smart defaults:
84
+ - input_keys: columns containing 'input', 'query', 'question', or 'prompt'
85
+ - output_keys: columns containing 'output', 'expected', 'answer', or 'response'
86
+ - metadata_keys: remaining columns
87
+
88
+ Args:
89
+ client: PhoenixClient instance
90
+ df: Polars DataFrame with experiment data
91
+ dataset_name: Name for the created Phoenix dataset
92
+ input_keys: Optional list of column names for inputs
93
+ output_keys: Optional list of column names for outputs (ground truth)
94
+ metadata_keys: Optional list of column names for metadata
95
+ description: Optional dataset description
96
+
97
+ Returns:
98
+ Phoenix Dataset instance
99
+
100
+ Example:
101
+ >>> df = pl.read_csv("golden_set.csv")
102
+ >>> dataset = dataframe_to_phoenix_dataset(
103
+ ... client=phoenix_client,
104
+ ... df=df,
105
+ ... dataset_name="my-golden-set",
106
+ ... input_keys=["query"],
107
+ ... output_keys=["expected_output"],
108
+ ... metadata_keys=["difficulty"]
109
+ ... )
110
+ """
111
+ columns = df.columns
112
+
113
+ # Smart defaults for column detection
114
+ if input_keys is None:
115
+ input_keys = [c for c in columns if any(
116
+ k in c.lower() for k in ["input", "query", "question", "prompt"]
117
+ )]
118
+ if not input_keys:
119
+ # Fallback: first column
120
+ input_keys = [columns[0]] if columns else []
121
+
122
+ if output_keys is None:
123
+ output_keys = [c for c in columns if any(
124
+ k in c.lower() for k in ["output", "expected", "answer", "response", "reference"]
125
+ )]
126
+ if not output_keys:
127
+ # Fallback: second column
128
+ output_keys = [columns[1]] if len(columns) > 1 else []
129
+
130
+ if metadata_keys is None:
131
+ used_keys = set(input_keys) | set(output_keys)
132
+ metadata_keys = [c for c in columns if c not in used_keys]
133
+
134
+ logger.debug(
135
+ f"DataFrame to Phoenix Dataset: inputs={input_keys}, "
136
+ f"outputs={output_keys}, metadata={metadata_keys}"
137
+ )
138
+
139
+ # Convert to list of dicts
140
+ records = df.to_dicts()
141
+
142
+ inputs = [{k: row.get(k) for k in input_keys} for row in records]
143
+ outputs = [{k: row.get(k) for k in output_keys} for row in records]
144
+ metadata = [{k: row.get(k) for k in metadata_keys} for row in records] if metadata_keys else None
145
+
146
+ # Create Phoenix dataset
147
+ return client.create_dataset_from_data(
148
+ name=dataset_name,
149
+ inputs=inputs,
150
+ outputs=outputs,
151
+ metadata=metadata,
152
+ description=description,
153
+ )
154
+
155
+
67
156
  class PhoenixClient:
68
157
  """High-level Phoenix client for REM evaluation workflows.
69
158
 
@@ -260,19 +349,22 @@ class PhoenixClient:
260
349
  "SEARCH semantic AI engineer",sarah-chen,person,medium,SEARCH
261
350
  """
262
351
  try:
263
- # Load CSV
264
- df = pd.read_csv(csv_file_path)
352
+ # Load CSV with Polars
353
+ df = pl.read_csv(csv_file_path)
354
+
355
+ # Convert to list of dicts
356
+ records = df.to_dicts()
265
357
 
266
358
  # Extract inputs
267
- inputs = cast(list[dict[str, Any]], df[input_keys].to_dict("records"))
359
+ inputs = [{k: row.get(k) for k in input_keys} for row in records]
268
360
 
269
361
  # Extract outputs
270
- outputs = cast(list[dict[str, Any]], df[output_keys].to_dict("records"))
362
+ outputs = [{k: row.get(k) for k in output_keys} for row in records]
271
363
 
272
364
  # Extract metadata if specified
273
365
  metadata = None
274
366
  if metadata_keys:
275
- metadata = cast(list[dict[str, Any]], df[metadata_keys].to_dict("records"))
367
+ metadata = [{k: row.get(k) for k in metadata_keys} for row in records]
276
368
 
277
369
  return self.create_dataset_from_data(
278
370
  name=name,
@@ -331,13 +423,16 @@ class PhoenixClient:
331
423
 
332
424
  def run_experiment(
333
425
  self,
334
- dataset: "Dataset" | str,
426
+ dataset: "Dataset" | str | pl.DataFrame,
335
427
  task: Callable[[Any], Any] | None = None,
336
428
  evaluators: list[Callable[[Any], Any]] | None = None,
337
429
  experiment_name: str | None = None,
338
430
  experiment_description: str | None = None,
339
431
  experiment_metadata: dict[str, Any] | None = None,
340
432
  experiment_config: Any | None = None,
433
+ input_keys: list[str] | None = None,
434
+ output_keys: list[str] | None = None,
435
+ metadata_keys: list[str] | None = None,
341
436
  ) -> "RanExperiment":
342
437
  """Run an evaluation experiment.
343
438
 
@@ -346,14 +441,22 @@ class PhoenixClient:
346
441
  2. Agent run: Provide task function to execute agents on dataset
347
442
  3. Evaluator run: Provide evaluators to score existing outputs
348
443
 
444
+ Dataset can be:
445
+ - Phoenix Dataset instance
446
+ - Dataset name (string) - will be loaded from Phoenix
447
+ - Polars DataFrame - will be converted to Phoenix Dataset
448
+
349
449
  Args:
350
- dataset: Dataset instance or name (required unless experiment_config provided)
450
+ dataset: Dataset instance, name, or Polars DataFrame
351
451
  task: Optional task function to run on each example (agent execution)
352
452
  evaluators: Optional list of evaluator functions
353
453
  experiment_name: Optional experiment name
354
454
  experiment_description: Optional description
355
455
  experiment_metadata: Optional metadata dict
356
456
  experiment_config: Optional ExperimentConfig instance (overrides other params)
457
+ input_keys: Column names for inputs (required if dataset is DataFrame)
458
+ output_keys: Column names for outputs (required if dataset is DataFrame)
459
+ metadata_keys: Optional column names for metadata
357
460
 
358
461
  Returns:
359
462
  RanExperiment with results
@@ -369,6 +472,16 @@ class PhoenixClient:
369
472
  ... experiment_name="rem-v1-baseline"
370
473
  ... )
371
474
 
475
+ Example - With Polars DataFrame:
476
+ >>> df = pl.read_csv("golden_set.csv")
477
+ >>> experiment = client.run_experiment(
478
+ ... dataset=df,
479
+ ... task=run_agent,
480
+ ... experiment_name="rem-v1-baseline",
481
+ ... input_keys=["query"],
482
+ ... output_keys=["expected_output"]
483
+ ... )
484
+
372
485
  Example - Evaluator Run (Phase 2b):
373
486
  >>> experiment = client.run_experiment(
374
487
  ... dataset=agent_results,
@@ -407,6 +520,21 @@ class PhoenixClient:
407
520
  else:
408
521
  dataset = dataset_ref.path
409
522
 
523
+ # Convert Polars DataFrame to Phoenix Dataset
524
+ if isinstance(dataset, pl.DataFrame):
525
+ dataset_name_for_phoenix = f"{experiment_name or 'experiment'}-dataset-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
526
+ logger.info(f"Converting Polars DataFrame to Phoenix Dataset: {dataset_name_for_phoenix}")
527
+ dataset = dataframe_to_phoenix_dataset(
528
+ client=self,
529
+ df=dataset,
530
+ dataset_name=dataset_name_for_phoenix,
531
+ input_keys=input_keys,
532
+ output_keys=output_keys,
533
+ metadata_keys=metadata_keys,
534
+ description=f"Auto-created from DataFrame for experiment: {experiment_name}",
535
+ )
536
+ logger.info(f"✓ Created Phoenix Dataset: {dataset_name_for_phoenix}")
537
+
410
538
  # Load dataset if name provided
411
539
  if isinstance(dataset, str):
412
540
  dataset = self.get_dataset(dataset)
@@ -454,7 +582,7 @@ class PhoenixClient:
454
582
  root_spans_only: bool = True,
455
583
  trace_id: str | None = None,
456
584
  span_id: str | None = None,
457
- ) -> pd.DataFrame:
585
+ ) -> pl.DataFrame:
458
586
  """Query traces from Phoenix.
459
587
 
460
588
  Args:
@@ -467,7 +595,7 @@ class PhoenixClient:
467
595
  span_id: Filter by specific span ID
468
596
 
469
597
  Returns:
470
- DataFrame with trace data
598
+ Polars DataFrame with trace data
471
599
 
472
600
  Example:
473
601
  >>> traces = client.get_traces(
@@ -492,8 +620,11 @@ class PhoenixClient:
492
620
  if span_id:
493
621
  query_params["span_id"] = span_id
494
622
 
495
- # Query traces
496
- traces_df = self._client.query_spans(limit=limit, **query_params) # type: ignore[attr-defined]
623
+ # Query traces (Phoenix returns pandas DataFrame)
624
+ pandas_df = self._client.query_spans(limit=limit, **query_params) # type: ignore[attr-defined]
625
+
626
+ # Convert pandas to Polars
627
+ traces_df = pl.from_pandas(pandas_df)
497
628
 
498
629
  logger.debug(f"Retrieved {len(traces_df)} traces")
499
630
  return traces_df
@@ -535,7 +666,7 @@ class PhoenixClient:
535
666
  ... )
536
667
  """
537
668
  try:
538
- # Query traces
669
+ # Query traces (returns Polars DataFrame)
539
670
  traces_df = self.get_traces(
540
671
  project_name=project_name,
541
672
  start_time=start_time,
@@ -547,12 +678,15 @@ class PhoenixClient:
547
678
  if len(traces_df) == 0:
548
679
  raise ValueError("No traces found matching criteria")
549
680
 
681
+ # Convert to list of dicts for iteration
682
+ records = traces_df.to_dicts()
683
+
550
684
  # Extract inputs and outputs from traces
551
685
  inputs = []
552
686
  outputs = []
553
687
  metadata = []
554
688
 
555
- for _, row in traces_df.iterrows():
689
+ for row in records:
556
690
  # Extract input
557
691
  span_input = row.get("attributes.input")
558
692
  if span_input:
@@ -658,29 +792,128 @@ class PhoenixClient:
658
792
  label: str | None = None,
659
793
  score: float | None = None,
660
794
  explanation: str | None = None,
661
- ) -> None:
795
+ metadata: dict[str, Any] | None = None,
796
+ ) -> str | None:
662
797
  """Add feedback annotation to a span.
663
798
 
664
799
  Args:
665
800
  span_id: Span ID to annotate
666
- annotation_name: Name of the annotation (e.g., "correctness")
801
+ annotation_name: Name of the annotation (e.g., "correctness", "user_feedback")
667
802
  annotator_kind: Type of annotator ("HUMAN", "LLM", "CODE")
668
- label: Optional label (e.g., "correct", "incorrect")
803
+ label: Optional label (e.g., "correct", "incorrect", "helpful")
669
804
  score: Optional numeric score (0.0-1.0)
670
805
  explanation: Optional explanation text
806
+ metadata: Optional additional metadata dict
807
+
808
+ Returns:
809
+ Annotation ID if successful, None otherwise
671
810
  """
672
811
  try:
673
- self._client.add_span_annotation( # type: ignore[attr-defined]
812
+ result = self._client.add_span_annotation( # type: ignore[attr-defined]
674
813
  span_id=span_id,
675
814
  name=annotation_name,
676
815
  annotator_kind=annotator_kind,
677
816
  label=label,
678
817
  score=score,
679
818
  explanation=explanation,
819
+ metadata=metadata,
680
820
  )
681
821
 
682
- logger.info(f"Added {annotator_kind} feedback to span {span_id}")
822
+ annotation_id = getattr(result, "id", None) if result else None
823
+ logger.info(f"Added {annotator_kind} feedback to span {span_id} -> {annotation_id}")
824
+
825
+ return annotation_id
683
826
 
684
827
  except Exception as e:
685
828
  logger.error(f"Failed to add span feedback: {e}")
686
829
  raise
830
+
831
+ def sync_user_feedback(
832
+ self,
833
+ span_id: str,
834
+ rating: int | None = None,
835
+ categories: list[str] | None = None,
836
+ comment: str | None = None,
837
+ feedback_id: str | None = None,
838
+ ) -> str | None:
839
+ """Sync user feedback to Phoenix as a span annotation.
840
+
841
+ Convenience method for syncing Feedback entities to Phoenix.
842
+ Converts REM feedback format to Phoenix annotation format.
843
+
844
+ Args:
845
+ span_id: OTEL span ID to annotate
846
+ rating: User rating (-1, 1-5 scale)
847
+ categories: List of feedback categories
848
+ comment: Free-text comment
849
+ feedback_id: Optional REM feedback ID for reference
850
+
851
+ Returns:
852
+ Phoenix annotation ID if successful
853
+
854
+ Example:
855
+ >>> client.sync_user_feedback(
856
+ ... span_id="abc123",
857
+ ... rating=4,
858
+ ... categories=["helpful", "accurate"],
859
+ ... comment="Great response!"
860
+ ... )
861
+ """
862
+ # Convert rating to 0-1 score
863
+ score = None
864
+ if rating is not None:
865
+ if rating == -1:
866
+ score = 0.0
867
+ elif 1 <= rating <= 5:
868
+ score = rating / 5.0
869
+
870
+ # Use primary category as label
871
+ label = categories[0] if categories else None
872
+
873
+ # Build explanation from comment and additional categories
874
+ explanation = comment
875
+ if categories and len(categories) > 1:
876
+ cats_str = ", ".join(categories[1:])
877
+ if explanation:
878
+ explanation = f"{explanation} [Categories: {cats_str}]"
879
+ else:
880
+ explanation = f"Categories: {cats_str}"
881
+
882
+ # Build metadata
883
+ metadata = {
884
+ "rating": rating,
885
+ "categories": categories or [],
886
+ }
887
+ if feedback_id:
888
+ metadata["rem_feedback_id"] = feedback_id
889
+
890
+ return self.add_span_feedback(
891
+ span_id=span_id,
892
+ annotation_name="user_feedback",
893
+ annotator_kind="HUMAN",
894
+ label=label,
895
+ score=score,
896
+ explanation=explanation,
897
+ metadata=metadata,
898
+ )
899
+
900
+ def get_span_annotations(
901
+ self,
902
+ span_id: str,
903
+ annotation_name: str | None = None,
904
+ ) -> list[dict[str, Any]]:
905
+ """Get annotations for a span.
906
+
907
+ Args:
908
+ span_id: Span ID to query
909
+ annotation_name: Optional filter by annotation name
910
+
911
+ Returns:
912
+ List of annotation dicts
913
+
914
+ TODO: Implement once Phoenix client exposes this method
915
+ """
916
+ # TODO: Phoenix client doesn't expose annotation query yet
917
+ # This is a stub for future implementation
918
+ logger.warning("get_span_annotations not yet implemented in Phoenix client")
919
+ return []