remdb 0.3.0__py3-none-any.whl → 0.3.114__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +500 -0
- rem/agentic/context.py +28 -22
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/otel/setup.py +92 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +142 -22
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +151 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +17 -2
- rem/api/mcp_router/tools.py +143 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +277 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +152 -16
- rem/api/routers/chat/models.py +7 -3
- rem/api/routers/chat/sse_events.py +526 -0
- rem/api/routers/chat/streaming.py +608 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +148 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +357 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +201 -70
- rem/cli/commands/ask.py +13 -10
- rem/cli/commands/cluster.py +1359 -0
- rem/cli/commands/configure.py +4 -3
- rem/cli/commands/db.py +350 -137
- rem/cli/commands/experiments.py +76 -72
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +95 -49
- rem/cli/main.py +29 -6
- rem/config.py +2 -2
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +92 -20
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +252 -19
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +426 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +86 -5
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +17 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +169 -17
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +231 -54
- rem/sql/migrations/002_install_models.sql +457 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +191 -35
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/db_maintainer.py +74 -0
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/METADATA +303 -164
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/RECORD +96 -70
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/entry_points.txt +0 -0
rem/services/embeddings/api.py
CHANGED
|
@@ -5,13 +5,20 @@ Provides synchronous and async wrappers for embedding generation using
|
|
|
5
5
|
raw HTTP requests (no OpenAI SDK dependency).
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import os
|
|
9
8
|
from typing import Optional, cast
|
|
10
9
|
|
|
11
10
|
import httpx
|
|
12
11
|
import requests
|
|
13
12
|
from loguru import logger
|
|
14
13
|
|
|
14
|
+
from rem.utils.constants import DEFAULT_EMBEDDING_DIMS, HTTP_TIMEOUT_DEFAULT
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _get_openai_api_key() -> Optional[str]:
|
|
18
|
+
"""Get OpenAI API key from settings."""
|
|
19
|
+
from rem.settings import settings
|
|
20
|
+
return settings.llm.openai_api_key
|
|
21
|
+
|
|
15
22
|
|
|
16
23
|
def generate_embedding(
|
|
17
24
|
text: str,
|
|
@@ -26,19 +33,19 @@ def generate_embedding(
|
|
|
26
33
|
text: Text to embed
|
|
27
34
|
model: Model name (default: text-embedding-3-small)
|
|
28
35
|
provider: Provider name (default: openai)
|
|
29
|
-
api_key: API key (defaults to
|
|
36
|
+
api_key: API key (defaults to settings.llm.openai_api_key)
|
|
30
37
|
|
|
31
38
|
Returns:
|
|
32
39
|
Embedding vector (1536 dimensions for text-embedding-3-small)
|
|
33
40
|
"""
|
|
34
41
|
if provider == "openai":
|
|
35
|
-
api_key = api_key or
|
|
42
|
+
api_key = api_key or _get_openai_api_key()
|
|
36
43
|
if not api_key:
|
|
37
44
|
logger.warning("No OpenAI API key - returning zero vector")
|
|
38
|
-
return [0.0] *
|
|
45
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
39
46
|
|
|
40
47
|
try:
|
|
41
|
-
logger.
|
|
48
|
+
logger.debug(f"Generating OpenAI embedding for text using {model}")
|
|
42
49
|
|
|
43
50
|
response = requests.post(
|
|
44
51
|
"https://api.openai.com/v1/embeddings",
|
|
@@ -47,22 +54,22 @@ def generate_embedding(
|
|
|
47
54
|
"Content-Type": "application/json",
|
|
48
55
|
},
|
|
49
56
|
json={"input": [text], "model": model},
|
|
50
|
-
timeout=
|
|
57
|
+
timeout=HTTP_TIMEOUT_DEFAULT,
|
|
51
58
|
)
|
|
52
59
|
response.raise_for_status()
|
|
53
60
|
|
|
54
61
|
data = response.json()
|
|
55
62
|
embedding = data["data"][0]["embedding"]
|
|
56
|
-
logger.
|
|
63
|
+
logger.debug(f"Successfully generated embedding (dimension: {len(embedding)})")
|
|
57
64
|
return cast(list[float], embedding)
|
|
58
65
|
|
|
59
66
|
except Exception as e:
|
|
60
67
|
logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
|
|
61
|
-
return [0.0] *
|
|
68
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
62
69
|
|
|
63
70
|
else:
|
|
64
71
|
logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
|
|
65
|
-
return [0.0] *
|
|
72
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
66
73
|
|
|
67
74
|
|
|
68
75
|
async def generate_embedding_async(
|
|
@@ -78,19 +85,19 @@ async def generate_embedding_async(
|
|
|
78
85
|
text: Text to embed
|
|
79
86
|
model: Model name (default: text-embedding-3-small)
|
|
80
87
|
provider: Provider name (default: openai)
|
|
81
|
-
api_key: API key (defaults to
|
|
88
|
+
api_key: API key (defaults to settings.llm.openai_api_key)
|
|
82
89
|
|
|
83
90
|
Returns:
|
|
84
91
|
Embedding vector (1536 dimensions for text-embedding-3-small)
|
|
85
92
|
"""
|
|
86
93
|
if provider == "openai":
|
|
87
|
-
api_key = api_key or
|
|
94
|
+
api_key = api_key or _get_openai_api_key()
|
|
88
95
|
if not api_key:
|
|
89
96
|
logger.warning("No OpenAI API key - returning zero vector")
|
|
90
|
-
return [0.0] *
|
|
97
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
91
98
|
|
|
92
99
|
try:
|
|
93
|
-
logger.
|
|
100
|
+
logger.debug(f"Generating OpenAI embedding for text using {model}")
|
|
94
101
|
|
|
95
102
|
async with httpx.AsyncClient() as client:
|
|
96
103
|
response = await client.post(
|
|
@@ -100,21 +107,21 @@ async def generate_embedding_async(
|
|
|
100
107
|
"Content-Type": "application/json",
|
|
101
108
|
},
|
|
102
109
|
json={"input": [text], "model": model},
|
|
103
|
-
timeout=
|
|
110
|
+
timeout=HTTP_TIMEOUT_DEFAULT,
|
|
104
111
|
)
|
|
105
112
|
response.raise_for_status()
|
|
106
113
|
|
|
107
114
|
data = response.json()
|
|
108
115
|
embedding = data["data"][0]["embedding"]
|
|
109
|
-
logger.
|
|
116
|
+
logger.debug(
|
|
110
117
|
f"Successfully generated embedding (dimension: {len(embedding)})"
|
|
111
118
|
)
|
|
112
119
|
return cast(list[float], embedding)
|
|
113
120
|
|
|
114
121
|
except Exception as e:
|
|
115
122
|
logger.error(f"Failed to generate embedding from OpenAI: {e}", exc_info=True)
|
|
116
|
-
return [0.0] *
|
|
123
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
117
124
|
|
|
118
125
|
else:
|
|
119
126
|
logger.warning(f"Unsupported provider '{provider}' - returning zero vector")
|
|
120
|
-
return [0.0] *
|
|
127
|
+
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
@@ -69,7 +69,7 @@ def get_global_embedding_worker(postgres_service: Any = None) -> "EmbeddingWorke
|
|
|
69
69
|
if postgres_service is None:
|
|
70
70
|
raise RuntimeError("Must provide postgres_service on first call to get_global_embedding_worker")
|
|
71
71
|
_global_worker = EmbeddingWorker(postgres_service=postgres_service)
|
|
72
|
-
logger.
|
|
72
|
+
logger.debug("Created global EmbeddingWorker singleton")
|
|
73
73
|
|
|
74
74
|
return _global_worker
|
|
75
75
|
|
|
@@ -117,7 +117,7 @@ class EmbeddingWorker:
|
|
|
117
117
|
"No OpenAI API key provided - embeddings will use zero vectors"
|
|
118
118
|
)
|
|
119
119
|
|
|
120
|
-
logger.
|
|
120
|
+
logger.debug(
|
|
121
121
|
f"Initialized EmbeddingWorker: {num_workers} workers, "
|
|
122
122
|
f"batch_size={batch_size}, timeout={batch_timeout}s"
|
|
123
123
|
)
|
|
@@ -125,17 +125,17 @@ class EmbeddingWorker:
|
|
|
125
125
|
async def start(self) -> None:
|
|
126
126
|
"""Start worker pool."""
|
|
127
127
|
if self.running:
|
|
128
|
-
logger.
|
|
128
|
+
logger.debug("EmbeddingWorker already running")
|
|
129
129
|
return
|
|
130
130
|
|
|
131
131
|
self.running = True
|
|
132
|
-
logger.
|
|
132
|
+
logger.debug(f"Starting {self.num_workers} embedding workers")
|
|
133
133
|
|
|
134
134
|
for i in range(self.num_workers):
|
|
135
135
|
worker = asyncio.create_task(self._worker_loop(i))
|
|
136
136
|
self.workers.append(worker)
|
|
137
137
|
|
|
138
|
-
logger.
|
|
138
|
+
logger.debug("EmbeddingWorker started")
|
|
139
139
|
|
|
140
140
|
async def stop(self) -> None:
|
|
141
141
|
"""Stop worker pool gracefully - processes remaining queue before stopping."""
|
|
@@ -143,7 +143,7 @@ class EmbeddingWorker:
|
|
|
143
143
|
return
|
|
144
144
|
|
|
145
145
|
queue_size = self.task_queue.qsize()
|
|
146
|
-
logger.
|
|
146
|
+
logger.debug(f"Stopping EmbeddingWorker (processing {queue_size} queued tasks first)")
|
|
147
147
|
|
|
148
148
|
# Wait for queue to drain (with timeout)
|
|
149
149
|
max_wait = 30 # 30 seconds max
|
|
@@ -171,7 +171,7 @@ class EmbeddingWorker:
|
|
|
171
171
|
await asyncio.gather(*self.workers, return_exceptions=True)
|
|
172
172
|
|
|
173
173
|
self.workers.clear()
|
|
174
|
-
logger.
|
|
174
|
+
logger.debug("EmbeddingWorker stopped")
|
|
175
175
|
|
|
176
176
|
async def queue_task(self, task: EmbeddingTask) -> None:
|
|
177
177
|
"""
|
|
@@ -195,7 +195,7 @@ class EmbeddingWorker:
|
|
|
195
195
|
Args:
|
|
196
196
|
worker_id: Unique worker identifier
|
|
197
197
|
"""
|
|
198
|
-
logger.
|
|
198
|
+
logger.debug(f"Worker {worker_id} started")
|
|
199
199
|
|
|
200
200
|
while self.running:
|
|
201
201
|
try:
|
|
@@ -205,7 +205,7 @@ class EmbeddingWorker:
|
|
|
205
205
|
if not batch:
|
|
206
206
|
continue
|
|
207
207
|
|
|
208
|
-
logger.
|
|
208
|
+
logger.debug(f"Worker {worker_id} processing batch of {len(batch)} tasks")
|
|
209
209
|
|
|
210
210
|
# Generate embeddings for batch
|
|
211
211
|
await self._process_batch(batch)
|
|
@@ -213,14 +213,14 @@ class EmbeddingWorker:
|
|
|
213
213
|
logger.debug(f"Worker {worker_id} completed batch")
|
|
214
214
|
|
|
215
215
|
except asyncio.CancelledError:
|
|
216
|
-
logger.
|
|
216
|
+
logger.debug(f"Worker {worker_id} cancelled")
|
|
217
217
|
break
|
|
218
218
|
except Exception as e:
|
|
219
219
|
logger.error(f"Worker {worker_id} error: {e}", exc_info=True)
|
|
220
220
|
# Continue processing (don't crash worker on error)
|
|
221
221
|
await asyncio.sleep(1)
|
|
222
222
|
|
|
223
|
-
logger.
|
|
223
|
+
logger.debug(f"Worker {worker_id} stopped")
|
|
224
224
|
|
|
225
225
|
async def _collect_batch(self) -> list[EmbeddingTask]:
|
|
226
226
|
"""
|
|
@@ -284,10 +284,10 @@ class EmbeddingWorker:
|
|
|
284
284
|
)
|
|
285
285
|
|
|
286
286
|
# Upsert to database
|
|
287
|
-
logger.
|
|
287
|
+
logger.debug(f"Upserting {len(embeddings)} embeddings to database...")
|
|
288
288
|
await self._upsert_embeddings(batch, embeddings)
|
|
289
289
|
|
|
290
|
-
logger.
|
|
290
|
+
logger.debug(
|
|
291
291
|
f"Successfully generated and stored {len(embeddings)} embeddings "
|
|
292
292
|
f"(provider={provider}, model={model})"
|
|
293
293
|
)
|
|
@@ -315,7 +315,7 @@ class EmbeddingWorker:
|
|
|
315
315
|
"""
|
|
316
316
|
if provider == "openai" and self.openai_api_key:
|
|
317
317
|
try:
|
|
318
|
-
logger.
|
|
318
|
+
logger.debug(
|
|
319
319
|
f"Generating OpenAI embeddings for {len(texts)} texts using {model}"
|
|
320
320
|
)
|
|
321
321
|
|
|
@@ -336,7 +336,7 @@ class EmbeddingWorker:
|
|
|
336
336
|
data = response.json()
|
|
337
337
|
embeddings = [item["embedding"] for item in data["data"]]
|
|
338
338
|
|
|
339
|
-
logger.
|
|
339
|
+
logger.debug(
|
|
340
340
|
f"Successfully generated {len(embeddings)} embeddings from OpenAI"
|
|
341
341
|
)
|
|
342
342
|
return embeddings
|
|
@@ -409,7 +409,7 @@ class EmbeddingWorker:
|
|
|
409
409
|
),
|
|
410
410
|
)
|
|
411
411
|
|
|
412
|
-
logger.
|
|
412
|
+
logger.debug(
|
|
413
413
|
f"Upserted embedding: {task.table_name}.{task.entity_id}.{task.field_name}"
|
|
414
414
|
)
|
|
415
415
|
|
|
@@ -164,7 +164,7 @@ cp curated-queries.csv experiments/rem-001/validation/production/
|
|
|
164
164
|
**Option C: Curated Engrams**
|
|
165
165
|
```bash
|
|
166
166
|
# Generate engrams from REM data
|
|
167
|
-
rem dreaming full --user-id test-user
|
|
167
|
+
rem dreaming full --user-id test-user --generate-test-cases
|
|
168
168
|
|
|
169
169
|
# Review and select high-quality engrams
|
|
170
170
|
rem engram list --quality high --limit 100 --output engrams.csv
|
|
@@ -357,7 +357,7 @@ Level 4 (Mature): Multiple cycles, full query capabilities
|
|
|
357
357
|
# Generate engrams from REM data
|
|
358
358
|
rem dreaming full \
|
|
359
359
|
--user-id test-user \
|
|
360
|
-
|
|
360
|
+
\
|
|
361
361
|
--generate-test-cases \
|
|
362
362
|
--quality-level 3
|
|
363
363
|
|
|
@@ -1027,7 +1027,7 @@ rem experiments experiment run rem-lookup-ask_rem-golden \
|
|
|
1027
1027
|
|
|
1028
1028
|
```bash
|
|
1029
1029
|
# 1. Generate high-quality engrams
|
|
1030
|
-
rem dreaming full
|
|
1030
|
+
rem dreaming full --generate-test-cases --quality-level 4
|
|
1031
1031
|
|
|
1032
1032
|
# 2. Export engrams
|
|
1033
1033
|
rem engram export rem-engrams-mature-mixed --output engrams.csv --format phoenix
|
rem/services/phoenix/client.py
CHANGED
|
@@ -53,7 +53,7 @@ from datetime import datetime
|
|
|
53
53
|
from pathlib import Path
|
|
54
54
|
from typing import Any, Callable, TYPE_CHECKING, cast
|
|
55
55
|
|
|
56
|
-
import
|
|
56
|
+
import polars as pl
|
|
57
57
|
from loguru import logger
|
|
58
58
|
|
|
59
59
|
from .config import PhoenixConfig
|
|
@@ -64,6 +64,95 @@ if TYPE_CHECKING:
|
|
|
64
64
|
from phoenix.client.resources.experiments.types import RanExperiment
|
|
65
65
|
|
|
66
66
|
|
|
67
|
+
def dataframe_to_phoenix_dataset(
|
|
68
|
+
client: "PhoenixClient",
|
|
69
|
+
df: pl.DataFrame,
|
|
70
|
+
dataset_name: str,
|
|
71
|
+
input_keys: list[str] | None = None,
|
|
72
|
+
output_keys: list[str] | None = None,
|
|
73
|
+
metadata_keys: list[str] | None = None,
|
|
74
|
+
description: str | None = None,
|
|
75
|
+
) -> "Dataset":
|
|
76
|
+
"""Convert a Polars DataFrame to a Phoenix Dataset.
|
|
77
|
+
|
|
78
|
+
This function transforms a Polars DataFrame into a Phoenix Dataset by:
|
|
79
|
+
1. Extracting input columns (what agents receive)
|
|
80
|
+
2. Extracting output columns (ground truth/expected output)
|
|
81
|
+
3. Extracting metadata columns (optional labels, difficulty, etc.)
|
|
82
|
+
|
|
83
|
+
If column keys are not specified, uses smart defaults:
|
|
84
|
+
- input_keys: columns containing 'input', 'query', 'question', or 'prompt'
|
|
85
|
+
- output_keys: columns containing 'output', 'expected', 'answer', or 'response'
|
|
86
|
+
- metadata_keys: remaining columns
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
client: PhoenixClient instance
|
|
90
|
+
df: Polars DataFrame with experiment data
|
|
91
|
+
dataset_name: Name for the created Phoenix dataset
|
|
92
|
+
input_keys: Optional list of column names for inputs
|
|
93
|
+
output_keys: Optional list of column names for outputs (ground truth)
|
|
94
|
+
metadata_keys: Optional list of column names for metadata
|
|
95
|
+
description: Optional dataset description
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Phoenix Dataset instance
|
|
99
|
+
|
|
100
|
+
Example:
|
|
101
|
+
>>> df = pl.read_csv("golden_set.csv")
|
|
102
|
+
>>> dataset = dataframe_to_phoenix_dataset(
|
|
103
|
+
... client=phoenix_client,
|
|
104
|
+
... df=df,
|
|
105
|
+
... dataset_name="my-golden-set",
|
|
106
|
+
... input_keys=["query"],
|
|
107
|
+
... output_keys=["expected_output"],
|
|
108
|
+
... metadata_keys=["difficulty"]
|
|
109
|
+
... )
|
|
110
|
+
"""
|
|
111
|
+
columns = df.columns
|
|
112
|
+
|
|
113
|
+
# Smart defaults for column detection
|
|
114
|
+
if input_keys is None:
|
|
115
|
+
input_keys = [c for c in columns if any(
|
|
116
|
+
k in c.lower() for k in ["input", "query", "question", "prompt"]
|
|
117
|
+
)]
|
|
118
|
+
if not input_keys:
|
|
119
|
+
# Fallback: first column
|
|
120
|
+
input_keys = [columns[0]] if columns else []
|
|
121
|
+
|
|
122
|
+
if output_keys is None:
|
|
123
|
+
output_keys = [c for c in columns if any(
|
|
124
|
+
k in c.lower() for k in ["output", "expected", "answer", "response", "reference"]
|
|
125
|
+
)]
|
|
126
|
+
if not output_keys:
|
|
127
|
+
# Fallback: second column
|
|
128
|
+
output_keys = [columns[1]] if len(columns) > 1 else []
|
|
129
|
+
|
|
130
|
+
if metadata_keys is None:
|
|
131
|
+
used_keys = set(input_keys) | set(output_keys)
|
|
132
|
+
metadata_keys = [c for c in columns if c not in used_keys]
|
|
133
|
+
|
|
134
|
+
logger.debug(
|
|
135
|
+
f"DataFrame to Phoenix Dataset: inputs={input_keys}, "
|
|
136
|
+
f"outputs={output_keys}, metadata={metadata_keys}"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Convert to list of dicts
|
|
140
|
+
records = df.to_dicts()
|
|
141
|
+
|
|
142
|
+
inputs = [{k: row.get(k) for k in input_keys} for row in records]
|
|
143
|
+
outputs = [{k: row.get(k) for k in output_keys} for row in records]
|
|
144
|
+
metadata = [{k: row.get(k) for k in metadata_keys} for row in records] if metadata_keys else None
|
|
145
|
+
|
|
146
|
+
# Create Phoenix dataset
|
|
147
|
+
return client.create_dataset_from_data(
|
|
148
|
+
name=dataset_name,
|
|
149
|
+
inputs=inputs,
|
|
150
|
+
outputs=outputs,
|
|
151
|
+
metadata=metadata,
|
|
152
|
+
description=description,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
67
156
|
class PhoenixClient:
|
|
68
157
|
"""High-level Phoenix client for REM evaluation workflows.
|
|
69
158
|
|
|
@@ -260,19 +349,22 @@ class PhoenixClient:
|
|
|
260
349
|
"SEARCH semantic AI engineer",sarah-chen,person,medium,SEARCH
|
|
261
350
|
"""
|
|
262
351
|
try:
|
|
263
|
-
# Load CSV
|
|
264
|
-
df =
|
|
352
|
+
# Load CSV with Polars
|
|
353
|
+
df = pl.read_csv(csv_file_path)
|
|
354
|
+
|
|
355
|
+
# Convert to list of dicts
|
|
356
|
+
records = df.to_dicts()
|
|
265
357
|
|
|
266
358
|
# Extract inputs
|
|
267
|
-
inputs =
|
|
359
|
+
inputs = [{k: row.get(k) for k in input_keys} for row in records]
|
|
268
360
|
|
|
269
361
|
# Extract outputs
|
|
270
|
-
outputs =
|
|
362
|
+
outputs = [{k: row.get(k) for k in output_keys} for row in records]
|
|
271
363
|
|
|
272
364
|
# Extract metadata if specified
|
|
273
365
|
metadata = None
|
|
274
366
|
if metadata_keys:
|
|
275
|
-
metadata =
|
|
367
|
+
metadata = [{k: row.get(k) for k in metadata_keys} for row in records]
|
|
276
368
|
|
|
277
369
|
return self.create_dataset_from_data(
|
|
278
370
|
name=name,
|
|
@@ -331,13 +423,16 @@ class PhoenixClient:
|
|
|
331
423
|
|
|
332
424
|
def run_experiment(
|
|
333
425
|
self,
|
|
334
|
-
dataset: "Dataset" | str,
|
|
426
|
+
dataset: "Dataset" | str | pl.DataFrame,
|
|
335
427
|
task: Callable[[Any], Any] | None = None,
|
|
336
428
|
evaluators: list[Callable[[Any], Any]] | None = None,
|
|
337
429
|
experiment_name: str | None = None,
|
|
338
430
|
experiment_description: str | None = None,
|
|
339
431
|
experiment_metadata: dict[str, Any] | None = None,
|
|
340
432
|
experiment_config: Any | None = None,
|
|
433
|
+
input_keys: list[str] | None = None,
|
|
434
|
+
output_keys: list[str] | None = None,
|
|
435
|
+
metadata_keys: list[str] | None = None,
|
|
341
436
|
) -> "RanExperiment":
|
|
342
437
|
"""Run an evaluation experiment.
|
|
343
438
|
|
|
@@ -346,14 +441,22 @@ class PhoenixClient:
|
|
|
346
441
|
2. Agent run: Provide task function to execute agents on dataset
|
|
347
442
|
3. Evaluator run: Provide evaluators to score existing outputs
|
|
348
443
|
|
|
444
|
+
Dataset can be:
|
|
445
|
+
- Phoenix Dataset instance
|
|
446
|
+
- Dataset name (string) - will be loaded from Phoenix
|
|
447
|
+
- Polars DataFrame - will be converted to Phoenix Dataset
|
|
448
|
+
|
|
349
449
|
Args:
|
|
350
|
-
dataset: Dataset instance
|
|
450
|
+
dataset: Dataset instance, name, or Polars DataFrame
|
|
351
451
|
task: Optional task function to run on each example (agent execution)
|
|
352
452
|
evaluators: Optional list of evaluator functions
|
|
353
453
|
experiment_name: Optional experiment name
|
|
354
454
|
experiment_description: Optional description
|
|
355
455
|
experiment_metadata: Optional metadata dict
|
|
356
456
|
experiment_config: Optional ExperimentConfig instance (overrides other params)
|
|
457
|
+
input_keys: Column names for inputs (required if dataset is DataFrame)
|
|
458
|
+
output_keys: Column names for outputs (required if dataset is DataFrame)
|
|
459
|
+
metadata_keys: Optional column names for metadata
|
|
357
460
|
|
|
358
461
|
Returns:
|
|
359
462
|
RanExperiment with results
|
|
@@ -369,6 +472,16 @@ class PhoenixClient:
|
|
|
369
472
|
... experiment_name="rem-v1-baseline"
|
|
370
473
|
... )
|
|
371
474
|
|
|
475
|
+
Example - With Polars DataFrame:
|
|
476
|
+
>>> df = pl.read_csv("golden_set.csv")
|
|
477
|
+
>>> experiment = client.run_experiment(
|
|
478
|
+
... dataset=df,
|
|
479
|
+
... task=run_agent,
|
|
480
|
+
... experiment_name="rem-v1-baseline",
|
|
481
|
+
... input_keys=["query"],
|
|
482
|
+
... output_keys=["expected_output"]
|
|
483
|
+
... )
|
|
484
|
+
|
|
372
485
|
Example - Evaluator Run (Phase 2b):
|
|
373
486
|
>>> experiment = client.run_experiment(
|
|
374
487
|
... dataset=agent_results,
|
|
@@ -407,6 +520,21 @@ class PhoenixClient:
|
|
|
407
520
|
else:
|
|
408
521
|
dataset = dataset_ref.path
|
|
409
522
|
|
|
523
|
+
# Convert Polars DataFrame to Phoenix Dataset
|
|
524
|
+
if isinstance(dataset, pl.DataFrame):
|
|
525
|
+
dataset_name_for_phoenix = f"{experiment_name or 'experiment'}-dataset-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
|
526
|
+
logger.info(f"Converting Polars DataFrame to Phoenix Dataset: {dataset_name_for_phoenix}")
|
|
527
|
+
dataset = dataframe_to_phoenix_dataset(
|
|
528
|
+
client=self,
|
|
529
|
+
df=dataset,
|
|
530
|
+
dataset_name=dataset_name_for_phoenix,
|
|
531
|
+
input_keys=input_keys,
|
|
532
|
+
output_keys=output_keys,
|
|
533
|
+
metadata_keys=metadata_keys,
|
|
534
|
+
description=f"Auto-created from DataFrame for experiment: {experiment_name}",
|
|
535
|
+
)
|
|
536
|
+
logger.info(f"✓ Created Phoenix Dataset: {dataset_name_for_phoenix}")
|
|
537
|
+
|
|
410
538
|
# Load dataset if name provided
|
|
411
539
|
if isinstance(dataset, str):
|
|
412
540
|
dataset = self.get_dataset(dataset)
|
|
@@ -454,7 +582,7 @@ class PhoenixClient:
|
|
|
454
582
|
root_spans_only: bool = True,
|
|
455
583
|
trace_id: str | None = None,
|
|
456
584
|
span_id: str | None = None,
|
|
457
|
-
) ->
|
|
585
|
+
) -> pl.DataFrame:
|
|
458
586
|
"""Query traces from Phoenix.
|
|
459
587
|
|
|
460
588
|
Args:
|
|
@@ -467,7 +595,7 @@ class PhoenixClient:
|
|
|
467
595
|
span_id: Filter by specific span ID
|
|
468
596
|
|
|
469
597
|
Returns:
|
|
470
|
-
DataFrame with trace data
|
|
598
|
+
Polars DataFrame with trace data
|
|
471
599
|
|
|
472
600
|
Example:
|
|
473
601
|
>>> traces = client.get_traces(
|
|
@@ -492,8 +620,11 @@ class PhoenixClient:
|
|
|
492
620
|
if span_id:
|
|
493
621
|
query_params["span_id"] = span_id
|
|
494
622
|
|
|
495
|
-
# Query traces
|
|
496
|
-
|
|
623
|
+
# Query traces (Phoenix returns pandas DataFrame)
|
|
624
|
+
pandas_df = self._client.query_spans(limit=limit, **query_params) # type: ignore[attr-defined]
|
|
625
|
+
|
|
626
|
+
# Convert pandas to Polars
|
|
627
|
+
traces_df = pl.from_pandas(pandas_df)
|
|
497
628
|
|
|
498
629
|
logger.debug(f"Retrieved {len(traces_df)} traces")
|
|
499
630
|
return traces_df
|
|
@@ -535,7 +666,7 @@ class PhoenixClient:
|
|
|
535
666
|
... )
|
|
536
667
|
"""
|
|
537
668
|
try:
|
|
538
|
-
# Query traces
|
|
669
|
+
# Query traces (returns Polars DataFrame)
|
|
539
670
|
traces_df = self.get_traces(
|
|
540
671
|
project_name=project_name,
|
|
541
672
|
start_time=start_time,
|
|
@@ -547,12 +678,15 @@ class PhoenixClient:
|
|
|
547
678
|
if len(traces_df) == 0:
|
|
548
679
|
raise ValueError("No traces found matching criteria")
|
|
549
680
|
|
|
681
|
+
# Convert to list of dicts for iteration
|
|
682
|
+
records = traces_df.to_dicts()
|
|
683
|
+
|
|
550
684
|
# Extract inputs and outputs from traces
|
|
551
685
|
inputs = []
|
|
552
686
|
outputs = []
|
|
553
687
|
metadata = []
|
|
554
688
|
|
|
555
|
-
for
|
|
689
|
+
for row in records:
|
|
556
690
|
# Extract input
|
|
557
691
|
span_input = row.get("attributes.input")
|
|
558
692
|
if span_input:
|
|
@@ -658,29 +792,128 @@ class PhoenixClient:
|
|
|
658
792
|
label: str | None = None,
|
|
659
793
|
score: float | None = None,
|
|
660
794
|
explanation: str | None = None,
|
|
661
|
-
|
|
795
|
+
metadata: dict[str, Any] | None = None,
|
|
796
|
+
) -> str | None:
|
|
662
797
|
"""Add feedback annotation to a span.
|
|
663
798
|
|
|
664
799
|
Args:
|
|
665
800
|
span_id: Span ID to annotate
|
|
666
|
-
annotation_name: Name of the annotation (e.g., "correctness")
|
|
801
|
+
annotation_name: Name of the annotation (e.g., "correctness", "user_feedback")
|
|
667
802
|
annotator_kind: Type of annotator ("HUMAN", "LLM", "CODE")
|
|
668
|
-
label: Optional label (e.g., "correct", "incorrect")
|
|
803
|
+
label: Optional label (e.g., "correct", "incorrect", "helpful")
|
|
669
804
|
score: Optional numeric score (0.0-1.0)
|
|
670
805
|
explanation: Optional explanation text
|
|
806
|
+
metadata: Optional additional metadata dict
|
|
807
|
+
|
|
808
|
+
Returns:
|
|
809
|
+
Annotation ID if successful, None otherwise
|
|
671
810
|
"""
|
|
672
811
|
try:
|
|
673
|
-
self._client.add_span_annotation( # type: ignore[attr-defined]
|
|
812
|
+
result = self._client.add_span_annotation( # type: ignore[attr-defined]
|
|
674
813
|
span_id=span_id,
|
|
675
814
|
name=annotation_name,
|
|
676
815
|
annotator_kind=annotator_kind,
|
|
677
816
|
label=label,
|
|
678
817
|
score=score,
|
|
679
818
|
explanation=explanation,
|
|
819
|
+
metadata=metadata,
|
|
680
820
|
)
|
|
681
821
|
|
|
682
|
-
|
|
822
|
+
annotation_id = getattr(result, "id", None) if result else None
|
|
823
|
+
logger.info(f"Added {annotator_kind} feedback to span {span_id} -> {annotation_id}")
|
|
824
|
+
|
|
825
|
+
return annotation_id
|
|
683
826
|
|
|
684
827
|
except Exception as e:
|
|
685
828
|
logger.error(f"Failed to add span feedback: {e}")
|
|
686
829
|
raise
|
|
830
|
+
|
|
831
|
+
def sync_user_feedback(
|
|
832
|
+
self,
|
|
833
|
+
span_id: str,
|
|
834
|
+
rating: int | None = None,
|
|
835
|
+
categories: list[str] | None = None,
|
|
836
|
+
comment: str | None = None,
|
|
837
|
+
feedback_id: str | None = None,
|
|
838
|
+
) -> str | None:
|
|
839
|
+
"""Sync user feedback to Phoenix as a span annotation.
|
|
840
|
+
|
|
841
|
+
Convenience method for syncing Feedback entities to Phoenix.
|
|
842
|
+
Converts REM feedback format to Phoenix annotation format.
|
|
843
|
+
|
|
844
|
+
Args:
|
|
845
|
+
span_id: OTEL span ID to annotate
|
|
846
|
+
rating: User rating (-1, 1-5 scale)
|
|
847
|
+
categories: List of feedback categories
|
|
848
|
+
comment: Free-text comment
|
|
849
|
+
feedback_id: Optional REM feedback ID for reference
|
|
850
|
+
|
|
851
|
+
Returns:
|
|
852
|
+
Phoenix annotation ID if successful
|
|
853
|
+
|
|
854
|
+
Example:
|
|
855
|
+
>>> client.sync_user_feedback(
|
|
856
|
+
... span_id="abc123",
|
|
857
|
+
... rating=4,
|
|
858
|
+
... categories=["helpful", "accurate"],
|
|
859
|
+
... comment="Great response!"
|
|
860
|
+
... )
|
|
861
|
+
"""
|
|
862
|
+
# Convert rating to 0-1 score
|
|
863
|
+
score = None
|
|
864
|
+
if rating is not None:
|
|
865
|
+
if rating == -1:
|
|
866
|
+
score = 0.0
|
|
867
|
+
elif 1 <= rating <= 5:
|
|
868
|
+
score = rating / 5.0
|
|
869
|
+
|
|
870
|
+
# Use primary category as label
|
|
871
|
+
label = categories[0] if categories else None
|
|
872
|
+
|
|
873
|
+
# Build explanation from comment and additional categories
|
|
874
|
+
explanation = comment
|
|
875
|
+
if categories and len(categories) > 1:
|
|
876
|
+
cats_str = ", ".join(categories[1:])
|
|
877
|
+
if explanation:
|
|
878
|
+
explanation = f"{explanation} [Categories: {cats_str}]"
|
|
879
|
+
else:
|
|
880
|
+
explanation = f"Categories: {cats_str}"
|
|
881
|
+
|
|
882
|
+
# Build metadata
|
|
883
|
+
metadata = {
|
|
884
|
+
"rating": rating,
|
|
885
|
+
"categories": categories or [],
|
|
886
|
+
}
|
|
887
|
+
if feedback_id:
|
|
888
|
+
metadata["rem_feedback_id"] = feedback_id
|
|
889
|
+
|
|
890
|
+
return self.add_span_feedback(
|
|
891
|
+
span_id=span_id,
|
|
892
|
+
annotation_name="user_feedback",
|
|
893
|
+
annotator_kind="HUMAN",
|
|
894
|
+
label=label,
|
|
895
|
+
score=score,
|
|
896
|
+
explanation=explanation,
|
|
897
|
+
metadata=metadata,
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
def get_span_annotations(
|
|
901
|
+
self,
|
|
902
|
+
span_id: str,
|
|
903
|
+
annotation_name: str | None = None,
|
|
904
|
+
) -> list[dict[str, Any]]:
|
|
905
|
+
"""Get annotations for a span.
|
|
906
|
+
|
|
907
|
+
Args:
|
|
908
|
+
span_id: Span ID to query
|
|
909
|
+
annotation_name: Optional filter by annotation name
|
|
910
|
+
|
|
911
|
+
Returns:
|
|
912
|
+
List of annotation dicts
|
|
913
|
+
|
|
914
|
+
TODO: Implement once Phoenix client exposes this method
|
|
915
|
+
"""
|
|
916
|
+
# TODO: Phoenix client doesn't expose annotation query yet
|
|
917
|
+
# This is a stub for future implementation
|
|
918
|
+
logger.warning("get_span_annotations not yet implemented in Phoenix client")
|
|
919
|
+
return []
|