remdb 0.3.0__py3-none-any.whl → 0.3.127__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -25
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +168 -24
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +154 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/ask.py +13 -10
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +5 -6
- rem/cli/commands/db.py +396 -139
- rem/cli/commands/experiments.py +293 -73
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +29 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +92 -20
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +302 -28
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +291 -9
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +313 -29
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +282 -35
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/METADATA +464 -289
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/RECORD +104 -73
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/entry_points.txt +0 -0
rem/services/phoenix/client.py
CHANGED
|
@@ -53,7 +53,7 @@ from datetime import datetime
|
|
|
53
53
|
from pathlib import Path
|
|
54
54
|
from typing import Any, Callable, TYPE_CHECKING, cast
|
|
55
55
|
|
|
56
|
-
import
|
|
56
|
+
import polars as pl
|
|
57
57
|
from loguru import logger
|
|
58
58
|
|
|
59
59
|
from .config import PhoenixConfig
|
|
@@ -64,6 +64,95 @@ if TYPE_CHECKING:
|
|
|
64
64
|
from phoenix.client.resources.experiments.types import RanExperiment
|
|
65
65
|
|
|
66
66
|
|
|
67
|
+
def dataframe_to_phoenix_dataset(
|
|
68
|
+
client: "PhoenixClient",
|
|
69
|
+
df: pl.DataFrame,
|
|
70
|
+
dataset_name: str,
|
|
71
|
+
input_keys: list[str] | None = None,
|
|
72
|
+
output_keys: list[str] | None = None,
|
|
73
|
+
metadata_keys: list[str] | None = None,
|
|
74
|
+
description: str | None = None,
|
|
75
|
+
) -> "Dataset":
|
|
76
|
+
"""Convert a Polars DataFrame to a Phoenix Dataset.
|
|
77
|
+
|
|
78
|
+
This function transforms a Polars DataFrame into a Phoenix Dataset by:
|
|
79
|
+
1. Extracting input columns (what agents receive)
|
|
80
|
+
2. Extracting output columns (ground truth/expected output)
|
|
81
|
+
3. Extracting metadata columns (optional labels, difficulty, etc.)
|
|
82
|
+
|
|
83
|
+
If column keys are not specified, uses smart defaults:
|
|
84
|
+
- input_keys: columns containing 'input', 'query', 'question', or 'prompt'
|
|
85
|
+
- output_keys: columns containing 'output', 'expected', 'answer', or 'response'
|
|
86
|
+
- metadata_keys: remaining columns
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
client: PhoenixClient instance
|
|
90
|
+
df: Polars DataFrame with experiment data
|
|
91
|
+
dataset_name: Name for the created Phoenix dataset
|
|
92
|
+
input_keys: Optional list of column names for inputs
|
|
93
|
+
output_keys: Optional list of column names for outputs (ground truth)
|
|
94
|
+
metadata_keys: Optional list of column names for metadata
|
|
95
|
+
description: Optional dataset description
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Phoenix Dataset instance
|
|
99
|
+
|
|
100
|
+
Example:
|
|
101
|
+
>>> df = pl.read_csv("golden_set.csv")
|
|
102
|
+
>>> dataset = dataframe_to_phoenix_dataset(
|
|
103
|
+
... client=phoenix_client,
|
|
104
|
+
... df=df,
|
|
105
|
+
... dataset_name="my-golden-set",
|
|
106
|
+
... input_keys=["query"],
|
|
107
|
+
... output_keys=["expected_output"],
|
|
108
|
+
... metadata_keys=["difficulty"]
|
|
109
|
+
... )
|
|
110
|
+
"""
|
|
111
|
+
columns = df.columns
|
|
112
|
+
|
|
113
|
+
# Smart defaults for column detection
|
|
114
|
+
if input_keys is None:
|
|
115
|
+
input_keys = [c for c in columns if any(
|
|
116
|
+
k in c.lower() for k in ["input", "query", "question", "prompt"]
|
|
117
|
+
)]
|
|
118
|
+
if not input_keys:
|
|
119
|
+
# Fallback: first column
|
|
120
|
+
input_keys = [columns[0]] if columns else []
|
|
121
|
+
|
|
122
|
+
if output_keys is None:
|
|
123
|
+
output_keys = [c for c in columns if any(
|
|
124
|
+
k in c.lower() for k in ["output", "expected", "answer", "response", "reference"]
|
|
125
|
+
)]
|
|
126
|
+
if not output_keys:
|
|
127
|
+
# Fallback: second column
|
|
128
|
+
output_keys = [columns[1]] if len(columns) > 1 else []
|
|
129
|
+
|
|
130
|
+
if metadata_keys is None:
|
|
131
|
+
used_keys = set(input_keys) | set(output_keys)
|
|
132
|
+
metadata_keys = [c for c in columns if c not in used_keys]
|
|
133
|
+
|
|
134
|
+
logger.debug(
|
|
135
|
+
f"DataFrame to Phoenix Dataset: inputs={input_keys}, "
|
|
136
|
+
f"outputs={output_keys}, metadata={metadata_keys}"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Convert to list of dicts
|
|
140
|
+
records = df.to_dicts()
|
|
141
|
+
|
|
142
|
+
inputs = [{k: row.get(k) for k in input_keys} for row in records]
|
|
143
|
+
outputs = [{k: row.get(k) for k in output_keys} for row in records]
|
|
144
|
+
metadata = [{k: row.get(k) for k in metadata_keys} for row in records] if metadata_keys else None
|
|
145
|
+
|
|
146
|
+
# Create Phoenix dataset
|
|
147
|
+
return client.create_dataset_from_data(
|
|
148
|
+
name=dataset_name,
|
|
149
|
+
inputs=inputs,
|
|
150
|
+
outputs=outputs,
|
|
151
|
+
metadata=metadata,
|
|
152
|
+
description=description,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
67
156
|
class PhoenixClient:
|
|
68
157
|
"""High-level Phoenix client for REM evaluation workflows.
|
|
69
158
|
|
|
@@ -260,19 +349,22 @@ class PhoenixClient:
|
|
|
260
349
|
"SEARCH semantic AI engineer",sarah-chen,person,medium,SEARCH
|
|
261
350
|
"""
|
|
262
351
|
try:
|
|
263
|
-
# Load CSV
|
|
264
|
-
df =
|
|
352
|
+
# Load CSV with Polars
|
|
353
|
+
df = pl.read_csv(csv_file_path)
|
|
354
|
+
|
|
355
|
+
# Convert to list of dicts
|
|
356
|
+
records = df.to_dicts()
|
|
265
357
|
|
|
266
358
|
# Extract inputs
|
|
267
|
-
inputs =
|
|
359
|
+
inputs = [{k: row.get(k) for k in input_keys} for row in records]
|
|
268
360
|
|
|
269
361
|
# Extract outputs
|
|
270
|
-
outputs =
|
|
362
|
+
outputs = [{k: row.get(k) for k in output_keys} for row in records]
|
|
271
363
|
|
|
272
364
|
# Extract metadata if specified
|
|
273
365
|
metadata = None
|
|
274
366
|
if metadata_keys:
|
|
275
|
-
metadata =
|
|
367
|
+
metadata = [{k: row.get(k) for k in metadata_keys} for row in records]
|
|
276
368
|
|
|
277
369
|
return self.create_dataset_from_data(
|
|
278
370
|
name=name,
|
|
@@ -331,13 +423,16 @@ class PhoenixClient:
|
|
|
331
423
|
|
|
332
424
|
def run_experiment(
|
|
333
425
|
self,
|
|
334
|
-
dataset: "Dataset" | str,
|
|
426
|
+
dataset: "Dataset" | str | pl.DataFrame,
|
|
335
427
|
task: Callable[[Any], Any] | None = None,
|
|
336
428
|
evaluators: list[Callable[[Any], Any]] | None = None,
|
|
337
429
|
experiment_name: str | None = None,
|
|
338
430
|
experiment_description: str | None = None,
|
|
339
431
|
experiment_metadata: dict[str, Any] | None = None,
|
|
340
432
|
experiment_config: Any | None = None,
|
|
433
|
+
input_keys: list[str] | None = None,
|
|
434
|
+
output_keys: list[str] | None = None,
|
|
435
|
+
metadata_keys: list[str] | None = None,
|
|
341
436
|
) -> "RanExperiment":
|
|
342
437
|
"""Run an evaluation experiment.
|
|
343
438
|
|
|
@@ -346,14 +441,22 @@ class PhoenixClient:
|
|
|
346
441
|
2. Agent run: Provide task function to execute agents on dataset
|
|
347
442
|
3. Evaluator run: Provide evaluators to score existing outputs
|
|
348
443
|
|
|
444
|
+
Dataset can be:
|
|
445
|
+
- Phoenix Dataset instance
|
|
446
|
+
- Dataset name (string) - will be loaded from Phoenix
|
|
447
|
+
- Polars DataFrame - will be converted to Phoenix Dataset
|
|
448
|
+
|
|
349
449
|
Args:
|
|
350
|
-
dataset: Dataset instance
|
|
450
|
+
dataset: Dataset instance, name, or Polars DataFrame
|
|
351
451
|
task: Optional task function to run on each example (agent execution)
|
|
352
452
|
evaluators: Optional list of evaluator functions
|
|
353
453
|
experiment_name: Optional experiment name
|
|
354
454
|
experiment_description: Optional description
|
|
355
455
|
experiment_metadata: Optional metadata dict
|
|
356
456
|
experiment_config: Optional ExperimentConfig instance (overrides other params)
|
|
457
|
+
input_keys: Column names for inputs (required if dataset is DataFrame)
|
|
458
|
+
output_keys: Column names for outputs (required if dataset is DataFrame)
|
|
459
|
+
metadata_keys: Optional column names for metadata
|
|
357
460
|
|
|
358
461
|
Returns:
|
|
359
462
|
RanExperiment with results
|
|
@@ -369,6 +472,16 @@ class PhoenixClient:
|
|
|
369
472
|
... experiment_name="rem-v1-baseline"
|
|
370
473
|
... )
|
|
371
474
|
|
|
475
|
+
Example - With Polars DataFrame:
|
|
476
|
+
>>> df = pl.read_csv("golden_set.csv")
|
|
477
|
+
>>> experiment = client.run_experiment(
|
|
478
|
+
... dataset=df,
|
|
479
|
+
... task=run_agent,
|
|
480
|
+
... experiment_name="rem-v1-baseline",
|
|
481
|
+
... input_keys=["query"],
|
|
482
|
+
... output_keys=["expected_output"]
|
|
483
|
+
... )
|
|
484
|
+
|
|
372
485
|
Example - Evaluator Run (Phase 2b):
|
|
373
486
|
>>> experiment = client.run_experiment(
|
|
374
487
|
... dataset=agent_results,
|
|
@@ -407,6 +520,21 @@ class PhoenixClient:
|
|
|
407
520
|
else:
|
|
408
521
|
dataset = dataset_ref.path
|
|
409
522
|
|
|
523
|
+
# Convert Polars DataFrame to Phoenix Dataset
|
|
524
|
+
if isinstance(dataset, pl.DataFrame):
|
|
525
|
+
dataset_name_for_phoenix = f"{experiment_name or 'experiment'}-dataset-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
|
526
|
+
logger.info(f"Converting Polars DataFrame to Phoenix Dataset: {dataset_name_for_phoenix}")
|
|
527
|
+
dataset = dataframe_to_phoenix_dataset(
|
|
528
|
+
client=self,
|
|
529
|
+
df=dataset,
|
|
530
|
+
dataset_name=dataset_name_for_phoenix,
|
|
531
|
+
input_keys=input_keys,
|
|
532
|
+
output_keys=output_keys,
|
|
533
|
+
metadata_keys=metadata_keys,
|
|
534
|
+
description=f"Auto-created from DataFrame for experiment: {experiment_name}",
|
|
535
|
+
)
|
|
536
|
+
logger.info(f"✓ Created Phoenix Dataset: {dataset_name_for_phoenix}")
|
|
537
|
+
|
|
410
538
|
# Load dataset if name provided
|
|
411
539
|
if isinstance(dataset, str):
|
|
412
540
|
dataset = self.get_dataset(dataset)
|
|
@@ -454,7 +582,7 @@ class PhoenixClient:
|
|
|
454
582
|
root_spans_only: bool = True,
|
|
455
583
|
trace_id: str | None = None,
|
|
456
584
|
span_id: str | None = None,
|
|
457
|
-
) ->
|
|
585
|
+
) -> pl.DataFrame:
|
|
458
586
|
"""Query traces from Phoenix.
|
|
459
587
|
|
|
460
588
|
Args:
|
|
@@ -467,7 +595,7 @@ class PhoenixClient:
|
|
|
467
595
|
span_id: Filter by specific span ID
|
|
468
596
|
|
|
469
597
|
Returns:
|
|
470
|
-
DataFrame with trace data
|
|
598
|
+
Polars DataFrame with trace data
|
|
471
599
|
|
|
472
600
|
Example:
|
|
473
601
|
>>> traces = client.get_traces(
|
|
@@ -492,8 +620,11 @@ class PhoenixClient:
|
|
|
492
620
|
if span_id:
|
|
493
621
|
query_params["span_id"] = span_id
|
|
494
622
|
|
|
495
|
-
# Query traces
|
|
496
|
-
|
|
623
|
+
# Query traces (Phoenix returns pandas DataFrame)
|
|
624
|
+
pandas_df = self._client.query_spans(limit=limit, **query_params) # type: ignore[attr-defined]
|
|
625
|
+
|
|
626
|
+
# Convert pandas to Polars
|
|
627
|
+
traces_df = pl.from_pandas(pandas_df)
|
|
497
628
|
|
|
498
629
|
logger.debug(f"Retrieved {len(traces_df)} traces")
|
|
499
630
|
return traces_df
|
|
@@ -535,7 +666,7 @@ class PhoenixClient:
|
|
|
535
666
|
... )
|
|
536
667
|
"""
|
|
537
668
|
try:
|
|
538
|
-
# Query traces
|
|
669
|
+
# Query traces (returns Polars DataFrame)
|
|
539
670
|
traces_df = self.get_traces(
|
|
540
671
|
project_name=project_name,
|
|
541
672
|
start_time=start_time,
|
|
@@ -547,12 +678,15 @@ class PhoenixClient:
|
|
|
547
678
|
if len(traces_df) == 0:
|
|
548
679
|
raise ValueError("No traces found matching criteria")
|
|
549
680
|
|
|
681
|
+
# Convert to list of dicts for iteration
|
|
682
|
+
records = traces_df.to_dicts()
|
|
683
|
+
|
|
550
684
|
# Extract inputs and outputs from traces
|
|
551
685
|
inputs = []
|
|
552
686
|
outputs = []
|
|
553
687
|
metadata = []
|
|
554
688
|
|
|
555
|
-
for
|
|
689
|
+
for row in records:
|
|
556
690
|
# Extract input
|
|
557
691
|
span_input = row.get("attributes.input")
|
|
558
692
|
if span_input:
|
|
@@ -658,29 +792,169 @@ class PhoenixClient:
|
|
|
658
792
|
label: str | None = None,
|
|
659
793
|
score: float | None = None,
|
|
660
794
|
explanation: str | None = None,
|
|
661
|
-
|
|
662
|
-
|
|
795
|
+
metadata: dict[str, Any] | None = None,
|
|
796
|
+
trace_id: str | None = None,
|
|
797
|
+
) -> str | None:
|
|
798
|
+
"""Add feedback annotation to a span via Phoenix REST API.
|
|
799
|
+
|
|
800
|
+
Uses direct HTTP POST to /v1/span_annotations for reliability
|
|
801
|
+
(Phoenix Python client API changes frequently).
|
|
663
802
|
|
|
664
803
|
Args:
|
|
665
|
-
span_id: Span ID to annotate
|
|
666
|
-
annotation_name: Name of the annotation (e.g., "correctness")
|
|
804
|
+
span_id: Span ID to annotate (hex string)
|
|
805
|
+
annotation_name: Name of the annotation (e.g., "correctness", "user_feedback")
|
|
667
806
|
annotator_kind: Type of annotator ("HUMAN", "LLM", "CODE")
|
|
668
|
-
label: Optional label (e.g., "correct", "incorrect")
|
|
807
|
+
label: Optional label (e.g., "correct", "incorrect", "helpful")
|
|
669
808
|
score: Optional numeric score (0.0-1.0)
|
|
670
809
|
explanation: Optional explanation text
|
|
810
|
+
metadata: Optional additional metadata dict
|
|
811
|
+
trace_id: Optional trace ID (used if span lookup needed)
|
|
812
|
+
|
|
813
|
+
Returns:
|
|
814
|
+
Annotation ID if successful, None otherwise
|
|
671
815
|
"""
|
|
816
|
+
import httpx
|
|
817
|
+
|
|
672
818
|
try:
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
819
|
+
# Build annotation payload for Phoenix REST API
|
|
820
|
+
annotation_data = {
|
|
821
|
+
"span_id": span_id,
|
|
822
|
+
"name": annotation_name,
|
|
823
|
+
"annotator_kind": annotator_kind,
|
|
824
|
+
"result": {
|
|
825
|
+
"label": label,
|
|
826
|
+
"score": score,
|
|
827
|
+
"explanation": explanation,
|
|
828
|
+
},
|
|
829
|
+
"metadata": metadata or {},
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
# Add trace_id if provided
|
|
833
|
+
if trace_id:
|
|
834
|
+
annotation_data["trace_id"] = trace_id
|
|
835
|
+
|
|
836
|
+
# POST to Phoenix REST API
|
|
837
|
+
annotations_endpoint = f"{self.config.base_url}/v1/span_annotations"
|
|
838
|
+
headers = {}
|
|
839
|
+
if self.config.api_key:
|
|
840
|
+
headers["Authorization"] = f"Bearer {self.config.api_key}"
|
|
841
|
+
|
|
842
|
+
with httpx.Client(timeout=5.0) as client:
|
|
843
|
+
response = client.post(
|
|
844
|
+
annotations_endpoint,
|
|
845
|
+
json={"data": [annotation_data]},
|
|
846
|
+
headers=headers,
|
|
847
|
+
)
|
|
848
|
+
response.raise_for_status()
|
|
681
849
|
|
|
682
850
|
logger.info(f"Added {annotator_kind} feedback to span {span_id}")
|
|
851
|
+
return span_id # Return span_id as annotation reference
|
|
683
852
|
|
|
853
|
+
except httpx.HTTPStatusError as e:
|
|
854
|
+
logger.error(
|
|
855
|
+
f"Failed to add span feedback (HTTP {e.response.status_code}): "
|
|
856
|
+
f"{e.response.text if hasattr(e, 'response') else 'N/A'}"
|
|
857
|
+
)
|
|
858
|
+
return None
|
|
684
859
|
except Exception as e:
|
|
685
860
|
logger.error(f"Failed to add span feedback: {e}")
|
|
686
|
-
|
|
861
|
+
return None
|
|
862
|
+
|
|
863
|
+
def sync_user_feedback(
|
|
864
|
+
self,
|
|
865
|
+
span_id: str,
|
|
866
|
+
rating: int | None = None,
|
|
867
|
+
categories: list[str] | None = None,
|
|
868
|
+
comment: str | None = None,
|
|
869
|
+
feedback_id: str | None = None,
|
|
870
|
+
trace_id: str | None = None,
|
|
871
|
+
) -> str | None:
|
|
872
|
+
"""Sync user feedback to Phoenix as a span annotation.
|
|
873
|
+
|
|
874
|
+
Convenience method for syncing Feedback entities to Phoenix.
|
|
875
|
+
Converts REM feedback format to Phoenix annotation format.
|
|
876
|
+
|
|
877
|
+
Args:
|
|
878
|
+
span_id: OTEL span ID to annotate
|
|
879
|
+
rating: User rating (-1, 1-5 scale)
|
|
880
|
+
categories: List of feedback categories
|
|
881
|
+
comment: Free-text comment
|
|
882
|
+
feedback_id: Optional REM feedback ID for reference
|
|
883
|
+
trace_id: Optional trace ID for the span
|
|
884
|
+
|
|
885
|
+
Returns:
|
|
886
|
+
Phoenix annotation ID if successful
|
|
887
|
+
|
|
888
|
+
Example:
|
|
889
|
+
>>> client.sync_user_feedback(
|
|
890
|
+
... span_id="abc123",
|
|
891
|
+
... rating=4,
|
|
892
|
+
... categories=["helpful", "accurate"],
|
|
893
|
+
... comment="Great response!"
|
|
894
|
+
... )
|
|
895
|
+
"""
|
|
896
|
+
# Convert rating to 0-1 score
|
|
897
|
+
# Rating scheme:
|
|
898
|
+
# -1 = thumbs down → score 0.0
|
|
899
|
+
# 1 = thumbs up → score 1.0
|
|
900
|
+
# 2-5 = star rating → normalized to 0-1 range
|
|
901
|
+
score = None
|
|
902
|
+
if rating is not None:
|
|
903
|
+
if rating == -1:
|
|
904
|
+
score = 0.0
|
|
905
|
+
elif rating == 1:
|
|
906
|
+
score = 1.0 # Thumbs up
|
|
907
|
+
elif 2 <= rating <= 5:
|
|
908
|
+
score = (rating - 1) / 4.0 # 2→0.25, 3→0.5, 4→0.75, 5→1.0
|
|
909
|
+
|
|
910
|
+
# Use primary category as label
|
|
911
|
+
label = categories[0] if categories else None
|
|
912
|
+
|
|
913
|
+
# Build explanation from comment and additional categories
|
|
914
|
+
explanation = comment
|
|
915
|
+
if categories and len(categories) > 1:
|
|
916
|
+
cats_str = ", ".join(categories[1:])
|
|
917
|
+
if explanation:
|
|
918
|
+
explanation = f"{explanation} [Categories: {cats_str}]"
|
|
919
|
+
else:
|
|
920
|
+
explanation = f"Categories: {cats_str}"
|
|
921
|
+
|
|
922
|
+
# Build metadata
|
|
923
|
+
metadata: dict[str, Any] = {
|
|
924
|
+
"rating": rating,
|
|
925
|
+
"categories": categories or [],
|
|
926
|
+
}
|
|
927
|
+
if feedback_id:
|
|
928
|
+
metadata["rem_feedback_id"] = feedback_id
|
|
929
|
+
|
|
930
|
+
return self.add_span_feedback(
|
|
931
|
+
span_id=span_id,
|
|
932
|
+
annotation_name="user_feedback",
|
|
933
|
+
annotator_kind="HUMAN",
|
|
934
|
+
label=label,
|
|
935
|
+
score=score,
|
|
936
|
+
explanation=explanation,
|
|
937
|
+
metadata=metadata,
|
|
938
|
+
trace_id=trace_id,
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
def get_span_annotations(
|
|
942
|
+
self,
|
|
943
|
+
span_id: str,
|
|
944
|
+
annotation_name: str | None = None,
|
|
945
|
+
) -> list[dict[str, Any]]:
|
|
946
|
+
"""Get annotations for a span.
|
|
947
|
+
|
|
948
|
+
Args:
|
|
949
|
+
span_id: Span ID to query
|
|
950
|
+
annotation_name: Optional filter by annotation name
|
|
951
|
+
|
|
952
|
+
Returns:
|
|
953
|
+
List of annotation dicts
|
|
954
|
+
|
|
955
|
+
TODO: Implement once Phoenix client exposes this method
|
|
956
|
+
"""
|
|
957
|
+
# TODO: Phoenix client doesn't expose annotation query yet
|
|
958
|
+
# This is a stub for future implementation
|
|
959
|
+
logger.warning("get_span_annotations not yet implemented in Phoenix client")
|
|
960
|
+
return []
|
rem/services/postgres/README.md
CHANGED
|
@@ -348,8 +348,27 @@ results = await service.vector_search(
|
|
|
348
348
|
|
|
349
349
|
### Initialize Service
|
|
350
350
|
|
|
351
|
+
There are two ways to initialize the PostgresService:
|
|
352
|
+
|
|
353
|
+
**Option 1: Factory function (recommended for apps using remdb as a library)**
|
|
354
|
+
|
|
355
|
+
```python
|
|
356
|
+
from rem.services.postgres import get_postgres_service
|
|
357
|
+
|
|
358
|
+
# Uses POSTGRES__CONNECTION_STRING from environment
|
|
359
|
+
pg = get_postgres_service()
|
|
360
|
+
if pg is None:
|
|
361
|
+
raise RuntimeError("Database not configured - set POSTGRES__CONNECTION_STRING")
|
|
362
|
+
|
|
363
|
+
await pg.connect()
|
|
364
|
+
# ... use pg ...
|
|
365
|
+
await pg.disconnect()
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
**Option 2: Direct instantiation**
|
|
369
|
+
|
|
351
370
|
```python
|
|
352
|
-
from rem.services.postgres import PostgresService
|
|
371
|
+
from rem.services.postgres import PostgresService
|
|
353
372
|
|
|
354
373
|
service = PostgresService(
|
|
355
374
|
connection_string="postgresql://user:pass@localhost/remdb",
|
|
@@ -359,6 +378,9 @@ service = PostgresService(
|
|
|
359
378
|
await service.connect()
|
|
360
379
|
```
|
|
361
380
|
|
|
381
|
+
> **Note**: `get_postgres_service()` returns the service directly. It does NOT support
|
|
382
|
+
> `async with` context manager syntax. Always call `connect()` and `disconnect()` explicitly.
|
|
383
|
+
|
|
362
384
|
### Using Repository Pattern
|
|
363
385
|
|
|
364
386
|
**Generic Repository** for simple CRUD operations:
|
|
@@ -514,34 +536,156 @@ results = await service.vector_search(
|
|
|
514
536
|
- HNSW parameters: `m=16, ef_construction=64` (tunable)
|
|
515
537
|
- Monitor shared_buffers and work_mem
|
|
516
538
|
|
|
517
|
-
##
|
|
539
|
+
## Schema Management
|
|
518
540
|
|
|
519
|
-
|
|
541
|
+
REM uses a **code-as-source-of-truth** approach. Pydantic models define the schema, and the database is kept in sync via diff-based migrations.
|
|
520
542
|
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
543
|
+
### File Structure
|
|
544
|
+
|
|
545
|
+
```
|
|
546
|
+
src/rem/sql/
|
|
547
|
+
├── migrations/
|
|
548
|
+
│ ├── 001_install.sql # Core infrastructure (manual)
|
|
549
|
+
│ └── 002_install_models.sql # Entity tables (auto-generated)
|
|
550
|
+
└── background_indexes.sql # HNSW vector indexes (optional)
|
|
525
551
|
```
|
|
526
552
|
|
|
527
|
-
|
|
553
|
+
**Key principle**: Only two migration files. No incremental `003_`, `004_` files.
|
|
554
|
+
|
|
555
|
+
### CLI Commands
|
|
528
556
|
|
|
529
557
|
```bash
|
|
530
|
-
|
|
558
|
+
# Apply migrations (installs extensions, core tables, entity tables)
|
|
559
|
+
rem db migrate
|
|
560
|
+
|
|
561
|
+
# Check migration status
|
|
562
|
+
rem db status
|
|
563
|
+
|
|
564
|
+
# Generate schema SQL from models (for remdb development)
|
|
565
|
+
rem db schema generate --models src/rem/models/entities
|
|
566
|
+
|
|
567
|
+
# Validate models for schema generation
|
|
568
|
+
rem db schema validate --models src/rem/models/entities
|
|
531
569
|
```
|
|
532
570
|
|
|
533
|
-
|
|
571
|
+
### Model Registry
|
|
534
572
|
|
|
535
|
-
|
|
573
|
+
Models are discovered via the registry:
|
|
536
574
|
|
|
537
|
-
```
|
|
538
|
-
|
|
575
|
+
```python
|
|
576
|
+
import rem
|
|
577
|
+
from rem.models.core import CoreModel
|
|
578
|
+
|
|
579
|
+
@rem.register_model
|
|
580
|
+
class MyEntity(CoreModel):
|
|
581
|
+
name: str
|
|
582
|
+
description: str # Auto-embeds
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
## Using REM as a Library (Downstream Apps)
|
|
586
|
+
|
|
587
|
+
When building an application that **depends on remdb as a package** (e.g., `pip install remdb`),
|
|
588
|
+
there are important differences from developing remdb itself.
|
|
589
|
+
|
|
590
|
+
### What Works Out of the Box
|
|
591
|
+
|
|
592
|
+
1. **All core entity tables** - Resources, Messages, Users, Sessions, etc.
|
|
593
|
+
2. **PostgresService** - Full database access via `get_postgres_service()`
|
|
594
|
+
3. **Repository pattern** - CRUD operations for core entities
|
|
595
|
+
4. **Migrations** - `rem db migrate` applies the bundled SQL files
|
|
596
|
+
|
|
597
|
+
```python
|
|
598
|
+
# In your downstream app (e.g., myapp/main.py)
|
|
599
|
+
from rem.services.postgres import get_postgres_service
|
|
600
|
+
from rem.models.entities import Message, Resource
|
|
601
|
+
|
|
602
|
+
pg = get_postgres_service()
|
|
603
|
+
await pg.connect()
|
|
604
|
+
|
|
605
|
+
# Use core entities - tables already exist
|
|
606
|
+
messages = await pg.query(Message, {"session_id": "abc"})
|
|
539
607
|
```
|
|
540
608
|
|
|
541
|
-
|
|
609
|
+
### Custom Models in Downstream Apps
|
|
610
|
+
|
|
611
|
+
The `@rem.register_model` decorator registers models in the **runtime registry**, which is useful for:
|
|
612
|
+
- Schema introspection at runtime
|
|
613
|
+
- Future tooling that reads the registry
|
|
614
|
+
|
|
615
|
+
However, **`rem db migrate` only applies SQL files bundled in the remdb package**.
|
|
616
|
+
Custom models from downstream apps do NOT automatically get tables created.
|
|
617
|
+
|
|
618
|
+
**Options for custom model tables:**
|
|
619
|
+
|
|
620
|
+
**Option A: Use core entities with metadata**
|
|
621
|
+
|
|
622
|
+
Store custom data in the `metadata` JSONB field of existing entities:
|
|
623
|
+
|
|
624
|
+
```python
|
|
625
|
+
resource = Resource(
|
|
626
|
+
name="my-custom-thing",
|
|
627
|
+
content="...",
|
|
628
|
+
metadata={"custom_field": "value", "another": 123}
|
|
629
|
+
)
|
|
630
|
+
```
|
|
631
|
+
|
|
632
|
+
**Option B: Create tables manually**
|
|
633
|
+
|
|
634
|
+
Write and apply your own SQL:
|
|
635
|
+
|
|
636
|
+
```sql
|
|
637
|
+
-- myapp/sql/custom_tables.sql
|
|
638
|
+
CREATE TABLE IF NOT EXISTS conversation_summaries (
|
|
639
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
640
|
+
session_ref TEXT NOT NULL,
|
|
641
|
+
summary TEXT NOT NULL,
|
|
642
|
+
-- ... include CoreModel fields for compatibility
|
|
643
|
+
user_id VARCHAR(256),
|
|
644
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
645
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
646
|
+
);
|
|
647
|
+
```
|
|
542
648
|
|
|
543
649
|
```bash
|
|
544
|
-
|
|
650
|
+
psql $DATABASE_URL -f myapp/sql/custom_tables.sql
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
**Option C: Contribute upstream**
|
|
654
|
+
|
|
655
|
+
If your model is generally useful, contribute it to remdb so it's included in
|
|
656
|
+
the next release and `rem db migrate` creates it automatically.
|
|
657
|
+
|
|
658
|
+
### Example: Downstream App Structure
|
|
659
|
+
|
|
660
|
+
```
|
|
661
|
+
myapp/
|
|
662
|
+
├── main.py # Import models, start API
|
|
663
|
+
├── models/
|
|
664
|
+
│ └── __init__.py # @rem.register_model decorators
|
|
665
|
+
├── sql/
|
|
666
|
+
│ └── custom.sql # Manual migrations for custom tables
|
|
667
|
+
├── .env # POSTGRES__CONNECTION_STRING, LLM keys
|
|
668
|
+
└── pyproject.toml # dependencies = ["remdb>=0.3.110"]
|
|
669
|
+
```
|
|
670
|
+
|
|
671
|
+
```python
|
|
672
|
+
# myapp/models/__init__.py
|
|
673
|
+
import rem
|
|
674
|
+
from rem.models.core import CoreModel
|
|
675
|
+
|
|
676
|
+
@rem.register_model
|
|
677
|
+
class ConversationSummary(CoreModel):
|
|
678
|
+
"""Registered for introspection, but table created via sql/custom.sql"""
|
|
679
|
+
session_ref: str
|
|
680
|
+
summary: str
|
|
681
|
+
```
|
|
682
|
+
|
|
683
|
+
```python
|
|
684
|
+
# myapp/main.py
|
|
685
|
+
import models # Registers custom models
|
|
686
|
+
|
|
687
|
+
from rem.api.main import app # Use REM's FastAPI app
|
|
688
|
+
# Or build your own app using rem.services
|
|
545
689
|
```
|
|
546
690
|
|
|
547
691
|
## Configuration
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
PostgreSQL service for CloudNativePG database operations.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
from .diff_service import DiffService, SchemaDiff
|
|
5
6
|
from .repository import Repository
|
|
6
7
|
from .service import PostgresService
|
|
7
8
|
|
|
@@ -20,4 +21,4 @@ def get_postgres_service() -> PostgresService | None:
|
|
|
20
21
|
return PostgresService()
|
|
21
22
|
|
|
22
23
|
|
|
23
|
-
__all__ = ["PostgresService", "get_postgres_service", "Repository"]
|
|
24
|
+
__all__ = ["PostgresService", "get_postgres_service", "Repository", "DiffService", "SchemaDiff"]
|