remdb 0.3.118__py3-none-any.whl → 0.3.146__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +23 -3
- rem/agentic/mcp/tool_wrapper.py +126 -15
- rem/agentic/otel/setup.py +1 -0
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +122 -43
- rem/agentic/schema.py +4 -1
- rem/api/mcp_router/tools.py +13 -2
- rem/api/routers/chat/completions.py +250 -4
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +35 -1
- rem/api/routers/feedback.py +134 -14
- rem/auth/middleware.py +66 -1
- rem/cli/commands/cluster.py +590 -82
- rem/cli/commands/configure.py +3 -4
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/config.py +8 -1
- rem/models/core/experiment.py +58 -14
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/pydantic_to_sqlalchemy.py +9 -12
- rem/services/session/compression.py +7 -0
- rem/settings.py +260 -17
- rem/sql/migrations/002_install_models.sql +91 -91
- rem/sql/migrations/004_cache_system.sql +1 -1
- rem/utils/README.md +45 -0
- rem/utils/files.py +157 -1
- rem/utils/schema_loader.py +94 -3
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +2 -1
- rem/workers/db_listener.py +579 -0
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/METADATA +161 -147
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/RECORD +44 -41
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/WHEEL +0 -0
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/entry_points.txt +0 -0
rem/utils/files.py
CHANGED
|
@@ -3,13 +3,18 @@ File utilities for consistent file handling throughout REM.
|
|
|
3
3
|
|
|
4
4
|
Provides context managers and helpers for temporary file operations,
|
|
5
5
|
ensuring proper cleanup and consistent patterns.
|
|
6
|
+
|
|
7
|
+
Also provides DataFrame I/O utilities using Polars with automatic
|
|
8
|
+
format detection based on file extension.
|
|
6
9
|
"""
|
|
7
10
|
|
|
8
11
|
import tempfile
|
|
9
12
|
from contextlib import contextmanager
|
|
13
|
+
from io import BytesIO
|
|
10
14
|
from pathlib import Path
|
|
11
|
-
from typing import Generator, Optional
|
|
15
|
+
from typing import Generator, Optional, Union
|
|
12
16
|
|
|
17
|
+
import polars as pl
|
|
13
18
|
from loguru import logger
|
|
14
19
|
|
|
15
20
|
|
|
@@ -165,3 +170,154 @@ def safe_delete(path: Path) -> bool:
|
|
|
165
170
|
except Exception as e:
|
|
166
171
|
logger.warning(f"Failed to delete {path}: {e}")
|
|
167
172
|
return False
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# Extension to Polars reader mapping
|
|
176
|
+
_EXTENSION_READERS = {
|
|
177
|
+
".csv": pl.read_csv,
|
|
178
|
+
".tsv": lambda p, **kw: pl.read_csv(p, separator="\t", **kw),
|
|
179
|
+
".parquet": pl.read_parquet,
|
|
180
|
+
".pq": pl.read_parquet,
|
|
181
|
+
".json": pl.read_json,
|
|
182
|
+
".jsonl": pl.read_ndjson,
|
|
183
|
+
".ndjson": pl.read_ndjson,
|
|
184
|
+
".avro": pl.read_avro,
|
|
185
|
+
".xlsx": pl.read_excel,
|
|
186
|
+
".xls": pl.read_excel,
|
|
187
|
+
".ods": pl.read_ods,
|
|
188
|
+
".ipc": pl.read_ipc,
|
|
189
|
+
".arrow": pl.read_ipc,
|
|
190
|
+
".feather": pl.read_ipc,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Extension to Polars writer mapping
|
|
194
|
+
_EXTENSION_WRITERS = {
|
|
195
|
+
".csv": "write_csv",
|
|
196
|
+
".tsv": "write_csv", # with separator="\t"
|
|
197
|
+
".parquet": "write_parquet",
|
|
198
|
+
".pq": "write_parquet",
|
|
199
|
+
".json": "write_json",
|
|
200
|
+
".jsonl": "write_ndjson",
|
|
201
|
+
".ndjson": "write_ndjson",
|
|
202
|
+
".avro": "write_avro",
|
|
203
|
+
".xlsx": "write_excel",
|
|
204
|
+
".ipc": "write_ipc",
|
|
205
|
+
".arrow": "write_ipc",
|
|
206
|
+
".feather": "write_ipc",
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def read_dataframe(
|
|
211
|
+
source: Union[str, Path, bytes],
|
|
212
|
+
filename: Optional[str] = None,
|
|
213
|
+
**kwargs,
|
|
214
|
+
) -> pl.DataFrame:
|
|
215
|
+
"""
|
|
216
|
+
Read a DataFrame from a file, inferring format from extension.
|
|
217
|
+
|
|
218
|
+
Supports all Polars-compatible formats:
|
|
219
|
+
- CSV (.csv), TSV (.tsv)
|
|
220
|
+
- Parquet (.parquet, .pq)
|
|
221
|
+
- JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
|
|
222
|
+
- Avro (.avro)
|
|
223
|
+
- Excel (.xlsx, .xls)
|
|
224
|
+
- OpenDocument (.ods)
|
|
225
|
+
- Arrow IPC (.ipc, .arrow, .feather)
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
source: File path (str/Path) or bytes content
|
|
229
|
+
filename: Required when source is bytes, to determine format
|
|
230
|
+
**kwargs: Additional arguments passed to the Polars reader
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Polars DataFrame
|
|
234
|
+
|
|
235
|
+
Raises:
|
|
236
|
+
ValueError: If format cannot be determined or is unsupported
|
|
237
|
+
|
|
238
|
+
Examples:
|
|
239
|
+
>>> df = read_dataframe("data.csv")
|
|
240
|
+
>>> df = read_dataframe("data.parquet")
|
|
241
|
+
>>> df = read_dataframe(csv_bytes, filename="data.csv")
|
|
242
|
+
"""
|
|
243
|
+
# Determine the file extension
|
|
244
|
+
if isinstance(source, bytes):
|
|
245
|
+
if not filename:
|
|
246
|
+
raise ValueError("filename is required when source is bytes")
|
|
247
|
+
ext = Path(filename).suffix.lower()
|
|
248
|
+
# For bytes, we need to wrap in BytesIO
|
|
249
|
+
file_like = BytesIO(source)
|
|
250
|
+
else:
|
|
251
|
+
path = Path(source)
|
|
252
|
+
ext = path.suffix.lower()
|
|
253
|
+
file_like = path
|
|
254
|
+
|
|
255
|
+
# Get the appropriate reader
|
|
256
|
+
reader = _EXTENSION_READERS.get(ext)
|
|
257
|
+
if reader is None:
|
|
258
|
+
supported = ", ".join(sorted(_EXTENSION_READERS.keys()))
|
|
259
|
+
raise ValueError(
|
|
260
|
+
f"Unsupported file format: {ext}. "
|
|
261
|
+
f"Supported formats: {supported}"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
return reader(file_like, **kwargs)
|
|
266
|
+
except Exception as e:
|
|
267
|
+
logger.error(f"Failed to read DataFrame from {ext} format: {e}")
|
|
268
|
+
raise
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def write_dataframe(
|
|
272
|
+
df: pl.DataFrame,
|
|
273
|
+
dest: Union[str, Path],
|
|
274
|
+
**kwargs,
|
|
275
|
+
) -> None:
|
|
276
|
+
"""
|
|
277
|
+
Write a DataFrame to a file, inferring format from extension.
|
|
278
|
+
|
|
279
|
+
Supports most Polars-writable formats:
|
|
280
|
+
- CSV (.csv), TSV (.tsv)
|
|
281
|
+
- Parquet (.parquet, .pq)
|
|
282
|
+
- JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
|
|
283
|
+
- Avro (.avro)
|
|
284
|
+
- Excel (.xlsx)
|
|
285
|
+
- Arrow IPC (.ipc, .arrow, .feather)
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
df: Polars DataFrame to write
|
|
289
|
+
dest: Destination file path
|
|
290
|
+
**kwargs: Additional arguments passed to the Polars writer
|
|
291
|
+
|
|
292
|
+
Raises:
|
|
293
|
+
ValueError: If format cannot be determined or is unsupported
|
|
294
|
+
|
|
295
|
+
Examples:
|
|
296
|
+
>>> write_dataframe(df, "output.csv")
|
|
297
|
+
>>> write_dataframe(df, "output.parquet")
|
|
298
|
+
>>> write_dataframe(df, "output.jsonl")
|
|
299
|
+
"""
|
|
300
|
+
path = Path(dest)
|
|
301
|
+
ext = path.suffix.lower()
|
|
302
|
+
|
|
303
|
+
writer_method = _EXTENSION_WRITERS.get(ext)
|
|
304
|
+
if writer_method is None:
|
|
305
|
+
supported = ", ".join(sorted(_EXTENSION_WRITERS.keys()))
|
|
306
|
+
raise ValueError(
|
|
307
|
+
f"Unsupported file format for writing: {ext}. "
|
|
308
|
+
f"Supported formats: {supported}"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Ensure parent directory exists
|
|
312
|
+
ensure_parent_exists(path)
|
|
313
|
+
|
|
314
|
+
# Handle TSV special case
|
|
315
|
+
if ext == ".tsv":
|
|
316
|
+
kwargs.setdefault("separator", "\t")
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
writer = getattr(df, writer_method)
|
|
320
|
+
writer(path, **kwargs)
|
|
321
|
+
except Exception as e:
|
|
322
|
+
logger.error(f"Failed to write DataFrame to {ext} format: {e}")
|
|
323
|
+
raise
|
rem/utils/schema_loader.py
CHANGED
|
@@ -195,7 +195,7 @@ def load_agent_schema(
|
|
|
195
195
|
"""
|
|
196
196
|
Load agent schema from YAML file with unified search logic and caching.
|
|
197
197
|
|
|
198
|
-
Schema names are case-invariant - "
|
|
198
|
+
Schema names are case-invariant - "Rem", "rem", "REM" all resolve to the same schema.
|
|
199
199
|
|
|
200
200
|
Filesystem schemas are cached indefinitely (immutable, versioned with code).
|
|
201
201
|
Database schemas (future) will be cached with TTL for invalidation.
|
|
@@ -271,10 +271,20 @@ def load_agent_schema(
|
|
|
271
271
|
# 2. Normalize name for package resource search (lowercase)
|
|
272
272
|
base_name = cache_key
|
|
273
273
|
|
|
274
|
-
# 3. Try custom schema paths (from registry + SCHEMA__PATHS env var)
|
|
274
|
+
# 3. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
275
275
|
from ..registry import get_schema_paths
|
|
276
276
|
|
|
277
277
|
custom_paths = get_schema_paths()
|
|
278
|
+
|
|
279
|
+
# Auto-detect local folders if they exist (convention over configuration)
|
|
280
|
+
auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
|
|
281
|
+
for auto_folder in auto_detect_folders:
|
|
282
|
+
auto_path = Path(auto_folder)
|
|
283
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
284
|
+
resolved = str(auto_path.resolve())
|
|
285
|
+
if resolved not in custom_paths:
|
|
286
|
+
custom_paths.insert(0, resolved)
|
|
287
|
+
logger.debug(f"Auto-detected schema directory: {auto_folder}")
|
|
278
288
|
for custom_dir in custom_paths:
|
|
279
289
|
# Try various patterns within each custom directory
|
|
280
290
|
for pattern in [
|
|
@@ -400,9 +410,20 @@ async def load_agent_schema_async(
|
|
|
400
410
|
|
|
401
411
|
base_name = cache_key
|
|
402
412
|
|
|
403
|
-
# Try custom schema paths
|
|
413
|
+
# Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
404
414
|
from ..registry import get_schema_paths
|
|
405
415
|
custom_paths = get_schema_paths()
|
|
416
|
+
|
|
417
|
+
# Auto-detect local folders if they exist (convention over configuration)
|
|
418
|
+
auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
|
|
419
|
+
for auto_folder in auto_detect_folders:
|
|
420
|
+
auto_path = Path(auto_folder)
|
|
421
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
422
|
+
resolved = str(auto_path.resolve())
|
|
423
|
+
if resolved not in custom_paths:
|
|
424
|
+
custom_paths.insert(0, resolved)
|
|
425
|
+
logger.debug(f"Auto-detected schema directory: {auto_folder}")
|
|
426
|
+
|
|
406
427
|
for custom_dir in custom_paths:
|
|
407
428
|
for pattern in [f"{base_name}.yaml", f"{base_name}.yml", f"agents/{base_name}.yaml"]:
|
|
408
429
|
custom_path = Path(custom_dir) / pattern
|
|
@@ -490,3 +511,73 @@ def validate_agent_schema(schema: dict[str, Any]) -> bool:
|
|
|
490
511
|
|
|
491
512
|
logger.debug("Schema validation passed")
|
|
492
513
|
return True
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def get_evaluator_schema_path(evaluator_name: str) -> Path | None:
|
|
517
|
+
"""
|
|
518
|
+
Find the file path to an evaluator schema.
|
|
519
|
+
|
|
520
|
+
Searches standard locations for the evaluator schema YAML file:
|
|
521
|
+
- ./evaluators/{name}.yaml (local project)
|
|
522
|
+
- Custom schema paths from registry
|
|
523
|
+
- Package resources: schemas/evaluators/{name}.yaml
|
|
524
|
+
|
|
525
|
+
Args:
|
|
526
|
+
evaluator_name: Name of the evaluator (e.g., "mental-health-classifier")
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
Path to the evaluator schema file, or None if not found
|
|
530
|
+
|
|
531
|
+
Example:
|
|
532
|
+
>>> path = get_evaluator_schema_path("mental-health-classifier")
|
|
533
|
+
>>> if path:
|
|
534
|
+
... print(f"Found evaluator at: {path}")
|
|
535
|
+
"""
|
|
536
|
+
from ..registry import get_schema_paths
|
|
537
|
+
|
|
538
|
+
base_name = evaluator_name.lower().replace('.yaml', '').replace('.yml', '')
|
|
539
|
+
|
|
540
|
+
# 1. Try custom schema paths (from registry + auto-detected)
|
|
541
|
+
custom_paths = get_schema_paths()
|
|
542
|
+
|
|
543
|
+
# Auto-detect local folders
|
|
544
|
+
auto_detect_folders = ["./evaluators", "./schemas", "./agents"]
|
|
545
|
+
for auto_folder in auto_detect_folders:
|
|
546
|
+
auto_path = Path(auto_folder)
|
|
547
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
548
|
+
resolved = str(auto_path.resolve())
|
|
549
|
+
if resolved not in custom_paths:
|
|
550
|
+
custom_paths.insert(0, resolved)
|
|
551
|
+
|
|
552
|
+
for custom_dir in custom_paths:
|
|
553
|
+
# Try various patterns within each custom directory
|
|
554
|
+
for pattern in [
|
|
555
|
+
f"{base_name}.yaml",
|
|
556
|
+
f"{base_name}.yml",
|
|
557
|
+
f"evaluators/{base_name}.yaml",
|
|
558
|
+
]:
|
|
559
|
+
custom_path = Path(custom_dir) / pattern
|
|
560
|
+
if custom_path.exists():
|
|
561
|
+
logger.debug(f"Found evaluator schema: {custom_path}")
|
|
562
|
+
return custom_path
|
|
563
|
+
|
|
564
|
+
# 2. Try package resources
|
|
565
|
+
evaluator_search_paths = [
|
|
566
|
+
f"schemas/evaluators/{base_name}.yaml",
|
|
567
|
+
f"schemas/evaluators/rem/{base_name}.yaml",
|
|
568
|
+
]
|
|
569
|
+
|
|
570
|
+
for search_path in evaluator_search_paths:
|
|
571
|
+
try:
|
|
572
|
+
schema_ref = importlib.resources.files("rem") / search_path
|
|
573
|
+
schema_path = Path(str(schema_ref))
|
|
574
|
+
|
|
575
|
+
if schema_path.exists():
|
|
576
|
+
logger.debug(f"Found evaluator schema in package: {schema_path}")
|
|
577
|
+
return schema_path
|
|
578
|
+
except Exception as e:
|
|
579
|
+
logger.debug(f"Could not check {search_path}: {e}")
|
|
580
|
+
continue
|
|
581
|
+
|
|
582
|
+
logger.warning(f"Evaluator schema not found: {evaluator_name}")
|
|
583
|
+
return None
|
rem/utils/vision.py
CHANGED
rem/workers/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Background workers for processing tasks."""
|
|
2
2
|
|
|
3
|
+
from .db_listener import DBListener
|
|
3
4
|
from .sqs_file_processor import SQSFileProcessor
|
|
4
5
|
from .unlogged_maintainer import UnloggedMaintainer
|
|
5
6
|
|
|
6
|
-
__all__ = ["SQSFileProcessor", "UnloggedMaintainer"]
|
|
7
|
+
__all__ = ["DBListener", "SQSFileProcessor", "UnloggedMaintainer"]
|