remdb 0.3.114__py3-none-any.whl → 0.3.172__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/__init__.py +16 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +103 -5
- rem/agentic/context_builder.py +36 -9
- rem/agentic/mcp/tool_wrapper.py +161 -18
- rem/agentic/otel/setup.py +1 -0
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +172 -30
- rem/agentic/schema.py +8 -4
- rem/api/deps.py +3 -5
- rem/api/main.py +26 -4
- rem/api/mcp_router/resources.py +15 -10
- rem/api/mcp_router/server.py +11 -3
- rem/api/mcp_router/tools.py +418 -4
- rem/api/middleware/tracking.py +5 -5
- rem/api/routers/admin.py +218 -1
- rem/api/routers/auth.py +349 -6
- rem/api/routers/chat/completions.py +255 -7
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +126 -19
- rem/api/routers/feedback.py +134 -14
- rem/api/routers/messages.py +24 -15
- rem/api/routers/query.py +6 -3
- rem/auth/__init__.py +13 -3
- rem/auth/jwt.py +352 -0
- rem/auth/middleware.py +115 -10
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/README.md +42 -0
- rem/cli/commands/cluster.py +617 -168
- rem/cli/commands/configure.py +4 -7
- rem/cli/commands/db.py +66 -22
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/schema.py +6 -5
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/config.py +8 -1
- rem/models/core/experiment.py +58 -14
- rem/models/entities/__init__.py +4 -0
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +4 -3
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +513 -0
- rem/services/email/templates.py +360 -0
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/README.md +38 -0
- rem/services/postgres/diff_service.py +127 -6
- rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
- rem/services/postgres/repository.py +5 -4
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/session/compression.py +120 -50
- rem/services/session/reload.py +14 -7
- rem/services/user_service.py +41 -9
- rem/settings.py +442 -23
- rem/sql/migrations/001_install.sql +156 -0
- rem/sql/migrations/002_install_models.sql +1951 -88
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/README.md +45 -0
- rem/utils/__init__.py +18 -0
- rem/utils/files.py +157 -1
- rem/utils/schema_loader.py +139 -10
- rem/utils/sql_paths.py +146 -0
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/METADATA +218 -180
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/RECORD +83 -68
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/WHEEL +0 -0
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/entry_points.txt +0 -0
rem/utils/README.md
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
1. [SQL Types](#sql-types-sql_typespy) - Pydantic to PostgreSQL type mapping
|
|
6
6
|
2. [Embeddings](#embeddings-embeddingspy) - Vector embeddings generation
|
|
7
|
+
3. [Files](#files-filespy) - File utilities and DataFrame I/O
|
|
7
8
|
|
|
8
9
|
## SQL Types (`sql_types.py`)
|
|
9
10
|
|
|
@@ -581,3 +582,47 @@ This will demonstrate:
|
|
|
581
582
|
- `sql_types.py` - Use `embedding_provider` in json_schema_extra for TEXT fields
|
|
582
583
|
- OpenAI Embeddings API: https://platform.openai.com/docs/api-reference/embeddings
|
|
583
584
|
- pgvector Documentation: https://github.com/pgvector/pgvector
|
|
585
|
+
|
|
586
|
+
---
|
|
587
|
+
|
|
588
|
+
## Files (`files.py`)
|
|
589
|
+
|
|
590
|
+
File utilities including temporary file handling and DataFrame I/O with automatic format detection.
|
|
591
|
+
|
|
592
|
+
### DataFrame I/O
|
|
593
|
+
|
|
594
|
+
Read and write DataFrames with format auto-detected from file extension:
|
|
595
|
+
|
|
596
|
+
```python
|
|
597
|
+
from rem.utils.files import read_dataframe, write_dataframe
|
|
598
|
+
|
|
599
|
+
# Read - format inferred from extension
|
|
600
|
+
df = read_dataframe("data.csv")
|
|
601
|
+
df = read_dataframe("data.parquet")
|
|
602
|
+
df = read_dataframe("data.xlsx")
|
|
603
|
+
|
|
604
|
+
# Read from bytes (e.g., from S3)
|
|
605
|
+
df = read_dataframe(content_bytes, filename="data.csv")
|
|
606
|
+
|
|
607
|
+
# Write - format inferred from extension
|
|
608
|
+
write_dataframe(df, "output.parquet")
|
|
609
|
+
```
|
|
610
|
+
|
|
611
|
+
**Supported formats**: `.csv`, `.tsv`, `.parquet`, `.json`, `.jsonl`, `.avro`, `.xlsx`, `.xls`, `.ods`, `.ipc`, `.arrow`, `.feather`
|
|
612
|
+
|
|
613
|
+
Note: Some formats require optional dependencies (e.g., `fastexcel` for Excel).
|
|
614
|
+
|
|
615
|
+
### Temporary File Utilities
|
|
616
|
+
|
|
617
|
+
```python
|
|
618
|
+
from rem.utils.files import temp_file_from_bytes, temp_directory
|
|
619
|
+
|
|
620
|
+
# Create temp file from bytes, auto-cleanup
|
|
621
|
+
with temp_file_from_bytes(pdf_bytes, suffix=".pdf") as tmp_path:
|
|
622
|
+
result = process_pdf(tmp_path)
|
|
623
|
+
|
|
624
|
+
# Create temp directory, auto-cleanup
|
|
625
|
+
with temp_directory() as tmp_dir:
|
|
626
|
+
# Work with files in tmp_dir
|
|
627
|
+
pass
|
|
628
|
+
```
|
rem/utils/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ Utility functions and helpers for the REM system:
|
|
|
5
5
|
- sql_types: Pydantic to PostgreSQL type mapping
|
|
6
6
|
- embeddings: Vector embeddings generation using requests library
|
|
7
7
|
- user_id: Deterministic UUID generation from email addresses
|
|
8
|
+
- sql_paths: SQL file path resolution for packages and user migrations
|
|
8
9
|
"""
|
|
9
10
|
|
|
10
11
|
from .embeddings import (
|
|
@@ -24,6 +25,15 @@ from .user_id import (
|
|
|
24
25
|
is_valid_uuid,
|
|
25
26
|
user_id_to_uuid,
|
|
26
27
|
)
|
|
28
|
+
from .sql_paths import (
|
|
29
|
+
USER_SQL_DIR_CONVENTION,
|
|
30
|
+
get_package_sql_dir,
|
|
31
|
+
get_package_migrations_dir,
|
|
32
|
+
get_user_sql_dir,
|
|
33
|
+
list_package_migrations,
|
|
34
|
+
list_user_migrations,
|
|
35
|
+
list_all_migrations,
|
|
36
|
+
)
|
|
27
37
|
|
|
28
38
|
__all__ = [
|
|
29
39
|
# SQL Types
|
|
@@ -40,4 +50,12 @@ __all__ = [
|
|
|
40
50
|
"email_to_user_id",
|
|
41
51
|
"user_id_to_uuid",
|
|
42
52
|
"is_valid_uuid",
|
|
53
|
+
# SQL Paths
|
|
54
|
+
"USER_SQL_DIR_CONVENTION",
|
|
55
|
+
"get_package_sql_dir",
|
|
56
|
+
"get_package_migrations_dir",
|
|
57
|
+
"get_user_sql_dir",
|
|
58
|
+
"list_package_migrations",
|
|
59
|
+
"list_user_migrations",
|
|
60
|
+
"list_all_migrations",
|
|
43
61
|
]
|
rem/utils/files.py
CHANGED
|
@@ -3,13 +3,18 @@ File utilities for consistent file handling throughout REM.
|
|
|
3
3
|
|
|
4
4
|
Provides context managers and helpers for temporary file operations,
|
|
5
5
|
ensuring proper cleanup and consistent patterns.
|
|
6
|
+
|
|
7
|
+
Also provides DataFrame I/O utilities using Polars with automatic
|
|
8
|
+
format detection based on file extension.
|
|
6
9
|
"""
|
|
7
10
|
|
|
8
11
|
import tempfile
|
|
9
12
|
from contextlib import contextmanager
|
|
13
|
+
from io import BytesIO
|
|
10
14
|
from pathlib import Path
|
|
11
|
-
from typing import Generator, Optional
|
|
15
|
+
from typing import Generator, Optional, Union
|
|
12
16
|
|
|
17
|
+
import polars as pl
|
|
13
18
|
from loguru import logger
|
|
14
19
|
|
|
15
20
|
|
|
@@ -165,3 +170,154 @@ def safe_delete(path: Path) -> bool:
|
|
|
165
170
|
except Exception as e:
|
|
166
171
|
logger.warning(f"Failed to delete {path}: {e}")
|
|
167
172
|
return False
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# Extension to Polars reader mapping
|
|
176
|
+
_EXTENSION_READERS = {
|
|
177
|
+
".csv": pl.read_csv,
|
|
178
|
+
".tsv": lambda p, **kw: pl.read_csv(p, separator="\t", **kw),
|
|
179
|
+
".parquet": pl.read_parquet,
|
|
180
|
+
".pq": pl.read_parquet,
|
|
181
|
+
".json": pl.read_json,
|
|
182
|
+
".jsonl": pl.read_ndjson,
|
|
183
|
+
".ndjson": pl.read_ndjson,
|
|
184
|
+
".avro": pl.read_avro,
|
|
185
|
+
".xlsx": pl.read_excel,
|
|
186
|
+
".xls": pl.read_excel,
|
|
187
|
+
".ods": pl.read_ods,
|
|
188
|
+
".ipc": pl.read_ipc,
|
|
189
|
+
".arrow": pl.read_ipc,
|
|
190
|
+
".feather": pl.read_ipc,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Extension to Polars writer mapping
|
|
194
|
+
_EXTENSION_WRITERS = {
|
|
195
|
+
".csv": "write_csv",
|
|
196
|
+
".tsv": "write_csv", # with separator="\t"
|
|
197
|
+
".parquet": "write_parquet",
|
|
198
|
+
".pq": "write_parquet",
|
|
199
|
+
".json": "write_json",
|
|
200
|
+
".jsonl": "write_ndjson",
|
|
201
|
+
".ndjson": "write_ndjson",
|
|
202
|
+
".avro": "write_avro",
|
|
203
|
+
".xlsx": "write_excel",
|
|
204
|
+
".ipc": "write_ipc",
|
|
205
|
+
".arrow": "write_ipc",
|
|
206
|
+
".feather": "write_ipc",
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def read_dataframe(
|
|
211
|
+
source: Union[str, Path, bytes],
|
|
212
|
+
filename: Optional[str] = None,
|
|
213
|
+
**kwargs,
|
|
214
|
+
) -> pl.DataFrame:
|
|
215
|
+
"""
|
|
216
|
+
Read a DataFrame from a file, inferring format from extension.
|
|
217
|
+
|
|
218
|
+
Supports all Polars-compatible formats:
|
|
219
|
+
- CSV (.csv), TSV (.tsv)
|
|
220
|
+
- Parquet (.parquet, .pq)
|
|
221
|
+
- JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
|
|
222
|
+
- Avro (.avro)
|
|
223
|
+
- Excel (.xlsx, .xls)
|
|
224
|
+
- OpenDocument (.ods)
|
|
225
|
+
- Arrow IPC (.ipc, .arrow, .feather)
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
source: File path (str/Path) or bytes content
|
|
229
|
+
filename: Required when source is bytes, to determine format
|
|
230
|
+
**kwargs: Additional arguments passed to the Polars reader
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Polars DataFrame
|
|
234
|
+
|
|
235
|
+
Raises:
|
|
236
|
+
ValueError: If format cannot be determined or is unsupported
|
|
237
|
+
|
|
238
|
+
Examples:
|
|
239
|
+
>>> df = read_dataframe("data.csv")
|
|
240
|
+
>>> df = read_dataframe("data.parquet")
|
|
241
|
+
>>> df = read_dataframe(csv_bytes, filename="data.csv")
|
|
242
|
+
"""
|
|
243
|
+
# Determine the file extension
|
|
244
|
+
if isinstance(source, bytes):
|
|
245
|
+
if not filename:
|
|
246
|
+
raise ValueError("filename is required when source is bytes")
|
|
247
|
+
ext = Path(filename).suffix.lower()
|
|
248
|
+
# For bytes, we need to wrap in BytesIO
|
|
249
|
+
file_like = BytesIO(source)
|
|
250
|
+
else:
|
|
251
|
+
path = Path(source)
|
|
252
|
+
ext = path.suffix.lower()
|
|
253
|
+
file_like = path
|
|
254
|
+
|
|
255
|
+
# Get the appropriate reader
|
|
256
|
+
reader = _EXTENSION_READERS.get(ext)
|
|
257
|
+
if reader is None:
|
|
258
|
+
supported = ", ".join(sorted(_EXTENSION_READERS.keys()))
|
|
259
|
+
raise ValueError(
|
|
260
|
+
f"Unsupported file format: {ext}. "
|
|
261
|
+
f"Supported formats: {supported}"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
return reader(file_like, **kwargs)
|
|
266
|
+
except Exception as e:
|
|
267
|
+
logger.error(f"Failed to read DataFrame from {ext} format: {e}")
|
|
268
|
+
raise
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def write_dataframe(
|
|
272
|
+
df: pl.DataFrame,
|
|
273
|
+
dest: Union[str, Path],
|
|
274
|
+
**kwargs,
|
|
275
|
+
) -> None:
|
|
276
|
+
"""
|
|
277
|
+
Write a DataFrame to a file, inferring format from extension.
|
|
278
|
+
|
|
279
|
+
Supports most Polars-writable formats:
|
|
280
|
+
- CSV (.csv), TSV (.tsv)
|
|
281
|
+
- Parquet (.parquet, .pq)
|
|
282
|
+
- JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
|
|
283
|
+
- Avro (.avro)
|
|
284
|
+
- Excel (.xlsx)
|
|
285
|
+
- Arrow IPC (.ipc, .arrow, .feather)
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
df: Polars DataFrame to write
|
|
289
|
+
dest: Destination file path
|
|
290
|
+
**kwargs: Additional arguments passed to the Polars writer
|
|
291
|
+
|
|
292
|
+
Raises:
|
|
293
|
+
ValueError: If format cannot be determined or is unsupported
|
|
294
|
+
|
|
295
|
+
Examples:
|
|
296
|
+
>>> write_dataframe(df, "output.csv")
|
|
297
|
+
>>> write_dataframe(df, "output.parquet")
|
|
298
|
+
>>> write_dataframe(df, "output.jsonl")
|
|
299
|
+
"""
|
|
300
|
+
path = Path(dest)
|
|
301
|
+
ext = path.suffix.lower()
|
|
302
|
+
|
|
303
|
+
writer_method = _EXTENSION_WRITERS.get(ext)
|
|
304
|
+
if writer_method is None:
|
|
305
|
+
supported = ", ".join(sorted(_EXTENSION_WRITERS.keys()))
|
|
306
|
+
raise ValueError(
|
|
307
|
+
f"Unsupported file format for writing: {ext}. "
|
|
308
|
+
f"Supported formats: {supported}"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Ensure parent directory exists
|
|
312
|
+
ensure_parent_exists(path)
|
|
313
|
+
|
|
314
|
+
# Handle TSV special case
|
|
315
|
+
if ext == ".tsv":
|
|
316
|
+
kwargs.setdefault("separator", "\t")
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
writer = getattr(df, writer_method)
|
|
320
|
+
writer(path, **kwargs)
|
|
321
|
+
except Exception as e:
|
|
322
|
+
logger.error(f"Failed to write DataFrame to {ext} format: {e}")
|
|
323
|
+
raise
|
rem/utils/schema_loader.py
CHANGED
|
@@ -132,13 +132,51 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
|
|
|
132
132
|
# Check if we're already in an async context
|
|
133
133
|
try:
|
|
134
134
|
loop = asyncio.get_running_loop()
|
|
135
|
-
# We're in an async context -
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
"
|
|
140
|
-
|
|
141
|
-
|
|
135
|
+
# We're in an async context - use thread executor to run async code
|
|
136
|
+
import concurrent.futures
|
|
137
|
+
|
|
138
|
+
async def _async_lookup():
|
|
139
|
+
"""Async helper to query database."""
|
|
140
|
+
from rem.services.postgres import get_postgres_service
|
|
141
|
+
|
|
142
|
+
db = get_postgres_service()
|
|
143
|
+
if not db:
|
|
144
|
+
logger.debug("PostgreSQL service not available for schema lookup")
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
await db.connect()
|
|
149
|
+
|
|
150
|
+
query = """
|
|
151
|
+
SELECT spec FROM schemas
|
|
152
|
+
WHERE LOWER(name) = LOWER($1)
|
|
153
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
154
|
+
LIMIT 1
|
|
155
|
+
"""
|
|
156
|
+
logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id}")
|
|
157
|
+
|
|
158
|
+
row = await db.fetchrow(query, schema_name, user_id)
|
|
159
|
+
|
|
160
|
+
if row:
|
|
161
|
+
spec = row.get("spec")
|
|
162
|
+
if spec and isinstance(spec, dict):
|
|
163
|
+
logger.debug(f"Found schema in database: {schema_name}")
|
|
164
|
+
return spec
|
|
165
|
+
|
|
166
|
+
logger.debug(f"Schema not found in database: {schema_name}")
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
logger.debug(f"Database schema lookup error: {e}")
|
|
171
|
+
return None
|
|
172
|
+
finally:
|
|
173
|
+
await db.disconnect()
|
|
174
|
+
|
|
175
|
+
# Run in thread pool to avoid blocking the event loop
|
|
176
|
+
with concurrent.futures.ThreadPoolExecutor() as pool:
|
|
177
|
+
future = pool.submit(asyncio.run, _async_lookup())
|
|
178
|
+
return future.result(timeout=10)
|
|
179
|
+
|
|
142
180
|
except RuntimeError:
|
|
143
181
|
# Not in async context - safe to use asyncio.run()
|
|
144
182
|
pass
|
|
@@ -195,7 +233,7 @@ def load_agent_schema(
|
|
|
195
233
|
"""
|
|
196
234
|
Load agent schema from YAML file with unified search logic and caching.
|
|
197
235
|
|
|
198
|
-
Schema names are case-invariant - "
|
|
236
|
+
Schema names are case-invariant - "Rem", "rem", "REM" all resolve to the same schema.
|
|
199
237
|
|
|
200
238
|
Filesystem schemas are cached indefinitely (immutable, versioned with code).
|
|
201
239
|
Database schemas (future) will be cached with TTL for invalidation.
|
|
@@ -271,10 +309,20 @@ def load_agent_schema(
|
|
|
271
309
|
# 2. Normalize name for package resource search (lowercase)
|
|
272
310
|
base_name = cache_key
|
|
273
311
|
|
|
274
|
-
# 3. Try custom schema paths (from registry + SCHEMA__PATHS env var)
|
|
312
|
+
# 3. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
275
313
|
from ..registry import get_schema_paths
|
|
276
314
|
|
|
277
315
|
custom_paths = get_schema_paths()
|
|
316
|
+
|
|
317
|
+
# Auto-detect local folders if they exist (convention over configuration)
|
|
318
|
+
auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
|
|
319
|
+
for auto_folder in auto_detect_folders:
|
|
320
|
+
auto_path = Path(auto_folder)
|
|
321
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
322
|
+
resolved = str(auto_path.resolve())
|
|
323
|
+
if resolved not in custom_paths:
|
|
324
|
+
custom_paths.insert(0, resolved)
|
|
325
|
+
logger.debug(f"Auto-detected schema directory: {auto_folder}")
|
|
278
326
|
for custom_dir in custom_paths:
|
|
279
327
|
# Try various patterns within each custom directory
|
|
280
328
|
for pattern in [
|
|
@@ -400,9 +448,20 @@ async def load_agent_schema_async(
|
|
|
400
448
|
|
|
401
449
|
base_name = cache_key
|
|
402
450
|
|
|
403
|
-
# Try custom schema paths
|
|
451
|
+
# Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
404
452
|
from ..registry import get_schema_paths
|
|
405
453
|
custom_paths = get_schema_paths()
|
|
454
|
+
|
|
455
|
+
# Auto-detect local folders if they exist (convention over configuration)
|
|
456
|
+
auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
|
|
457
|
+
for auto_folder in auto_detect_folders:
|
|
458
|
+
auto_path = Path(auto_folder)
|
|
459
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
460
|
+
resolved = str(auto_path.resolve())
|
|
461
|
+
if resolved not in custom_paths:
|
|
462
|
+
custom_paths.insert(0, resolved)
|
|
463
|
+
logger.debug(f"Auto-detected schema directory: {auto_folder}")
|
|
464
|
+
|
|
406
465
|
for custom_dir in custom_paths:
|
|
407
466
|
for pattern in [f"{base_name}.yaml", f"{base_name}.yml", f"agents/{base_name}.yaml"]:
|
|
408
467
|
custom_path = Path(custom_dir) / pattern
|
|
@@ -490,3 +549,73 @@ def validate_agent_schema(schema: dict[str, Any]) -> bool:
|
|
|
490
549
|
|
|
491
550
|
logger.debug("Schema validation passed")
|
|
492
551
|
return True
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def get_evaluator_schema_path(evaluator_name: str) -> Path | None:
|
|
555
|
+
"""
|
|
556
|
+
Find the file path to an evaluator schema.
|
|
557
|
+
|
|
558
|
+
Searches standard locations for the evaluator schema YAML file:
|
|
559
|
+
- ./evaluators/{name}.yaml (local project)
|
|
560
|
+
- Custom schema paths from registry
|
|
561
|
+
- Package resources: schemas/evaluators/{name}.yaml
|
|
562
|
+
|
|
563
|
+
Args:
|
|
564
|
+
evaluator_name: Name of the evaluator (e.g., "mental-health-classifier")
|
|
565
|
+
|
|
566
|
+
Returns:
|
|
567
|
+
Path to the evaluator schema file, or None if not found
|
|
568
|
+
|
|
569
|
+
Example:
|
|
570
|
+
>>> path = get_evaluator_schema_path("mental-health-classifier")
|
|
571
|
+
>>> if path:
|
|
572
|
+
... print(f"Found evaluator at: {path}")
|
|
573
|
+
"""
|
|
574
|
+
from ..registry import get_schema_paths
|
|
575
|
+
|
|
576
|
+
base_name = evaluator_name.lower().replace('.yaml', '').replace('.yml', '')
|
|
577
|
+
|
|
578
|
+
# 1. Try custom schema paths (from registry + auto-detected)
|
|
579
|
+
custom_paths = get_schema_paths()
|
|
580
|
+
|
|
581
|
+
# Auto-detect local folders
|
|
582
|
+
auto_detect_folders = ["./evaluators", "./schemas", "./agents"]
|
|
583
|
+
for auto_folder in auto_detect_folders:
|
|
584
|
+
auto_path = Path(auto_folder)
|
|
585
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
586
|
+
resolved = str(auto_path.resolve())
|
|
587
|
+
if resolved not in custom_paths:
|
|
588
|
+
custom_paths.insert(0, resolved)
|
|
589
|
+
|
|
590
|
+
for custom_dir in custom_paths:
|
|
591
|
+
# Try various patterns within each custom directory
|
|
592
|
+
for pattern in [
|
|
593
|
+
f"{base_name}.yaml",
|
|
594
|
+
f"{base_name}.yml",
|
|
595
|
+
f"evaluators/{base_name}.yaml",
|
|
596
|
+
]:
|
|
597
|
+
custom_path = Path(custom_dir) / pattern
|
|
598
|
+
if custom_path.exists():
|
|
599
|
+
logger.debug(f"Found evaluator schema: {custom_path}")
|
|
600
|
+
return custom_path
|
|
601
|
+
|
|
602
|
+
# 2. Try package resources
|
|
603
|
+
evaluator_search_paths = [
|
|
604
|
+
f"schemas/evaluators/{base_name}.yaml",
|
|
605
|
+
f"schemas/evaluators/rem/{base_name}.yaml",
|
|
606
|
+
]
|
|
607
|
+
|
|
608
|
+
for search_path in evaluator_search_paths:
|
|
609
|
+
try:
|
|
610
|
+
schema_ref = importlib.resources.files("rem") / search_path
|
|
611
|
+
schema_path = Path(str(schema_ref))
|
|
612
|
+
|
|
613
|
+
if schema_path.exists():
|
|
614
|
+
logger.debug(f"Found evaluator schema in package: {schema_path}")
|
|
615
|
+
return schema_path
|
|
616
|
+
except Exception as e:
|
|
617
|
+
logger.debug(f"Could not check {search_path}: {e}")
|
|
618
|
+
continue
|
|
619
|
+
|
|
620
|
+
logger.warning(f"Evaluator schema not found: {evaluator_name}")
|
|
621
|
+
return None
|
rem/utils/sql_paths.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Utilities for resolving SQL file paths.
|
|
2
|
+
|
|
3
|
+
Handles package SQL directory resolution and user migrations.
|
|
4
|
+
|
|
5
|
+
Convention for user migrations:
|
|
6
|
+
Place custom SQL files in `./sql/migrations/` relative to your project root.
|
|
7
|
+
Files should be numbered (e.g., `100_custom_table.sql`) to control execution order.
|
|
8
|
+
Package migrations (001-099) run first, then user migrations (100+).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Optional
|
|
13
|
+
import importlib.resources
|
|
14
|
+
|
|
15
|
+
# Convention: Default location for user-maintained migrations
|
|
16
|
+
USER_SQL_DIR_CONVENTION = "sql"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_package_sql_dir() -> Path:
|
|
20
|
+
"""Get the SQL directory from the installed rem package.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Path to the package's sql directory
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
FileNotFoundError: If the SQL directory cannot be found
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
# Use importlib.resources for Python 3.9+
|
|
30
|
+
sql_ref = importlib.resources.files("rem") / "sql"
|
|
31
|
+
package_sql = Path(str(sql_ref))
|
|
32
|
+
if package_sql.exists():
|
|
33
|
+
return package_sql
|
|
34
|
+
except (AttributeError, TypeError):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
# Fallback: use __file__ to find package location
|
|
38
|
+
try:
|
|
39
|
+
import rem
|
|
40
|
+
package_sql = Path(rem.__file__).parent / "sql"
|
|
41
|
+
if package_sql.exists():
|
|
42
|
+
return package_sql
|
|
43
|
+
except (ImportError, AttributeError):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
# Development fallback: check relative to cwd
|
|
47
|
+
dev_sql = Path("src/rem/sql")
|
|
48
|
+
if dev_sql.exists():
|
|
49
|
+
return dev_sql
|
|
50
|
+
|
|
51
|
+
raise FileNotFoundError(
|
|
52
|
+
"Could not locate rem SQL directory. "
|
|
53
|
+
"Ensure remdb is properly installed or run from the source directory."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_package_migrations_dir() -> Path:
|
|
58
|
+
"""Get the migrations directory from the installed rem package.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Path to the package's migrations directory
|
|
62
|
+
"""
|
|
63
|
+
return get_package_sql_dir() / "migrations"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_user_sql_dir() -> Optional[Path]:
|
|
67
|
+
"""Get the conventional user SQL directory if it exists.
|
|
68
|
+
|
|
69
|
+
Looks for `./sql/` relative to the current working directory.
|
|
70
|
+
This follows the convention for user-maintained migrations.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Path to user sql directory if it exists, None otherwise
|
|
74
|
+
"""
|
|
75
|
+
user_sql = Path.cwd() / USER_SQL_DIR_CONVENTION
|
|
76
|
+
if user_sql.exists() and user_sql.is_dir():
|
|
77
|
+
return user_sql
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def list_package_migrations() -> List[Path]:
|
|
82
|
+
"""List all migration files in the package.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Sorted list of migration file paths
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
migrations_dir = get_package_migrations_dir()
|
|
89
|
+
if migrations_dir.exists():
|
|
90
|
+
return sorted(
|
|
91
|
+
f for f in migrations_dir.glob("*.sql")
|
|
92
|
+
if f.name[0].isdigit() # Only numbered migrations
|
|
93
|
+
)
|
|
94
|
+
except FileNotFoundError:
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def list_user_migrations() -> List[Path]:
|
|
101
|
+
"""List all migration files in the user's sql/migrations directory.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Sorted list of user migration file paths
|
|
105
|
+
"""
|
|
106
|
+
user_sql = get_user_sql_dir()
|
|
107
|
+
if user_sql:
|
|
108
|
+
migrations_dir = user_sql / "migrations"
|
|
109
|
+
if migrations_dir.exists():
|
|
110
|
+
return sorted(
|
|
111
|
+
f for f in migrations_dir.glob("*.sql")
|
|
112
|
+
if f.name[0].isdigit() # Only numbered migrations
|
|
113
|
+
)
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def list_all_migrations() -> List[Path]:
|
|
118
|
+
"""List all migration files from package and user directories.
|
|
119
|
+
|
|
120
|
+
Collects migrations from:
|
|
121
|
+
1. Package migrations directory
|
|
122
|
+
2. User directory (./sql/migrations/) if it exists
|
|
123
|
+
|
|
124
|
+
Files are sorted by name, so use numbered prefixes to control order:
|
|
125
|
+
- 001-099: Reserved for package migrations
|
|
126
|
+
- 100+: Recommended for user migrations
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Sorted list of all migration file paths (by filename)
|
|
130
|
+
"""
|
|
131
|
+
all_migrations = []
|
|
132
|
+
seen_names = set()
|
|
133
|
+
|
|
134
|
+
# Package migrations first
|
|
135
|
+
for f in list_package_migrations():
|
|
136
|
+
if f.name not in seen_names:
|
|
137
|
+
all_migrations.append(f)
|
|
138
|
+
seen_names.add(f.name)
|
|
139
|
+
|
|
140
|
+
# User migrations second
|
|
141
|
+
for f in list_user_migrations():
|
|
142
|
+
if f.name not in seen_names:
|
|
143
|
+
all_migrations.append(f)
|
|
144
|
+
seen_names.add(f.name)
|
|
145
|
+
|
|
146
|
+
return sorted(all_migrations, key=lambda p: p.name)
|
rem/utils/vision.py
CHANGED
rem/workers/__init__.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Background workers for processing tasks."""
|
|
2
2
|
|
|
3
|
+
from .db_listener import DBListener
|
|
3
4
|
from .sqs_file_processor import SQSFileProcessor
|
|
5
|
+
from .unlogged_maintainer import UnloggedMaintainer
|
|
4
6
|
|
|
5
|
-
__all__ = ["SQSFileProcessor"]
|
|
7
|
+
__all__ = ["DBListener", "SQSFileProcessor", "UnloggedMaintainer"]
|