remdb 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +32 -2
- rem/agentic/agents/agent_manager.py +310 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -27
- rem/agentic/context_builder.py +5 -3
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +155 -18
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +280 -57
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +215 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +132 -40
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +28 -5
- rem/api/mcp_router/tools.py +555 -7
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +278 -4
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +697 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/__init__.py +13 -3
- rem/auth/middleware.py +186 -22
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +4 -7
- rem/cli/commands/db.py +386 -143
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +97 -50
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +29 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/experiment.py +58 -14
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +25 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/registry.py +10 -4
- rem/schemas/agents/core/agent-builder.yaml +134 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +92 -19
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +459 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/client.py +154 -14
- rem/services/postgres/README.md +197 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +547 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +470 -140
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +137 -51
- rem/services/session/reload.py +15 -8
- rem/settings.py +515 -27
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2304 -377
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/README.md +45 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/files.py +157 -1
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +220 -22
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA +340 -229
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/RECORD +109 -80
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1051
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0
rem/utils/README.md
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
1. [SQL Types](#sql-types-sql_typespy) - Pydantic to PostgreSQL type mapping
|
|
6
6
|
2. [Embeddings](#embeddings-embeddingspy) - Vector embeddings generation
|
|
7
|
+
3. [Files](#files-filespy) - File utilities and DataFrame I/O
|
|
7
8
|
|
|
8
9
|
## SQL Types (`sql_types.py`)
|
|
9
10
|
|
|
@@ -581,3 +582,47 @@ This will demonstrate:
|
|
|
581
582
|
- `sql_types.py` - Use `embedding_provider` in json_schema_extra for TEXT fields
|
|
582
583
|
- OpenAI Embeddings API: https://platform.openai.com/docs/api-reference/embeddings
|
|
583
584
|
- pgvector Documentation: https://github.com/pgvector/pgvector
|
|
585
|
+
|
|
586
|
+
---
|
|
587
|
+
|
|
588
|
+
## Files (`files.py`)
|
|
589
|
+
|
|
590
|
+
File utilities including temporary file handling and DataFrame I/O with automatic format detection.
|
|
591
|
+
|
|
592
|
+
### DataFrame I/O
|
|
593
|
+
|
|
594
|
+
Read and write DataFrames with format auto-detected from file extension:
|
|
595
|
+
|
|
596
|
+
```python
|
|
597
|
+
from rem.utils.files import read_dataframe, write_dataframe
|
|
598
|
+
|
|
599
|
+
# Read - format inferred from extension
|
|
600
|
+
df = read_dataframe("data.csv")
|
|
601
|
+
df = read_dataframe("data.parquet")
|
|
602
|
+
df = read_dataframe("data.xlsx")
|
|
603
|
+
|
|
604
|
+
# Read from bytes (e.g., from S3)
|
|
605
|
+
df = read_dataframe(content_bytes, filename="data.csv")
|
|
606
|
+
|
|
607
|
+
# Write - format inferred from extension
|
|
608
|
+
write_dataframe(df, "output.parquet")
|
|
609
|
+
```
|
|
610
|
+
|
|
611
|
+
**Supported formats**: `.csv`, `.tsv`, `.parquet`, `.json`, `.jsonl`, `.avro`, `.xlsx`, `.xls`, `.ods`, `.ipc`, `.arrow`, `.feather`
|
|
612
|
+
|
|
613
|
+
Note: Some formats require optional dependencies (e.g., `fastexcel` for Excel).
|
|
614
|
+
|
|
615
|
+
### Temporary File Utilities
|
|
616
|
+
|
|
617
|
+
```python
|
|
618
|
+
from rem.utils.files import temp_file_from_bytes, temp_directory
|
|
619
|
+
|
|
620
|
+
# Create temp file from bytes, auto-cleanup
|
|
621
|
+
with temp_file_from_bytes(pdf_bytes, suffix=".pdf") as tmp_path:
|
|
622
|
+
result = process_pdf(tmp_path)
|
|
623
|
+
|
|
624
|
+
# Create temp directory, auto-cleanup
|
|
625
|
+
with temp_directory() as tmp_dir:
|
|
626
|
+
# Work with files in tmp_dir
|
|
627
|
+
pass
|
|
628
|
+
```
|
rem/utils/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ Utility functions and helpers for the REM system:
|
|
|
5
5
|
- sql_types: Pydantic to PostgreSQL type mapping
|
|
6
6
|
- embeddings: Vector embeddings generation using requests library
|
|
7
7
|
- user_id: Deterministic UUID generation from email addresses
|
|
8
|
+
- sql_paths: SQL file path resolution for packages and user migrations
|
|
8
9
|
"""
|
|
9
10
|
|
|
10
11
|
from .embeddings import (
|
|
@@ -24,6 +25,15 @@ from .user_id import (
|
|
|
24
25
|
is_valid_uuid,
|
|
25
26
|
user_id_to_uuid,
|
|
26
27
|
)
|
|
28
|
+
from .sql_paths import (
|
|
29
|
+
USER_SQL_DIR_CONVENTION,
|
|
30
|
+
get_package_sql_dir,
|
|
31
|
+
get_package_migrations_dir,
|
|
32
|
+
get_user_sql_dir,
|
|
33
|
+
list_package_migrations,
|
|
34
|
+
list_user_migrations,
|
|
35
|
+
list_all_migrations,
|
|
36
|
+
)
|
|
27
37
|
|
|
28
38
|
__all__ = [
|
|
29
39
|
# SQL Types
|
|
@@ -40,4 +50,12 @@ __all__ = [
|
|
|
40
50
|
"email_to_user_id",
|
|
41
51
|
"user_id_to_uuid",
|
|
42
52
|
"is_valid_uuid",
|
|
53
|
+
# SQL Paths
|
|
54
|
+
"USER_SQL_DIR_CONVENTION",
|
|
55
|
+
"get_package_sql_dir",
|
|
56
|
+
"get_package_migrations_dir",
|
|
57
|
+
"get_user_sql_dir",
|
|
58
|
+
"list_package_migrations",
|
|
59
|
+
"list_user_migrations",
|
|
60
|
+
"list_all_migrations",
|
|
43
61
|
]
|
rem/utils/date_utils.py
CHANGED
|
@@ -14,7 +14,7 @@ Convention:
|
|
|
14
14
|
See CLAUDE.md Section 1 (Datetime Convention) for details.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
from datetime import datetime, timedelta
|
|
17
|
+
from datetime import UTC, datetime, timedelta
|
|
18
18
|
from typing import Optional
|
|
19
19
|
|
|
20
20
|
|
|
@@ -30,7 +30,7 @@ def utc_now() -> datetime:
|
|
|
30
30
|
>>> now.tzinfo is None
|
|
31
31
|
True
|
|
32
32
|
"""
|
|
33
|
-
return datetime.
|
|
33
|
+
return datetime.now(UTC).replace(tzinfo=None)
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def to_iso(dt: datetime) -> str:
|
rem/utils/files.py
CHANGED
|
@@ -3,13 +3,18 @@ File utilities for consistent file handling throughout REM.
|
|
|
3
3
|
|
|
4
4
|
Provides context managers and helpers for temporary file operations,
|
|
5
5
|
ensuring proper cleanup and consistent patterns.
|
|
6
|
+
|
|
7
|
+
Also provides DataFrame I/O utilities using Polars with automatic
|
|
8
|
+
format detection based on file extension.
|
|
6
9
|
"""
|
|
7
10
|
|
|
8
11
|
import tempfile
|
|
9
12
|
from contextlib import contextmanager
|
|
13
|
+
from io import BytesIO
|
|
10
14
|
from pathlib import Path
|
|
11
|
-
from typing import Generator, Optional
|
|
15
|
+
from typing import Generator, Optional, Union
|
|
12
16
|
|
|
17
|
+
import polars as pl
|
|
13
18
|
from loguru import logger
|
|
14
19
|
|
|
15
20
|
|
|
@@ -165,3 +170,154 @@ def safe_delete(path: Path) -> bool:
|
|
|
165
170
|
except Exception as e:
|
|
166
171
|
logger.warning(f"Failed to delete {path}: {e}")
|
|
167
172
|
return False
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# Extension to Polars reader mapping
|
|
176
|
+
_EXTENSION_READERS = {
|
|
177
|
+
".csv": pl.read_csv,
|
|
178
|
+
".tsv": lambda p, **kw: pl.read_csv(p, separator="\t", **kw),
|
|
179
|
+
".parquet": pl.read_parquet,
|
|
180
|
+
".pq": pl.read_parquet,
|
|
181
|
+
".json": pl.read_json,
|
|
182
|
+
".jsonl": pl.read_ndjson,
|
|
183
|
+
".ndjson": pl.read_ndjson,
|
|
184
|
+
".avro": pl.read_avro,
|
|
185
|
+
".xlsx": pl.read_excel,
|
|
186
|
+
".xls": pl.read_excel,
|
|
187
|
+
".ods": pl.read_ods,
|
|
188
|
+
".ipc": pl.read_ipc,
|
|
189
|
+
".arrow": pl.read_ipc,
|
|
190
|
+
".feather": pl.read_ipc,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Extension to Polars writer mapping
|
|
194
|
+
_EXTENSION_WRITERS = {
|
|
195
|
+
".csv": "write_csv",
|
|
196
|
+
".tsv": "write_csv", # with separator="\t"
|
|
197
|
+
".parquet": "write_parquet",
|
|
198
|
+
".pq": "write_parquet",
|
|
199
|
+
".json": "write_json",
|
|
200
|
+
".jsonl": "write_ndjson",
|
|
201
|
+
".ndjson": "write_ndjson",
|
|
202
|
+
".avro": "write_avro",
|
|
203
|
+
".xlsx": "write_excel",
|
|
204
|
+
".ipc": "write_ipc",
|
|
205
|
+
".arrow": "write_ipc",
|
|
206
|
+
".feather": "write_ipc",
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def read_dataframe(
|
|
211
|
+
source: Union[str, Path, bytes],
|
|
212
|
+
filename: Optional[str] = None,
|
|
213
|
+
**kwargs,
|
|
214
|
+
) -> pl.DataFrame:
|
|
215
|
+
"""
|
|
216
|
+
Read a DataFrame from a file, inferring format from extension.
|
|
217
|
+
|
|
218
|
+
Supports all Polars-compatible formats:
|
|
219
|
+
- CSV (.csv), TSV (.tsv)
|
|
220
|
+
- Parquet (.parquet, .pq)
|
|
221
|
+
- JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
|
|
222
|
+
- Avro (.avro)
|
|
223
|
+
- Excel (.xlsx, .xls)
|
|
224
|
+
- OpenDocument (.ods)
|
|
225
|
+
- Arrow IPC (.ipc, .arrow, .feather)
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
source: File path (str/Path) or bytes content
|
|
229
|
+
filename: Required when source is bytes, to determine format
|
|
230
|
+
**kwargs: Additional arguments passed to the Polars reader
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Polars DataFrame
|
|
234
|
+
|
|
235
|
+
Raises:
|
|
236
|
+
ValueError: If format cannot be determined or is unsupported
|
|
237
|
+
|
|
238
|
+
Examples:
|
|
239
|
+
>>> df = read_dataframe("data.csv")
|
|
240
|
+
>>> df = read_dataframe("data.parquet")
|
|
241
|
+
>>> df = read_dataframe(csv_bytes, filename="data.csv")
|
|
242
|
+
"""
|
|
243
|
+
# Determine the file extension
|
|
244
|
+
if isinstance(source, bytes):
|
|
245
|
+
if not filename:
|
|
246
|
+
raise ValueError("filename is required when source is bytes")
|
|
247
|
+
ext = Path(filename).suffix.lower()
|
|
248
|
+
# For bytes, we need to wrap in BytesIO
|
|
249
|
+
file_like = BytesIO(source)
|
|
250
|
+
else:
|
|
251
|
+
path = Path(source)
|
|
252
|
+
ext = path.suffix.lower()
|
|
253
|
+
file_like = path
|
|
254
|
+
|
|
255
|
+
# Get the appropriate reader
|
|
256
|
+
reader = _EXTENSION_READERS.get(ext)
|
|
257
|
+
if reader is None:
|
|
258
|
+
supported = ", ".join(sorted(_EXTENSION_READERS.keys()))
|
|
259
|
+
raise ValueError(
|
|
260
|
+
f"Unsupported file format: {ext}. "
|
|
261
|
+
f"Supported formats: {supported}"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
return reader(file_like, **kwargs)
|
|
266
|
+
except Exception as e:
|
|
267
|
+
logger.error(f"Failed to read DataFrame from {ext} format: {e}")
|
|
268
|
+
raise
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def write_dataframe(
|
|
272
|
+
df: pl.DataFrame,
|
|
273
|
+
dest: Union[str, Path],
|
|
274
|
+
**kwargs,
|
|
275
|
+
) -> None:
|
|
276
|
+
"""
|
|
277
|
+
Write a DataFrame to a file, inferring format from extension.
|
|
278
|
+
|
|
279
|
+
Supports most Polars-writable formats:
|
|
280
|
+
- CSV (.csv), TSV (.tsv)
|
|
281
|
+
- Parquet (.parquet, .pq)
|
|
282
|
+
- JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
|
|
283
|
+
- Avro (.avro)
|
|
284
|
+
- Excel (.xlsx)
|
|
285
|
+
- Arrow IPC (.ipc, .arrow, .feather)
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
df: Polars DataFrame to write
|
|
289
|
+
dest: Destination file path
|
|
290
|
+
**kwargs: Additional arguments passed to the Polars writer
|
|
291
|
+
|
|
292
|
+
Raises:
|
|
293
|
+
ValueError: If format cannot be determined or is unsupported
|
|
294
|
+
|
|
295
|
+
Examples:
|
|
296
|
+
>>> write_dataframe(df, "output.csv")
|
|
297
|
+
>>> write_dataframe(df, "output.parquet")
|
|
298
|
+
>>> write_dataframe(df, "output.jsonl")
|
|
299
|
+
"""
|
|
300
|
+
path = Path(dest)
|
|
301
|
+
ext = path.suffix.lower()
|
|
302
|
+
|
|
303
|
+
writer_method = _EXTENSION_WRITERS.get(ext)
|
|
304
|
+
if writer_method is None:
|
|
305
|
+
supported = ", ".join(sorted(_EXTENSION_WRITERS.keys()))
|
|
306
|
+
raise ValueError(
|
|
307
|
+
f"Unsupported file format for writing: {ext}. "
|
|
308
|
+
f"Supported formats: {supported}"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Ensure parent directory exists
|
|
312
|
+
ensure_parent_exists(path)
|
|
313
|
+
|
|
314
|
+
# Handle TSV special case
|
|
315
|
+
if ext == ".tsv":
|
|
316
|
+
kwargs.setdefault("separator", "\t")
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
writer = getattr(df, writer_method)
|
|
320
|
+
writer(path, **kwargs)
|
|
321
|
+
except Exception as e:
|
|
322
|
+
logger.error(f"Failed to write DataFrame to {ext} format: {e}")
|
|
323
|
+
raise
|
rem/utils/model_helpers.py
CHANGED
|
@@ -16,8 +16,12 @@ Embedding Field Detection:
|
|
|
16
16
|
Table Name Inference:
|
|
17
17
|
1. model_config.json_schema_extra.table_name
|
|
18
18
|
2. CamelCase → snake_case + pluralization
|
|
19
|
+
|
|
20
|
+
Model Resolution:
|
|
21
|
+
- model_from_arbitrary_casing: Resolve model class from flexible input casing
|
|
19
22
|
"""
|
|
20
23
|
|
|
24
|
+
import re
|
|
21
25
|
from typing import Any, Type
|
|
22
26
|
|
|
23
27
|
from loguru import logger
|
|
@@ -94,7 +98,9 @@ def get_table_name(model: Type[BaseModel]) -> str:
|
|
|
94
98
|
if isinstance(model_config, dict):
|
|
95
99
|
json_extra = model_config.get("json_schema_extra", {})
|
|
96
100
|
if isinstance(json_extra, dict) and "table_name" in json_extra:
|
|
97
|
-
|
|
101
|
+
table_name = json_extra["table_name"]
|
|
102
|
+
if isinstance(table_name, str):
|
|
103
|
+
return table_name
|
|
98
104
|
|
|
99
105
|
# Infer from class name
|
|
100
106
|
name = model.__name__
|
|
@@ -234,3 +240,152 @@ def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
|
|
|
234
240
|
"entity_key_field": get_entity_key_field(model),
|
|
235
241
|
"embeddable_fields": get_embeddable_fields(model),
|
|
236
242
|
}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def normalize_to_title_case(name: str) -> str:
|
|
246
|
+
"""
|
|
247
|
+
Normalize arbitrary casing to TitleCase (PascalCase).
|
|
248
|
+
|
|
249
|
+
Handles various input formats:
|
|
250
|
+
- kebab-case: domain-resource → DomainResource
|
|
251
|
+
- snake_case: domain_resource → DomainResource
|
|
252
|
+
- lowercase: domainresource → Domainresource (single word)
|
|
253
|
+
- TitleCase: DomainResource → DomainResource (passthrough)
|
|
254
|
+
- Mixed: Domain-Resource, DOMAIN_RESOURCE → DomainResource
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
name: Input name in any casing format
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
TitleCase (PascalCase) version of the name
|
|
261
|
+
|
|
262
|
+
Example:
|
|
263
|
+
>>> normalize_to_title_case("domain-resource")
|
|
264
|
+
'DomainResource'
|
|
265
|
+
>>> normalize_to_title_case("domain_resources")
|
|
266
|
+
'DomainResources'
|
|
267
|
+
>>> normalize_to_title_case("DomainResource")
|
|
268
|
+
'DomainResource'
|
|
269
|
+
"""
|
|
270
|
+
# If already TitleCase (starts with uppercase, has no delimiters, and has
|
|
271
|
+
# at least one lowercase letter), return as-is
|
|
272
|
+
if (
|
|
273
|
+
name
|
|
274
|
+
and name[0].isupper()
|
|
275
|
+
and '-' not in name
|
|
276
|
+
and '_' not in name
|
|
277
|
+
and any(c.islower() for c in name)
|
|
278
|
+
):
|
|
279
|
+
return name
|
|
280
|
+
|
|
281
|
+
# Split on common delimiters (hyphen, underscore)
|
|
282
|
+
parts = re.split(r'[-_]', name)
|
|
283
|
+
|
|
284
|
+
# Capitalize first letter of each part, lowercase the rest
|
|
285
|
+
normalized_parts = [part.capitalize() for part in parts if part]
|
|
286
|
+
|
|
287
|
+
return "".join(normalized_parts)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def model_from_arbitrary_casing(
|
|
291
|
+
name: str,
|
|
292
|
+
registry: dict[str, Type[BaseModel]] | None = None,
|
|
293
|
+
) -> Type[BaseModel]:
|
|
294
|
+
"""
|
|
295
|
+
Resolve a model class from arbitrary casing input.
|
|
296
|
+
|
|
297
|
+
REM entity models use strict TitleCase (PascalCase) naming. This function
|
|
298
|
+
allows flexible input formats while maintaining consistency:
|
|
299
|
+
|
|
300
|
+
Input formats supported:
|
|
301
|
+
- kebab-case: domain-resource, domain-resources
|
|
302
|
+
- snake_case: domain_resource, domain_resources
|
|
303
|
+
- lowercase: resource, domainresource
|
|
304
|
+
- TitleCase: Resource, DomainResource
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
name: Model name in any supported casing format
|
|
308
|
+
registry: Optional dict mapping TitleCase names to model classes.
|
|
309
|
+
If not provided, uses rem.models.entities module.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
The resolved Pydantic model class
|
|
313
|
+
|
|
314
|
+
Raises:
|
|
315
|
+
ValueError: If no model matches the normalized name
|
|
316
|
+
|
|
317
|
+
Example:
|
|
318
|
+
>>> model = model_from_arbitrary_casing("domain-resources")
|
|
319
|
+
>>> model.__name__
|
|
320
|
+
'DomainResource'
|
|
321
|
+
>>> model = model_from_arbitrary_casing("Resource")
|
|
322
|
+
>>> model.__name__
|
|
323
|
+
'Resource'
|
|
324
|
+
"""
|
|
325
|
+
# Build default registry from entities module if not provided
|
|
326
|
+
if registry is None:
|
|
327
|
+
from rem.models.entities import (
|
|
328
|
+
DomainResource,
|
|
329
|
+
Feedback,
|
|
330
|
+
File,
|
|
331
|
+
ImageResource,
|
|
332
|
+
Message,
|
|
333
|
+
Moment,
|
|
334
|
+
Ontology,
|
|
335
|
+
OntologyConfig,
|
|
336
|
+
Resource,
|
|
337
|
+
Schema,
|
|
338
|
+
Session,
|
|
339
|
+
User,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
registry = {
|
|
343
|
+
"Resource": Resource,
|
|
344
|
+
"Resources": Resource, # Plural alias
|
|
345
|
+
"DomainResource": DomainResource,
|
|
346
|
+
"DomainResources": DomainResource, # Plural alias
|
|
347
|
+
"ImageResource": ImageResource,
|
|
348
|
+
"ImageResources": ImageResource,
|
|
349
|
+
"File": File,
|
|
350
|
+
"Files": File,
|
|
351
|
+
"Message": Message,
|
|
352
|
+
"Messages": Message,
|
|
353
|
+
"Moment": Moment,
|
|
354
|
+
"Moments": Moment,
|
|
355
|
+
"Session": Session,
|
|
356
|
+
"Sessions": Session,
|
|
357
|
+
"Feedback": Feedback,
|
|
358
|
+
"User": User,
|
|
359
|
+
"Users": User,
|
|
360
|
+
"Schema": Schema,
|
|
361
|
+
"Schemas": Schema,
|
|
362
|
+
"Ontology": Ontology,
|
|
363
|
+
"Ontologies": Ontology,
|
|
364
|
+
"OntologyConfig": OntologyConfig,
|
|
365
|
+
"OntologyConfigs": OntologyConfig,
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
# Normalize input to TitleCase
|
|
369
|
+
normalized = normalize_to_title_case(name)
|
|
370
|
+
|
|
371
|
+
# Look up in registry
|
|
372
|
+
if normalized in registry:
|
|
373
|
+
logger.debug(f"Resolved model '{name}' → {registry[normalized].__name__}")
|
|
374
|
+
return registry[normalized]
|
|
375
|
+
|
|
376
|
+
# Try without trailing 's' (singular form)
|
|
377
|
+
if normalized.endswith("s") and normalized[:-1] in registry:
|
|
378
|
+
logger.debug(f"Resolved model '{name}' → {registry[normalized[:-1]].__name__} (singular)")
|
|
379
|
+
return registry[normalized[:-1]]
|
|
380
|
+
|
|
381
|
+
# Try with trailing 's' (plural form)
|
|
382
|
+
plural = normalized + "s"
|
|
383
|
+
if plural in registry:
|
|
384
|
+
logger.debug(f"Resolved model '{name}' → {registry[plural].__name__} (plural)")
|
|
385
|
+
return registry[plural]
|
|
386
|
+
|
|
387
|
+
available = sorted(set(m.__name__ for m in registry.values()))
|
|
388
|
+
raise ValueError(
|
|
389
|
+
f"Unknown model: '{name}' (normalized: '{normalized}'). "
|
|
390
|
+
f"Available models: {', '.join(available)}"
|
|
391
|
+
)
|