remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic Model Helper Utilities.
|
|
3
|
+
|
|
4
|
+
Utilities for working with REM Pydantic models following our conventions:
|
|
5
|
+
|
|
6
|
+
Business Key (entity_key) Detection:
|
|
7
|
+
1. Field with json_schema_extra={"entity_key": True}
|
|
8
|
+
2. Common business key fields: name, uri, key, label
|
|
9
|
+
3. Fallback to "id" (unique by UUID only)
|
|
10
|
+
|
|
11
|
+
Embedding Field Detection:
|
|
12
|
+
1. Field with json_schema_extra={"embed": True}
|
|
13
|
+
2. Common content fields: content, description, summary, etc.
|
|
14
|
+
3. Explicit disable with json_schema_extra={"embed": False}
|
|
15
|
+
|
|
16
|
+
Table Name Inference:
|
|
17
|
+
1. model_config.json_schema_extra.table_name
|
|
18
|
+
2. CamelCase → snake_case + pluralization
|
|
19
|
+
|
|
20
|
+
Model Resolution:
|
|
21
|
+
- model_from_arbitrary_casing: Resolve model class from flexible input casing
|
|
22
|
+
|
|
23
|
+
Data Validation:
|
|
24
|
+
- validate_data_for_model: Validate row data against a Pydantic model with clear error reporting
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import re
|
|
28
|
+
from typing import Any, Type
|
|
29
|
+
|
|
30
|
+
from loguru import logger
|
|
31
|
+
from pydantic import BaseModel
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_entity_key_field(model: Type[BaseModel]) -> str:
|
|
35
|
+
"""
|
|
36
|
+
Get the business key field for KV store lookups.
|
|
37
|
+
|
|
38
|
+
Follows REM conventions:
|
|
39
|
+
1. Field with json_schema_extra={"entity_key": True}
|
|
40
|
+
2. "name" field (most common for resources, moments, etc.)
|
|
41
|
+
3. "uri" field (for files)
|
|
42
|
+
4. "key" or "label" fields
|
|
43
|
+
5. Fallback to "id" (UUID only)
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
model: Pydantic model class
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Field name to use as entity_key
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
>>> from rem.models.entities import Resource
|
|
53
|
+
>>> get_entity_key_field(Resource)
|
|
54
|
+
'name'
|
|
55
|
+
"""
|
|
56
|
+
# Check for explicit entity_key marker
|
|
57
|
+
for field_name, field_info in model.model_fields.items():
|
|
58
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
59
|
+
if json_extra and isinstance(json_extra, dict):
|
|
60
|
+
if json_extra.get("entity_key") is True:
|
|
61
|
+
logger.debug(f"Using explicit entity_key field: {field_name}")
|
|
62
|
+
return field_name
|
|
63
|
+
|
|
64
|
+
# Check for common business key fields
|
|
65
|
+
for candidate in ["name", "uri", "key", "label", "title"]:
|
|
66
|
+
if candidate in model.model_fields:
|
|
67
|
+
logger.debug(f"Using conventional entity_key field: {candidate}")
|
|
68
|
+
return candidate
|
|
69
|
+
|
|
70
|
+
# Fallback to id (unique by UUID only)
|
|
71
|
+
logger.warning(
|
|
72
|
+
f"No business key found for {model.__name__}, using 'id' (UUID only)"
|
|
73
|
+
)
|
|
74
|
+
return "id"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_table_name(model: Type[BaseModel]) -> str:
|
|
78
|
+
"""
|
|
79
|
+
Get table name for a Pydantic model.
|
|
80
|
+
|
|
81
|
+
Follows REM conventions:
|
|
82
|
+
1. model_config.json_schema_extra.table_name (explicit)
|
|
83
|
+
2. CamelCase → snake_case + pluralization
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
model: Pydantic model class
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Table name
|
|
90
|
+
|
|
91
|
+
Example:
|
|
92
|
+
>>> from rem.models.entities import Resource
|
|
93
|
+
>>> get_table_name(Resource)
|
|
94
|
+
'resources'
|
|
95
|
+
"""
|
|
96
|
+
import re
|
|
97
|
+
|
|
98
|
+
# Check for explicit table_name
|
|
99
|
+
if hasattr(model, "model_config"):
|
|
100
|
+
model_config = model.model_config
|
|
101
|
+
if isinstance(model_config, dict):
|
|
102
|
+
json_extra = model_config.get("json_schema_extra", {})
|
|
103
|
+
if isinstance(json_extra, dict) and "table_name" in json_extra:
|
|
104
|
+
table_name = json_extra["table_name"]
|
|
105
|
+
if isinstance(table_name, str):
|
|
106
|
+
return table_name
|
|
107
|
+
|
|
108
|
+
# Infer from class name
|
|
109
|
+
name = model.__name__
|
|
110
|
+
|
|
111
|
+
# Convert CamelCase to snake_case
|
|
112
|
+
name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
|
|
113
|
+
name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
|
|
114
|
+
|
|
115
|
+
# Pluralize
|
|
116
|
+
if not name.endswith("s"):
|
|
117
|
+
if name.endswith("y"):
|
|
118
|
+
name = name[:-1] + "ies" # category -> categories
|
|
119
|
+
else:
|
|
120
|
+
name = name + "s" # resource -> resources
|
|
121
|
+
|
|
122
|
+
return name
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def get_embeddable_fields(model: Type[BaseModel]) -> list[str]:
|
|
126
|
+
"""
|
|
127
|
+
Get list of fields that should have embeddings generated.
|
|
128
|
+
|
|
129
|
+
Follows REM conventions:
|
|
130
|
+
1. Field with json_schema_extra={"embed": True} → always embed
|
|
131
|
+
2. Field with json_schema_extra={"embed": False} → never embed
|
|
132
|
+
3. Common content fields → embed by default
|
|
133
|
+
4. Otherwise → don't embed
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
model: Pydantic model class
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
List of field names to generate embeddings for
|
|
140
|
+
|
|
141
|
+
Example:
|
|
142
|
+
>>> from rem.models.entities import Resource
|
|
143
|
+
>>> fields = get_embeddable_fields(Resource)
|
|
144
|
+
>>> "content" in fields
|
|
145
|
+
True
|
|
146
|
+
"""
|
|
147
|
+
# Common content fields that embed by default
|
|
148
|
+
DEFAULT_EMBED_FIELDS = {
|
|
149
|
+
"content",
|
|
150
|
+
"description",
|
|
151
|
+
"summary",
|
|
152
|
+
"text",
|
|
153
|
+
"body",
|
|
154
|
+
"message",
|
|
155
|
+
"notes",
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
embeddable = []
|
|
159
|
+
|
|
160
|
+
for field_name, field_info in model.model_fields.items():
|
|
161
|
+
# Check json_schema_extra for explicit embed configuration
|
|
162
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
163
|
+
if json_extra and isinstance(json_extra, dict):
|
|
164
|
+
embed = json_extra.get("embed")
|
|
165
|
+
if embed is True:
|
|
166
|
+
embeddable.append(field_name)
|
|
167
|
+
continue
|
|
168
|
+
elif embed is False:
|
|
169
|
+
# Explicitly disabled
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
# Check if field name matches common content fields
|
|
173
|
+
if field_name.lower() in DEFAULT_EMBED_FIELDS:
|
|
174
|
+
embeddable.append(field_name)
|
|
175
|
+
|
|
176
|
+
return embeddable
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def should_skip_field(field_name: str) -> bool:
|
|
180
|
+
"""
|
|
181
|
+
Check if a field should be skipped during SQL generation.
|
|
182
|
+
|
|
183
|
+
System fields that are added separately:
|
|
184
|
+
- id (added as PRIMARY KEY)
|
|
185
|
+
- tenant_id (added for multi-tenancy)
|
|
186
|
+
- user_id (added for ownership)
|
|
187
|
+
- created_at, updated_at, deleted_at (added as system timestamps)
|
|
188
|
+
- graph_edges, metadata (added as JSONB system fields)
|
|
189
|
+
- tags, column (CoreModel fields)
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
field_name: Name of the field
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
True if field should be skipped
|
|
196
|
+
|
|
197
|
+
Example:
|
|
198
|
+
>>> should_skip_field("id")
|
|
199
|
+
True
|
|
200
|
+
>>> should_skip_field("name")
|
|
201
|
+
False
|
|
202
|
+
"""
|
|
203
|
+
SYSTEM_FIELDS = {
|
|
204
|
+
"id",
|
|
205
|
+
"tenant_id",
|
|
206
|
+
"user_id",
|
|
207
|
+
"created_at",
|
|
208
|
+
"updated_at",
|
|
209
|
+
"deleted_at",
|
|
210
|
+
"graph_edges",
|
|
211
|
+
"metadata",
|
|
212
|
+
"tags",
|
|
213
|
+
"column",
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return field_name in SYSTEM_FIELDS
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
|
|
220
|
+
"""
|
|
221
|
+
Extract REM-specific metadata from a Pydantic model.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Dict with:
|
|
225
|
+
- table_name: Database table name
|
|
226
|
+
- entity_key_field: Business key field name
|
|
227
|
+
- embeddable_fields: List of fields to embed
|
|
228
|
+
- model_name: Original model class name
|
|
229
|
+
|
|
230
|
+
Example:
|
|
231
|
+
>>> from rem.models.entities import Resource
|
|
232
|
+
>>> meta = get_model_metadata(Resource)
|
|
233
|
+
>>> meta["table_name"]
|
|
234
|
+
'resources'
|
|
235
|
+
>>> meta["entity_key_field"]
|
|
236
|
+
'name'
|
|
237
|
+
>>> "content" in meta["embeddable_fields"]
|
|
238
|
+
True
|
|
239
|
+
"""
|
|
240
|
+
return {
|
|
241
|
+
"model_name": model.__name__,
|
|
242
|
+
"table_name": get_table_name(model),
|
|
243
|
+
"entity_key_field": get_entity_key_field(model),
|
|
244
|
+
"embeddable_fields": get_embeddable_fields(model),
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def normalize_to_title_case(name: str) -> str:
|
|
249
|
+
"""
|
|
250
|
+
Normalize arbitrary casing to TitleCase (PascalCase).
|
|
251
|
+
|
|
252
|
+
Handles various input formats:
|
|
253
|
+
- kebab-case: domain-resource → DomainResource
|
|
254
|
+
- snake_case: domain_resource → DomainResource
|
|
255
|
+
- lowercase: domainresource → Domainresource (single word)
|
|
256
|
+
- TitleCase: DomainResource → DomainResource (passthrough)
|
|
257
|
+
- Mixed: Domain-Resource, DOMAIN_RESOURCE → DomainResource
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
name: Input name in any casing format
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
TitleCase (PascalCase) version of the name
|
|
264
|
+
|
|
265
|
+
Example:
|
|
266
|
+
>>> normalize_to_title_case("domain-resource")
|
|
267
|
+
'DomainResource'
|
|
268
|
+
>>> normalize_to_title_case("domain_resources")
|
|
269
|
+
'DomainResources'
|
|
270
|
+
>>> normalize_to_title_case("DomainResource")
|
|
271
|
+
'DomainResource'
|
|
272
|
+
"""
|
|
273
|
+
# If already TitleCase (starts with uppercase, has no delimiters, and has
|
|
274
|
+
# at least one lowercase letter), return as-is
|
|
275
|
+
if (
|
|
276
|
+
name
|
|
277
|
+
and name[0].isupper()
|
|
278
|
+
and '-' not in name
|
|
279
|
+
and '_' not in name
|
|
280
|
+
and any(c.islower() for c in name)
|
|
281
|
+
):
|
|
282
|
+
return name
|
|
283
|
+
|
|
284
|
+
# Split on common delimiters (hyphen, underscore)
|
|
285
|
+
parts = re.split(r'[-_]', name)
|
|
286
|
+
|
|
287
|
+
# Capitalize first letter of each part, lowercase the rest
|
|
288
|
+
normalized_parts = [part.capitalize() for part in parts if part]
|
|
289
|
+
|
|
290
|
+
return "".join(normalized_parts)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def model_from_arbitrary_casing(
|
|
294
|
+
name: str,
|
|
295
|
+
registry: dict[str, Type[BaseModel]] | None = None,
|
|
296
|
+
) -> Type[BaseModel]:
|
|
297
|
+
"""
|
|
298
|
+
Resolve a model class from arbitrary casing input.
|
|
299
|
+
|
|
300
|
+
REM entity models use strict TitleCase (PascalCase) naming. This function
|
|
301
|
+
allows flexible input formats while maintaining consistency:
|
|
302
|
+
|
|
303
|
+
Input formats supported:
|
|
304
|
+
- kebab-case: domain-resource, domain-resources
|
|
305
|
+
- snake_case: domain_resource, domain_resources
|
|
306
|
+
- lowercase: resource, domainresource
|
|
307
|
+
- TitleCase: Resource, DomainResource
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
name: Model name in any supported casing format
|
|
311
|
+
registry: Optional dict mapping TitleCase names to model classes.
|
|
312
|
+
If not provided, uses rem.models.entities module.
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
The resolved Pydantic model class
|
|
316
|
+
|
|
317
|
+
Raises:
|
|
318
|
+
ValueError: If no model matches the normalized name
|
|
319
|
+
|
|
320
|
+
Example:
|
|
321
|
+
>>> model = model_from_arbitrary_casing("domain-resources")
|
|
322
|
+
>>> model.__name__
|
|
323
|
+
'DomainResource'
|
|
324
|
+
>>> model = model_from_arbitrary_casing("Resource")
|
|
325
|
+
>>> model.__name__
|
|
326
|
+
'Resource'
|
|
327
|
+
"""
|
|
328
|
+
# Build default registry from entities module if not provided
|
|
329
|
+
if registry is None:
|
|
330
|
+
from rem.models.entities import (
|
|
331
|
+
DomainResource,
|
|
332
|
+
Feedback,
|
|
333
|
+
File,
|
|
334
|
+
ImageResource,
|
|
335
|
+
Message,
|
|
336
|
+
Moment,
|
|
337
|
+
Ontology,
|
|
338
|
+
OntologyConfig,
|
|
339
|
+
Resource,
|
|
340
|
+
Schema,
|
|
341
|
+
Session,
|
|
342
|
+
User,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
registry = {
|
|
346
|
+
"Resource": Resource,
|
|
347
|
+
"Resources": Resource, # Plural alias
|
|
348
|
+
"DomainResource": DomainResource,
|
|
349
|
+
"DomainResources": DomainResource, # Plural alias
|
|
350
|
+
"ImageResource": ImageResource,
|
|
351
|
+
"ImageResources": ImageResource,
|
|
352
|
+
"File": File,
|
|
353
|
+
"Files": File,
|
|
354
|
+
"Message": Message,
|
|
355
|
+
"Messages": Message,
|
|
356
|
+
"Moment": Moment,
|
|
357
|
+
"Moments": Moment,
|
|
358
|
+
"Session": Session,
|
|
359
|
+
"Sessions": Session,
|
|
360
|
+
"Feedback": Feedback,
|
|
361
|
+
"User": User,
|
|
362
|
+
"Users": User,
|
|
363
|
+
"Schema": Schema,
|
|
364
|
+
"Schemas": Schema,
|
|
365
|
+
"Ontology": Ontology,
|
|
366
|
+
"Ontologies": Ontology,
|
|
367
|
+
"OntologyConfig": OntologyConfig,
|
|
368
|
+
"OntologyConfigs": OntologyConfig,
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
# Normalize input to TitleCase
|
|
372
|
+
normalized = normalize_to_title_case(name)
|
|
373
|
+
|
|
374
|
+
# Look up in registry
|
|
375
|
+
if normalized in registry:
|
|
376
|
+
logger.debug(f"Resolved model '{name}' → {registry[normalized].__name__}")
|
|
377
|
+
return registry[normalized]
|
|
378
|
+
|
|
379
|
+
# Try without trailing 's' (singular form)
|
|
380
|
+
if normalized.endswith("s") and normalized[:-1] in registry:
|
|
381
|
+
logger.debug(f"Resolved model '{name}' → {registry[normalized[:-1]].__name__} (singular)")
|
|
382
|
+
return registry[normalized[:-1]]
|
|
383
|
+
|
|
384
|
+
# Try with trailing 's' (plural form)
|
|
385
|
+
plural = normalized + "s"
|
|
386
|
+
if plural in registry:
|
|
387
|
+
logger.debug(f"Resolved model '{name}' → {registry[plural].__name__} (plural)")
|
|
388
|
+
return registry[plural]
|
|
389
|
+
|
|
390
|
+
available = sorted(set(m.__name__ for m in registry.values()))
|
|
391
|
+
raise ValueError(
|
|
392
|
+
f"Unknown model: '{name}' (normalized: '{normalized}'). "
|
|
393
|
+
f"Available models: {', '.join(available)}"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
class ValidationResult:
|
|
398
|
+
"""Result of validating data against a Pydantic model."""
|
|
399
|
+
|
|
400
|
+
def __init__(
|
|
401
|
+
self,
|
|
402
|
+
valid: bool,
|
|
403
|
+
instance: BaseModel | None = None,
|
|
404
|
+
errors: list[str] | None = None,
|
|
405
|
+
missing_required: set[str] | None = None,
|
|
406
|
+
extra_fields: set[str] | None = None,
|
|
407
|
+
required_fields: set[str] | None = None,
|
|
408
|
+
optional_fields: set[str] | None = None,
|
|
409
|
+
):
|
|
410
|
+
self.valid = valid
|
|
411
|
+
self.instance = instance
|
|
412
|
+
self.errors = errors or []
|
|
413
|
+
self.missing_required = missing_required or set()
|
|
414
|
+
self.extra_fields = extra_fields or set()
|
|
415
|
+
self.required_fields = required_fields or set()
|
|
416
|
+
self.optional_fields = optional_fields or set()
|
|
417
|
+
|
|
418
|
+
def log_errors(self, row_label: str = "Row") -> None:
|
|
419
|
+
"""Log validation errors using loguru."""
|
|
420
|
+
if self.valid:
|
|
421
|
+
return
|
|
422
|
+
|
|
423
|
+
logger.error(f"{row_label}: Validation failed")
|
|
424
|
+
if self.missing_required:
|
|
425
|
+
logger.error(f" Missing required: {self.missing_required}")
|
|
426
|
+
if self.extra_fields:
|
|
427
|
+
logger.warning(f" Unknown fields (ignored): {self.extra_fields}")
|
|
428
|
+
for err in self.errors:
|
|
429
|
+
logger.error(f" - {err}")
|
|
430
|
+
logger.info(f" Required: {self.required_fields or '(none)'}")
|
|
431
|
+
logger.info(f" Optional: {self.optional_fields}")
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def validate_data_for_model(
|
|
435
|
+
model: Type[BaseModel],
|
|
436
|
+
data: dict[str, Any],
|
|
437
|
+
) -> ValidationResult:
|
|
438
|
+
"""
|
|
439
|
+
Validate a data dict against a Pydantic model with detailed error reporting.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
model: Pydantic model class to validate against
|
|
443
|
+
data: Dictionary of field values
|
|
444
|
+
|
|
445
|
+
Returns:
|
|
446
|
+
ValidationResult with validation status and detailed field info
|
|
447
|
+
|
|
448
|
+
Example:
|
|
449
|
+
>>> from rem.models.entities import Resource
|
|
450
|
+
>>> result = validate_data_for_model(Resource, {"name": "test", "content": "hello"})
|
|
451
|
+
>>> result.valid
|
|
452
|
+
True
|
|
453
|
+
>>> result = validate_data_for_model(Resource, {"unknown_field": "value"})
|
|
454
|
+
>>> result.valid
|
|
455
|
+
True # Resource has no required fields
|
|
456
|
+
>>> result.extra_fields
|
|
457
|
+
{'unknown_field'}
|
|
458
|
+
"""
|
|
459
|
+
from pydantic import ValidationError
|
|
460
|
+
|
|
461
|
+
model_fields = set(model.model_fields.keys())
|
|
462
|
+
required = {k for k, v in model.model_fields.items() if v.is_required()}
|
|
463
|
+
optional = model_fields - required
|
|
464
|
+
data_fields = set(data.keys())
|
|
465
|
+
|
|
466
|
+
missing_required = required - data_fields
|
|
467
|
+
extra_fields = data_fields - model_fields
|
|
468
|
+
|
|
469
|
+
try:
|
|
470
|
+
instance = model(**data)
|
|
471
|
+
return ValidationResult(
|
|
472
|
+
valid=True,
|
|
473
|
+
instance=instance,
|
|
474
|
+
required_fields=required,
|
|
475
|
+
optional_fields=optional,
|
|
476
|
+
extra_fields=extra_fields,
|
|
477
|
+
)
|
|
478
|
+
except ValidationError as e:
|
|
479
|
+
errors = []
|
|
480
|
+
for err in e.errors():
|
|
481
|
+
field = ".".join(str(p) for p in err["loc"])
|
|
482
|
+
if field not in missing_required: # Don't double-report missing
|
|
483
|
+
errors.append(f"{field}: {err['msg']}")
|
|
484
|
+
|
|
485
|
+
return ValidationResult(
|
|
486
|
+
valid=False,
|
|
487
|
+
errors=errors,
|
|
488
|
+
missing_required=missing_required,
|
|
489
|
+
extra_fields=extra_fields,
|
|
490
|
+
required_fields=required,
|
|
491
|
+
optional_fields=optional,
|
|
492
|
+
)
|