remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,492 @@
1
+ """
2
+ Pydantic Model Helper Utilities.
3
+
4
+ Utilities for working with REM Pydantic models following our conventions:
5
+
6
+ Business Key (entity_key) Detection:
7
+ 1. Field with json_schema_extra={"entity_key": True}
8
+ 2. Common business key fields: name, uri, key, label
9
+ 3. Fallback to "id" (unique by UUID only)
10
+
11
+ Embedding Field Detection:
12
+ 1. Field with json_schema_extra={"embed": True}
13
+ 2. Common content fields: content, description, summary, etc.
14
+ 3. Explicit disable with json_schema_extra={"embed": False}
15
+
16
+ Table Name Inference:
17
+ 1. model_config.json_schema_extra.table_name
18
+ 2. CamelCase → snake_case + pluralization
19
+
20
+ Model Resolution:
21
+ - model_from_arbitrary_casing: Resolve model class from flexible input casing
22
+
23
+ Data Validation:
24
+ - validate_data_for_model: Validate row data against a Pydantic model with clear error reporting
25
+ """
26
+
27
+ import re
28
+ from typing import Any, Type
29
+
30
+ from loguru import logger
31
+ from pydantic import BaseModel
32
+
33
+
34
+ def get_entity_key_field(model: Type[BaseModel]) -> str:
35
+ """
36
+ Get the business key field for KV store lookups.
37
+
38
+ Follows REM conventions:
39
+ 1. Field with json_schema_extra={"entity_key": True}
40
+ 2. "name" field (most common for resources, moments, etc.)
41
+ 3. "uri" field (for files)
42
+ 4. "key" or "label" fields
43
+ 5. Fallback to "id" (UUID only)
44
+
45
+ Args:
46
+ model: Pydantic model class
47
+
48
+ Returns:
49
+ Field name to use as entity_key
50
+
51
+ Example:
52
+ >>> from rem.models.entities import Resource
53
+ >>> get_entity_key_field(Resource)
54
+ 'name'
55
+ """
56
+ # Check for explicit entity_key marker
57
+ for field_name, field_info in model.model_fields.items():
58
+ json_extra = getattr(field_info, "json_schema_extra", None)
59
+ if json_extra and isinstance(json_extra, dict):
60
+ if json_extra.get("entity_key") is True:
61
+ logger.debug(f"Using explicit entity_key field: {field_name}")
62
+ return field_name
63
+
64
+ # Check for common business key fields
65
+ for candidate in ["name", "uri", "key", "label", "title"]:
66
+ if candidate in model.model_fields:
67
+ logger.debug(f"Using conventional entity_key field: {candidate}")
68
+ return candidate
69
+
70
+ # Fallback to id (unique by UUID only)
71
+ logger.warning(
72
+ f"No business key found for {model.__name__}, using 'id' (UUID only)"
73
+ )
74
+ return "id"
75
+
76
+
77
+ def get_table_name(model: Type[BaseModel]) -> str:
78
+ """
79
+ Get table name for a Pydantic model.
80
+
81
+ Follows REM conventions:
82
+ 1. model_config.json_schema_extra.table_name (explicit)
83
+ 2. CamelCase → snake_case + pluralization
84
+
85
+ Args:
86
+ model: Pydantic model class
87
+
88
+ Returns:
89
+ Table name
90
+
91
+ Example:
92
+ >>> from rem.models.entities import Resource
93
+ >>> get_table_name(Resource)
94
+ 'resources'
95
+ """
96
+ import re
97
+
98
+ # Check for explicit table_name
99
+ if hasattr(model, "model_config"):
100
+ model_config = model.model_config
101
+ if isinstance(model_config, dict):
102
+ json_extra = model_config.get("json_schema_extra", {})
103
+ if isinstance(json_extra, dict) and "table_name" in json_extra:
104
+ table_name = json_extra["table_name"]
105
+ if isinstance(table_name, str):
106
+ return table_name
107
+
108
+ # Infer from class name
109
+ name = model.__name__
110
+
111
+ # Convert CamelCase to snake_case
112
+ name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
113
+ name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
114
+
115
+ # Pluralize
116
+ if not name.endswith("s"):
117
+ if name.endswith("y"):
118
+ name = name[:-1] + "ies" # category -> categories
119
+ else:
120
+ name = name + "s" # resource -> resources
121
+
122
+ return name
123
+
124
+
125
+ def get_embeddable_fields(model: Type[BaseModel]) -> list[str]:
126
+ """
127
+ Get list of fields that should have embeddings generated.
128
+
129
+ Follows REM conventions:
130
+ 1. Field with json_schema_extra={"embed": True} → always embed
131
+ 2. Field with json_schema_extra={"embed": False} → never embed
132
+ 3. Common content fields → embed by default
133
+ 4. Otherwise → don't embed
134
+
135
+ Args:
136
+ model: Pydantic model class
137
+
138
+ Returns:
139
+ List of field names to generate embeddings for
140
+
141
+ Example:
142
+ >>> from rem.models.entities import Resource
143
+ >>> fields = get_embeddable_fields(Resource)
144
+ >>> "content" in fields
145
+ True
146
+ """
147
+ # Common content fields that embed by default
148
+ DEFAULT_EMBED_FIELDS = {
149
+ "content",
150
+ "description",
151
+ "summary",
152
+ "text",
153
+ "body",
154
+ "message",
155
+ "notes",
156
+ }
157
+
158
+ embeddable = []
159
+
160
+ for field_name, field_info in model.model_fields.items():
161
+ # Check json_schema_extra for explicit embed configuration
162
+ json_extra = getattr(field_info, "json_schema_extra", None)
163
+ if json_extra and isinstance(json_extra, dict):
164
+ embed = json_extra.get("embed")
165
+ if embed is True:
166
+ embeddable.append(field_name)
167
+ continue
168
+ elif embed is False:
169
+ # Explicitly disabled
170
+ continue
171
+
172
+ # Check if field name matches common content fields
173
+ if field_name.lower() in DEFAULT_EMBED_FIELDS:
174
+ embeddable.append(field_name)
175
+
176
+ return embeddable
177
+
178
+
179
+ def should_skip_field(field_name: str) -> bool:
180
+ """
181
+ Check if a field should be skipped during SQL generation.
182
+
183
+ System fields that are added separately:
184
+ - id (added as PRIMARY KEY)
185
+ - tenant_id (added for multi-tenancy)
186
+ - user_id (added for ownership)
187
+ - created_at, updated_at, deleted_at (added as system timestamps)
188
+ - graph_edges, metadata (added as JSONB system fields)
189
+ - tags, column (CoreModel fields)
190
+
191
+ Args:
192
+ field_name: Name of the field
193
+
194
+ Returns:
195
+ True if field should be skipped
196
+
197
+ Example:
198
+ >>> should_skip_field("id")
199
+ True
200
+ >>> should_skip_field("name")
201
+ False
202
+ """
203
+ SYSTEM_FIELDS = {
204
+ "id",
205
+ "tenant_id",
206
+ "user_id",
207
+ "created_at",
208
+ "updated_at",
209
+ "deleted_at",
210
+ "graph_edges",
211
+ "metadata",
212
+ "tags",
213
+ "column",
214
+ }
215
+
216
+ return field_name in SYSTEM_FIELDS
217
+
218
+
219
+ def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
220
+ """
221
+ Extract REM-specific metadata from a Pydantic model.
222
+
223
+ Returns:
224
+ Dict with:
225
+ - table_name: Database table name
226
+ - entity_key_field: Business key field name
227
+ - embeddable_fields: List of fields to embed
228
+ - model_name: Original model class name
229
+
230
+ Example:
231
+ >>> from rem.models.entities import Resource
232
+ >>> meta = get_model_metadata(Resource)
233
+ >>> meta["table_name"]
234
+ 'resources'
235
+ >>> meta["entity_key_field"]
236
+ 'name'
237
+ >>> "content" in meta["embeddable_fields"]
238
+ True
239
+ """
240
+ return {
241
+ "model_name": model.__name__,
242
+ "table_name": get_table_name(model),
243
+ "entity_key_field": get_entity_key_field(model),
244
+ "embeddable_fields": get_embeddable_fields(model),
245
+ }
246
+
247
+
248
+ def normalize_to_title_case(name: str) -> str:
249
+ """
250
+ Normalize arbitrary casing to TitleCase (PascalCase).
251
+
252
+ Handles various input formats:
253
+ - kebab-case: domain-resource → DomainResource
254
+ - snake_case: domain_resource → DomainResource
255
+ - lowercase: domainresource → Domainresource (single word)
256
+ - TitleCase: DomainResource → DomainResource (passthrough)
257
+ - Mixed: Domain-Resource, DOMAIN_RESOURCE → DomainResource
258
+
259
+ Args:
260
+ name: Input name in any casing format
261
+
262
+ Returns:
263
+ TitleCase (PascalCase) version of the name
264
+
265
+ Example:
266
+ >>> normalize_to_title_case("domain-resource")
267
+ 'DomainResource'
268
+ >>> normalize_to_title_case("domain_resources")
269
+ 'DomainResources'
270
+ >>> normalize_to_title_case("DomainResource")
271
+ 'DomainResource'
272
+ """
273
+ # If already TitleCase (starts with uppercase, has no delimiters, and has
274
+ # at least one lowercase letter), return as-is
275
+ if (
276
+ name
277
+ and name[0].isupper()
278
+ and '-' not in name
279
+ and '_' not in name
280
+ and any(c.islower() for c in name)
281
+ ):
282
+ return name
283
+
284
+ # Split on common delimiters (hyphen, underscore)
285
+ parts = re.split(r'[-_]', name)
286
+
287
+ # Capitalize first letter of each part, lowercase the rest
288
+ normalized_parts = [part.capitalize() for part in parts if part]
289
+
290
+ return "".join(normalized_parts)
291
+
292
+
293
+ def model_from_arbitrary_casing(
294
+ name: str,
295
+ registry: dict[str, Type[BaseModel]] | None = None,
296
+ ) -> Type[BaseModel]:
297
+ """
298
+ Resolve a model class from arbitrary casing input.
299
+
300
+ REM entity models use strict TitleCase (PascalCase) naming. This function
301
+ allows flexible input formats while maintaining consistency:
302
+
303
+ Input formats supported:
304
+ - kebab-case: domain-resource, domain-resources
305
+ - snake_case: domain_resource, domain_resources
306
+ - lowercase: resource, domainresource
307
+ - TitleCase: Resource, DomainResource
308
+
309
+ Args:
310
+ name: Model name in any supported casing format
311
+ registry: Optional dict mapping TitleCase names to model classes.
312
+ If not provided, uses rem.models.entities module.
313
+
314
+ Returns:
315
+ The resolved Pydantic model class
316
+
317
+ Raises:
318
+ ValueError: If no model matches the normalized name
319
+
320
+ Example:
321
+ >>> model = model_from_arbitrary_casing("domain-resources")
322
+ >>> model.__name__
323
+ 'DomainResource'
324
+ >>> model = model_from_arbitrary_casing("Resource")
325
+ >>> model.__name__
326
+ 'Resource'
327
+ """
328
+ # Build default registry from entities module if not provided
329
+ if registry is None:
330
+ from rem.models.entities import (
331
+ DomainResource,
332
+ Feedback,
333
+ File,
334
+ ImageResource,
335
+ Message,
336
+ Moment,
337
+ Ontology,
338
+ OntologyConfig,
339
+ Resource,
340
+ Schema,
341
+ Session,
342
+ User,
343
+ )
344
+
345
+ registry = {
346
+ "Resource": Resource,
347
+ "Resources": Resource, # Plural alias
348
+ "DomainResource": DomainResource,
349
+ "DomainResources": DomainResource, # Plural alias
350
+ "ImageResource": ImageResource,
351
+ "ImageResources": ImageResource,
352
+ "File": File,
353
+ "Files": File,
354
+ "Message": Message,
355
+ "Messages": Message,
356
+ "Moment": Moment,
357
+ "Moments": Moment,
358
+ "Session": Session,
359
+ "Sessions": Session,
360
+ "Feedback": Feedback,
361
+ "User": User,
362
+ "Users": User,
363
+ "Schema": Schema,
364
+ "Schemas": Schema,
365
+ "Ontology": Ontology,
366
+ "Ontologies": Ontology,
367
+ "OntologyConfig": OntologyConfig,
368
+ "OntologyConfigs": OntologyConfig,
369
+ }
370
+
371
+ # Normalize input to TitleCase
372
+ normalized = normalize_to_title_case(name)
373
+
374
+ # Look up in registry
375
+ if normalized in registry:
376
+ logger.debug(f"Resolved model '{name}' → {registry[normalized].__name__}")
377
+ return registry[normalized]
378
+
379
+ # Try without trailing 's' (singular form)
380
+ if normalized.endswith("s") and normalized[:-1] in registry:
381
+ logger.debug(f"Resolved model '{name}' → {registry[normalized[:-1]].__name__} (singular)")
382
+ return registry[normalized[:-1]]
383
+
384
+ # Try with trailing 's' (plural form)
385
+ plural = normalized + "s"
386
+ if plural in registry:
387
+ logger.debug(f"Resolved model '{name}' → {registry[plural].__name__} (plural)")
388
+ return registry[plural]
389
+
390
+ available = sorted(set(m.__name__ for m in registry.values()))
391
+ raise ValueError(
392
+ f"Unknown model: '{name}' (normalized: '{normalized}'). "
393
+ f"Available models: {', '.join(available)}"
394
+ )
395
+
396
+
397
+ class ValidationResult:
398
+ """Result of validating data against a Pydantic model."""
399
+
400
+ def __init__(
401
+ self,
402
+ valid: bool,
403
+ instance: BaseModel | None = None,
404
+ errors: list[str] | None = None,
405
+ missing_required: set[str] | None = None,
406
+ extra_fields: set[str] | None = None,
407
+ required_fields: set[str] | None = None,
408
+ optional_fields: set[str] | None = None,
409
+ ):
410
+ self.valid = valid
411
+ self.instance = instance
412
+ self.errors = errors or []
413
+ self.missing_required = missing_required or set()
414
+ self.extra_fields = extra_fields or set()
415
+ self.required_fields = required_fields or set()
416
+ self.optional_fields = optional_fields or set()
417
+
418
+ def log_errors(self, row_label: str = "Row") -> None:
419
+ """Log validation errors using loguru."""
420
+ if self.valid:
421
+ return
422
+
423
+ logger.error(f"{row_label}: Validation failed")
424
+ if self.missing_required:
425
+ logger.error(f" Missing required: {self.missing_required}")
426
+ if self.extra_fields:
427
+ logger.warning(f" Unknown fields (ignored): {self.extra_fields}")
428
+ for err in self.errors:
429
+ logger.error(f" - {err}")
430
+ logger.info(f" Required: {self.required_fields or '(none)'}")
431
+ logger.info(f" Optional: {self.optional_fields}")
432
+
433
+
434
+ def validate_data_for_model(
435
+ model: Type[BaseModel],
436
+ data: dict[str, Any],
437
+ ) -> ValidationResult:
438
+ """
439
+ Validate a data dict against a Pydantic model with detailed error reporting.
440
+
441
+ Args:
442
+ model: Pydantic model class to validate against
443
+ data: Dictionary of field values
444
+
445
+ Returns:
446
+ ValidationResult with validation status and detailed field info
447
+
448
+ Example:
449
+ >>> from rem.models.entities import Resource
450
+ >>> result = validate_data_for_model(Resource, {"name": "test", "content": "hello"})
451
+ >>> result.valid
452
+ True
453
+ >>> result = validate_data_for_model(Resource, {"unknown_field": "value"})
454
+ >>> result.valid
455
+ True # Resource has no required fields
456
+ >>> result.extra_fields
457
+ {'unknown_field'}
458
+ """
459
+ from pydantic import ValidationError
460
+
461
+ model_fields = set(model.model_fields.keys())
462
+ required = {k for k, v in model.model_fields.items() if v.is_required()}
463
+ optional = model_fields - required
464
+ data_fields = set(data.keys())
465
+
466
+ missing_required = required - data_fields
467
+ extra_fields = data_fields - model_fields
468
+
469
+ try:
470
+ instance = model(**data)
471
+ return ValidationResult(
472
+ valid=True,
473
+ instance=instance,
474
+ required_fields=required,
475
+ optional_fields=optional,
476
+ extra_fields=extra_fields,
477
+ )
478
+ except ValidationError as e:
479
+ errors = []
480
+ for err in e.errors():
481
+ field = ".".join(str(p) for p in err["loc"])
482
+ if field not in missing_required: # Don't double-report missing
483
+ errors.append(f"{field}: {err['msg']}")
484
+
485
+ return ValidationResult(
486
+ valid=False,
487
+ errors=errors,
488
+ missing_required=missing_required,
489
+ extra_fields=extra_fields,
490
+ required_fields=required,
491
+ optional_fields=optional,
492
+ )