remdb 0.2.6__py3-none-any.whl → 0.3.103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +500 -0
- rem/agentic/context.py +7 -5
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +84 -10
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +70 -22
- rem/api/mcp_router/server.py +8 -1
- rem/api/mcp_router/tools.py +80 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +277 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +123 -14
- rem/api/routers/chat/models.py +7 -3
- rem/api/routers/chat/sse_events.py +526 -0
- rem/api/routers/chat/streaming.py +468 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +455 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/ask.py +15 -11
- rem/cli/commands/configure.py +169 -94
- rem/cli/commands/db.py +53 -7
- rem/cli/commands/experiments.py +278 -96
- rem/cli/commands/process.py +8 -7
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +9 -9
- rem/cli/main.py +10 -0
- rem/config.py +2 -2
- rem/models/core/core_model.py +7 -1
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +206 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +367 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +85 -16
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +20 -13
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +252 -19
- rem/services/postgres/README.md +29 -10
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +86 -5
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/session/compression.py +17 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +115 -17
- rem/sql/background_indexes.sql +10 -0
- rem/sql/migrations/001_install.sql +152 -2
- rem/sql/migrations/002_install_models.sql +580 -231
- rem/sql/migrations/003_seed_default_user.sql +48 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +273 -14
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/db_maintainer.py +74 -0
- {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/METADATA +486 -132
- {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/RECORD +80 -57
- {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/entry_points.txt +0 -0
rem/utils/mime_types.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized MIME type mappings for file format detection.
|
|
3
|
+
|
|
4
|
+
Provides bidirectional mappings between file extensions and MIME types.
|
|
5
|
+
Use these constants throughout the codebase instead of inline dictionaries.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# Extension to MIME type mapping (extension includes leading dot)
|
|
9
|
+
EXTENSION_TO_MIME: dict[str, str] = {
|
|
10
|
+
# Images
|
|
11
|
+
".png": "image/png",
|
|
12
|
+
".jpg": "image/jpeg",
|
|
13
|
+
".jpeg": "image/jpeg",
|
|
14
|
+
".gif": "image/gif",
|
|
15
|
+
".webp": "image/webp",
|
|
16
|
+
".bmp": "image/bmp",
|
|
17
|
+
".tiff": "image/tiff",
|
|
18
|
+
".svg": "image/svg+xml",
|
|
19
|
+
# Documents
|
|
20
|
+
".pdf": "application/pdf",
|
|
21
|
+
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
22
|
+
".doc": "application/msword",
|
|
23
|
+
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
24
|
+
".ppt": "application/vnd.ms-powerpoint",
|
|
25
|
+
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
26
|
+
".xls": "application/vnd.ms-excel",
|
|
27
|
+
# Audio
|
|
28
|
+
".wav": "audio/wav",
|
|
29
|
+
".mp3": "audio/mpeg",
|
|
30
|
+
".m4a": "audio/x-m4a",
|
|
31
|
+
".flac": "audio/flac",
|
|
32
|
+
".ogg": "audio/ogg",
|
|
33
|
+
".aac": "audio/aac",
|
|
34
|
+
# Video
|
|
35
|
+
".mp4": "video/mp4",
|
|
36
|
+
".webm": "video/webm",
|
|
37
|
+
".avi": "video/x-msvideo",
|
|
38
|
+
".mov": "video/quicktime",
|
|
39
|
+
# Text/Code
|
|
40
|
+
".txt": "text/plain",
|
|
41
|
+
".md": "text/markdown",
|
|
42
|
+
".markdown": "text/markdown",
|
|
43
|
+
".json": "application/json",
|
|
44
|
+
".yaml": "application/x-yaml",
|
|
45
|
+
".yml": "application/x-yaml",
|
|
46
|
+
".xml": "application/xml",
|
|
47
|
+
".html": "text/html",
|
|
48
|
+
".css": "text/css",
|
|
49
|
+
".js": "application/javascript",
|
|
50
|
+
".py": "text/x-python",
|
|
51
|
+
".ts": "application/typescript",
|
|
52
|
+
".csv": "text/csv",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# MIME type to extension mapping (reverse of above, preferring shorter extensions)
|
|
56
|
+
MIME_TO_EXTENSION: dict[str, str] = {
|
|
57
|
+
# Images
|
|
58
|
+
"image/png": ".png",
|
|
59
|
+
"image/jpeg": ".jpg",
|
|
60
|
+
"image/gif": ".gif",
|
|
61
|
+
"image/webp": ".webp",
|
|
62
|
+
"image/bmp": ".bmp",
|
|
63
|
+
"image/tiff": ".tiff",
|
|
64
|
+
"image/svg+xml": ".svg",
|
|
65
|
+
# Documents
|
|
66
|
+
"application/pdf": ".pdf",
|
|
67
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
|
68
|
+
"application/msword": ".doc",
|
|
69
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
|
70
|
+
"application/vnd.ms-powerpoint": ".ppt",
|
|
71
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
|
72
|
+
"application/vnd.ms-excel": ".xls",
|
|
73
|
+
# Audio
|
|
74
|
+
"audio/wav": ".wav",
|
|
75
|
+
"audio/mpeg": ".mp3",
|
|
76
|
+
"audio/x-m4a": ".m4a",
|
|
77
|
+
"audio/mp4": ".m4a",
|
|
78
|
+
"audio/flac": ".flac",
|
|
79
|
+
"audio/ogg": ".ogg",
|
|
80
|
+
"audio/aac": ".aac",
|
|
81
|
+
# Video
|
|
82
|
+
"video/mp4": ".mp4",
|
|
83
|
+
"video/webm": ".webm",
|
|
84
|
+
"video/x-msvideo": ".avi",
|
|
85
|
+
"video/quicktime": ".mov",
|
|
86
|
+
# Text/Code
|
|
87
|
+
"text/plain": ".txt",
|
|
88
|
+
"text/markdown": ".md",
|
|
89
|
+
"application/json": ".json",
|
|
90
|
+
"application/x-yaml": ".yaml",
|
|
91
|
+
"application/xml": ".xml",
|
|
92
|
+
"text/html": ".html",
|
|
93
|
+
"text/css": ".css",
|
|
94
|
+
"application/javascript": ".js",
|
|
95
|
+
"text/x-python": ".py",
|
|
96
|
+
"application/typescript": ".ts",
|
|
97
|
+
"text/csv": ".csv",
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# Grouped by category for convenience
|
|
101
|
+
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".svg"}
|
|
102
|
+
DOCUMENT_EXTENSIONS = {".pdf", ".docx", ".doc", ".pptx", ".ppt", ".xlsx", ".xls"}
|
|
103
|
+
AUDIO_EXTENSIONS = {".wav", ".mp3", ".m4a", ".flac", ".ogg", ".aac"}
|
|
104
|
+
VIDEO_EXTENSIONS = {".mp4", ".webm", ".avi", ".mov"}
|
|
105
|
+
TEXT_EXTENSIONS = {".txt", ".md", ".markdown", ".json", ".yaml", ".yml", ".xml", ".html", ".css", ".js", ".py", ".ts", ".csv"}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_extension(mime_type: str, default: str = ".bin") -> str:
|
|
109
|
+
"""
|
|
110
|
+
Get file extension for a MIME type.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
mime_type: MIME type string (e.g., "image/png")
|
|
114
|
+
default: Default extension if MIME type not found
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
File extension with leading dot (e.g., ".png")
|
|
118
|
+
"""
|
|
119
|
+
return MIME_TO_EXTENSION.get(mime_type, default)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def get_mime_type(extension: str, default: str = "application/octet-stream") -> str:
|
|
123
|
+
"""
|
|
124
|
+
Get MIME type for a file extension.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
extension: File extension with or without leading dot
|
|
128
|
+
default: Default MIME type if extension not found
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
MIME type string (e.g., "image/png")
|
|
132
|
+
"""
|
|
133
|
+
# Normalize extension to have leading dot
|
|
134
|
+
ext = extension if extension.startswith(".") else f".{extension}"
|
|
135
|
+
return EXTENSION_TO_MIME.get(ext.lower(), default)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def is_image(extension_or_mime: str) -> bool:
|
|
139
|
+
"""Check if extension or MIME type represents an image."""
|
|
140
|
+
if extension_or_mime.startswith("."):
|
|
141
|
+
return extension_or_mime.lower() in IMAGE_EXTENSIONS
|
|
142
|
+
return extension_or_mime.startswith("image/")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def is_audio(extension_or_mime: str) -> bool:
|
|
146
|
+
"""Check if extension or MIME type represents audio."""
|
|
147
|
+
if extension_or_mime.startswith("."):
|
|
148
|
+
return extension_or_mime.lower() in AUDIO_EXTENSIONS
|
|
149
|
+
return extension_or_mime.startswith("audio/")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def is_document(extension_or_mime: str) -> bool:
|
|
153
|
+
"""Check if extension or MIME type represents a document."""
|
|
154
|
+
if extension_or_mime.startswith("."):
|
|
155
|
+
return extension_or_mime.lower() in DOCUMENT_EXTENSIONS
|
|
156
|
+
# Check common document MIME types
|
|
157
|
+
doc_mimes = {"application/pdf", "application/msword"}
|
|
158
|
+
return extension_or_mime in doc_mimes or "officedocument" in extension_or_mime
|
rem/utils/model_helpers.py
CHANGED
|
@@ -16,8 +16,12 @@ Embedding Field Detection:
|
|
|
16
16
|
Table Name Inference:
|
|
17
17
|
1. model_config.json_schema_extra.table_name
|
|
18
18
|
2. CamelCase → snake_case + pluralization
|
|
19
|
+
|
|
20
|
+
Model Resolution:
|
|
21
|
+
- model_from_arbitrary_casing: Resolve model class from flexible input casing
|
|
19
22
|
"""
|
|
20
23
|
|
|
24
|
+
import re
|
|
21
25
|
from typing import Any, Type
|
|
22
26
|
|
|
23
27
|
from loguru import logger
|
|
@@ -94,7 +98,9 @@ def get_table_name(model: Type[BaseModel]) -> str:
|
|
|
94
98
|
if isinstance(model_config, dict):
|
|
95
99
|
json_extra = model_config.get("json_schema_extra", {})
|
|
96
100
|
if isinstance(json_extra, dict) and "table_name" in json_extra:
|
|
97
|
-
|
|
101
|
+
table_name = json_extra["table_name"]
|
|
102
|
+
if isinstance(table_name, str):
|
|
103
|
+
return table_name
|
|
98
104
|
|
|
99
105
|
# Infer from class name
|
|
100
106
|
name = model.__name__
|
|
@@ -234,3 +240,152 @@ def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
|
|
|
234
240
|
"entity_key_field": get_entity_key_field(model),
|
|
235
241
|
"embeddable_fields": get_embeddable_fields(model),
|
|
236
242
|
}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def normalize_to_title_case(name: str) -> str:
|
|
246
|
+
"""
|
|
247
|
+
Normalize arbitrary casing to TitleCase (PascalCase).
|
|
248
|
+
|
|
249
|
+
Handles various input formats:
|
|
250
|
+
- kebab-case: domain-resource → DomainResource
|
|
251
|
+
- snake_case: domain_resource → DomainResource
|
|
252
|
+
- lowercase: domainresource → Domainresource (single word)
|
|
253
|
+
- TitleCase: DomainResource → DomainResource (passthrough)
|
|
254
|
+
- Mixed: Domain-Resource, DOMAIN_RESOURCE → DomainResource
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
name: Input name in any casing format
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
TitleCase (PascalCase) version of the name
|
|
261
|
+
|
|
262
|
+
Example:
|
|
263
|
+
>>> normalize_to_title_case("domain-resource")
|
|
264
|
+
'DomainResource'
|
|
265
|
+
>>> normalize_to_title_case("domain_resources")
|
|
266
|
+
'DomainResources'
|
|
267
|
+
>>> normalize_to_title_case("DomainResource")
|
|
268
|
+
'DomainResource'
|
|
269
|
+
"""
|
|
270
|
+
# If already TitleCase (starts with uppercase, has no delimiters, and has
|
|
271
|
+
# at least one lowercase letter), return as-is
|
|
272
|
+
if (
|
|
273
|
+
name
|
|
274
|
+
and name[0].isupper()
|
|
275
|
+
and '-' not in name
|
|
276
|
+
and '_' not in name
|
|
277
|
+
and any(c.islower() for c in name)
|
|
278
|
+
):
|
|
279
|
+
return name
|
|
280
|
+
|
|
281
|
+
# Split on common delimiters (hyphen, underscore)
|
|
282
|
+
parts = re.split(r'[-_]', name)
|
|
283
|
+
|
|
284
|
+
# Capitalize first letter of each part, lowercase the rest
|
|
285
|
+
normalized_parts = [part.capitalize() for part in parts if part]
|
|
286
|
+
|
|
287
|
+
return "".join(normalized_parts)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def model_from_arbitrary_casing(
|
|
291
|
+
name: str,
|
|
292
|
+
registry: dict[str, Type[BaseModel]] | None = None,
|
|
293
|
+
) -> Type[BaseModel]:
|
|
294
|
+
"""
|
|
295
|
+
Resolve a model class from arbitrary casing input.
|
|
296
|
+
|
|
297
|
+
REM entity models use strict TitleCase (PascalCase) naming. This function
|
|
298
|
+
allows flexible input formats while maintaining consistency:
|
|
299
|
+
|
|
300
|
+
Input formats supported:
|
|
301
|
+
- kebab-case: domain-resource, domain-resources
|
|
302
|
+
- snake_case: domain_resource, domain_resources
|
|
303
|
+
- lowercase: resource, domainresource
|
|
304
|
+
- TitleCase: Resource, DomainResource
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
name: Model name in any supported casing format
|
|
308
|
+
registry: Optional dict mapping TitleCase names to model classes.
|
|
309
|
+
If not provided, uses rem.models.entities module.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
The resolved Pydantic model class
|
|
313
|
+
|
|
314
|
+
Raises:
|
|
315
|
+
ValueError: If no model matches the normalized name
|
|
316
|
+
|
|
317
|
+
Example:
|
|
318
|
+
>>> model = model_from_arbitrary_casing("domain-resources")
|
|
319
|
+
>>> model.__name__
|
|
320
|
+
'DomainResource'
|
|
321
|
+
>>> model = model_from_arbitrary_casing("Resource")
|
|
322
|
+
>>> model.__name__
|
|
323
|
+
'Resource'
|
|
324
|
+
"""
|
|
325
|
+
# Build default registry from entities module if not provided
|
|
326
|
+
if registry is None:
|
|
327
|
+
from rem.models.entities import (
|
|
328
|
+
DomainResource,
|
|
329
|
+
Feedback,
|
|
330
|
+
File,
|
|
331
|
+
ImageResource,
|
|
332
|
+
Message,
|
|
333
|
+
Moment,
|
|
334
|
+
Ontology,
|
|
335
|
+
OntologyConfig,
|
|
336
|
+
Resource,
|
|
337
|
+
Schema,
|
|
338
|
+
Session,
|
|
339
|
+
User,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
registry = {
|
|
343
|
+
"Resource": Resource,
|
|
344
|
+
"Resources": Resource, # Plural alias
|
|
345
|
+
"DomainResource": DomainResource,
|
|
346
|
+
"DomainResources": DomainResource, # Plural alias
|
|
347
|
+
"ImageResource": ImageResource,
|
|
348
|
+
"ImageResources": ImageResource,
|
|
349
|
+
"File": File,
|
|
350
|
+
"Files": File,
|
|
351
|
+
"Message": Message,
|
|
352
|
+
"Messages": Message,
|
|
353
|
+
"Moment": Moment,
|
|
354
|
+
"Moments": Moment,
|
|
355
|
+
"Session": Session,
|
|
356
|
+
"Sessions": Session,
|
|
357
|
+
"Feedback": Feedback,
|
|
358
|
+
"User": User,
|
|
359
|
+
"Users": User,
|
|
360
|
+
"Schema": Schema,
|
|
361
|
+
"Schemas": Schema,
|
|
362
|
+
"Ontology": Ontology,
|
|
363
|
+
"Ontologies": Ontology,
|
|
364
|
+
"OntologyConfig": OntologyConfig,
|
|
365
|
+
"OntologyConfigs": OntologyConfig,
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
# Normalize input to TitleCase
|
|
369
|
+
normalized = normalize_to_title_case(name)
|
|
370
|
+
|
|
371
|
+
# Look up in registry
|
|
372
|
+
if normalized in registry:
|
|
373
|
+
logger.debug(f"Resolved model '{name}' → {registry[normalized].__name__}")
|
|
374
|
+
return registry[normalized]
|
|
375
|
+
|
|
376
|
+
# Try without trailing 's' (singular form)
|
|
377
|
+
if normalized.endswith("s") and normalized[:-1] in registry:
|
|
378
|
+
logger.debug(f"Resolved model '{name}' → {registry[normalized[:-1]].__name__} (singular)")
|
|
379
|
+
return registry[normalized[:-1]]
|
|
380
|
+
|
|
381
|
+
# Try with trailing 's' (plural form)
|
|
382
|
+
plural = normalized + "s"
|
|
383
|
+
if plural in registry:
|
|
384
|
+
logger.debug(f"Resolved model '{name}' → {registry[plural].__name__} (plural)")
|
|
385
|
+
return registry[plural]
|
|
386
|
+
|
|
387
|
+
available = sorted(set(m.__name__ for m in registry.values()))
|
|
388
|
+
raise ValueError(
|
|
389
|
+
f"Unknown model: '{name}' (normalized: '{normalized}'). "
|
|
390
|
+
f"Available models: {', '.join(available)}"
|
|
391
|
+
)
|