@elizaos/python 2.0.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +26 -0
- package/README.md +239 -0
- package/elizaos/__init__.py +280 -0
- package/elizaos/action_docs.py +149 -0
- package/elizaos/advanced_capabilities/__init__.py +85 -0
- package/elizaos/advanced_capabilities/actions/__init__.py +54 -0
- package/elizaos/advanced_capabilities/actions/add_contact.py +139 -0
- package/elizaos/advanced_capabilities/actions/follow_room.py +151 -0
- package/elizaos/advanced_capabilities/actions/image_generation.py +148 -0
- package/elizaos/advanced_capabilities/actions/mute_room.py +164 -0
- package/elizaos/advanced_capabilities/actions/remove_contact.py +145 -0
- package/elizaos/advanced_capabilities/actions/roles.py +207 -0
- package/elizaos/advanced_capabilities/actions/schedule_follow_up.py +154 -0
- package/elizaos/advanced_capabilities/actions/search_contacts.py +145 -0
- package/elizaos/advanced_capabilities/actions/send_message.py +187 -0
- package/elizaos/advanced_capabilities/actions/settings.py +151 -0
- package/elizaos/advanced_capabilities/actions/unfollow_room.py +164 -0
- package/elizaos/advanced_capabilities/actions/unmute_room.py +164 -0
- package/elizaos/advanced_capabilities/actions/update_contact.py +164 -0
- package/elizaos/advanced_capabilities/actions/update_entity.py +161 -0
- package/elizaos/advanced_capabilities/evaluators/__init__.py +18 -0
- package/elizaos/advanced_capabilities/evaluators/reflection.py +134 -0
- package/elizaos/advanced_capabilities/evaluators/relationship_extraction.py +203 -0
- package/elizaos/advanced_capabilities/providers/__init__.py +36 -0
- package/elizaos/advanced_capabilities/providers/agent_settings.py +60 -0
- package/elizaos/advanced_capabilities/providers/contacts.py +77 -0
- package/elizaos/advanced_capabilities/providers/facts.py +82 -0
- package/elizaos/advanced_capabilities/providers/follow_ups.py +113 -0
- package/elizaos/advanced_capabilities/providers/knowledge.py +83 -0
- package/elizaos/advanced_capabilities/providers/relationships.py +112 -0
- package/elizaos/advanced_capabilities/providers/roles.py +97 -0
- package/elizaos/advanced_capabilities/providers/settings.py +51 -0
- package/elizaos/advanced_capabilities/services/__init__.py +18 -0
- package/elizaos/advanced_capabilities/services/follow_up.py +138 -0
- package/elizaos/advanced_capabilities/services/rolodex.py +244 -0
- package/elizaos/advanced_memory/__init__.py +3 -0
- package/elizaos/advanced_memory/evaluators.py +97 -0
- package/elizaos/advanced_memory/memory_service.py +556 -0
- package/elizaos/advanced_memory/plugin.py +30 -0
- package/elizaos/advanced_memory/prompts.py +12 -0
- package/elizaos/advanced_memory/providers.py +90 -0
- package/elizaos/advanced_memory/types.py +65 -0
- package/elizaos/advanced_planning/__init__.py +10 -0
- package/elizaos/advanced_planning/actions.py +145 -0
- package/elizaos/advanced_planning/message_classifier.py +127 -0
- package/elizaos/advanced_planning/planning_service.py +712 -0
- package/elizaos/advanced_planning/plugin.py +40 -0
- package/elizaos/advanced_planning/prompts.py +4 -0
- package/elizaos/basic_capabilities/__init__.py +66 -0
- package/elizaos/basic_capabilities/actions/__init__.py +24 -0
- package/elizaos/basic_capabilities/actions/choice.py +140 -0
- package/elizaos/basic_capabilities/actions/ignore.py +66 -0
- package/elizaos/basic_capabilities/actions/none.py +56 -0
- package/elizaos/basic_capabilities/actions/reply.py +120 -0
- package/elizaos/basic_capabilities/providers/__init__.py +54 -0
- package/elizaos/basic_capabilities/providers/action_state.py +113 -0
- package/elizaos/basic_capabilities/providers/actions.py +263 -0
- package/elizaos/basic_capabilities/providers/attachments.py +76 -0
- package/elizaos/basic_capabilities/providers/capabilities.py +62 -0
- package/elizaos/basic_capabilities/providers/character.py +113 -0
- package/elizaos/basic_capabilities/providers/choice.py +73 -0
- package/elizaos/basic_capabilities/providers/context_bench.py +44 -0
- package/elizaos/basic_capabilities/providers/current_time.py +58 -0
- package/elizaos/basic_capabilities/providers/entities.py +99 -0
- package/elizaos/basic_capabilities/providers/evaluators.py +54 -0
- package/elizaos/basic_capabilities/providers/providers_list.py +55 -0
- package/elizaos/basic_capabilities/providers/recent_messages.py +85 -0
- package/elizaos/basic_capabilities/providers/time.py +45 -0
- package/elizaos/basic_capabilities/providers/world.py +93 -0
- package/elizaos/basic_capabilities/services/__init__.py +18 -0
- package/elizaos/basic_capabilities/services/embedding.py +122 -0
- package/elizaos/basic_capabilities/services/task.py +178 -0
- package/elizaos/bootstrap/__init__.py +12 -0
- package/elizaos/bootstrap/actions/__init__.py +68 -0
- package/elizaos/bootstrap/actions/add_contact.py +149 -0
- package/elizaos/bootstrap/actions/choice.py +147 -0
- package/elizaos/bootstrap/actions/follow_room.py +151 -0
- package/elizaos/bootstrap/actions/ignore.py +80 -0
- package/elizaos/bootstrap/actions/image_generation.py +135 -0
- package/elizaos/bootstrap/actions/mute_room.py +151 -0
- package/elizaos/bootstrap/actions/none.py +71 -0
- package/elizaos/bootstrap/actions/remove_contact.py +159 -0
- package/elizaos/bootstrap/actions/reply.py +140 -0
- package/elizaos/bootstrap/actions/roles.py +193 -0
- package/elizaos/bootstrap/actions/schedule_follow_up.py +164 -0
- package/elizaos/bootstrap/actions/search_contacts.py +159 -0
- package/elizaos/bootstrap/actions/send_message.py +173 -0
- package/elizaos/bootstrap/actions/settings.py +165 -0
- package/elizaos/bootstrap/actions/unfollow_room.py +151 -0
- package/elizaos/bootstrap/actions/unmute_room.py +151 -0
- package/elizaos/bootstrap/actions/update_contact.py +178 -0
- package/elizaos/bootstrap/actions/update_entity.py +175 -0
- package/elizaos/bootstrap/autonomy/__init__.py +18 -0
- package/elizaos/bootstrap/autonomy/action.py +197 -0
- package/elizaos/bootstrap/autonomy/providers.py +165 -0
- package/elizaos/bootstrap/autonomy/routes.py +171 -0
- package/elizaos/bootstrap/autonomy/service.py +562 -0
- package/elizaos/bootstrap/autonomy/types.py +18 -0
- package/elizaos/bootstrap/evaluators/__init__.py +19 -0
- package/elizaos/bootstrap/evaluators/reflection.py +118 -0
- package/elizaos/bootstrap/evaluators/relationship_extraction.py +192 -0
- package/elizaos/bootstrap/plugin.py +140 -0
- package/elizaos/bootstrap/providers/__init__.py +80 -0
- package/elizaos/bootstrap/providers/action_state.py +71 -0
- package/elizaos/bootstrap/providers/actions.py +256 -0
- package/elizaos/bootstrap/providers/agent_settings.py +63 -0
- package/elizaos/bootstrap/providers/attachments.py +76 -0
- package/elizaos/bootstrap/providers/capabilities.py +66 -0
- package/elizaos/bootstrap/providers/character.py +128 -0
- package/elizaos/bootstrap/providers/choice.py +77 -0
- package/elizaos/bootstrap/providers/contacts.py +78 -0
- package/elizaos/bootstrap/providers/context_bench.py +49 -0
- package/elizaos/bootstrap/providers/current_time.py +56 -0
- package/elizaos/bootstrap/providers/entities.py +99 -0
- package/elizaos/bootstrap/providers/evaluators.py +58 -0
- package/elizaos/bootstrap/providers/facts.py +86 -0
- package/elizaos/bootstrap/providers/follow_ups.py +116 -0
- package/elizaos/bootstrap/providers/knowledge.py +73 -0
- package/elizaos/bootstrap/providers/providers_list.py +59 -0
- package/elizaos/bootstrap/providers/recent_messages.py +85 -0
- package/elizaos/bootstrap/providers/relationships.py +106 -0
- package/elizaos/bootstrap/providers/roles.py +95 -0
- package/elizaos/bootstrap/providers/settings.py +55 -0
- package/elizaos/bootstrap/providers/time.py +45 -0
- package/elizaos/bootstrap/providers/world.py +97 -0
- package/elizaos/bootstrap/services/__init__.py +26 -0
- package/elizaos/bootstrap/services/embedding.py +122 -0
- package/elizaos/bootstrap/services/follow_up.py +138 -0
- package/elizaos/bootstrap/services/rolodex.py +244 -0
- package/elizaos/bootstrap/services/task.py +585 -0
- package/elizaos/bootstrap/types.py +54 -0
- package/elizaos/bootstrap/utils/__init__.py +7 -0
- package/elizaos/bootstrap/utils/xml.py +69 -0
- package/elizaos/character.py +149 -0
- package/elizaos/logger.py +179 -0
- package/elizaos/media/__init__.py +45 -0
- package/elizaos/media/mime.py +315 -0
- package/elizaos/media/search.py +161 -0
- package/elizaos/media/tests/__init__.py +1 -0
- package/elizaos/media/tests/test_mime.py +117 -0
- package/elizaos/media/tests/test_search.py +156 -0
- package/elizaos/plugin.py +191 -0
- package/elizaos/prompts.py +1071 -0
- package/elizaos/py.typed +0 -0
- package/elizaos/runtime.py +2572 -0
- package/elizaos/services/__init__.py +49 -0
- package/elizaos/services/hook_service.py +511 -0
- package/elizaos/services/message_service.py +1248 -0
- package/elizaos/settings.py +182 -0
- package/elizaos/streaming_context.py +159 -0
- package/elizaos/trajectory_context.py +18 -0
- package/elizaos/types/__init__.py +512 -0
- package/elizaos/types/agent.py +31 -0
- package/elizaos/types/components.py +208 -0
- package/elizaos/types/database.py +64 -0
- package/elizaos/types/environment.py +46 -0
- package/elizaos/types/events.py +47 -0
- package/elizaos/types/memory.py +45 -0
- package/elizaos/types/model.py +393 -0
- package/elizaos/types/plugin.py +188 -0
- package/elizaos/types/primitives.py +100 -0
- package/elizaos/types/runtime.py +460 -0
- package/elizaos/types/service.py +113 -0
- package/elizaos/types/service_interfaces.py +244 -0
- package/elizaos/types/state.py +188 -0
- package/elizaos/types/task.py +29 -0
- package/elizaos/utils/__init__.py +108 -0
- package/elizaos/utils/spec_examples.py +48 -0
- package/elizaos/utils/streaming.py +426 -0
- package/elizaos_atropos_shared/__init__.py +1 -0
- package/elizaos_atropos_shared/canonical_eliza.py +282 -0
- package/package.json +19 -0
- package/pyproject.toml +143 -0
- package/requirements-dev.in +11 -0
- package/requirements-dev.lock +134 -0
- package/requirements.in +9 -0
- package/requirements.lock +64 -0
- package/tests/__init__.py +0 -0
- package/tests/test_action_parameters.py +154 -0
- package/tests/test_actions_provider_examples.py +39 -0
- package/tests/test_advanced_memory_behavior.py +96 -0
- package/tests/test_advanced_memory_flag.py +30 -0
- package/tests/test_advanced_planning_behavior.py +225 -0
- package/tests/test_advanced_planning_flag.py +26 -0
- package/tests/test_autonomy.py +445 -0
- package/tests/test_bootstrap_initialize.py +37 -0
- package/tests/test_character.py +163 -0
- package/tests/test_character_provider.py +231 -0
- package/tests/test_dynamic_prompt_exec.py +561 -0
- package/tests/test_logger_redaction.py +43 -0
- package/tests/test_plugin.py +117 -0
- package/tests/test_runtime.py +422 -0
- package/tests/test_salt_production_enforcement.py +22 -0
- package/tests/test_settings_crypto.py +118 -0
- package/tests/test_streaming.py +295 -0
- package/tests/test_types.py +221 -0
- package/tests/test_uuid_parity.py +46 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MIME type detection and media utilities for Eliza.
|
|
3
|
+
|
|
4
|
+
Provides robust MIME type detection from file buffers, headers, and extensions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
|
|
13
|
+
# Try to import python-magic for MIME sniffing, fallback to filetype
|
|
14
|
+
try:
|
|
15
|
+
import magic
|
|
16
|
+
|
|
17
|
+
HAS_MAGIC = True
|
|
18
|
+
except ImportError:
|
|
19
|
+
HAS_MAGIC = False
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
import filetype
|
|
23
|
+
|
|
24
|
+
HAS_FILETYPE = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
HAS_FILETYPE = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MediaKind(str, Enum):
|
|
30
|
+
"""Media kind categories."""
|
|
31
|
+
|
|
32
|
+
IMAGE = "image"
|
|
33
|
+
AUDIO = "audio"
|
|
34
|
+
VIDEO = "video"
|
|
35
|
+
DOCUMENT = "document"
|
|
36
|
+
UNKNOWN = "unknown"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Map common MIME types to preferred file extensions
|
|
40
|
+
EXT_BY_MIME: dict[str, str] = {
|
|
41
|
+
"image/heic": ".heic",
|
|
42
|
+
"image/heif": ".heif",
|
|
43
|
+
"image/jpeg": ".jpg",
|
|
44
|
+
"image/png": ".png",
|
|
45
|
+
"image/webp": ".webp",
|
|
46
|
+
"image/gif": ".gif",
|
|
47
|
+
"audio/ogg": ".ogg",
|
|
48
|
+
"audio/mpeg": ".mp3",
|
|
49
|
+
"audio/x-m4a": ".m4a",
|
|
50
|
+
"audio/mp4": ".m4a",
|
|
51
|
+
"video/mp4": ".mp4",
|
|
52
|
+
"video/quicktime": ".mov",
|
|
53
|
+
"application/pdf": ".pdf",
|
|
54
|
+
"application/json": ".json",
|
|
55
|
+
"application/zip": ".zip",
|
|
56
|
+
"application/gzip": ".gz",
|
|
57
|
+
"application/x-tar": ".tar",
|
|
58
|
+
"application/x-7z-compressed": ".7z",
|
|
59
|
+
"application/vnd.rar": ".rar",
|
|
60
|
+
"application/msword": ".doc",
|
|
61
|
+
"application/vnd.ms-excel": ".xls",
|
|
62
|
+
"application/vnd.ms-powerpoint": ".ppt",
|
|
63
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
|
64
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
|
65
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
|
66
|
+
"text/csv": ".csv",
|
|
67
|
+
"text/plain": ".txt",
|
|
68
|
+
"text/markdown": ".md",
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Reverse map: extension to MIME
|
|
72
|
+
MIME_BY_EXT: dict[str, str] = {v: k for k, v in EXT_BY_MIME.items()}
|
|
73
|
+
MIME_BY_EXT[".jpeg"] = "image/jpeg"
|
|
74
|
+
|
|
75
|
+
# Audio file extensions
|
|
76
|
+
AUDIO_FILE_EXTENSIONS: set[str] = {".aac", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav"}
|
|
77
|
+
|
|
78
|
+
# Voice-compatible audio extensions (Opus/Ogg)
|
|
79
|
+
VOICE_AUDIO_EXTENSIONS: set[str] = {".oga", ".ogg", ".opus"}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _normalize_header_mime(mime: str | None) -> str | None:
|
|
83
|
+
"""Normalize a MIME type from HTTP headers."""
|
|
84
|
+
if not mime:
|
|
85
|
+
return None
|
|
86
|
+
cleaned = mime.split(";")[0].strip().lower()
|
|
87
|
+
return cleaned or None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _sniff_mime(buffer: bytes | None) -> str | None:
|
|
91
|
+
"""Detect MIME type from a buffer using magic bytes."""
|
|
92
|
+
if not buffer:
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
# Try python-magic first
|
|
96
|
+
if HAS_MAGIC:
|
|
97
|
+
try:
|
|
98
|
+
mime = magic.from_buffer(buffer, mime=True)
|
|
99
|
+
return mime if mime else None
|
|
100
|
+
except Exception:
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
# Fallback to filetype
|
|
104
|
+
if HAS_FILETYPE:
|
|
105
|
+
try:
|
|
106
|
+
kind = filetype.guess(buffer)
|
|
107
|
+
return kind.mime if kind else None
|
|
108
|
+
except Exception:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_file_extension(file_path: str | None) -> str | None:
|
|
115
|
+
"""
|
|
116
|
+
Get the file extension from a path or URL.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
file_path: File path or URL
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
File extension including the dot (e.g., ".jpg"), or None
|
|
123
|
+
"""
|
|
124
|
+
if not file_path:
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
# Try parsing as URL
|
|
128
|
+
if re.match(r"^https?://", file_path, re.IGNORECASE):
|
|
129
|
+
try:
|
|
130
|
+
parsed = urlparse(file_path)
|
|
131
|
+
path_parts = parsed.path.split(".")
|
|
132
|
+
if len(path_parts) >= 2:
|
|
133
|
+
ext = path_parts[-1].lower()
|
|
134
|
+
return f".{ext}"
|
|
135
|
+
except Exception:
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
# Plain path parsing
|
|
139
|
+
parts = file_path.split(".")
|
|
140
|
+
if len(parts) < 2:
|
|
141
|
+
return None
|
|
142
|
+
return f".{parts[-1].lower()}"
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _is_generic_mime(mime: str | None) -> bool:
|
|
146
|
+
"""Check if a MIME type is generic/container type."""
|
|
147
|
+
if not mime:
|
|
148
|
+
return True
|
|
149
|
+
m = mime.lower()
|
|
150
|
+
return m == "application/octet-stream" or m == "application/zip"
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def detect_mime(
|
|
154
|
+
buffer: bytes | None = None,
|
|
155
|
+
header_mime: str | None = None,
|
|
156
|
+
file_path: str | None = None,
|
|
157
|
+
) -> str | None:
|
|
158
|
+
"""
|
|
159
|
+
Detect MIME type from buffer, headers, and/or file path.
|
|
160
|
+
Prioritizes sniffed types over extension-based detection.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
buffer: File contents for magic byte detection
|
|
164
|
+
header_mime: MIME type from HTTP headers
|
|
165
|
+
file_path: File path for extension-based detection
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Detected MIME type or None
|
|
169
|
+
"""
|
|
170
|
+
ext = get_file_extension(file_path)
|
|
171
|
+
ext_mime = MIME_BY_EXT.get(ext) if ext else None
|
|
172
|
+
normalized_header = _normalize_header_mime(header_mime)
|
|
173
|
+
sniffed = _sniff_mime(buffer)
|
|
174
|
+
|
|
175
|
+
# Prefer sniffed types, but don't let generic container types override
|
|
176
|
+
# a more specific extension mapping (e.g., XLSX vs ZIP)
|
|
177
|
+
if sniffed and (not _is_generic_mime(sniffed) or not ext_mime):
|
|
178
|
+
return sniffed
|
|
179
|
+
if ext_mime:
|
|
180
|
+
return ext_mime
|
|
181
|
+
if normalized_header and not _is_generic_mime(normalized_header):
|
|
182
|
+
return normalized_header
|
|
183
|
+
if sniffed:
|
|
184
|
+
return sniffed
|
|
185
|
+
if normalized_header:
|
|
186
|
+
return normalized_header
|
|
187
|
+
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def extension_for_mime(mime: str | None) -> str | None:
|
|
192
|
+
"""
|
|
193
|
+
Get the file extension for a MIME type.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
mime: MIME type string
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
File extension including the dot (e.g., ".jpg"), or None
|
|
200
|
+
"""
|
|
201
|
+
if not mime:
|
|
202
|
+
return None
|
|
203
|
+
return EXT_BY_MIME.get(mime.lower())
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def is_audio_filename(filename: str | None) -> bool:
|
|
207
|
+
"""
|
|
208
|
+
Check if a file appears to be an audio file by extension.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
filename: File name or path
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
True if the file has an audio extension
|
|
215
|
+
"""
|
|
216
|
+
ext = get_file_extension(filename)
|
|
217
|
+
return ext in AUDIO_FILE_EXTENSIONS if ext else False
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def is_gif_media(
|
|
221
|
+
content_type: str | None = None,
|
|
222
|
+
filename: str | None = None,
|
|
223
|
+
) -> bool:
|
|
224
|
+
"""
|
|
225
|
+
Check if media is a GIF.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
content_type: MIME type / content type header
|
|
229
|
+
filename: File name or path
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
True if the media is a GIF
|
|
233
|
+
"""
|
|
234
|
+
if content_type and content_type.lower() == "image/gif":
|
|
235
|
+
return True
|
|
236
|
+
return get_file_extension(filename) == ".gif"
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def is_voice_compatible_audio(
|
|
240
|
+
content_type: str | None = None,
|
|
241
|
+
filename: str | None = None,
|
|
242
|
+
) -> bool:
|
|
243
|
+
"""
|
|
244
|
+
Check if audio is voice-compatible (Opus/Ogg format).
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
content_type: MIME type / content type header
|
|
248
|
+
filename: File name or path
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
True if the audio is in Opus/Ogg format
|
|
252
|
+
"""
|
|
253
|
+
mime = content_type.lower() if content_type else None
|
|
254
|
+
if mime and ("ogg" in mime or "opus" in mime):
|
|
255
|
+
return True
|
|
256
|
+
ext = get_file_extension(filename)
|
|
257
|
+
return ext in VOICE_AUDIO_EXTENSIONS if ext else False
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def media_kind_from_mime(mime: str | None) -> MediaKind:
|
|
261
|
+
"""
|
|
262
|
+
Get media kind from MIME type.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
mime: MIME type string
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
MediaKind enum value
|
|
269
|
+
"""
|
|
270
|
+
if not mime:
|
|
271
|
+
return MediaKind.UNKNOWN
|
|
272
|
+
|
|
273
|
+
m = mime.lower()
|
|
274
|
+
if m.startswith("image/"):
|
|
275
|
+
return MediaKind.IMAGE
|
|
276
|
+
if m.startswith("audio/"):
|
|
277
|
+
return MediaKind.AUDIO
|
|
278
|
+
if m.startswith("video/"):
|
|
279
|
+
return MediaKind.VIDEO
|
|
280
|
+
if (
|
|
281
|
+
m.startswith("application/pdf")
|
|
282
|
+
or m.startswith("application/msword")
|
|
283
|
+
or m.startswith("application/vnd.ms-")
|
|
284
|
+
or m.startswith("application/vnd.openxmlformats")
|
|
285
|
+
or m.startswith("text/")
|
|
286
|
+
):
|
|
287
|
+
return MediaKind.DOCUMENT
|
|
288
|
+
|
|
289
|
+
return MediaKind.UNKNOWN
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def image_mime_from_format(format_name: str | None) -> str | None:
|
|
293
|
+
"""
|
|
294
|
+
Get image MIME type from format name.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
format_name: Image format name (e.g., "jpg", "png")
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
MIME type string or None
|
|
301
|
+
"""
|
|
302
|
+
if not format_name:
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
fmt = format_name.lower()
|
|
306
|
+
mapping = {
|
|
307
|
+
"jpg": "image/jpeg",
|
|
308
|
+
"jpeg": "image/jpeg",
|
|
309
|
+
"heic": "image/heic",
|
|
310
|
+
"heif": "image/heif",
|
|
311
|
+
"png": "image/png",
|
|
312
|
+
"webp": "image/webp",
|
|
313
|
+
"gif": "image/gif",
|
|
314
|
+
}
|
|
315
|
+
return mapping.get(fmt)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hybrid search utilities for combining vector and keyword search results.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class HybridVectorResult:
|
|
13
|
+
"""Result from vector similarity search."""
|
|
14
|
+
|
|
15
|
+
id: str
|
|
16
|
+
path: str
|
|
17
|
+
start_line: int
|
|
18
|
+
end_line: int
|
|
19
|
+
source: str
|
|
20
|
+
snippet: str
|
|
21
|
+
vector_score: float
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class HybridKeywordResult:
|
|
26
|
+
"""Result from keyword (BM25) search."""
|
|
27
|
+
|
|
28
|
+
id: str
|
|
29
|
+
path: str
|
|
30
|
+
start_line: int
|
|
31
|
+
end_line: int
|
|
32
|
+
source: str
|
|
33
|
+
snippet: str
|
|
34
|
+
text_score: float
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class HybridMergedResult:
|
|
39
|
+
"""Merged result from hybrid search."""
|
|
40
|
+
|
|
41
|
+
path: str
|
|
42
|
+
start_line: int
|
|
43
|
+
end_line: int
|
|
44
|
+
score: float
|
|
45
|
+
snippet: str
|
|
46
|
+
source: str
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def build_fts_query(raw: str) -> str | None:
|
|
50
|
+
"""
|
|
51
|
+
Build an FTS (Full-Text Search) query from a raw search string.
|
|
52
|
+
Extracts alphanumeric tokens and joins them with AND for strict matching.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
raw: The raw search query string
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
The FTS query string, or None if no valid tokens found
|
|
59
|
+
"""
|
|
60
|
+
tokens = re.findall(r"[A-Za-z0-9_]+", raw)
|
|
61
|
+
tokens = [t.strip() for t in tokens if t.strip()]
|
|
62
|
+
|
|
63
|
+
if not tokens:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
quoted = [f'"{t.replace(chr(34), "")}"' for t in tokens]
|
|
67
|
+
return " AND ".join(quoted)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def bm25_rank_to_score(rank: float) -> float:
|
|
71
|
+
"""
|
|
72
|
+
Convert BM25 rank to a normalized score between 0 and 1.
|
|
73
|
+
Lower rank = higher score.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
rank: The BM25 rank value
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
A normalized score where 1 is best and 0 is worst
|
|
80
|
+
"""
|
|
81
|
+
import math
|
|
82
|
+
|
|
83
|
+
normalized = max(0.0, rank) if math.isfinite(rank) else 999.0
|
|
84
|
+
return 1.0 / (1.0 + normalized)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def merge_hybrid_results(
|
|
88
|
+
vector: list[HybridVectorResult],
|
|
89
|
+
keyword: list[HybridKeywordResult],
|
|
90
|
+
vector_weight: float = 0.7,
|
|
91
|
+
text_weight: float = 0.3,
|
|
92
|
+
) -> list[HybridMergedResult]:
|
|
93
|
+
"""
|
|
94
|
+
Merge vector similarity and keyword search results using weighted scoring.
|
|
95
|
+
|
|
96
|
+
This implements a hybrid search approach where results from both vector
|
|
97
|
+
similarity search and keyword (BM25) search are combined. Results that
|
|
98
|
+
appear in both searches get boosted scores.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
vector: Results from vector similarity search
|
|
102
|
+
keyword: Results from keyword (BM25) search
|
|
103
|
+
vector_weight: Weight for vector similarity scores (default: 0.7)
|
|
104
|
+
text_weight: Weight for keyword/text scores (default: 0.3)
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Merged and sorted results with combined scores
|
|
108
|
+
"""
|
|
109
|
+
by_id: dict[str, dict] = {}
|
|
110
|
+
|
|
111
|
+
# Add vector search results
|
|
112
|
+
for r in vector:
|
|
113
|
+
by_id[r.id] = {
|
|
114
|
+
"id": r.id,
|
|
115
|
+
"path": r.path,
|
|
116
|
+
"start_line": r.start_line,
|
|
117
|
+
"end_line": r.end_line,
|
|
118
|
+
"source": r.source,
|
|
119
|
+
"snippet": r.snippet,
|
|
120
|
+
"vector_score": r.vector_score,
|
|
121
|
+
"text_score": 0.0,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Merge keyword search results
|
|
125
|
+
for r in keyword:
|
|
126
|
+
if r.id in by_id:
|
|
127
|
+
existing = by_id[r.id]
|
|
128
|
+
existing["text_score"] = r.text_score
|
|
129
|
+
# Prefer keyword snippet if available (may have highlights)
|
|
130
|
+
if r.snippet:
|
|
131
|
+
existing["snippet"] = r.snippet
|
|
132
|
+
else:
|
|
133
|
+
by_id[r.id] = {
|
|
134
|
+
"id": r.id,
|
|
135
|
+
"path": r.path,
|
|
136
|
+
"start_line": r.start_line,
|
|
137
|
+
"end_line": r.end_line,
|
|
138
|
+
"source": r.source,
|
|
139
|
+
"snippet": r.snippet,
|
|
140
|
+
"vector_score": 0.0,
|
|
141
|
+
"text_score": r.text_score,
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# Calculate weighted scores and create results
|
|
145
|
+
merged = []
|
|
146
|
+
for entry in by_id.values():
|
|
147
|
+
score = vector_weight * entry["vector_score"] + text_weight * entry["text_score"]
|
|
148
|
+
merged.append(
|
|
149
|
+
HybridMergedResult(
|
|
150
|
+
path=entry["path"],
|
|
151
|
+
start_line=entry["start_line"],
|
|
152
|
+
end_line=entry["end_line"],
|
|
153
|
+
score=score,
|
|
154
|
+
snippet=entry["snippet"],
|
|
155
|
+
source=entry["source"],
|
|
156
|
+
)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Sort by score descending
|
|
160
|
+
merged.sort(key=lambda x: x.score, reverse=True)
|
|
161
|
+
return merged
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Tests for media utilities
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Tests for MIME utilities."""
|
|
2
|
+
|
|
3
|
+
from elizaos.media.mime import (
|
|
4
|
+
MediaKind,
|
|
5
|
+
extension_for_mime,
|
|
6
|
+
get_file_extension,
|
|
7
|
+
image_mime_from_format,
|
|
8
|
+
is_audio_filename,
|
|
9
|
+
is_gif_media,
|
|
10
|
+
is_voice_compatible_audio,
|
|
11
|
+
media_kind_from_mime,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestGetFileExtension:
|
|
16
|
+
def test_simple_path(self):
|
|
17
|
+
assert get_file_extension("test.jpg") == ".jpg"
|
|
18
|
+
|
|
19
|
+
def test_path_with_directory(self):
|
|
20
|
+
assert get_file_extension("path/to/file.PNG") == ".png"
|
|
21
|
+
|
|
22
|
+
def test_no_extension(self):
|
|
23
|
+
assert get_file_extension("noext") is None
|
|
24
|
+
|
|
25
|
+
def test_none_input(self):
|
|
26
|
+
assert get_file_extension(None) is None
|
|
27
|
+
|
|
28
|
+
def test_url(self):
|
|
29
|
+
assert get_file_extension("https://example.com/image.webp") == ".webp"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TestMediaKindFromMime:
|
|
33
|
+
def test_image(self):
|
|
34
|
+
assert media_kind_from_mime("image/jpeg") == MediaKind.IMAGE
|
|
35
|
+
assert media_kind_from_mime("image/png") == MediaKind.IMAGE
|
|
36
|
+
|
|
37
|
+
def test_audio(self):
|
|
38
|
+
assert media_kind_from_mime("audio/mp3") == MediaKind.AUDIO
|
|
39
|
+
assert media_kind_from_mime("audio/ogg") == MediaKind.AUDIO
|
|
40
|
+
|
|
41
|
+
def test_video(self):
|
|
42
|
+
assert media_kind_from_mime("video/mp4") == MediaKind.VIDEO
|
|
43
|
+
|
|
44
|
+
def test_document(self):
|
|
45
|
+
assert media_kind_from_mime("application/pdf") == MediaKind.DOCUMENT
|
|
46
|
+
assert media_kind_from_mime("text/plain") == MediaKind.DOCUMENT
|
|
47
|
+
|
|
48
|
+
def test_unknown(self):
|
|
49
|
+
assert media_kind_from_mime("application/octet-stream") == MediaKind.UNKNOWN
|
|
50
|
+
assert media_kind_from_mime(None) == MediaKind.UNKNOWN
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TestIsAudioFilename:
|
|
54
|
+
def test_audio_files(self):
|
|
55
|
+
assert is_audio_filename("song.mp3") is True
|
|
56
|
+
assert is_audio_filename("voice.ogg") is True
|
|
57
|
+
assert is_audio_filename("music.wav") is True
|
|
58
|
+
|
|
59
|
+
def test_non_audio_files(self):
|
|
60
|
+
assert is_audio_filename("image.jpg") is False
|
|
61
|
+
assert is_audio_filename("document.pdf") is False
|
|
62
|
+
|
|
63
|
+
def test_none(self):
|
|
64
|
+
assert is_audio_filename(None) is False
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class TestIsGifMedia:
|
|
68
|
+
def test_gif_content_type(self):
|
|
69
|
+
assert is_gif_media(content_type="image/gif") is True
|
|
70
|
+
|
|
71
|
+
def test_gif_filename(self):
|
|
72
|
+
assert is_gif_media(filename="animation.gif") is True
|
|
73
|
+
|
|
74
|
+
def test_non_gif(self):
|
|
75
|
+
assert is_gif_media(content_type="image/jpeg") is False
|
|
76
|
+
assert is_gif_media(filename="photo.jpg") is False
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class TestIsVoiceCompatibleAudio:
|
|
80
|
+
def test_ogg_content_type(self):
|
|
81
|
+
assert is_voice_compatible_audio(content_type="audio/ogg") is True
|
|
82
|
+
assert is_voice_compatible_audio(content_type="audio/opus") is True
|
|
83
|
+
|
|
84
|
+
def test_ogg_filename(self):
|
|
85
|
+
assert is_voice_compatible_audio(filename="voice.ogg") is True
|
|
86
|
+
assert is_voice_compatible_audio(filename="voice.opus") is True
|
|
87
|
+
|
|
88
|
+
def test_non_voice_compatible(self):
|
|
89
|
+
assert is_voice_compatible_audio(content_type="audio/mp3") is False
|
|
90
|
+
assert is_voice_compatible_audio(filename="song.mp3") is False
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class TestExtensionForMime:
|
|
94
|
+
def test_known_mimes(self):
|
|
95
|
+
assert extension_for_mime("image/jpeg") == ".jpg"
|
|
96
|
+
assert extension_for_mime("audio/mpeg") == ".mp3"
|
|
97
|
+
assert extension_for_mime("video/mp4") == ".mp4"
|
|
98
|
+
|
|
99
|
+
def test_none(self):
|
|
100
|
+
assert extension_for_mime(None) is None
|
|
101
|
+
|
|
102
|
+
def test_unknown_mime(self):
|
|
103
|
+
assert extension_for_mime("application/x-unknown") is None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class TestImageMimeFromFormat:
|
|
107
|
+
def test_known_formats(self):
|
|
108
|
+
assert image_mime_from_format("jpg") == "image/jpeg"
|
|
109
|
+
assert image_mime_from_format("jpeg") == "image/jpeg"
|
|
110
|
+
assert image_mime_from_format("png") == "image/png"
|
|
111
|
+
assert image_mime_from_format("gif") == "image/gif"
|
|
112
|
+
|
|
113
|
+
def test_none(self):
|
|
114
|
+
assert image_mime_from_format(None) is None
|
|
115
|
+
|
|
116
|
+
def test_unknown_format(self):
|
|
117
|
+
assert image_mime_from_format("bmp") is None
|