remdb 0.3.7__py3-none-any.whl → 0.3.133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +502 -0
  6. rem/agentic/context.py +51 -25
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/mcp/tool_wrapper.py +112 -17
  9. rem/agentic/otel/setup.py +93 -4
  10. rem/agentic/providers/phoenix.py +314 -132
  11. rem/agentic/providers/pydantic_ai.py +215 -26
  12. rem/agentic/schema.py +361 -21
  13. rem/agentic/tools/rem_tools.py +3 -3
  14. rem/api/README.md +238 -1
  15. rem/api/deps.py +255 -0
  16. rem/api/main.py +154 -37
  17. rem/api/mcp_router/resources.py +1 -1
  18. rem/api/mcp_router/server.py +26 -5
  19. rem/api/mcp_router/tools.py +465 -7
  20. rem/api/middleware/tracking.py +172 -0
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +124 -0
  23. rem/api/routers/chat/completions.py +402 -20
  24. rem/api/routers/chat/models.py +88 -10
  25. rem/api/routers/chat/otel_utils.py +33 -0
  26. rem/api/routers/chat/sse_events.py +542 -0
  27. rem/api/routers/chat/streaming.py +642 -45
  28. rem/api/routers/dev.py +81 -0
  29. rem/api/routers/feedback.py +268 -0
  30. rem/api/routers/messages.py +473 -0
  31. rem/api/routers/models.py +78 -0
  32. rem/api/routers/query.py +360 -0
  33. rem/api/routers/shared_sessions.py +406 -0
  34. rem/auth/middleware.py +126 -27
  35. rem/cli/commands/README.md +237 -64
  36. rem/cli/commands/ask.py +13 -10
  37. rem/cli/commands/cluster.py +1808 -0
  38. rem/cli/commands/configure.py +5 -6
  39. rem/cli/commands/db.py +396 -139
  40. rem/cli/commands/experiments.py +469 -74
  41. rem/cli/commands/process.py +22 -15
  42. rem/cli/commands/scaffold.py +47 -0
  43. rem/cli/commands/schema.py +97 -50
  44. rem/cli/main.py +29 -6
  45. rem/config.py +10 -3
  46. rem/models/core/core_model.py +7 -1
  47. rem/models/core/experiment.py +54 -0
  48. rem/models/core/rem_query.py +5 -2
  49. rem/models/entities/__init__.py +21 -0
  50. rem/models/entities/domain_resource.py +38 -0
  51. rem/models/entities/feedback.py +123 -0
  52. rem/models/entities/message.py +30 -1
  53. rem/models/entities/session.py +83 -0
  54. rem/models/entities/shared_session.py +180 -0
  55. rem/models/entities/user.py +10 -3
  56. rem/registry.py +373 -0
  57. rem/schemas/agents/rem.yaml +7 -3
  58. rem/services/content/providers.py +92 -133
  59. rem/services/content/service.py +92 -20
  60. rem/services/dreaming/affinity_service.py +2 -16
  61. rem/services/dreaming/moment_service.py +2 -15
  62. rem/services/embeddings/api.py +24 -17
  63. rem/services/embeddings/worker.py +16 -16
  64. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  65. rem/services/phoenix/client.py +302 -28
  66. rem/services/postgres/README.md +159 -15
  67. rem/services/postgres/__init__.py +2 -1
  68. rem/services/postgres/diff_service.py +531 -0
  69. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  70. rem/services/postgres/repository.py +132 -0
  71. rem/services/postgres/schema_generator.py +291 -9
  72. rem/services/postgres/service.py +6 -6
  73. rem/services/rate_limit.py +113 -0
  74. rem/services/rem/README.md +14 -0
  75. rem/services/rem/parser.py +44 -9
  76. rem/services/rem/service.py +36 -2
  77. rem/services/session/compression.py +24 -1
  78. rem/services/session/reload.py +1 -1
  79. rem/services/user_service.py +98 -0
  80. rem/settings.py +399 -29
  81. rem/sql/background_indexes.sql +21 -16
  82. rem/sql/migrations/001_install.sql +387 -54
  83. rem/sql/migrations/002_install_models.sql +2320 -393
  84. rem/sql/migrations/003_optional_extensions.sql +326 -0
  85. rem/sql/migrations/004_cache_system.sql +548 -0
  86. rem/utils/__init__.py +18 -0
  87. rem/utils/constants.py +97 -0
  88. rem/utils/date_utils.py +228 -0
  89. rem/utils/embeddings.py +17 -4
  90. rem/utils/files.py +167 -0
  91. rem/utils/mime_types.py +158 -0
  92. rem/utils/model_helpers.py +156 -1
  93. rem/utils/schema_loader.py +282 -35
  94. rem/utils/sql_paths.py +146 -0
  95. rem/utils/sql_types.py +3 -1
  96. rem/utils/vision.py +9 -14
  97. rem/workers/README.md +14 -14
  98. rem/workers/__init__.py +3 -1
  99. rem/workers/db_listener.py +579 -0
  100. rem/workers/db_maintainer.py +74 -0
  101. rem/workers/unlogged_maintainer.py +463 -0
  102. {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/METADATA +460 -303
  103. {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/RECORD +105 -74
  104. {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
  105. rem/sql/002_install_models.sql +0 -1068
  106. rem/sql/install_models.sql +0 -1038
  107. {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,228 @@
1
+ """
2
+ Centralized datetime utilities for consistent UTC-naive datetime handling.
3
+
4
+ IMPORTANT: REM uses UTC-naive datetimes throughout the codebase.
5
+ PostgreSQL stores TIMESTAMP WITHOUT TIME ZONE, so all Python datetime
6
+ operations should use UTC-naive datetimes to avoid comparison errors.
7
+
8
+ Convention:
9
+ - All timestamps are implicitly UTC
10
+ - Use utc_now() instead of datetime.utcnow() or datetime.now(timezone.utc)
11
+ - Use parse_iso() to parse ISO format strings (handles "Z" suffix)
12
+ - Use to_iso() to format datetimes as ISO strings
13
+
14
+ See CLAUDE.md Section 1 (Datetime Convention) for details.
15
+ """
16
+
17
+ from datetime import UTC, datetime, timedelta
18
+ from typing import Optional
19
+
20
+
21
+ def utc_now() -> datetime:
22
+ """
23
+ Get current UTC time as a naive datetime.
24
+
25
+ Returns:
26
+ UTC-naive datetime representing current time.
27
+
28
+ Example:
29
+ >>> now = utc_now()
30
+ >>> now.tzinfo is None
31
+ True
32
+ """
33
+ return datetime.now(UTC).replace(tzinfo=None)
34
+
35
+
36
+ def to_iso(dt: datetime) -> str:
37
+ """
38
+ Convert datetime to ISO 8601 format string.
39
+
40
+ Args:
41
+ dt: Datetime to format (should be UTC-naive)
42
+
43
+ Returns:
44
+ ISO format string (e.g., "2024-01-15T10:30:00")
45
+
46
+ Example:
47
+ >>> dt = datetime(2024, 1, 15, 10, 30, 0)
48
+ >>> to_iso(dt)
49
+ '2024-01-15T10:30:00'
50
+ """
51
+ return dt.isoformat()
52
+
53
+
54
+ def to_iso_with_z(dt: datetime) -> str:
55
+ """
56
+ Convert datetime to ISO 8601 format with Z suffix.
57
+
58
+ Use this when interfacing with external APIs that expect
59
+ the Z suffix to indicate UTC.
60
+
61
+ Args:
62
+ dt: Datetime to format (should be UTC-naive)
63
+
64
+ Returns:
65
+ ISO format string with Z suffix (e.g., "2024-01-15T10:30:00Z")
66
+ """
67
+ return dt.isoformat() + "Z"
68
+
69
+
70
+ def parse_iso(iso_string: str) -> datetime:
71
+ """
72
+ Parse ISO 8601 format string to UTC-naive datetime.
73
+
74
+ Handles:
75
+ - Standard ISO format: "2024-01-15T10:30:00"
76
+ - Z suffix: "2024-01-15T10:30:00Z"
77
+ - Timezone offset: "2024-01-15T10:30:00+00:00" (converts to naive)
78
+ - Microseconds: "2024-01-15T10:30:00.123456"
79
+
80
+ Args:
81
+ iso_string: ISO format datetime string
82
+
83
+ Returns:
84
+ UTC-naive datetime
85
+
86
+ Raises:
87
+ ValueError: If string cannot be parsed
88
+
89
+ Example:
90
+ >>> parse_iso("2024-01-15T10:30:00Z")
91
+ datetime.datetime(2024, 1, 15, 10, 30)
92
+ >>> parse_iso("2024-01-15T10:30:00+00:00")
93
+ datetime.datetime(2024, 1, 15, 10, 30)
94
+ """
95
+ # Handle Z suffix (replace with +00:00 for fromisoformat)
96
+ if iso_string.endswith("Z"):
97
+ iso_string = iso_string[:-1] + "+00:00"
98
+
99
+ # Parse the ISO string
100
+ dt = datetime.fromisoformat(iso_string)
101
+
102
+ # Convert to naive UTC if timezone-aware
103
+ if dt.tzinfo is not None:
104
+ # Convert to UTC and strip timezone
105
+ from datetime import timezone
106
+ dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
107
+
108
+ return dt
109
+
110
+
111
+ def parse_iso_safe(iso_string: Optional[str], default: Optional[datetime] = None) -> Optional[datetime]:
112
+ """
113
+ Safely parse ISO string, returning default on failure.
114
+
115
+ Args:
116
+ iso_string: ISO format string or None
117
+ default: Default value if parsing fails
118
+
119
+ Returns:
120
+ Parsed datetime or default value
121
+ """
122
+ if not iso_string:
123
+ return default
124
+ try:
125
+ return parse_iso(iso_string)
126
+ except (ValueError, TypeError):
127
+ return default
128
+
129
+
130
+ def format_timestamp(dt: Optional[datetime] = None) -> str:
131
+ """
132
+ Format datetime for display/logging.
133
+
134
+ Args:
135
+ dt: Datetime to format (defaults to current UTC time)
136
+
137
+ Returns:
138
+ Formatted string like "2024-01-15 10:30:00 UTC"
139
+ """
140
+ if dt is None:
141
+ dt = utc_now()
142
+ return dt.strftime("%Y-%m-%d %H:%M:%S") + " UTC"
143
+
144
+
145
+ def format_timestamp_compact(dt: Optional[datetime] = None) -> str:
146
+ """
147
+ Format datetime as compact string for filenames/IDs.
148
+
149
+ Args:
150
+ dt: Datetime to format (defaults to current UTC time)
151
+
152
+ Returns:
153
+ Formatted string like "20240115_103000"
154
+ """
155
+ if dt is None:
156
+ dt = utc_now()
157
+ return dt.strftime("%Y%m%d_%H%M%S")
158
+
159
+
160
+ def format_timestamp_for_experiment(dt: Optional[datetime] = None) -> str:
161
+ """
162
+ Format datetime for experiment names.
163
+
164
+ Args:
165
+ dt: Datetime to format (defaults to current UTC time)
166
+
167
+ Returns:
168
+ Formatted string like "20240115-103000"
169
+ """
170
+ if dt is None:
171
+ dt = utc_now()
172
+ return dt.strftime("%Y%m%d-%H%M%S")
173
+
174
+
175
+ def days_ago(days: int) -> datetime:
176
+ """
177
+ Get datetime N days ago from now.
178
+
179
+ Args:
180
+ days: Number of days ago
181
+
182
+ Returns:
183
+ UTC-naive datetime
184
+ """
185
+ return utc_now() - timedelta(days=days)
186
+
187
+
188
+ def hours_ago(hours: int) -> datetime:
189
+ """
190
+ Get datetime N hours ago from now.
191
+
192
+ Args:
193
+ hours: Number of hours ago
194
+
195
+ Returns:
196
+ UTC-naive datetime
197
+ """
198
+ return utc_now() - timedelta(hours=hours)
199
+
200
+
201
+ def is_within_hours(dt: datetime, hours: int) -> bool:
202
+ """
203
+ Check if datetime is within N hours of now.
204
+
205
+ Args:
206
+ dt: Datetime to check (should be UTC-naive)
207
+ hours: Number of hours
208
+
209
+ Returns:
210
+ True if dt is within the time window
211
+ """
212
+ cutoff = hours_ago(hours)
213
+ return dt >= cutoff
214
+
215
+
216
+ def is_within_days(dt: datetime, days: int) -> bool:
217
+ """
218
+ Check if datetime is within N days of now.
219
+
220
+ Args:
221
+ dt: Datetime to check (should be UTC-naive)
222
+ days: Number of days
223
+
224
+ Returns:
225
+ True if dt is within the time window
226
+ """
227
+ cutoff = days_ago(days)
228
+ return dt >= cutoff
rem/utils/embeddings.py CHANGED
@@ -20,7 +20,6 @@ Usage:
20
20
  embeddings = generate_embeddings("openai:text-embedding-3-small", texts)
21
21
  """
22
22
 
23
- import os
24
23
  from typing import Any, cast
25
24
 
26
25
  import requests
@@ -31,6 +30,16 @@ from tenacity import (
31
30
  wait_exponential,
32
31
  )
33
32
 
33
+ from rem.utils.constants import (
34
+ HTTP_TIMEOUT_LONG,
35
+ OPENAI_EMBEDDING_DIMS_SMALL,
36
+ OPENAI_EMBEDDING_DIMS_LARGE,
37
+ VOYAGE_EMBEDDING_DIMS,
38
+ RETRY_BACKOFF_MULTIPLIER,
39
+ RETRY_BACKOFF_MIN,
40
+ RETRY_BACKOFF_MAX,
41
+ )
42
+
34
43
 
35
44
  class EmbeddingError(Exception):
36
45
  """Base exception for embedding generation errors."""
@@ -166,7 +175,11 @@ def _create_retry_decorator(max_retries: int):
166
175
  return retry(
167
176
  retry=retry_if_exception_type(RateLimitError),
168
177
  stop=stop_after_attempt(max_retries),
169
- wait=wait_exponential(multiplier=1, min=1, max=60),
178
+ wait=wait_exponential(
179
+ multiplier=RETRY_BACKOFF_MULTIPLIER,
180
+ min=RETRY_BACKOFF_MIN,
181
+ max=RETRY_BACKOFF_MAX,
182
+ ),
170
183
  reraise=True,
171
184
  )
172
185
 
@@ -234,7 +247,7 @@ def _generate_openai_embeddings(
234
247
  }
235
248
 
236
249
  try:
237
- response = requests.post(url, json=payload, headers=headers, timeout=60)
250
+ response = requests.post(url, json=payload, headers=headers, timeout=HTTP_TIMEOUT_LONG)
238
251
 
239
252
  # Handle rate limits
240
253
  if response.status_code == 429:
@@ -334,7 +347,7 @@ def _generate_voyage_embeddings(
334
347
  }
335
348
 
336
349
  try:
337
- response = requests.post(url, json=payload, headers=headers, timeout=60)
350
+ response = requests.post(url, json=payload, headers=headers, timeout=HTTP_TIMEOUT_LONG)
338
351
 
339
352
  # Handle rate limits
340
353
  if response.status_code == 429:
rem/utils/files.py ADDED
@@ -0,0 +1,167 @@
1
+ """
2
+ File utilities for consistent file handling throughout REM.
3
+
4
+ Provides context managers and helpers for temporary file operations,
5
+ ensuring proper cleanup and consistent patterns.
6
+ """
7
+
8
+ import tempfile
9
+ from contextlib import contextmanager
10
+ from pathlib import Path
11
+ from typing import Generator, Optional
12
+
13
+ from loguru import logger
14
+
15
+
16
+ @contextmanager
17
+ def temp_file_from_bytes(
18
+ content: bytes,
19
+ suffix: str = "",
20
+ prefix: str = "rem_",
21
+ dir: Optional[str] = None,
22
+ ) -> Generator[Path, None, None]:
23
+ """
24
+ Create a temporary file from bytes, yield path, cleanup automatically.
25
+
26
+ This context manager ensures proper cleanup of temporary files even
27
+ if an exception occurs during processing.
28
+
29
+ Args:
30
+ content: Bytes to write to the temporary file
31
+ suffix: File extension (e.g., ".pdf", ".wav")
32
+ prefix: Prefix for the temp file name
33
+ dir: Directory for temp file (uses system temp if None)
34
+
35
+ Yields:
36
+ Path to the temporary file
37
+
38
+ Example:
39
+ >>> with temp_file_from_bytes(pdf_bytes, suffix=".pdf") as tmp_path:
40
+ ... result = process_pdf(tmp_path)
41
+ # File is automatically cleaned up after the block
42
+
43
+ Note:
44
+ The file is created with delete=False so we control cleanup.
45
+ This allows the file to be read by external processes.
46
+ """
47
+ tmp_path: Optional[Path] = None
48
+ try:
49
+ with tempfile.NamedTemporaryFile(
50
+ suffix=suffix,
51
+ prefix=prefix,
52
+ dir=dir,
53
+ delete=False,
54
+ ) as tmp:
55
+ tmp.write(content)
56
+ tmp_path = Path(tmp.name)
57
+
58
+ yield tmp_path
59
+
60
+ finally:
61
+ if tmp_path is not None:
62
+ try:
63
+ tmp_path.unlink(missing_ok=True)
64
+ except Exception as e:
65
+ logger.warning(f"Failed to cleanup temp file {tmp_path}: {e}")
66
+
67
+
68
+ @contextmanager
69
+ def temp_file_empty(
70
+ suffix: str = "",
71
+ prefix: str = "rem_",
72
+ dir: Optional[str] = None,
73
+ ) -> Generator[Path, None, None]:
74
+ """
75
+ Create an empty temporary file, yield path, cleanup automatically.
76
+
77
+ Useful when you need to write to a file after creation or when
78
+ an external process will write to the file.
79
+
80
+ Args:
81
+ suffix: File extension
82
+ prefix: Prefix for the temp file name
83
+ dir: Directory for temp file
84
+
85
+ Yields:
86
+ Path to the empty temporary file
87
+ """
88
+ tmp_path: Optional[Path] = None
89
+ try:
90
+ with tempfile.NamedTemporaryFile(
91
+ suffix=suffix,
92
+ prefix=prefix,
93
+ dir=dir,
94
+ delete=False,
95
+ ) as tmp:
96
+ tmp_path = Path(tmp.name)
97
+
98
+ yield tmp_path
99
+
100
+ finally:
101
+ if tmp_path is not None:
102
+ try:
103
+ tmp_path.unlink(missing_ok=True)
104
+ except Exception as e:
105
+ logger.warning(f"Failed to cleanup temp file {tmp_path}: {e}")
106
+
107
+
108
+ @contextmanager
109
+ def temp_directory(
110
+ prefix: str = "rem_",
111
+ dir: Optional[str] = None,
112
+ ) -> Generator[Path, None, None]:
113
+ """
114
+ Create a temporary directory, yield path, cleanup automatically.
115
+
116
+ Args:
117
+ prefix: Prefix for the temp directory name
118
+ dir: Parent directory for temp directory
119
+
120
+ Yields:
121
+ Path to the temporary directory
122
+ """
123
+ import shutil
124
+
125
+ tmp_dir: Optional[Path] = None
126
+ try:
127
+ tmp_dir = Path(tempfile.mkdtemp(prefix=prefix, dir=dir))
128
+ yield tmp_dir
129
+
130
+ finally:
131
+ if tmp_dir is not None:
132
+ try:
133
+ shutil.rmtree(tmp_dir, ignore_errors=True)
134
+ except Exception as e:
135
+ logger.warning(f"Failed to cleanup temp directory {tmp_dir}: {e}")
136
+
137
+
138
+ def ensure_parent_exists(path: Path) -> Path:
139
+ """
140
+ Ensure parent directory exists, creating if necessary.
141
+
142
+ Args:
143
+ path: File path whose parent should exist
144
+
145
+ Returns:
146
+ The original path (for chaining)
147
+ """
148
+ path.parent.mkdir(parents=True, exist_ok=True)
149
+ return path
150
+
151
+
152
+ def safe_delete(path: Path) -> bool:
153
+ """
154
+ Safely delete a file, returning success status.
155
+
156
+ Args:
157
+ path: Path to delete
158
+
159
+ Returns:
160
+ True if deleted or didn't exist, False on error
161
+ """
162
+ try:
163
+ path.unlink(missing_ok=True)
164
+ return True
165
+ except Exception as e:
166
+ logger.warning(f"Failed to delete {path}: {e}")
167
+ return False
@@ -0,0 +1,158 @@
1
+ """
2
+ Centralized MIME type mappings for file format detection.
3
+
4
+ Provides bidirectional mappings between file extensions and MIME types.
5
+ Use these constants throughout the codebase instead of inline dictionaries.
6
+ """
7
+
8
+ # Extension to MIME type mapping (extension includes leading dot)
9
+ EXTENSION_TO_MIME: dict[str, str] = {
10
+ # Images
11
+ ".png": "image/png",
12
+ ".jpg": "image/jpeg",
13
+ ".jpeg": "image/jpeg",
14
+ ".gif": "image/gif",
15
+ ".webp": "image/webp",
16
+ ".bmp": "image/bmp",
17
+ ".tiff": "image/tiff",
18
+ ".svg": "image/svg+xml",
19
+ # Documents
20
+ ".pdf": "application/pdf",
21
+ ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
22
+ ".doc": "application/msword",
23
+ ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
24
+ ".ppt": "application/vnd.ms-powerpoint",
25
+ ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
26
+ ".xls": "application/vnd.ms-excel",
27
+ # Audio
28
+ ".wav": "audio/wav",
29
+ ".mp3": "audio/mpeg",
30
+ ".m4a": "audio/x-m4a",
31
+ ".flac": "audio/flac",
32
+ ".ogg": "audio/ogg",
33
+ ".aac": "audio/aac",
34
+ # Video
35
+ ".mp4": "video/mp4",
36
+ ".webm": "video/webm",
37
+ ".avi": "video/x-msvideo",
38
+ ".mov": "video/quicktime",
39
+ # Text/Code
40
+ ".txt": "text/plain",
41
+ ".md": "text/markdown",
42
+ ".markdown": "text/markdown",
43
+ ".json": "application/json",
44
+ ".yaml": "application/x-yaml",
45
+ ".yml": "application/x-yaml",
46
+ ".xml": "application/xml",
47
+ ".html": "text/html",
48
+ ".css": "text/css",
49
+ ".js": "application/javascript",
50
+ ".py": "text/x-python",
51
+ ".ts": "application/typescript",
52
+ ".csv": "text/csv",
53
+ }
54
+
55
+ # MIME type to extension mapping (reverse of above, preferring shorter extensions)
56
+ MIME_TO_EXTENSION: dict[str, str] = {
57
+ # Images
58
+ "image/png": ".png",
59
+ "image/jpeg": ".jpg",
60
+ "image/gif": ".gif",
61
+ "image/webp": ".webp",
62
+ "image/bmp": ".bmp",
63
+ "image/tiff": ".tiff",
64
+ "image/svg+xml": ".svg",
65
+ # Documents
66
+ "application/pdf": ".pdf",
67
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
68
+ "application/msword": ".doc",
69
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
70
+ "application/vnd.ms-powerpoint": ".ppt",
71
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
72
+ "application/vnd.ms-excel": ".xls",
73
+ # Audio
74
+ "audio/wav": ".wav",
75
+ "audio/mpeg": ".mp3",
76
+ "audio/x-m4a": ".m4a",
77
+ "audio/mp4": ".m4a",
78
+ "audio/flac": ".flac",
79
+ "audio/ogg": ".ogg",
80
+ "audio/aac": ".aac",
81
+ # Video
82
+ "video/mp4": ".mp4",
83
+ "video/webm": ".webm",
84
+ "video/x-msvideo": ".avi",
85
+ "video/quicktime": ".mov",
86
+ # Text/Code
87
+ "text/plain": ".txt",
88
+ "text/markdown": ".md",
89
+ "application/json": ".json",
90
+ "application/x-yaml": ".yaml",
91
+ "application/xml": ".xml",
92
+ "text/html": ".html",
93
+ "text/css": ".css",
94
+ "application/javascript": ".js",
95
+ "text/x-python": ".py",
96
+ "application/typescript": ".ts",
97
+ "text/csv": ".csv",
98
+ }
99
+
100
+ # Grouped by category for convenience
101
+ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".svg"}
102
+ DOCUMENT_EXTENSIONS = {".pdf", ".docx", ".doc", ".pptx", ".ppt", ".xlsx", ".xls"}
103
+ AUDIO_EXTENSIONS = {".wav", ".mp3", ".m4a", ".flac", ".ogg", ".aac"}
104
+ VIDEO_EXTENSIONS = {".mp4", ".webm", ".avi", ".mov"}
105
+ TEXT_EXTENSIONS = {".txt", ".md", ".markdown", ".json", ".yaml", ".yml", ".xml", ".html", ".css", ".js", ".py", ".ts", ".csv"}
106
+
107
+
108
+ def get_extension(mime_type: str, default: str = ".bin") -> str:
109
+ """
110
+ Get file extension for a MIME type.
111
+
112
+ Args:
113
+ mime_type: MIME type string (e.g., "image/png")
114
+ default: Default extension if MIME type not found
115
+
116
+ Returns:
117
+ File extension with leading dot (e.g., ".png")
118
+ """
119
+ return MIME_TO_EXTENSION.get(mime_type, default)
120
+
121
+
122
+ def get_mime_type(extension: str, default: str = "application/octet-stream") -> str:
123
+ """
124
+ Get MIME type for a file extension.
125
+
126
+ Args:
127
+ extension: File extension with or without leading dot
128
+ default: Default MIME type if extension not found
129
+
130
+ Returns:
131
+ MIME type string (e.g., "image/png")
132
+ """
133
+ # Normalize extension to have leading dot
134
+ ext = extension if extension.startswith(".") else f".{extension}"
135
+ return EXTENSION_TO_MIME.get(ext.lower(), default)
136
+
137
+
138
+ def is_image(extension_or_mime: str) -> bool:
139
+ """Check if extension or MIME type represents an image."""
140
+ if extension_or_mime.startswith("."):
141
+ return extension_or_mime.lower() in IMAGE_EXTENSIONS
142
+ return extension_or_mime.startswith("image/")
143
+
144
+
145
+ def is_audio(extension_or_mime: str) -> bool:
146
+ """Check if extension or MIME type represents audio."""
147
+ if extension_or_mime.startswith("."):
148
+ return extension_or_mime.lower() in AUDIO_EXTENSIONS
149
+ return extension_or_mime.startswith("audio/")
150
+
151
+
152
+ def is_document(extension_or_mime: str) -> bool:
153
+ """Check if extension or MIME type represents a document."""
154
+ if extension_or_mime.startswith("."):
155
+ return extension_or_mime.lower() in DOCUMENT_EXTENSIONS
156
+ # Check common document MIME types
157
+ doc_mimes = {"application/pdf", "application/msword"}
158
+ return extension_or_mime in doc_mimes or "officedocument" in extension_or_mime