MemoryOS 0.0.1__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (124) hide show
  1. memoryos-0.1.13.dist-info/METADATA +288 -0
  2. memoryos-0.1.13.dist-info/RECORD +122 -0
  3. memos/__init__.py +20 -1
  4. memos/api/start_api.py +420 -0
  5. memos/chunkers/__init__.py +4 -0
  6. memos/chunkers/base.py +24 -0
  7. memos/chunkers/factory.py +22 -0
  8. memos/chunkers/sentence_chunker.py +35 -0
  9. memos/configs/__init__.py +0 -0
  10. memos/configs/base.py +82 -0
  11. memos/configs/chunker.py +45 -0
  12. memos/configs/embedder.py +53 -0
  13. memos/configs/graph_db.py +45 -0
  14. memos/configs/internet_retriever.py +81 -0
  15. memos/configs/llm.py +71 -0
  16. memos/configs/mem_chat.py +81 -0
  17. memos/configs/mem_cube.py +89 -0
  18. memos/configs/mem_os.py +74 -0
  19. memos/configs/mem_reader.py +53 -0
  20. memos/configs/mem_scheduler.py +78 -0
  21. memos/configs/memory.py +195 -0
  22. memos/configs/parser.py +38 -0
  23. memos/configs/utils.py +8 -0
  24. memos/configs/vec_db.py +64 -0
  25. memos/deprecation.py +262 -0
  26. memos/embedders/__init__.py +0 -0
  27. memos/embedders/base.py +15 -0
  28. memos/embedders/factory.py +23 -0
  29. memos/embedders/ollama.py +74 -0
  30. memos/embedders/sentence_transformer.py +40 -0
  31. memos/exceptions.py +30 -0
  32. memos/graph_dbs/__init__.py +0 -0
  33. memos/graph_dbs/base.py +215 -0
  34. memos/graph_dbs/factory.py +21 -0
  35. memos/graph_dbs/neo4j.py +827 -0
  36. memos/hello_world.py +97 -0
  37. memos/llms/__init__.py +0 -0
  38. memos/llms/base.py +16 -0
  39. memos/llms/factory.py +25 -0
  40. memos/llms/hf.py +231 -0
  41. memos/llms/ollama.py +82 -0
  42. memos/llms/openai.py +34 -0
  43. memos/llms/utils.py +14 -0
  44. memos/log.py +78 -0
  45. memos/mem_chat/__init__.py +0 -0
  46. memos/mem_chat/base.py +30 -0
  47. memos/mem_chat/factory.py +21 -0
  48. memos/mem_chat/simple.py +200 -0
  49. memos/mem_cube/__init__.py +0 -0
  50. memos/mem_cube/base.py +29 -0
  51. memos/mem_cube/general.py +146 -0
  52. memos/mem_cube/utils.py +24 -0
  53. memos/mem_os/client.py +5 -0
  54. memos/mem_os/core.py +819 -0
  55. memos/mem_os/main.py +503 -0
  56. memos/mem_os/product.py +89 -0
  57. memos/mem_reader/__init__.py +0 -0
  58. memos/mem_reader/base.py +27 -0
  59. memos/mem_reader/factory.py +21 -0
  60. memos/mem_reader/memory.py +298 -0
  61. memos/mem_reader/simple_struct.py +241 -0
  62. memos/mem_scheduler/__init__.py +0 -0
  63. memos/mem_scheduler/base_scheduler.py +164 -0
  64. memos/mem_scheduler/general_scheduler.py +305 -0
  65. memos/mem_scheduler/modules/__init__.py +0 -0
  66. memos/mem_scheduler/modules/base.py +74 -0
  67. memos/mem_scheduler/modules/dispatcher.py +103 -0
  68. memos/mem_scheduler/modules/monitor.py +82 -0
  69. memos/mem_scheduler/modules/redis_service.py +146 -0
  70. memos/mem_scheduler/modules/retriever.py +41 -0
  71. memos/mem_scheduler/modules/schemas.py +146 -0
  72. memos/mem_scheduler/scheduler_factory.py +21 -0
  73. memos/mem_scheduler/utils.py +26 -0
  74. memos/mem_user/user_manager.py +488 -0
  75. memos/memories/__init__.py +0 -0
  76. memos/memories/activation/__init__.py +0 -0
  77. memos/memories/activation/base.py +42 -0
  78. memos/memories/activation/item.py +25 -0
  79. memos/memories/activation/kv.py +232 -0
  80. memos/memories/base.py +19 -0
  81. memos/memories/factory.py +34 -0
  82. memos/memories/parametric/__init__.py +0 -0
  83. memos/memories/parametric/base.py +19 -0
  84. memos/memories/parametric/item.py +11 -0
  85. memos/memories/parametric/lora.py +41 -0
  86. memos/memories/textual/__init__.py +0 -0
  87. memos/memories/textual/base.py +89 -0
  88. memos/memories/textual/general.py +286 -0
  89. memos/memories/textual/item.py +167 -0
  90. memos/memories/textual/naive.py +185 -0
  91. memos/memories/textual/tree.py +321 -0
  92. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  93. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  94. memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
  95. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  96. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
  97. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
  98. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  99. memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
  100. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  101. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
  102. memos/memories/textual/tree_text_memory/retrieve/searcher.py +208 -0
  103. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
  104. memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
  105. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
  106. memos/parsers/__init__.py +0 -0
  107. memos/parsers/base.py +15 -0
  108. memos/parsers/factory.py +19 -0
  109. memos/parsers/markitdown.py +22 -0
  110. memos/settings.py +8 -0
  111. memos/templates/__init__.py +0 -0
  112. memos/templates/mem_reader_prompts.py +98 -0
  113. memos/templates/mem_scheduler_prompts.py +65 -0
  114. memos/templates/mos_prompts.py +63 -0
  115. memos/types.py +55 -0
  116. memos/vec_dbs/__init__.py +0 -0
  117. memos/vec_dbs/base.py +105 -0
  118. memos/vec_dbs/factory.py +21 -0
  119. memos/vec_dbs/item.py +43 -0
  120. memos/vec_dbs/qdrant.py +292 -0
  121. memoryos-0.0.1.dist-info/METADATA +0 -53
  122. memoryos-0.0.1.dist-info/RECORD +0 -5
  123. {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/LICENSE +0 -0
  124. {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/WHEEL +0 -0
memos/api/start_api.py ADDED
@@ -0,0 +1,420 @@
1
+ import logging
2
+ import os
3
+
4
+ from typing import Any, Generic, TypeVar
5
+
6
+ from dotenv import load_dotenv
7
+ from fastapi import FastAPI
8
+ from fastapi.requests import Request
9
+ from fastapi.responses import JSONResponse, RedirectResponse
10
+ from pydantic import BaseModel, Field
11
+
12
+ from memos.configs.mem_os import MOSConfig
13
+ from memos.mem_os.main import MOS
14
+ from memos.mem_user.user_manager import UserManager, UserRole
15
+
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Load environment variables
22
+ load_dotenv()
23
+
24
+ T = TypeVar("T")
25
+
26
+ # Default configuration
27
+ DEFAULT_CONFIG = {
28
+ "user_id": os.getenv("MOS_USER_ID", "default_user"),
29
+ "session_id": os.getenv("MOS_SESSION_ID", "default_session"),
30
+ "enable_textual_memory": True,
31
+ "enable_activation_memory": False,
32
+ "top_k": int(os.getenv("MOS_TOP_K", "5")),
33
+ "chat_model": {
34
+ "backend": os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai"),
35
+ "config": {
36
+ "model_name_or_path": os.getenv("MOS_CHAT_MODEL", "gpt-3.5-turbo"),
37
+ "api_key": os.getenv("OPENAI_API_KEY", "apikey"),
38
+ "temperature": float(os.getenv("MOS_CHAT_TEMPERATURE", "0.7")),
39
+ "api_base": os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1"),
40
+ },
41
+ },
42
+ }
43
+
44
+ # Initialize MOS instance with lazy initialization
45
+ MOS_INSTANCE = None
46
+
47
+
48
+ def get_mos_instance():
49
+ """Get or create MOS instance with default user creation."""
50
+ global MOS_INSTANCE
51
+ if MOS_INSTANCE is None:
52
+ # Create a temporary MOS instance to access user manager
53
+ temp_config = MOSConfig(**DEFAULT_CONFIG)
54
+ temp_mos = MOS.__new__(MOS)
55
+ temp_mos.config = temp_config
56
+ temp_mos.user_id = temp_config.user_id
57
+ temp_mos.session_id = temp_config.session_id
58
+ temp_mos.mem_cubes = {}
59
+ temp_mos.chat_llm = None # Will be initialized later
60
+ temp_mos.user_manager = UserManager()
61
+
62
+ # Create default user if it doesn't exist
63
+ if not temp_mos.user_manager.validate_user(temp_config.user_id):
64
+ temp_mos.user_manager.create_user(
65
+ user_name=temp_config.user_id, role=UserRole.USER, user_id=temp_config.user_id
66
+ )
67
+ logger.info(f"Created default user: {temp_config.user_id}")
68
+
69
+ # Now create the actual MOS instance
70
+ MOS_INSTANCE = MOS(config=temp_config)
71
+
72
+ return MOS_INSTANCE
73
+
74
+
75
+ app = FastAPI(
76
+ title="MemOS REST APIs",
77
+ description="A REST API for managing and searching memories using MemOS.",
78
+ version="1.0.0",
79
+ )
80
+
81
+
82
+ class BaseRequest(BaseModel):
83
+ """Base model for all requests."""
84
+
85
+ user_id: str | None = Field(
86
+ None, description="User ID for the request", json_schema_extra={"example": "user123"}
87
+ )
88
+
89
+
90
+ class BaseResponse(BaseModel, Generic[T]):
91
+ """Base model for all responses."""
92
+
93
+ code: int = Field(200, description="Response status code", json_schema_extra={"example": 200})
94
+ message: str = Field(
95
+ ..., description="Response message", json_schema_extra={"example": "Operation successful"}
96
+ )
97
+ data: T | None = Field(None, description="Response data")
98
+
99
+
100
+ class Message(BaseModel):
101
+ role: str = Field(
102
+ ...,
103
+ description="Role of the message (user or assistant).",
104
+ json_schema_extra={"example": "user"},
105
+ )
106
+ content: str = Field(
107
+ ...,
108
+ description="Message content.",
109
+ json_schema_extra={"example": "Hello, how can I help you?"},
110
+ )
111
+
112
+
113
+ class MemoryCreate(BaseRequest):
114
+ messages: list[Message] | None = Field(
115
+ None,
116
+ description="List of messages to store.",
117
+ json_schema_extra={"example": [{"role": "user", "content": "Hello"}]},
118
+ )
119
+ mem_cube_id: str | None = Field(
120
+ None, description="ID of the memory cube", json_schema_extra={"example": "cube123"}
121
+ )
122
+ memory_content: str | None = Field(
123
+ None,
124
+ description="Content to store as memory",
125
+ json_schema_extra={"example": "This is a memory content"},
126
+ )
127
+ doc_path: str | None = Field(
128
+ None,
129
+ description="Path to document to store",
130
+ json_schema_extra={"example": "/path/to/document.txt"},
131
+ )
132
+
133
+
134
+ class SearchRequest(BaseRequest):
135
+ query: str = Field(
136
+ ...,
137
+ description="Search query.",
138
+ json_schema_extra={"example": "How to implement a feature?"},
139
+ )
140
+ install_cube_ids: list[str] | None = Field(
141
+ None,
142
+ description="List of cube IDs to search in",
143
+ json_schema_extra={"example": ["cube123", "cube456"]},
144
+ )
145
+
146
+
147
+ class MemCubeRegister(BaseRequest):
148
+ mem_cube_name_or_path: str = Field(
149
+ ...,
150
+ description="Name or path of the MemCube to register.",
151
+ json_schema_extra={"example": "/path/to/cube"},
152
+ )
153
+ mem_cube_id: str | None = Field(
154
+ None, description="ID for the MemCube", json_schema_extra={"example": "cube123"}
155
+ )
156
+
157
+
158
+ class ChatRequest(BaseRequest):
159
+ query: str = Field(
160
+ ...,
161
+ description="Chat query message.",
162
+ json_schema_extra={"example": "What is the latest update?"},
163
+ )
164
+
165
+
166
+ class UserCreate(BaseRequest):
167
+ user_name: str | None = Field(
168
+ None, description="Name of the user", json_schema_extra={"example": "john_doe"}
169
+ )
170
+ role: str = Field("user", description="Role of the user", json_schema_extra={"example": "user"})
171
+ user_id: str = Field(..., description="User ID", json_schema_extra={"example": "user123"})
172
+
173
+
174
+ class CubeShare(BaseRequest):
175
+ target_user_id: str = Field(
176
+ ..., description="Target user ID to share with", json_schema_extra={"example": "user456"}
177
+ )
178
+
179
+
180
+ class SimpleResponse(BaseResponse[None]):
181
+ """Simple response model for operations without data return."""
182
+
183
+
184
+ class ConfigResponse(BaseResponse[None]):
185
+ """Response model for configuration endpoint."""
186
+
187
+
188
+ class MemoryResponse(BaseResponse[dict]):
189
+ """Response model for memory operations."""
190
+
191
+
192
+ class SearchResponse(BaseResponse[dict]):
193
+ """Response model for search operations."""
194
+
195
+
196
+ class ChatResponse(BaseResponse[str]):
197
+ """Response model for chat operations."""
198
+
199
+
200
+ class UserResponse(BaseResponse[dict]):
201
+ """Response model for user operations."""
202
+
203
+
204
+ class UserListResponse(BaseResponse[list]):
205
+ """Response model for user list operations."""
206
+
207
+
208
+ @app.post("/configure", summary="Configure MemOS", response_model=ConfigResponse)
209
+ async def set_config(config: MOSConfig):
210
+ """Set MemOS configuration."""
211
+ global MOS_INSTANCE
212
+
213
+ # Create a temporary user manager to check/create default user
214
+ temp_user_manager = UserManager()
215
+
216
+ # Create default user if it doesn't exist
217
+ if not temp_user_manager.validate_user(config.user_id):
218
+ temp_user_manager.create_user(
219
+ user_name=config.user_id, role=UserRole.USER, user_id=config.user_id
220
+ )
221
+ logger.info(f"Created default user: {config.user_id}")
222
+
223
+ # Now create the MOS instance
224
+ MOS_INSTANCE = MOS(config=config)
225
+ return ConfigResponse(message="Configuration set successfully")
226
+
227
+
228
+ @app.post("/users", summary="Create a new user", response_model=UserResponse)
229
+ async def create_user(user_create: UserCreate):
230
+ """Create a new user."""
231
+ mos_instance = get_mos_instance()
232
+ role = UserRole(user_create.role)
233
+ user_id = mos_instance.create_user(
234
+ user_id=user_create.user_id, role=role, user_name=user_create.user_name
235
+ )
236
+ return UserResponse(message="User created successfully", data={"user_id": user_id})
237
+
238
+
239
+ @app.get("/users", summary="List all users", response_model=UserListResponse)
240
+ async def list_users():
241
+ """List all active users."""
242
+ mos_instance = get_mos_instance()
243
+ users = mos_instance.list_users()
244
+ return UserListResponse(message="Users retrieved successfully", data=users)
245
+
246
+
247
+ @app.get("/users/me", summary="Get current user info", response_model=UserResponse)
248
+ async def get_user_info():
249
+ """Get current user information including accessible cubes."""
250
+ mos_instance = get_mos_instance()
251
+ user_info = mos_instance.get_user_info()
252
+ return UserResponse(message="User info retrieved successfully", data=user_info)
253
+
254
+
255
+ @app.post("/mem_cubes", summary="Register a MemCube", response_model=SimpleResponse)
256
+ async def register_mem_cube(mem_cube: MemCubeRegister):
257
+ """Register a new MemCube."""
258
+ mos_instance = get_mos_instance()
259
+ mos_instance.register_mem_cube(
260
+ mem_cube_name_or_path=mem_cube.mem_cube_name_or_path,
261
+ mem_cube_id=mem_cube.mem_cube_id,
262
+ user_id=mem_cube.user_id,
263
+ )
264
+ return SimpleResponse(message="MemCube registered successfully")
265
+
266
+
267
+ @app.delete(
268
+ "/mem_cubes/{mem_cube_id}", summary="Unregister a MemCube", response_model=SimpleResponse
269
+ )
270
+ async def unregister_mem_cube(mem_cube_id: str, user_id: str | None = None):
271
+ """Unregister a MemCube."""
272
+ mos_instance = get_mos_instance()
273
+ mos_instance.unregister_mem_cube(mem_cube_id=mem_cube_id, user_id=user_id)
274
+ return SimpleResponse(message="MemCube unregistered successfully")
275
+
276
+
277
+ @app.post(
278
+ "/mem_cubes/{cube_id}/share",
279
+ summary="Share a cube with another user",
280
+ response_model=SimpleResponse,
281
+ )
282
+ async def share_cube(cube_id: str, share_request: CubeShare):
283
+ """Share a cube with another user."""
284
+ mos_instance = get_mos_instance()
285
+ success = mos_instance.share_cube_with_user(cube_id, share_request.target_user_id)
286
+ if success:
287
+ return SimpleResponse(message="Cube shared successfully")
288
+ else:
289
+ raise ValueError("Failed to share cube")
290
+
291
+
292
+ @app.post("/memories", summary="Create memories", response_model=SimpleResponse)
293
+ async def add_memory(memory_create: MemoryCreate):
294
+ """Store new memories in a MemCube."""
295
+ if not any([memory_create.messages, memory_create.memory_content, memory_create.doc_path]):
296
+ raise ValueError("Either messages, memory_content, or doc_path must be provided")
297
+ mos_instance = get_mos_instance()
298
+ if memory_create.messages:
299
+ messages = [m.model_dump() for m in memory_create.messages]
300
+ mos_instance.add(
301
+ messages=messages,
302
+ mem_cube_id=memory_create.mem_cube_id,
303
+ user_id=memory_create.user_id,
304
+ )
305
+ elif memory_create.memory_content:
306
+ mos_instance.add(
307
+ memory_content=memory_create.memory_content,
308
+ mem_cube_id=memory_create.mem_cube_id,
309
+ user_id=memory_create.user_id,
310
+ )
311
+ elif memory_create.doc_path:
312
+ mos_instance.add(
313
+ doc_path=memory_create.doc_path,
314
+ mem_cube_id=memory_create.mem_cube_id,
315
+ user_id=memory_create.user_id,
316
+ )
317
+ return SimpleResponse(message="Memories added successfully")
318
+
319
+
320
+ @app.get("/memories", summary="Get all memories", response_model=MemoryResponse)
321
+ async def get_all_memories(
322
+ mem_cube_id: str | None = None,
323
+ user_id: str | None = None,
324
+ ):
325
+ """Retrieve all memories from a MemCube."""
326
+ mos_instance = get_mos_instance()
327
+ result = mos_instance.get_all(mem_cube_id=mem_cube_id, user_id=user_id)
328
+ return MemoryResponse(message="Memories retrieved successfully", data=result)
329
+
330
+
331
+ @app.get(
332
+ "/memories/{mem_cube_id}/{memory_id}", summary="Get a memory", response_model=MemoryResponse
333
+ )
334
+ async def get_memory(mem_cube_id: str, memory_id: str, user_id: str | None = None):
335
+ """Retrieve a specific memory by ID from a MemCube."""
336
+ mos_instance = get_mos_instance()
337
+ result = mos_instance.get(mem_cube_id=mem_cube_id, memory_id=memory_id, user_id=user_id)
338
+ return MemoryResponse(message="Memory retrieved successfully", data=result)
339
+
340
+
341
+ @app.post("/search", summary="Search memories", response_model=SearchResponse)
342
+ async def search_memories(search_req: SearchRequest):
343
+ """Search for memories across MemCubes."""
344
+ mos_instance = get_mos_instance()
345
+ result = mos_instance.search(
346
+ query=search_req.query,
347
+ user_id=search_req.user_id,
348
+ install_cube_ids=search_req.install_cube_ids,
349
+ )
350
+ return SearchResponse(message="Search completed successfully", data=result)
351
+
352
+
353
+ @app.put(
354
+ "/memories/{mem_cube_id}/{memory_id}", summary="Update a memory", response_model=SimpleResponse
355
+ )
356
+ async def update_memory(
357
+ mem_cube_id: str, memory_id: str, updated_memory: dict[str, Any], user_id: str | None = None
358
+ ):
359
+ """Update an existing memory in a MemCube."""
360
+ mos_instance = get_mos_instance()
361
+ mos_instance.update(
362
+ mem_cube_id=mem_cube_id,
363
+ memory_id=memory_id,
364
+ text_memory_item=updated_memory,
365
+ user_id=user_id,
366
+ )
367
+ return SimpleResponse(message="Memory updated successfully")
368
+
369
+
370
+ @app.delete(
371
+ "/memories/{mem_cube_id}/{memory_id}", summary="Delete a memory", response_model=SimpleResponse
372
+ )
373
+ async def delete_memory(mem_cube_id: str, memory_id: str, user_id: str | None = None):
374
+ """Delete a specific memory from a MemCube."""
375
+ mos_instance = get_mos_instance()
376
+ mos_instance.delete(mem_cube_id=mem_cube_id, memory_id=memory_id, user_id=user_id)
377
+ return SimpleResponse(message="Memory deleted successfully")
378
+
379
+
380
+ @app.delete("/memories/{mem_cube_id}", summary="Delete all memories", response_model=SimpleResponse)
381
+ async def delete_all_memories(mem_cube_id: str, user_id: str | None = None):
382
+ """Delete all memories from a MemCube."""
383
+ mos_instance = get_mos_instance()
384
+ mos_instance.delete_all(mem_cube_id=mem_cube_id, user_id=user_id)
385
+ return SimpleResponse(message="All memories deleted successfully")
386
+
387
+
388
+ @app.post("/chat", summary="Chat with MemOS", response_model=ChatResponse)
389
+ async def chat(chat_req: ChatRequest):
390
+ """Chat with the MemOS system."""
391
+ mos_instance = get_mos_instance()
392
+ response = mos_instance.chat(query=chat_req.query, user_id=chat_req.user_id)
393
+ if response is None:
394
+ raise ValueError("No response generated")
395
+ return ChatResponse(message="Chat response generated", data=response)
396
+
397
+
398
+ @app.get("/", summary="Redirect to the OpenAPI documentation", include_in_schema=False)
399
+ async def home():
400
+ """Redirect to the OpenAPI documentation."""
401
+ return RedirectResponse(url="/docs", status_code=307)
402
+
403
+
404
+ @app.exception_handler(ValueError)
405
+ async def value_error_handler(request: Request, exc: ValueError):
406
+ """Handle ValueError exceptions globally."""
407
+ return JSONResponse(
408
+ status_code=400,
409
+ content={"code": 400, "message": str(exc), "data": None},
410
+ )
411
+
412
+
413
+ @app.exception_handler(Exception)
414
+ async def global_exception_handler(request: Request, exc: Exception):
415
+ """Handle all unhandled exceptions globally."""
416
+ logger.exception("Unhandled error:")
417
+ return JSONResponse(
418
+ status_code=500,
419
+ content={"code": 500, "message": str(exc), "data": None},
420
+ )
@@ -0,0 +1,4 @@
1
+ from .factory import ChunkerFactory
2
+
3
+
4
+ __all__ = ["ChunkerFactory"]
memos/chunkers/base.py ADDED
@@ -0,0 +1,24 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from memos.configs.chunker import BaseChunkerConfig
4
+
5
+
6
+ class Chunk:
7
+ """Class representing a text chunk."""
8
+
9
+ def __init__(self, text: str, token_count: int, sentences: list[str]):
10
+ self.text = text
11
+ self.token_count = token_count
12
+ self.sentences = sentences
13
+
14
+
15
+ class BaseChunker(ABC):
16
+ """Base class for all text chunkers."""
17
+
18
+ @abstractmethod
19
+ def __init__(self, config: BaseChunkerConfig):
20
+ """Initialize the chunker with the given configuration."""
21
+
22
+ @abstractmethod
23
+ def chunk(self, text: str) -> list[Chunk]:
24
+ """Chunk the given text into smaller chunks."""
@@ -0,0 +1,22 @@
1
+ from typing import Any, ClassVar
2
+
3
+ from memos.configs.chunker import ChunkerConfigFactory
4
+
5
+ from .base import BaseChunker
6
+ from .sentence_chunker import SentenceChunker
7
+
8
+
9
+ class ChunkerFactory:
10
+ """Factory class for creating chunker instances."""
11
+
12
+ backend_to_class: ClassVar[dict[str, Any]] = {
13
+ "sentence": SentenceChunker,
14
+ }
15
+
16
+ @classmethod
17
+ def from_config(cls, config_factory: ChunkerConfigFactory) -> BaseChunker:
18
+ backend = config_factory.backend
19
+ if backend not in cls.backend_to_class:
20
+ raise ValueError(f"Invalid backend: {backend}")
21
+ chunker_class = cls.backend_to_class[backend]
22
+ return chunker_class(config_factory.config)
@@ -0,0 +1,35 @@
1
+ from chonkie import SentenceChunker as ChonkieSentenceChunker
2
+
3
+ from memos.configs.chunker import SentenceChunkerConfig
4
+ from memos.log import get_logger
5
+
6
+ from .base import BaseChunker, Chunk
7
+
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ class SentenceChunker(BaseChunker):
13
+ """Sentence-based text chunker."""
14
+
15
+ def __init__(self, config: SentenceChunkerConfig):
16
+ self.config = config
17
+ self.chunker = ChonkieSentenceChunker(
18
+ tokenizer_or_token_counter=config.tokenizer_or_token_counter,
19
+ chunk_size=config.chunk_size,
20
+ chunk_overlap=config.chunk_overlap,
21
+ min_sentences_per_chunk=config.min_sentences_per_chunk,
22
+ )
23
+ logger.info(f"Initialized SentenceChunker with config: {config}")
24
+
25
+ def chunk(self, text: str) -> list[Chunk]:
26
+ """Chunk the given text into smaller chunks based on sentences."""
27
+ chonkie_chunks = self.chunker.chunk(text)
28
+
29
+ chunks = []
30
+ for c in chonkie_chunks:
31
+ chunk = Chunk(text=c.text, token_count=c.token_count, sentences=c.sentences)
32
+ chunks.append(chunk)
33
+
34
+ logger.debug(f"Generated {len(chunks)} chunks from input text")
35
+ return chunks
File without changes
memos/configs/base.py ADDED
@@ -0,0 +1,82 @@
1
+ import os
2
+
3
+ from typing import Any
4
+
5
+ import yaml
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
8
+
9
+ from memos.log import get_logger
10
+
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ class BaseConfig(BaseModel):
16
+ """Base configuration.
17
+
18
+ All configurations should inherit from this class.
19
+ This class uses Pydantic's ConfigDict to enforce strict validation
20
+ and forbids extra fields."""
21
+
22
+ model_schema: str = Field(
23
+ "NOT_SET",
24
+ description="Schema for configuration. This value will be automatically set.",
25
+ exclude=True,
26
+ )
27
+
28
+ model_config = ConfigDict(extra="forbid", strict=True)
29
+
30
+ @model_validator(mode="after")
31
+ def set_default_schema(self) -> "BaseConfig":
32
+ dot_path_schema = self.__module__ + "." + self.__class__.__name__
33
+ if self.model_schema == dot_path_schema:
34
+ return self
35
+ if self.model_schema != "NOT_SET":
36
+ logger.warning(
37
+ f"Schema is set to {self.model_schema}, but it should be {dot_path_schema}. "
38
+ "Changing schema to the default value."
39
+ )
40
+ self.model_schema = dot_path_schema
41
+ return self
42
+
43
+ @classmethod
44
+ def from_json_file(cls, json_path: str) -> Any:
45
+ """Load configuration from a JSON file."""
46
+ with open(json_path, encoding="utf-8") as f:
47
+ data = f.read()
48
+ return cls.model_validate_json(data)
49
+
50
+ def to_json_file(self, json_path: str) -> None:
51
+ """Dump configuration to a JSON file."""
52
+ dir_path = os.path.dirname(json_path)
53
+ if dir_path:
54
+ os.makedirs(dir_path, exist_ok=True)
55
+ with open(json_path, "w", encoding="utf-8") as f:
56
+ f.write(self.model_dump_json(indent=2, warnings="none"))
57
+
58
+ @classmethod
59
+ def from_yaml_file(cls, yaml_path: str) -> Any:
60
+ """Load configuration from a YAML file."""
61
+ with open(yaml_path, encoding="utf-8") as f:
62
+ data = yaml.safe_load(f)
63
+ return cls.model_validate(data)
64
+
65
+ def to_yaml_file(self, yaml_path: str) -> None:
66
+ """Dump configuration to a YAML file."""
67
+
68
+ dir_path = os.path.dirname(yaml_path)
69
+ if dir_path:
70
+ os.makedirs(dir_path, exist_ok=True)
71
+
72
+ with open(yaml_path, "w", encoding="utf-8") as f:
73
+ yaml.safe_dump(
74
+ self.model_dump(mode="json", warnings="none"),
75
+ f,
76
+ default_flow_style=False,
77
+ allow_unicode=True,
78
+ indent=2,
79
+ )
80
+
81
+ def get(self, key, default=None):
82
+ return getattr(self, key, default)
@@ -0,0 +1,45 @@
1
+ from typing import Any, ClassVar
2
+
3
+ from pydantic import Field, field_validator, model_validator
4
+
5
+ from memos.configs.base import BaseConfig
6
+
7
+
8
+ class BaseChunkerConfig(BaseConfig):
9
+ """Base configuration class for chunkers."""
10
+
11
+ tokenizer_or_token_counter: str = Field(
12
+ default="gpt2", description="Tokenizer model name or a token counting function"
13
+ )
14
+ chunk_size: int = Field(default=512, description="Maximum tokens per chunk")
15
+ chunk_overlap: int = Field(default=128, description="Overlap between chunks")
16
+ min_sentences_per_chunk: int = Field(default=1, description="Minimum sentences in each chunk")
17
+
18
+
19
+ class SentenceChunkerConfig(BaseChunkerConfig):
20
+ """Configuration for sentence-based text chunker."""
21
+
22
+
23
+ class ChunkerConfigFactory(BaseConfig):
24
+ """Factory class for creating chunker configurations."""
25
+
26
+ backend: str = Field(..., description="Backend for chunker")
27
+ config: dict[str, Any] = Field(..., description="Configuration for the chunker backend")
28
+
29
+ backend_to_class: ClassVar[dict[str, Any]] = {
30
+ "sentence": SentenceChunkerConfig,
31
+ }
32
+
33
+ @field_validator("backend")
34
+ @classmethod
35
+ def validate_backend(cls, backend: str) -> str:
36
+ """Validate the backend field."""
37
+ if backend not in cls.backend_to_class:
38
+ raise ValueError(f"Invalid backend: {backend}")
39
+ return backend
40
+
41
+ @model_validator(mode="after")
42
+ def create_config(self) -> "ChunkerConfigFactory":
43
+ config_class = self.backend_to_class[self.backend]
44
+ self.config = config_class(**self.config)
45
+ return self
@@ -0,0 +1,53 @@
1
+ from typing import Any, ClassVar
2
+
3
+ from pydantic import Field, field_validator, model_validator
4
+
5
+ from memos.configs.base import BaseConfig
6
+
7
+
8
+ class BaseEmbedderConfig(BaseConfig):
9
+ """Base configuration class for embedding models."""
10
+
11
+ model_name_or_path: str = Field(..., description="Model name or path")
12
+ embedding_dims: int | None = Field(
13
+ default=None, description="Number of dimensions for the embedding"
14
+ )
15
+
16
+
17
+ class OllamaEmbedderConfig(BaseEmbedderConfig):
18
+ api_base: str = Field(default="http://localhost:11434", description="Base URL for Ollama API")
19
+
20
+
21
+ class SenTranEmbedderConfig(BaseEmbedderConfig):
22
+ """Configuration class for Sentence Transformer embeddings."""
23
+
24
+ trust_remote_code: bool = Field(
25
+ default=True,
26
+ description="Whether to trust remote code when loading the model",
27
+ )
28
+
29
+
30
+ class EmbedderConfigFactory(BaseConfig):
31
+ """Factory class for creating embedder configurations."""
32
+
33
+ backend: str = Field(..., description="Backend for embedding model")
34
+ config: dict[str, Any] = Field(..., description="Configuration for the embedding model backend")
35
+
36
+ backend_to_class: ClassVar[dict[str, Any]] = {
37
+ "ollama": OllamaEmbedderConfig,
38
+ "sentence_transformer": SenTranEmbedderConfig,
39
+ }
40
+
41
+ @field_validator("backend")
42
+ @classmethod
43
+ def validate_backend(cls, backend: str) -> str:
44
+ """Validate the backend field."""
45
+ if backend not in cls.backend_to_class:
46
+ raise ValueError(f"Invalid backend: {backend}")
47
+ return backend
48
+
49
+ @model_validator(mode="after")
50
+ def create_config(self) -> "EmbedderConfigFactory":
51
+ config_class = self.backend_to_class[self.backend]
52
+ self.config = config_class(**self.config)
53
+ return self