MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
memos/embedders/ark.py ADDED
@@ -0,0 +1,95 @@
1
+ from memos.configs.embedder import ArkEmbedderConfig
2
+ from memos.dependency import require_python_package
3
+ from memos.embedders.base import BaseEmbedder
4
+ from memos.log import get_logger
5
+
6
+
7
+ logger = get_logger(__name__)
8
+
9
+
10
+ class ArkEmbedder(BaseEmbedder):
11
+ """Ark Embedder class."""
12
+
13
+ @require_python_package(
14
+ import_name="volcenginesdkarkruntime",
15
+ install_command="pip install 'volcengine-python-sdk[ark]'",
16
+ install_link="https://www.volcengine.com/docs/82379/1541595",
17
+ )
18
+ def __init__(self, config: ArkEmbedderConfig):
19
+ from volcenginesdkarkruntime import Ark
20
+
21
+ self.config = config
22
+
23
+ if self.config.embedding_dims is not None:
24
+ logger.warning(
25
+ "Ark does not support specifying embedding dimensions. "
26
+ "The embedding dimensions is determined by the model."
27
+ "`embedding_dims` will be set to None."
28
+ )
29
+ self.config.embedding_dims = None
30
+
31
+ # Default model if not specified
32
+ if not self.config.model_name_or_path:
33
+ self.config.model_name_or_path = "doubao-embedding-vision-250615"
34
+
35
+ # Initialize ark client
36
+ self.client = Ark(api_key=self.config.api_key, base_url=self.config.api_base)
37
+
38
+ def embed(self, texts: list[str]) -> list[list[float]]:
39
+ """
40
+ Generate embeddings for the given texts.
41
+
42
+ Args:
43
+ texts: List of texts to embed.
44
+
45
+ Returns:
46
+ List of embeddings, each represented as a list of floats.
47
+ """
48
+ from volcenginesdkarkruntime.types.multimodal_embedding import (
49
+ MultimodalEmbeddingContentPartTextParam,
50
+ )
51
+
52
+ # Truncate texts if max_tokens is configured
53
+ texts = self._truncate_texts(texts)
54
+
55
+ if self.config.multi_modal:
56
+ texts_input = [
57
+ MultimodalEmbeddingContentPartTextParam(text=text, type="text") for text in texts
58
+ ]
59
+ return self.multimodal_embeddings(inputs=texts_input, chunk_size=self.config.chunk_size)
60
+ return self.text_embedding(texts, chunk_size=self.config.chunk_size)
61
+
62
+ def text_embedding(self, inputs: list[str], chunk_size: int | None = None) -> list[list[float]]:
63
+ chunk_size_ = chunk_size or self.config.chunk_size
64
+ embeddings: list[list[float]] = []
65
+ for i in range(0, len(inputs), chunk_size_):
66
+ response = self.client.embeddings.create(
67
+ model=self.config.model_name_or_path,
68
+ input=inputs[i : i + chunk_size_],
69
+ )
70
+
71
+ data = [response.data] if isinstance(response.data, dict) else response.data
72
+ embeddings.extend(r.embedding for r in data)
73
+
74
+ return embeddings
75
+
76
+ def multimodal_embeddings(
77
+ self, inputs: list, chunk_size: int | None = None
78
+ ) -> list[list[float]]:
79
+ from volcenginesdkarkruntime.types.multimodal_embedding import (
80
+ MultimodalEmbeddingResponse, # noqa: TC002
81
+ )
82
+
83
+ chunk_size_ = chunk_size or self.config.chunk_size
84
+ embeddings: list[list[float]] = []
85
+
86
+ for i in range(0, len(inputs), chunk_size_):
87
+ response: MultimodalEmbeddingResponse = self.client.multimodal_embeddings.create(
88
+ model=self.config.model_name_or_path,
89
+ input=inputs[i : i + chunk_size_],
90
+ )
91
+
92
+ data = [response.data] if isinstance(response.data, dict) else response.data
93
+ embeddings.extend(r["embedding"] for r in data)
94
+
95
+ return embeddings
@@ -0,0 +1,106 @@
1
+ import re
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ from memos.configs.embedder import BaseEmbedderConfig
6
+
7
+
8
+ def _count_tokens_for_embedding(text: str) -> int:
9
+ """
10
+ Count tokens in text for embedding truncation.
11
+ Uses tiktoken if available, otherwise falls back to heuristic.
12
+
13
+ Args:
14
+ text: Text to count tokens for.
15
+
16
+ Returns:
17
+ Number of tokens.
18
+ """
19
+ try:
20
+ import tiktoken
21
+
22
+ try:
23
+ enc = tiktoken.encoding_for_model("gpt-4o-mini")
24
+ except Exception:
25
+ enc = tiktoken.get_encoding("cl100k_base")
26
+ return len(enc.encode(text or "", disallowed_special=()))
27
+ except Exception:
28
+ # Heuristic fallback: zh chars ~1 token, others ~1 token per ~4 chars
29
+ if not text:
30
+ return 0
31
+ zh_chars = re.findall(r"[\u4e00-\u9fff]", text)
32
+ zh = len(zh_chars)
33
+ rest = len(text) - zh
34
+ return zh + max(1, rest // 4)
35
+
36
+
37
+ def _truncate_text_to_tokens(text: str, max_tokens: int) -> str:
38
+ """
39
+ Truncate text to fit within max_tokens limit.
40
+ Uses binary search to find the optimal truncation point.
41
+
42
+ Args:
43
+ text: Text to truncate.
44
+ max_tokens: Maximum number of tokens allowed.
45
+
46
+ Returns:
47
+ Truncated text.
48
+ """
49
+ if not text or max_tokens is None or max_tokens <= 0:
50
+ return text
51
+
52
+ current_tokens = _count_tokens_for_embedding(text)
53
+ if current_tokens <= max_tokens:
54
+ return text
55
+
56
+ # Binary search for the right truncation point
57
+ low, high = 0, len(text)
58
+ best_text = ""
59
+
60
+ while low < high:
61
+ mid = (low + high + 1) // 2 # Use +1 to avoid infinite loop
62
+ truncated = text[:mid]
63
+ tokens = _count_tokens_for_embedding(truncated)
64
+
65
+ if tokens <= max_tokens:
66
+ best_text = truncated
67
+ low = mid
68
+ else:
69
+ high = mid - 1
70
+
71
+ return best_text if best_text else text[:1] # Fallback to at least one character
72
+
73
+
74
+ class BaseEmbedder(ABC):
75
+ """Base class for all Embedding models."""
76
+
77
+ @abstractmethod
78
+ def __init__(self, config: BaseEmbedderConfig):
79
+ """Initialize the embedding model with the given configuration."""
80
+ self.config = config
81
+
82
+ def _truncate_texts(self, texts: list[str], approx_char_per_token=1.0) -> (list)[str]:
83
+ """
84
+ Truncate texts to fit within max_tokens limit if configured.
85
+
86
+ Args:
87
+ texts: List of texts to truncate.
88
+
89
+ Returns:
90
+ List of truncated texts.
91
+ """
92
+ if not hasattr(self, "config") or self.config.max_tokens is None:
93
+ return texts
94
+ max_tokens = self.config.max_tokens
95
+
96
+ truncated = []
97
+ for t in texts:
98
+ if len(t) < max_tokens * approx_char_per_token:
99
+ truncated.append(t)
100
+ else:
101
+ truncated.append(t[:max_tokens])
102
+ return truncated
103
+
104
+ @abstractmethod
105
+ def embed(self, texts: list[str]) -> list[list[float]]:
106
+ """Generate embeddings for the given texts."""
@@ -0,0 +1,29 @@
1
+ from typing import Any, ClassVar
2
+
3
+ from memos.configs.embedder import EmbedderConfigFactory
4
+ from memos.embedders.ark import ArkEmbedder
5
+ from memos.embedders.base import BaseEmbedder
6
+ from memos.embedders.ollama import OllamaEmbedder
7
+ from memos.embedders.sentence_transformer import SenTranEmbedder
8
+ from memos.embedders.universal_api import UniversalAPIEmbedder
9
+ from memos.memos_tools.singleton import singleton_factory
10
+
11
+
12
+ class EmbedderFactory(BaseEmbedder):
13
+ """Factory class for creating embedder instances."""
14
+
15
+ backend_to_class: ClassVar[dict[str, Any]] = {
16
+ "ollama": OllamaEmbedder,
17
+ "sentence_transformer": SenTranEmbedder,
18
+ "ark": ArkEmbedder,
19
+ "universal_api": UniversalAPIEmbedder,
20
+ }
21
+
22
+ @classmethod
23
+ @singleton_factory()
24
+ def from_config(cls, config_factory: EmbedderConfigFactory) -> BaseEmbedder:
25
+ backend = config_factory.backend
26
+ if backend not in cls.backend_to_class:
27
+ raise ValueError(f"Invalid backend: {backend}")
28
+ embedder_class = cls.backend_to_class[backend]
29
+ return embedder_class(config_factory.config)
@@ -0,0 +1,77 @@
1
+ from ollama import Client
2
+
3
+ from memos.configs.embedder import OllamaEmbedderConfig
4
+ from memos.embedders.base import BaseEmbedder
5
+ from memos.log import get_logger
6
+
7
+
8
+ logger = get_logger(__name__)
9
+
10
+
11
+ class OllamaEmbedder(BaseEmbedder):
12
+ """Ollama Embedder class."""
13
+
14
+ def __init__(self, config: OllamaEmbedderConfig):
15
+ self.config = config
16
+ self.api_base = config.api_base
17
+
18
+ if self.config.embedding_dims is not None:
19
+ logger.warning(
20
+ "Ollama does not support specifying embedding dimensions. "
21
+ "The embedding dimensions is determined by the model."
22
+ "`embedding_dims` will be set to None."
23
+ )
24
+ self.config.embedding_dims = None
25
+
26
+ # Default model if not specified
27
+ if not self.config.model_name_or_path:
28
+ self.config.model_name_or_path = "nomic-embed-text:latest"
29
+
30
+ # Initialize ollama client
31
+ self.client = Client(host=self.api_base)
32
+
33
+ # Ensure the model exists locally
34
+ self._ensure_model_exists()
35
+
36
+ def _list_models(self) -> list[str]:
37
+ """
38
+ List all models available in the Ollama client.
39
+
40
+ Returns:
41
+ List of model names.
42
+ """
43
+ local_models = self.client.list()["models"]
44
+ return [model.model for model in local_models]
45
+
46
+ def _ensure_model_exists(self):
47
+ """
48
+ Ensure the specified model exists locally. If not, pull it from Ollama.
49
+ """
50
+ try:
51
+ local_models = self._list_models()
52
+ if self.config.model_name_or_path not in local_models:
53
+ logger.warning(
54
+ f"Model {self.config.model_name_or_path} not found locally. Pulling from Ollama..."
55
+ )
56
+ self.client.pull(self.config.model_name_or_path)
57
+ except Exception as e:
58
+ logger.warning(f"Could not verify model existence: {e}")
59
+
60
+ def embed(self, texts: list[str]) -> list[list[float]]:
61
+ """
62
+ Generate embeddings for the given texts.
63
+
64
+ Args:
65
+ texts: List of texts to embed.
66
+
67
+ Returns:
68
+ List of embeddings, each represented as a list of floats.
69
+ """
70
+ # Truncate texts if max_tokens is configured
71
+ texts = self._truncate_texts(texts)
72
+
73
+ response = self.client.embed(
74
+ model=self.config.model_name_or_path,
75
+ input=texts,
76
+ )
77
+ return response.embeddings
@@ -0,0 +1,49 @@
1
+ from memos.configs.embedder import SenTranEmbedderConfig
2
+ from memos.dependency import require_python_package
3
+ from memos.embedders.base import BaseEmbedder
4
+ from memos.log import get_logger
5
+
6
+
7
+ logger = get_logger(__name__)
8
+
9
+
10
+ class SenTranEmbedder(BaseEmbedder):
11
+ """Sentence Transformer Embedder class."""
12
+
13
+ @require_python_package(
14
+ import_name="sentence_transformers",
15
+ install_command="pip install sentence-transformers",
16
+ install_link="https://www.sbert.net/docs/installation.html",
17
+ )
18
+ def __init__(self, config: SenTranEmbedderConfig):
19
+ from sentence_transformers import SentenceTransformer
20
+
21
+ self.config = config
22
+ self.model = SentenceTransformer(
23
+ self.config.model_name_or_path, trust_remote_code=self.config.trust_remote_code
24
+ )
25
+
26
+ if self.config.embedding_dims is not None:
27
+ logger.warning(
28
+ "SentenceTransformer does not support specifying embedding dimensions directly. "
29
+ "The embedding dimension is determined by the model."
30
+ "`embedding_dims` will be ignored."
31
+ )
32
+ # Get embedding dimensions from the model
33
+ self.config.embedding_dims = self.model.get_sentence_embedding_dimension()
34
+
35
+ def embed(self, texts: list[str]) -> list[list[float]]:
36
+ """
37
+ Generate embeddings for the given texts.
38
+
39
+ Args:
40
+ texts: List of texts to embed.
41
+
42
+ Returns:
43
+ List of embeddings, each represented as a list of floats.
44
+ """
45
+ # Truncate texts if max_tokens is configured
46
+ texts = self._truncate_texts(texts)
47
+
48
+ embeddings = self.model.encode(texts, convert_to_numpy=True)
49
+ return embeddings.tolist()
@@ -0,0 +1,51 @@
1
+ from openai import AzureOpenAI as AzureClient
2
+ from openai import OpenAI as OpenAIClient
3
+
4
+ from memos.configs.embedder import UniversalAPIEmbedderConfig
5
+ from memos.embedders.base import BaseEmbedder
6
+ from memos.log import get_logger
7
+ from memos.utils import timed_with_status
8
+
9
+
10
+ logger = get_logger(__name__)
11
+
12
+
13
+ class UniversalAPIEmbedder(BaseEmbedder):
14
+ def __init__(self, config: UniversalAPIEmbedderConfig):
15
+ self.provider = config.provider
16
+ self.config = config
17
+
18
+ if self.provider == "openai":
19
+ self.client = OpenAIClient(
20
+ api_key=config.api_key,
21
+ base_url=config.base_url,
22
+ default_headers=config.headers_extra if config.headers_extra else None,
23
+ )
24
+ elif self.provider == "azure":
25
+ self.client = AzureClient(
26
+ azure_endpoint=config.base_url,
27
+ api_version="2024-03-01-preview",
28
+ api_key=config.api_key,
29
+ )
30
+ else:
31
+ raise ValueError(f"Embeddings unsupported provider: {self.provider}")
32
+
33
+ @timed_with_status(
34
+ log_prefix="model_timed_embedding",
35
+ log_extra_args={"model_name_or_path": "text-embedding-3-large"},
36
+ )
37
+ def embed(self, texts: list[str]) -> list[list[float]]:
38
+ # Truncate texts if max_tokens is configured
39
+ texts = self._truncate_texts(texts)
40
+
41
+ if self.provider == "openai" or self.provider == "azure":
42
+ try:
43
+ response = self.client.embeddings.create(
44
+ model=getattr(self.config, "model_name_or_path", "text-embedding-3-large"),
45
+ input=texts,
46
+ )
47
+ return [r.embedding for r in response.data]
48
+ except Exception as e:
49
+ raise Exception(f"Embeddings request ended with error: {e}") from e
50
+ else:
51
+ raise ValueError(f"Embeddings unsupported provider: {self.provider}")
memos/exceptions.py ADDED
@@ -0,0 +1,30 @@
1
+ """Custom exceptions for the MemOS library.
2
+
3
+ This module defines all custom exceptions used throughout the MemOS project.
4
+ All exceptions inherit from a base MemOSError class to provide a consistent
5
+ error handling interface.
6
+ """
7
+
8
+
9
+ class MemOSError(Exception): ...
10
+
11
+
12
+ class ConfigurationError(MemOSError): ...
13
+
14
+
15
+ class MemoryError(MemOSError): ...
16
+
17
+
18
+ class MemCubeError(MemOSError): ...
19
+
20
+
21
+ class VectorDBError(MemOSError): ...
22
+
23
+
24
+ class LLMError(MemOSError): ...
25
+
26
+
27
+ class EmbedderError(MemOSError): ...
28
+
29
+
30
+ class ParserError(MemOSError): ...
File without changes