MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
memos/api/start_api.py
ADDED
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from typing import Any, Generic, TypeVar
|
|
5
|
+
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
from fastapi import FastAPI
|
|
8
|
+
from fastapi.requests import Request
|
|
9
|
+
from fastapi.responses import JSONResponse, RedirectResponse
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from memos.api.middleware.request_context import RequestContextMiddleware
|
|
13
|
+
from memos.configs.mem_os import MOSConfig
|
|
14
|
+
from memos.mem_os.main import MOS
|
|
15
|
+
from memos.mem_user.user_manager import UserManager, UserRole
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Configure logging
|
|
19
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# Load environment variables
|
|
23
|
+
load_dotenv(override=True)
|
|
24
|
+
|
|
25
|
+
T = TypeVar("T")
|
|
26
|
+
|
|
27
|
+
# Default configuration
|
|
28
|
+
DEFAULT_CONFIG = {
|
|
29
|
+
"user_id": os.getenv("MOS_USER_ID", "default_user"),
|
|
30
|
+
"session_id": os.getenv("MOS_SESSION_ID", "default_session"),
|
|
31
|
+
"enable_textual_memory": True,
|
|
32
|
+
"enable_activation_memory": False,
|
|
33
|
+
"top_k": int(os.getenv("MOS_TOP_K", "5")),
|
|
34
|
+
"chat_model": {
|
|
35
|
+
"backend": os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai"),
|
|
36
|
+
"config": {
|
|
37
|
+
"model_name_or_path": os.getenv("MOS_CHAT_MODEL", "gpt-3.5-turbo"),
|
|
38
|
+
"api_key": os.getenv("OPENAI_API_KEY", "apikey"),
|
|
39
|
+
"temperature": float(os.getenv("MOS_CHAT_TEMPERATURE", "0.7")),
|
|
40
|
+
"api_base": os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1"),
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Initialize MOS instance with lazy initialization
|
|
46
|
+
MOS_INSTANCE = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_mos_instance():
|
|
50
|
+
"""Get or create MOS instance with default user creation."""
|
|
51
|
+
global MOS_INSTANCE
|
|
52
|
+
if MOS_INSTANCE is None:
|
|
53
|
+
# Create a temporary MOS instance to access user manager
|
|
54
|
+
temp_config = MOSConfig(**DEFAULT_CONFIG)
|
|
55
|
+
temp_mos = MOS.__new__(MOS)
|
|
56
|
+
temp_mos.config = temp_config
|
|
57
|
+
temp_mos.user_id = temp_config.user_id
|
|
58
|
+
temp_mos.session_id = temp_config.session_id
|
|
59
|
+
temp_mos.mem_cubes = {}
|
|
60
|
+
temp_mos.chat_llm = None # Will be initialized later
|
|
61
|
+
temp_mos.user_manager = UserManager()
|
|
62
|
+
|
|
63
|
+
# Create default user if it doesn't exist
|
|
64
|
+
if not temp_mos.user_manager.validate_user(temp_config.user_id):
|
|
65
|
+
temp_mos.user_manager.create_user(
|
|
66
|
+
user_name=temp_config.user_id, role=UserRole.USER, user_id=temp_config.user_id
|
|
67
|
+
)
|
|
68
|
+
logger.info(f"Created default user: {temp_config.user_id}")
|
|
69
|
+
|
|
70
|
+
# Now create the actual MOS instance
|
|
71
|
+
MOS_INSTANCE = MOS(config=temp_config)
|
|
72
|
+
|
|
73
|
+
return MOS_INSTANCE
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
app = FastAPI(
|
|
77
|
+
title="MemOS REST APIs",
|
|
78
|
+
description="A REST API for managing and searching memories using MemOS.",
|
|
79
|
+
version="1.0.0",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
app.add_middleware(RequestContextMiddleware)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class BaseRequest(BaseModel):
|
|
86
|
+
"""Base model for all requests."""
|
|
87
|
+
|
|
88
|
+
user_id: str | None = Field(
|
|
89
|
+
None, description="User ID for the request", json_schema_extra={"example": "user123"}
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class BaseResponse(BaseModel, Generic[T]):
|
|
94
|
+
"""Base model for all responses."""
|
|
95
|
+
|
|
96
|
+
code: int = Field(200, description="Response status code", json_schema_extra={"example": 200})
|
|
97
|
+
message: str = Field(
|
|
98
|
+
..., description="Response message", json_schema_extra={"example": "Operation successful"}
|
|
99
|
+
)
|
|
100
|
+
data: T | None = Field(None, description="Response data")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class Message(BaseModel):
|
|
104
|
+
role: str = Field(
|
|
105
|
+
...,
|
|
106
|
+
description="Role of the message (user or assistant).",
|
|
107
|
+
json_schema_extra={"example": "user"},
|
|
108
|
+
)
|
|
109
|
+
content: str = Field(
|
|
110
|
+
...,
|
|
111
|
+
description="Message content.",
|
|
112
|
+
json_schema_extra={"example": "Hello, how can I help you?"},
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class MemoryCreate(BaseRequest):
|
|
117
|
+
messages: list[Message] | None = Field(
|
|
118
|
+
None,
|
|
119
|
+
description="List of messages to store.",
|
|
120
|
+
json_schema_extra={"example": [{"role": "user", "content": "Hello"}]},
|
|
121
|
+
)
|
|
122
|
+
mem_cube_id: str | None = Field(
|
|
123
|
+
None, description="ID of the memory cube", json_schema_extra={"example": "cube123"}
|
|
124
|
+
)
|
|
125
|
+
memory_content: str | None = Field(
|
|
126
|
+
None,
|
|
127
|
+
description="Content to store as memory",
|
|
128
|
+
json_schema_extra={"example": "This is a memory content"},
|
|
129
|
+
)
|
|
130
|
+
doc_path: str | None = Field(
|
|
131
|
+
None,
|
|
132
|
+
description="Path to document to store",
|
|
133
|
+
json_schema_extra={"example": "/path/to/document.txt"},
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class SearchRequest(BaseRequest):
|
|
138
|
+
query: str = Field(
|
|
139
|
+
...,
|
|
140
|
+
description="Search query.",
|
|
141
|
+
json_schema_extra={"example": "How to implement a feature?"},
|
|
142
|
+
)
|
|
143
|
+
install_cube_ids: list[str] | None = Field(
|
|
144
|
+
None,
|
|
145
|
+
description="List of cube IDs to search in",
|
|
146
|
+
json_schema_extra={"example": ["cube123", "cube456"]},
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class MemCubeRegister(BaseRequest):
|
|
151
|
+
mem_cube_name_or_path: str = Field(
|
|
152
|
+
...,
|
|
153
|
+
description="Name or path of the MemCube to register.",
|
|
154
|
+
json_schema_extra={"example": "/path/to/cube"},
|
|
155
|
+
)
|
|
156
|
+
mem_cube_id: str | None = Field(
|
|
157
|
+
None, description="ID for the MemCube", json_schema_extra={"example": "cube123"}
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class ChatRequest(BaseRequest):
|
|
162
|
+
query: str = Field(
|
|
163
|
+
...,
|
|
164
|
+
description="Chat query message.",
|
|
165
|
+
json_schema_extra={"example": "What is the latest update?"},
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class UserCreate(BaseRequest):
|
|
170
|
+
user_name: str | None = Field(
|
|
171
|
+
None, description="Name of the user", json_schema_extra={"example": "john_doe"}
|
|
172
|
+
)
|
|
173
|
+
role: str = Field("user", description="Role of the user", json_schema_extra={"example": "user"})
|
|
174
|
+
user_id: str = Field(..., description="User ID", json_schema_extra={"example": "user123"})
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class CubeShare(BaseRequest):
|
|
178
|
+
target_user_id: str = Field(
|
|
179
|
+
..., description="Target user ID to share with", json_schema_extra={"example": "user456"}
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class SimpleResponse(BaseResponse[None]):
|
|
184
|
+
"""Simple response model for operations without data return."""
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class ConfigResponse(BaseResponse[None]):
|
|
188
|
+
"""Response model for configuration endpoint."""
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class MemoryResponse(BaseResponse[dict]):
|
|
192
|
+
"""Response model for memory operations."""
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class SearchResponse(BaseResponse[dict]):
|
|
196
|
+
"""Response model for search operations."""
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class ChatResponse(BaseResponse[str]):
|
|
200
|
+
"""Response model for chat operations."""
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class UserResponse(BaseResponse[dict]):
|
|
204
|
+
"""Response model for user operations."""
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class UserListResponse(BaseResponse[list]):
|
|
208
|
+
"""Response model for user list operations."""
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@app.post("/configure", summary="Configure MemOS", response_model=ConfigResponse)
|
|
212
|
+
async def set_config(config: MOSConfig):
|
|
213
|
+
"""Set MemOS configuration."""
|
|
214
|
+
global MOS_INSTANCE
|
|
215
|
+
|
|
216
|
+
# Create a temporary user manager to check/create default user
|
|
217
|
+
temp_user_manager = UserManager()
|
|
218
|
+
|
|
219
|
+
# Create default user if it doesn't exist
|
|
220
|
+
if not temp_user_manager.validate_user(config.user_id):
|
|
221
|
+
temp_user_manager.create_user(
|
|
222
|
+
user_name=config.user_id, role=UserRole.USER, user_id=config.user_id
|
|
223
|
+
)
|
|
224
|
+
logger.info(f"Created default user: {config.user_id}")
|
|
225
|
+
|
|
226
|
+
# Now create the MOS instance
|
|
227
|
+
MOS_INSTANCE = MOS(config=config)
|
|
228
|
+
return ConfigResponse(message="Configuration set successfully")
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
@app.post("/users", summary="Create a new user", response_model=UserResponse)
|
|
232
|
+
async def create_user(user_create: UserCreate):
|
|
233
|
+
"""Create a new user."""
|
|
234
|
+
mos_instance = get_mos_instance()
|
|
235
|
+
role = UserRole(user_create.role)
|
|
236
|
+
user_id = mos_instance.create_user(
|
|
237
|
+
user_id=user_create.user_id, role=role, user_name=user_create.user_name
|
|
238
|
+
)
|
|
239
|
+
return UserResponse(message="User created successfully", data={"user_id": user_id})
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
@app.get("/users", summary="List all users", response_model=UserListResponse)
|
|
243
|
+
async def list_users():
|
|
244
|
+
"""List all active users."""
|
|
245
|
+
mos_instance = get_mos_instance()
|
|
246
|
+
users = mos_instance.list_users()
|
|
247
|
+
return UserListResponse(message="Users retrieved successfully", data=users)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@app.get("/users/me", summary="Get current user info", response_model=UserResponse)
|
|
251
|
+
async def get_user_info():
|
|
252
|
+
"""Get current user information including accessible cubes."""
|
|
253
|
+
mos_instance = get_mos_instance()
|
|
254
|
+
user_info = mos_instance.get_user_info()
|
|
255
|
+
return UserResponse(message="User info retrieved successfully", data=user_info)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@app.post("/mem_cubes", summary="Register a MemCube", response_model=SimpleResponse)
|
|
259
|
+
async def register_mem_cube(mem_cube: MemCubeRegister):
|
|
260
|
+
"""Register a new MemCube."""
|
|
261
|
+
mos_instance = get_mos_instance()
|
|
262
|
+
mos_instance.register_mem_cube(
|
|
263
|
+
mem_cube_name_or_path=mem_cube.mem_cube_name_or_path,
|
|
264
|
+
mem_cube_id=mem_cube.mem_cube_id,
|
|
265
|
+
user_id=mem_cube.user_id,
|
|
266
|
+
)
|
|
267
|
+
return SimpleResponse(message="MemCube registered successfully")
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@app.delete(
|
|
271
|
+
"/mem_cubes/{mem_cube_id}", summary="Unregister a MemCube", response_model=SimpleResponse
|
|
272
|
+
)
|
|
273
|
+
async def unregister_mem_cube(mem_cube_id: str, user_id: str | None = None):
|
|
274
|
+
"""Unregister a MemCube."""
|
|
275
|
+
mos_instance = get_mos_instance()
|
|
276
|
+
mos_instance.unregister_mem_cube(mem_cube_id=mem_cube_id, user_id=user_id)
|
|
277
|
+
return SimpleResponse(message="MemCube unregistered successfully")
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@app.post(
|
|
281
|
+
"/mem_cubes/{cube_id}/share",
|
|
282
|
+
summary="Share a cube with another user",
|
|
283
|
+
response_model=SimpleResponse,
|
|
284
|
+
)
|
|
285
|
+
async def share_cube(cube_id: str, share_request: CubeShare):
|
|
286
|
+
"""Share a cube with another user."""
|
|
287
|
+
mos_instance = get_mos_instance()
|
|
288
|
+
success = mos_instance.share_cube_with_user(cube_id, share_request.target_user_id)
|
|
289
|
+
if success:
|
|
290
|
+
return SimpleResponse(message="Cube shared successfully")
|
|
291
|
+
else:
|
|
292
|
+
raise ValueError("Failed to share cube")
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@app.post("/memories", summary="Create memories", response_model=SimpleResponse)
|
|
296
|
+
async def add_memory(memory_create: MemoryCreate):
|
|
297
|
+
"""Store new memories in a MemCube."""
|
|
298
|
+
if not any([memory_create.messages, memory_create.memory_content, memory_create.doc_path]):
|
|
299
|
+
raise ValueError("Either messages, memory_content, or doc_path must be provided")
|
|
300
|
+
mos_instance = get_mos_instance()
|
|
301
|
+
if memory_create.messages:
|
|
302
|
+
messages = [m.model_dump() for m in memory_create.messages]
|
|
303
|
+
mos_instance.add(
|
|
304
|
+
messages=messages,
|
|
305
|
+
mem_cube_id=memory_create.mem_cube_id,
|
|
306
|
+
user_id=memory_create.user_id,
|
|
307
|
+
)
|
|
308
|
+
elif memory_create.memory_content:
|
|
309
|
+
mos_instance.add(
|
|
310
|
+
memory_content=memory_create.memory_content,
|
|
311
|
+
mem_cube_id=memory_create.mem_cube_id,
|
|
312
|
+
user_id=memory_create.user_id,
|
|
313
|
+
)
|
|
314
|
+
elif memory_create.doc_path:
|
|
315
|
+
mos_instance.add(
|
|
316
|
+
doc_path=memory_create.doc_path,
|
|
317
|
+
mem_cube_id=memory_create.mem_cube_id,
|
|
318
|
+
user_id=memory_create.user_id,
|
|
319
|
+
)
|
|
320
|
+
return SimpleResponse(message="Memories added successfully")
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
@app.get("/memories", summary="Get all memories", response_model=MemoryResponse)
|
|
324
|
+
async def get_all_memories(
|
|
325
|
+
mem_cube_id: str | None = None,
|
|
326
|
+
user_id: str | None = None,
|
|
327
|
+
):
|
|
328
|
+
"""Retrieve all memories from a MemCube."""
|
|
329
|
+
mos_instance = get_mos_instance()
|
|
330
|
+
result = mos_instance.get_all(mem_cube_id=mem_cube_id, user_id=user_id)
|
|
331
|
+
return MemoryResponse(message="Memories retrieved successfully", data=result)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
@app.get(
|
|
335
|
+
"/memories/{mem_cube_id}/{memory_id}", summary="Get a memory", response_model=MemoryResponse
|
|
336
|
+
)
|
|
337
|
+
async def get_memory(mem_cube_id: str, memory_id: str, user_id: str | None = None):
|
|
338
|
+
"""Retrieve a specific memory by ID from a MemCube."""
|
|
339
|
+
mos_instance = get_mos_instance()
|
|
340
|
+
result = mos_instance.get(mem_cube_id=mem_cube_id, memory_id=memory_id, user_id=user_id)
|
|
341
|
+
return MemoryResponse(message="Memory retrieved successfully", data=result)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
@app.post("/search", summary="Search memories", response_model=SearchResponse)
|
|
345
|
+
async def search_memories(search_req: SearchRequest):
|
|
346
|
+
"""Search for memories across MemCubes."""
|
|
347
|
+
mos_instance = get_mos_instance()
|
|
348
|
+
result = mos_instance.search(
|
|
349
|
+
query=search_req.query,
|
|
350
|
+
user_id=search_req.user_id,
|
|
351
|
+
install_cube_ids=search_req.install_cube_ids,
|
|
352
|
+
)
|
|
353
|
+
return SearchResponse(message="Search completed successfully", data=result)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
@app.put(
|
|
357
|
+
"/memories/{mem_cube_id}/{memory_id}", summary="Update a memory", response_model=SimpleResponse
|
|
358
|
+
)
|
|
359
|
+
async def update_memory(
|
|
360
|
+
mem_cube_id: str, memory_id: str, updated_memory: dict[str, Any], user_id: str | None = None
|
|
361
|
+
):
|
|
362
|
+
"""Update an existing memory in a MemCube."""
|
|
363
|
+
mos_instance = get_mos_instance()
|
|
364
|
+
mos_instance.update(
|
|
365
|
+
mem_cube_id=mem_cube_id,
|
|
366
|
+
memory_id=memory_id,
|
|
367
|
+
text_memory_item=updated_memory,
|
|
368
|
+
user_id=user_id,
|
|
369
|
+
)
|
|
370
|
+
return SimpleResponse(message="Memory updated successfully")
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
@app.delete(
|
|
374
|
+
"/memories/{mem_cube_id}/{memory_id}", summary="Delete a memory", response_model=SimpleResponse
|
|
375
|
+
)
|
|
376
|
+
async def delete_memory(mem_cube_id: str, memory_id: str, user_id: str | None = None):
|
|
377
|
+
"""Delete a specific memory from a MemCube."""
|
|
378
|
+
mos_instance = get_mos_instance()
|
|
379
|
+
mos_instance.delete(mem_cube_id=mem_cube_id, memory_id=memory_id, user_id=user_id)
|
|
380
|
+
return SimpleResponse(message="Memory deleted successfully")
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
@app.delete("/memories/{mem_cube_id}", summary="Delete all memories", response_model=SimpleResponse)
|
|
384
|
+
async def delete_all_memories(mem_cube_id: str, user_id: str | None = None):
|
|
385
|
+
"""Delete all memories from a MemCube."""
|
|
386
|
+
mos_instance = get_mos_instance()
|
|
387
|
+
mos_instance.delete_all(mem_cube_id=mem_cube_id, user_id=user_id)
|
|
388
|
+
return SimpleResponse(message="All memories deleted successfully")
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
@app.post("/chat", summary="Chat with MemOS", response_model=ChatResponse)
|
|
392
|
+
async def chat(chat_req: ChatRequest):
|
|
393
|
+
"""Chat with the MemOS system."""
|
|
394
|
+
mos_instance = get_mos_instance()
|
|
395
|
+
response = mos_instance.chat(query=chat_req.query, user_id=chat_req.user_id)
|
|
396
|
+
if response is None:
|
|
397
|
+
raise ValueError("No response generated")
|
|
398
|
+
return ChatResponse(message="Chat response generated", data=response)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
@app.get("/", summary="Redirect to the OpenAPI documentation", include_in_schema=False)
|
|
402
|
+
async def home():
|
|
403
|
+
"""Redirect to the OpenAPI documentation."""
|
|
404
|
+
return RedirectResponse(url="/docs", status_code=307)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
@app.exception_handler(ValueError)
|
|
408
|
+
async def value_error_handler(request: Request, exc: ValueError):
|
|
409
|
+
"""Handle ValueError exceptions globally."""
|
|
410
|
+
return JSONResponse(
|
|
411
|
+
status_code=400,
|
|
412
|
+
content={"code": 400, "message": str(exc), "data": None},
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
@app.exception_handler(Exception)
|
|
417
|
+
async def global_exception_handler(request: Request, exc: Exception):
|
|
418
|
+
"""Handle all unhandled exceptions globally."""
|
|
419
|
+
logger.exception("Unhandled error:")
|
|
420
|
+
return JSONResponse(
|
|
421
|
+
status_code=500,
|
|
422
|
+
content={"code": 500, "message": str(exc), "data": None},
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
if __name__ == "__main__":
|
|
427
|
+
import argparse
|
|
428
|
+
|
|
429
|
+
parser = argparse.ArgumentParser()
|
|
430
|
+
parser.add_argument("--port", type=int, default=8000, help="Port to run the server on")
|
|
431
|
+
parser.add_argument("--host", type=str, default="0.0.0.0", help="Host to run the server on")
|
|
432
|
+
parser.add_argument("--reload", action="store_true", help="Enable auto-reload for development")
|
|
433
|
+
args = parser.parse_args()
|
memos/chunkers/base.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
from memos.configs.chunker import BaseChunkerConfig
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Chunk:
|
|
7
|
+
"""Class representing a text chunk."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, text: str, token_count: int, sentences: list[str]):
|
|
10
|
+
self.text = text
|
|
11
|
+
self.token_count = token_count
|
|
12
|
+
self.sentences = sentences
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseChunker(ABC):
|
|
16
|
+
"""Base class for all text chunkers."""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def __init__(self, config: BaseChunkerConfig):
|
|
20
|
+
"""Initialize the chunker with the given configuration."""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def chunk(self, text: str) -> list[Chunk]:
|
|
24
|
+
"""Chunk the given text into smaller chunks."""
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from memos.configs.chunker import MarkdownChunkerConfig
|
|
2
|
+
from memos.dependency import require_python_package
|
|
3
|
+
from memos.log import get_logger
|
|
4
|
+
|
|
5
|
+
from .base import BaseChunker, Chunk
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = get_logger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CharacterTextChunker(BaseChunker):
|
|
12
|
+
"""Character-based text chunker."""
|
|
13
|
+
|
|
14
|
+
@require_python_package(
|
|
15
|
+
import_name="langchain_text_splitters",
|
|
16
|
+
install_command="pip install langchain_text_splitters==1.0.0",
|
|
17
|
+
install_link="https://github.com/langchain-ai/langchain-text-splitters",
|
|
18
|
+
)
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
config: MarkdownChunkerConfig | None = None,
|
|
22
|
+
chunk_size: int = 1000,
|
|
23
|
+
chunk_overlap: int = 200,
|
|
24
|
+
):
|
|
25
|
+
from langchain_text_splitters import (
|
|
26
|
+
RecursiveCharacterTextSplitter,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
self.config = config
|
|
30
|
+
self.chunker = RecursiveCharacterTextSplitter(
|
|
31
|
+
chunk_size=config.chunk_size if config else chunk_size,
|
|
32
|
+
chunk_overlap=config.chunk_overlap if config else chunk_overlap,
|
|
33
|
+
length_function=len,
|
|
34
|
+
separators=["\n\n", "\n", "。", "!", "?", ". ", "! ", "? ", " ", ""],
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def chunk(self, text: str, **kwargs) -> list[str] | list[Chunk]:
|
|
38
|
+
"""Chunk the given text into smaller chunks based on sentences."""
|
|
39
|
+
chunks = self.chunker.split_text(text)
|
|
40
|
+
logger.debug(f"Generated {len(chunks)} chunks from input text")
|
|
41
|
+
return chunks
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import Any, ClassVar
|
|
2
|
+
|
|
3
|
+
from memos.configs.chunker import ChunkerConfigFactory
|
|
4
|
+
|
|
5
|
+
from .base import BaseChunker
|
|
6
|
+
from .markdown_chunker import MarkdownChunker
|
|
7
|
+
from .sentence_chunker import SentenceChunker
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ChunkerFactory:
|
|
11
|
+
"""Factory class for creating chunker instances."""
|
|
12
|
+
|
|
13
|
+
backend_to_class: ClassVar[dict[str, Any]] = {
|
|
14
|
+
"sentence": SentenceChunker,
|
|
15
|
+
"markdown": MarkdownChunker,
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def from_config(cls, config_factory: ChunkerConfigFactory) -> BaseChunker:
|
|
20
|
+
backend = config_factory.backend
|
|
21
|
+
if backend not in cls.backend_to_class:
|
|
22
|
+
raise ValueError(f"Invalid backend: {backend}")
|
|
23
|
+
chunker_class = cls.backend_to_class[backend]
|
|
24
|
+
return chunker_class(config_factory.config)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from memos.configs.chunker import MarkdownChunkerConfig
|
|
2
|
+
from memos.dependency import require_python_package
|
|
3
|
+
from memos.log import get_logger
|
|
4
|
+
|
|
5
|
+
from .base import BaseChunker, Chunk
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = get_logger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MarkdownChunker(BaseChunker):
|
|
12
|
+
"""Markdown-based text chunker."""
|
|
13
|
+
|
|
14
|
+
@require_python_package(
|
|
15
|
+
import_name="langchain_text_splitters",
|
|
16
|
+
install_command="pip install langchain_text_splitters==1.0.0",
|
|
17
|
+
install_link="https://github.com/langchain-ai/langchain-text-splitters",
|
|
18
|
+
)
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
config: MarkdownChunkerConfig | None = None,
|
|
22
|
+
chunk_size: int = 1000,
|
|
23
|
+
chunk_overlap: int = 200,
|
|
24
|
+
recursive: bool = False,
|
|
25
|
+
):
|
|
26
|
+
from langchain_text_splitters import (
|
|
27
|
+
MarkdownHeaderTextSplitter,
|
|
28
|
+
RecursiveCharacterTextSplitter,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
self.config = config
|
|
32
|
+
self.chunker = MarkdownHeaderTextSplitter(
|
|
33
|
+
headers_to_split_on=config.headers_to_split_on
|
|
34
|
+
if config
|
|
35
|
+
else [("#", "Header 1"), ("##", "Header 2"), ("###", "Header 3")],
|
|
36
|
+
strip_headers=config.strip_headers if config else False,
|
|
37
|
+
)
|
|
38
|
+
self.chunker_recursive = None
|
|
39
|
+
logger.info(f"Initialized MarkdownHeaderTextSplitter with config: {config}")
|
|
40
|
+
if (config and config.recursive) or recursive:
|
|
41
|
+
self.chunker_recursive = RecursiveCharacterTextSplitter(
|
|
42
|
+
chunk_size=config.chunk_size if config else chunk_size,
|
|
43
|
+
chunk_overlap=config.chunk_overlap if config else chunk_overlap,
|
|
44
|
+
length_function=len,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def chunk(self, text: str, **kwargs) -> list[str] | list[Chunk]:
|
|
48
|
+
"""Chunk the given text into smaller chunks based on sentences."""
|
|
49
|
+
md_header_splits = self.chunker.split_text(text)
|
|
50
|
+
chunks = []
|
|
51
|
+
if self.chunker_recursive:
|
|
52
|
+
md_header_splits = self.chunker_recursive.split_documents(md_header_splits)
|
|
53
|
+
for doc in md_header_splits:
|
|
54
|
+
try:
|
|
55
|
+
chunk = " ".join(list(doc.metadata.values())) + "\n" + doc.page_content
|
|
56
|
+
chunks.append(chunk)
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logger.warning(f"warning chunking document: {e}")
|
|
59
|
+
chunks.append(doc.page_content)
|
|
60
|
+
logger.info(f"Generated chunks: {chunks[:5]}")
|
|
61
|
+
logger.debug(f"Generated {len(chunks)} chunks from input text")
|
|
62
|
+
return chunks
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from memos.configs.chunker import SentenceChunkerConfig
|
|
2
|
+
from memos.dependency import require_python_package
|
|
3
|
+
from memos.log import get_logger
|
|
4
|
+
|
|
5
|
+
from .base import BaseChunker, Chunk
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = get_logger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SentenceChunker(BaseChunker):
|
|
12
|
+
"""Sentence-based text chunker."""
|
|
13
|
+
|
|
14
|
+
@require_python_package(
|
|
15
|
+
import_name="chonkie",
|
|
16
|
+
install_command="pip install chonkie",
|
|
17
|
+
install_link="https://docs.chonkie.ai/python-sdk/getting-started/installation",
|
|
18
|
+
)
|
|
19
|
+
def __init__(self, config: SentenceChunkerConfig):
|
|
20
|
+
from chonkie import SentenceChunker as ChonkieSentenceChunker
|
|
21
|
+
|
|
22
|
+
self.config = config
|
|
23
|
+
|
|
24
|
+
# Try new API first (v1.4.0+)
|
|
25
|
+
try:
|
|
26
|
+
self.chunker = ChonkieSentenceChunker(
|
|
27
|
+
tokenizer=config.tokenizer_or_token_counter,
|
|
28
|
+
chunk_size=config.chunk_size,
|
|
29
|
+
chunk_overlap=config.chunk_overlap,
|
|
30
|
+
min_sentences_per_chunk=config.min_sentences_per_chunk,
|
|
31
|
+
)
|
|
32
|
+
except (TypeError, AttributeError) as e:
|
|
33
|
+
# Fallback to old API (<v1.4.0)
|
|
34
|
+
logger.debug(f"Falling back to old chonkie API: {e}")
|
|
35
|
+
self.chunker = ChonkieSentenceChunker(
|
|
36
|
+
tokenizer_or_token_counter=config.tokenizer_or_token_counter,
|
|
37
|
+
chunk_size=config.chunk_size,
|
|
38
|
+
chunk_overlap=config.chunk_overlap,
|
|
39
|
+
min_sentences_per_chunk=config.min_sentences_per_chunk,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
logger.info(f"Initialized SentenceChunker with config: {config}")
|
|
43
|
+
|
|
44
|
+
def chunk(self, text: str) -> list[str] | list[Chunk]:
|
|
45
|
+
"""Chunk the given text into smaller chunks based on sentences."""
|
|
46
|
+
chonkie_chunks = self.chunker.chunk(text)
|
|
47
|
+
|
|
48
|
+
chunks = []
|
|
49
|
+
for c in chonkie_chunks:
|
|
50
|
+
chunk = Chunk(text=c.text, token_count=c.token_count, sentences=c.sentences)
|
|
51
|
+
chunks.append(chunk)
|
|
52
|
+
|
|
53
|
+
logger.debug(f"Generated {len(chunks)} chunks from input text")
|
|
54
|
+
return chunks
|