MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from memos.api.product_models import APIADDRequest, APIFeedbackRequest, APISearchRequest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MemCubeView(Protocol):
|
|
11
|
+
"""
|
|
12
|
+
A high-level cube view used by AddHandler.
|
|
13
|
+
It may wrap a single logical cube or multiple cubes,
|
|
14
|
+
but exposes a unified add_memories interface.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def add_memories(self, add_req: APIADDRequest) -> list[dict[str, Any]]:
|
|
18
|
+
"""
|
|
19
|
+
Process add_req, extract memories and write them into one or more cubes.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
A list of memory dicts, each item should at least contain:
|
|
23
|
+
- memory
|
|
24
|
+
- memory_id
|
|
25
|
+
- memory_type
|
|
26
|
+
- cube_id
|
|
27
|
+
"""
|
|
28
|
+
...
|
|
29
|
+
|
|
30
|
+
def search_memories(self, search_req: APISearchRequest) -> dict[str, Any]:
|
|
31
|
+
"""
|
|
32
|
+
Process search_req, read memories from one or more cubes and search them.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
A list of memory dicts, each item should at least contain:
|
|
36
|
+
- memory
|
|
37
|
+
- memory_id
|
|
38
|
+
- memory_type
|
|
39
|
+
- cube_id
|
|
40
|
+
"""
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
def feedback_memories(self, feedback_req: APIFeedbackRequest) -> dict[str, Any]:
|
|
44
|
+
"""
|
|
45
|
+
Process feedback_req, read memories from one or more cubes and feedback them.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
A list of memory dicts, each item should at least contain:
|
|
49
|
+
- memory
|
|
50
|
+
- memory_id
|
|
51
|
+
- memory_type
|
|
52
|
+
- cube_id
|
|
53
|
+
"""
|
|
54
|
+
...
|
|
File without changes
|
memos/parsers/base.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
from memos.configs.parser import BaseParserConfig
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseParser(ABC):
|
|
7
|
+
"""Base class for all parsers."""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def __init__(self, config: BaseParserConfig):
|
|
11
|
+
"""Initialize the parser with the given configuration."""
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def parse(self, file_path: str) -> str:
|
|
15
|
+
"""Parse the file at the given path and return its content as a string."""
|
memos/parsers/factory.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from typing import Any, ClassVar
|
|
2
|
+
|
|
3
|
+
from memos.configs.parser import ParserConfigFactory
|
|
4
|
+
from memos.memos_tools.singleton import singleton_factory
|
|
5
|
+
from memos.parsers.base import BaseParser
|
|
6
|
+
from memos.parsers.markitdown import MarkItDownParser
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ParserFactory(BaseParser):
|
|
10
|
+
"""Factory class for creating Parser instances."""
|
|
11
|
+
|
|
12
|
+
backend_to_class: ClassVar[dict[str, Any]] = {"markitdown": MarkItDownParser}
|
|
13
|
+
|
|
14
|
+
@classmethod
|
|
15
|
+
@singleton_factory()
|
|
16
|
+
def from_config(cls, config_factory: ParserConfigFactory) -> BaseParser:
|
|
17
|
+
backend = config_factory.backend
|
|
18
|
+
if backend not in cls.backend_to_class:
|
|
19
|
+
raise ValueError(f"Invalid backend: {backend}")
|
|
20
|
+
parser_class = cls.backend_to_class[backend]
|
|
21
|
+
return parser_class(config_factory.config)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from memos.configs.parser import MarkItDownParserConfig
|
|
2
|
+
from memos.dependency import require_python_package
|
|
3
|
+
from memos.log import get_logger
|
|
4
|
+
from memos.parsers.base import BaseParser
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
logger = get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MarkItDownParser(BaseParser):
|
|
11
|
+
"""MarkItDown Parser class."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, config: MarkItDownParserConfig):
|
|
14
|
+
self.config = config
|
|
15
|
+
|
|
16
|
+
@require_python_package(
|
|
17
|
+
import_name="markitdown",
|
|
18
|
+
install_command="pip install markitdown[all]",
|
|
19
|
+
install_link="https://github.com/microsoft/markitdown",
|
|
20
|
+
)
|
|
21
|
+
def parse(self, file_path: str) -> str:
|
|
22
|
+
from markitdown import MarkItDown
|
|
23
|
+
|
|
24
|
+
"""Parse the file at the given path and return its content as a MarkDown string."""
|
|
25
|
+
md = MarkItDown(enable_plugins=False)
|
|
26
|
+
result = md.convert(file_path)
|
|
27
|
+
|
|
28
|
+
return result.text_content
|
memos/reranker/base.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# memos/reranker/base.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BaseReranker(ABC):
|
|
13
|
+
"""Abstract interface for memory rerankers."""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def rerank(
|
|
17
|
+
self,
|
|
18
|
+
query: str,
|
|
19
|
+
graph_results: list[TextualMemoryItem],
|
|
20
|
+
top_k: int,
|
|
21
|
+
search_filter: dict | None = None,
|
|
22
|
+
**kwargs,
|
|
23
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
24
|
+
"""Return top_k (item, score) sorted by score desc."""
|
|
25
|
+
raise NotImplementedError
|
memos/reranker/concat.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from memos.memories.textual.item import SourceMessage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
_TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_encoded_tokens(content: str) -> int:
|
|
12
|
+
"""
|
|
13
|
+
Get encoded tokens.
|
|
14
|
+
Args:
|
|
15
|
+
content: str
|
|
16
|
+
Returns:
|
|
17
|
+
int: Encoded tokens.
|
|
18
|
+
"""
|
|
19
|
+
return len(content)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def truncate_data(data: list[str | dict[str, Any] | Any], max_tokens: int) -> list[str]:
|
|
23
|
+
"""
|
|
24
|
+
Truncate data to max tokens.
|
|
25
|
+
Args:
|
|
26
|
+
data: List of strings or dictionaries.
|
|
27
|
+
max_tokens: Maximum number of tokens.
|
|
28
|
+
Returns:
|
|
29
|
+
str: Truncated string.
|
|
30
|
+
"""
|
|
31
|
+
truncated_string = ""
|
|
32
|
+
for item in data:
|
|
33
|
+
if isinstance(item, SourceMessage):
|
|
34
|
+
content = getattr(item, "content", "")
|
|
35
|
+
chat_time = getattr(item, "chat_time", "")
|
|
36
|
+
if not content:
|
|
37
|
+
continue
|
|
38
|
+
truncated_string += f"[{chat_time}]: {content}\n"
|
|
39
|
+
if get_encoded_tokens(truncated_string) > max_tokens:
|
|
40
|
+
break
|
|
41
|
+
return truncated_string
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def process_source(
|
|
45
|
+
items: list[tuple[Any, str | dict[str, Any] | list[Any]]] | None = None,
|
|
46
|
+
recent_num: int = 10,
|
|
47
|
+
max_tokens: int = 2048,
|
|
48
|
+
) -> str:
|
|
49
|
+
"""
|
|
50
|
+
Args:
|
|
51
|
+
items: List of tuples where each tuple contains (memory, source).
|
|
52
|
+
source can be str, Dict, or List.
|
|
53
|
+
recent_num: Number of recent items to concatenate.
|
|
54
|
+
Returns:
|
|
55
|
+
str: Concatenated source.
|
|
56
|
+
"""
|
|
57
|
+
if items is None:
|
|
58
|
+
items = []
|
|
59
|
+
concat_data = []
|
|
60
|
+
memory = None
|
|
61
|
+
for item in items:
|
|
62
|
+
memory, source = item
|
|
63
|
+
concat_data.extend(source[-recent_num:])
|
|
64
|
+
truncated_string = truncate_data(concat_data, max_tokens)
|
|
65
|
+
if memory is not None:
|
|
66
|
+
truncated_string = f"{memory}\n{truncated_string}"
|
|
67
|
+
return truncated_string
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def concat_original_source(
|
|
71
|
+
graph_results: list,
|
|
72
|
+
rerank_source: str | None = None,
|
|
73
|
+
) -> list[str]:
|
|
74
|
+
"""
|
|
75
|
+
Merge memory items with original dialogue.
|
|
76
|
+
Args:
|
|
77
|
+
graph_results (list[TextualMemoryItem]): List of memory items with embeddings.
|
|
78
|
+
merge_field (List[str]): List of fields to merge.
|
|
79
|
+
Returns:
|
|
80
|
+
list[str]: List of memory and concat orginal memory.
|
|
81
|
+
"""
|
|
82
|
+
merge_field = []
|
|
83
|
+
merge_field = ["sources"] if rerank_source is None else rerank_source.split(",")
|
|
84
|
+
documents = []
|
|
85
|
+
for item in graph_results:
|
|
86
|
+
m = item.get("memory") if isinstance(item, dict) else getattr(item, "memory", None)
|
|
87
|
+
|
|
88
|
+
memory = _TAG1.sub("", m) if isinstance(m, str) else m
|
|
89
|
+
|
|
90
|
+
sources = []
|
|
91
|
+
for field in merge_field:
|
|
92
|
+
if isinstance(item, dict):
|
|
93
|
+
metadata = item.get("metadata", {})
|
|
94
|
+
source = metadata.get(field) if isinstance(metadata, dict) else None
|
|
95
|
+
else:
|
|
96
|
+
source = getattr(item.metadata, field, None) if hasattr(item, "metadata") else None
|
|
97
|
+
|
|
98
|
+
if source is None:
|
|
99
|
+
continue
|
|
100
|
+
sources.append((memory, source))
|
|
101
|
+
concat_string = process_source(sources)
|
|
102
|
+
documents.append(concat_string)
|
|
103
|
+
return documents
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# memos/reranker/cosine_local.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from memos.log import get_logger
|
|
7
|
+
from memos.utils import timed
|
|
8
|
+
|
|
9
|
+
from .base import BaseReranker
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import numpy as _np
|
|
17
|
+
|
|
18
|
+
_HAS_NUMPY = True
|
|
19
|
+
except Exception:
|
|
20
|
+
_HAS_NUMPY = False
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _cosine_one_to_many(q: list[float], m: list[list[float]]) -> list[float]:
|
|
26
|
+
"""
|
|
27
|
+
Compute cosine similarities between a single vector q and a matrix m (rows are candidates).
|
|
28
|
+
"""
|
|
29
|
+
if not _HAS_NUMPY:
|
|
30
|
+
|
|
31
|
+
def dot(a, b): # lowercase per N806
|
|
32
|
+
return sum(x * y for x, y in zip(a, b, strict=False))
|
|
33
|
+
|
|
34
|
+
def norm(a): # lowercase per N806
|
|
35
|
+
return sum(x * x for x in a) ** 0.5
|
|
36
|
+
|
|
37
|
+
qn = norm(q) or 1e-10
|
|
38
|
+
sims = []
|
|
39
|
+
for v in m:
|
|
40
|
+
vn = norm(v) or 1e-10
|
|
41
|
+
sims.append(dot(q, v) / (qn * vn))
|
|
42
|
+
return sims
|
|
43
|
+
|
|
44
|
+
qv = _np.asarray(q, dtype=float) # lowercase
|
|
45
|
+
mv = _np.asarray(m, dtype=float) # lowercase
|
|
46
|
+
qn = _np.linalg.norm(qv) or 1e-10
|
|
47
|
+
mn = _np.linalg.norm(mv, axis=1) # lowercase
|
|
48
|
+
dots = mv @ qv
|
|
49
|
+
return (dots / (mn * qn + 1e-10)).tolist()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class CosineLocalReranker(BaseReranker):
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
level_weights: dict[str, float] | None = None,
|
|
56
|
+
level_field: str = "background",
|
|
57
|
+
**kwargs,
|
|
58
|
+
):
|
|
59
|
+
self.level_weights = level_weights or {"topic": 1.0, "concept": 1.0, "fact": 1.0}
|
|
60
|
+
self.level_field = level_field
|
|
61
|
+
|
|
62
|
+
@timed
|
|
63
|
+
def rerank(
|
|
64
|
+
self,
|
|
65
|
+
query: str,
|
|
66
|
+
graph_results: list,
|
|
67
|
+
top_k: int,
|
|
68
|
+
**kwargs,
|
|
69
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
70
|
+
if not graph_results:
|
|
71
|
+
return []
|
|
72
|
+
|
|
73
|
+
query_embedding: list[float] | None = kwargs.get("query_embedding")
|
|
74
|
+
if not query_embedding:
|
|
75
|
+
return [(item, 0.0) for item in graph_results[:top_k]]
|
|
76
|
+
|
|
77
|
+
items_with_emb = [
|
|
78
|
+
it
|
|
79
|
+
for it in graph_results
|
|
80
|
+
if getattr(it, "metadata", None) and getattr(it.metadata, "embedding", None)
|
|
81
|
+
]
|
|
82
|
+
if not items_with_emb:
|
|
83
|
+
return [(item, 0.5) for item in graph_results[:top_k]]
|
|
84
|
+
|
|
85
|
+
cand_vecs = [it.metadata.embedding for it in items_with_emb]
|
|
86
|
+
sims = _cosine_one_to_many(query_embedding, cand_vecs)
|
|
87
|
+
|
|
88
|
+
def get_weight(it: TextualMemoryItem) -> float:
|
|
89
|
+
level = getattr(it.metadata, self.level_field, None)
|
|
90
|
+
return self.level_weights.get(level, 1.0)
|
|
91
|
+
|
|
92
|
+
weighted = [sim * get_weight(it) for sim, it in zip(sims, items_with_emb, strict=False)]
|
|
93
|
+
scored_pairs = list(zip(items_with_emb, weighted, strict=False))
|
|
94
|
+
scored_pairs.sort(key=lambda x: x[1], reverse=True)
|
|
95
|
+
|
|
96
|
+
top_items = scored_pairs[:top_k]
|
|
97
|
+
if len(top_items) < top_k:
|
|
98
|
+
chosen = {it.id for it, _ in top_items}
|
|
99
|
+
remain = [(it, -1.0) for it in graph_results if it.id not in chosen]
|
|
100
|
+
top_items.extend(remain[: top_k - len(top_items)])
|
|
101
|
+
logger.info(f"CosineLocalReranker rerank result: {top_items[:1]}")
|
|
102
|
+
return top_items
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# memos/reranker/factory.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
# Import singleton decorator
|
|
9
|
+
from memos.memos_tools.singleton import singleton_factory
|
|
10
|
+
|
|
11
|
+
from .cosine_local import CosineLocalReranker
|
|
12
|
+
from .http_bge import HTTPBGEReranker
|
|
13
|
+
from .http_bge_strategy import HTTPBGERerankerStrategy
|
|
14
|
+
from .noop import NoopReranker
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from memos.configs.reranker import RerankerConfigFactory
|
|
19
|
+
|
|
20
|
+
from .base import BaseReranker
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RerankerFactory:
|
|
24
|
+
@staticmethod
|
|
25
|
+
@singleton_factory("RerankerFactory")
|
|
26
|
+
def from_config(cfg: RerankerConfigFactory | None) -> BaseReranker | None:
|
|
27
|
+
if not cfg:
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
backend = (cfg.backend or "").lower()
|
|
31
|
+
c: dict[str, Any] = cfg.config or {}
|
|
32
|
+
|
|
33
|
+
headers_extra = c.get("headers_extra")
|
|
34
|
+
if isinstance(headers_extra, str):
|
|
35
|
+
try:
|
|
36
|
+
headers_extra = json.loads(headers_extra)
|
|
37
|
+
except Exception:
|
|
38
|
+
headers_extra = None
|
|
39
|
+
|
|
40
|
+
if backend in {"http_bge", "bge"}:
|
|
41
|
+
return HTTPBGEReranker(
|
|
42
|
+
reranker_url=c.get("url") or c.get("endpoint") or c.get("reranker_url"),
|
|
43
|
+
model=c.get("model", "bge-reranker-v2-m3"),
|
|
44
|
+
timeout=int(c.get("timeout", 10)),
|
|
45
|
+
max_query_tokens=min(max(c.get("max_query_tokens", 8000), 100), 8000),
|
|
46
|
+
concate_len=min(max(c.get("concate_len", 1000), 4), 8000),
|
|
47
|
+
headers_extra=headers_extra,
|
|
48
|
+
rerank_source=c.get("rerank_source"),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if backend in {"cosine_local", "cosine"}:
|
|
52
|
+
return CosineLocalReranker(
|
|
53
|
+
level_weights=c.get("level_weights"),
|
|
54
|
+
level_field=c.get("level_field", "background"),
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if backend in {"noop", "none", "disabled"}:
|
|
58
|
+
return NoopReranker()
|
|
59
|
+
|
|
60
|
+
if backend in {"http_bge_strategy", "bge_strategy"}:
|
|
61
|
+
return HTTPBGERerankerStrategy(
|
|
62
|
+
reranker_url=c.get("url") or c.get("endpoint") or c.get("reranker_url"),
|
|
63
|
+
model=c.get("model", "bge-reranker-v2-m3"),
|
|
64
|
+
timeout=int(c.get("timeout", 10)),
|
|
65
|
+
max_query_tokens=min(max(c.get("max_query_tokens", 8000), 100), 8000),
|
|
66
|
+
concate_len=min(max(c.get("concate_len", 1000), 4), 8000),
|
|
67
|
+
headers_extra=headers_extra,
|
|
68
|
+
rerank_source=c.get("rerank_source"),
|
|
69
|
+
reranker_strategy=c.get("reranker_strategy"),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
raise ValueError(f"Unknown reranker backend: {cfg.backend}")
|