MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from memos.configs.llm import HFLLMConfig
|
|
6
|
+
from memos.llms.hf import HFLLM
|
|
7
|
+
from memos.log import get_logger
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HFSingletonLLM(HFLLM):
|
|
14
|
+
"""
|
|
15
|
+
Singleton version of HFLLM that prevents multiple loading of the same model.
|
|
16
|
+
This class inherits from HFLLM and adds singleton behavior.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
_instances: ClassVar[dict[str, "HFSingletonLLM"]] = {}
|
|
20
|
+
_lock: ClassVar[threading.Lock] = threading.Lock()
|
|
21
|
+
|
|
22
|
+
def __new__(cls, config: HFLLMConfig):
|
|
23
|
+
"""
|
|
24
|
+
Singleton pattern implementation.
|
|
25
|
+
Returns existing instance if config already exists, otherwise creates new one.
|
|
26
|
+
"""
|
|
27
|
+
config_key = cls._get_config_key(config)
|
|
28
|
+
|
|
29
|
+
if config_key in cls._instances:
|
|
30
|
+
logger.debug(f"Reusing existing HF model: {config.model_name_or_path}")
|
|
31
|
+
return cls._instances[config_key]
|
|
32
|
+
|
|
33
|
+
with cls._lock:
|
|
34
|
+
# Double-check pattern to prevent race conditions
|
|
35
|
+
if config_key in cls._instances:
|
|
36
|
+
logger.debug(f"Reusing existing HF model: {config.model_name_or_path}")
|
|
37
|
+
return cls._instances[config_key]
|
|
38
|
+
|
|
39
|
+
logger.info(f"Creating new HF model: {config.model_name_or_path}")
|
|
40
|
+
instance = super().__new__(cls)
|
|
41
|
+
cls._instances[config_key] = instance
|
|
42
|
+
return instance
|
|
43
|
+
|
|
44
|
+
def __init__(self, config: HFLLMConfig):
|
|
45
|
+
"""
|
|
46
|
+
Initialize the singleton HFLLM instance.
|
|
47
|
+
Only initializes if this is a new instance.
|
|
48
|
+
"""
|
|
49
|
+
# Check if already initialized
|
|
50
|
+
if hasattr(self, "_initialized"):
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
# Call parent constructor
|
|
54
|
+
super().__init__(config)
|
|
55
|
+
self._initialized = True
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def _get_config_key(cls, config: HFLLMConfig) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Generate a unique key for the HF model configuration.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
config: The HFLLM configuration
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
A unique string key representing the configuration
|
|
67
|
+
"""
|
|
68
|
+
# Create a unique key based on model path and key parameters
|
|
69
|
+
key_parts = [config.model_name_or_path]
|
|
70
|
+
return "|".join(key_parts)
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def get_instance_count(cls) -> int:
|
|
74
|
+
"""
|
|
75
|
+
Get the number of unique HF model instances currently managed.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Number of HF model instances
|
|
79
|
+
"""
|
|
80
|
+
return len(cls._instances)
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def get_instance_info(cls) -> dict[str, str]:
|
|
84
|
+
"""
|
|
85
|
+
Get information about all managed HF model instances.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Dictionary mapping config keys to model paths
|
|
89
|
+
"""
|
|
90
|
+
return {key: instance.config.model_name_or_path for key, instance in cls._instances.items()}
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def clear_all(cls) -> None:
|
|
94
|
+
"""
|
|
95
|
+
Clear all HF model instances from memory.
|
|
96
|
+
This should be used carefully as it will force reloading of models.
|
|
97
|
+
"""
|
|
98
|
+
with cls._lock:
|
|
99
|
+
cls._instances.clear()
|
|
100
|
+
logger.info("All HF model instances cleared from singleton manager")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# Convenience function to get singleton manager info
|
|
104
|
+
def get_hf_singleton_info() -> dict[str, int]:
|
|
105
|
+
"""
|
|
106
|
+
Get information about the HF singleton manager.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Dictionary with instance count and info
|
|
110
|
+
"""
|
|
111
|
+
return {
|
|
112
|
+
"instance_count": HFSingletonLLM.get_instance_count(),
|
|
113
|
+
"instance_info": HFSingletonLLM.get_instance_info(),
|
|
114
|
+
}
|
memos/llms/ollama.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
from collections.abc import Generator
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from ollama import Client, Message
|
|
5
|
+
|
|
6
|
+
from memos.configs.llm import OllamaLLMConfig
|
|
7
|
+
from memos.llms.base import BaseLLM
|
|
8
|
+
from memos.llms.utils import remove_thinking_tags
|
|
9
|
+
from memos.log import get_logger
|
|
10
|
+
from memos.types import MessageList
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OllamaLLM(BaseLLM):
|
|
17
|
+
"""Ollama LLM class."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: OllamaLLMConfig):
|
|
20
|
+
self.config = config
|
|
21
|
+
self.api_base = config.api_base
|
|
22
|
+
|
|
23
|
+
# Default model if not specified
|
|
24
|
+
if not self.config.model_name_or_path:
|
|
25
|
+
self.config.model_name_or_path = "llama3.1:latest"
|
|
26
|
+
|
|
27
|
+
# Initialize ollama client
|
|
28
|
+
self.client = Client(host=self.api_base)
|
|
29
|
+
|
|
30
|
+
# Ensure the model exists locally
|
|
31
|
+
self._ensure_model_exists()
|
|
32
|
+
|
|
33
|
+
def _list_models(self) -> list[str]:
|
|
34
|
+
"""
|
|
35
|
+
List all models available in the Ollama client.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
List of model names.
|
|
39
|
+
"""
|
|
40
|
+
local_models = self.client.list()["models"]
|
|
41
|
+
return [model.model for model in local_models]
|
|
42
|
+
|
|
43
|
+
def _ensure_model_exists(self):
|
|
44
|
+
"""
|
|
45
|
+
Ensure the specified model exists locally. If not, pull it from Ollama.
|
|
46
|
+
"""
|
|
47
|
+
try:
|
|
48
|
+
local_models = self._list_models()
|
|
49
|
+
if self.config.model_name_or_path not in local_models:
|
|
50
|
+
logger.warning(
|
|
51
|
+
f"Model {self.config.model_name_or_path} not found locally. Pulling from Ollama..."
|
|
52
|
+
)
|
|
53
|
+
self.client.pull(self.config.model_name_or_path)
|
|
54
|
+
except Exception as e:
|
|
55
|
+
logger.warning(f"Could not verify model existence: {e}")
|
|
56
|
+
|
|
57
|
+
def generate(self, messages: MessageList, **kwargs) -> Any:
|
|
58
|
+
"""
|
|
59
|
+
Generate a response from Ollama LLM.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
messages: List of message dicts containing 'role' and 'content'.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
str: The generated response.
|
|
66
|
+
"""
|
|
67
|
+
response = self.client.chat(
|
|
68
|
+
model=self.config.model_name_or_path,
|
|
69
|
+
messages=messages,
|
|
70
|
+
options={
|
|
71
|
+
"temperature": kwargs.get("temperature", self.config.temperature),
|
|
72
|
+
"num_predict": kwargs.get("max_tokens", self.config.max_tokens),
|
|
73
|
+
"top_p": kwargs.get("top_p", self.config.top_p),
|
|
74
|
+
"top_k": kwargs.get("top_k", self.config.top_k),
|
|
75
|
+
},
|
|
76
|
+
think=self.config.enable_thinking,
|
|
77
|
+
tools=kwargs.get("tools"),
|
|
78
|
+
)
|
|
79
|
+
logger.info(f"Raw response from Ollama: {response.model_dump_json()}")
|
|
80
|
+
tool_calls = getattr(response.message, "tool_calls", None)
|
|
81
|
+
if isinstance(tool_calls, list) and len(tool_calls) > 0:
|
|
82
|
+
return self.tool_call_parser(tool_calls)
|
|
83
|
+
|
|
84
|
+
str_thinking = (
|
|
85
|
+
f"<think>{response.message.thinking}</think>"
|
|
86
|
+
if hasattr(response.message, "thinking")
|
|
87
|
+
else ""
|
|
88
|
+
)
|
|
89
|
+
str_response = response.message.content
|
|
90
|
+
if self.config.remove_think_prefix:
|
|
91
|
+
return remove_thinking_tags(str_response)
|
|
92
|
+
else:
|
|
93
|
+
return str_thinking + str_response
|
|
94
|
+
|
|
95
|
+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
|
|
96
|
+
if kwargs.get("tools"):
|
|
97
|
+
logger.info("stream api not support tools")
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
response = self.client.chat(
|
|
101
|
+
model=kwargs.get("model_name_or_path", self.config.model_name_or_path),
|
|
102
|
+
messages=messages,
|
|
103
|
+
options={
|
|
104
|
+
"temperature": kwargs.get("temperature", self.config.temperature),
|
|
105
|
+
"num_predict": kwargs.get("max_tokens", self.config.max_tokens),
|
|
106
|
+
"top_p": kwargs.get("top_p", self.config.top_p),
|
|
107
|
+
"top_k": kwargs.get("top_k", self.config.top_k),
|
|
108
|
+
},
|
|
109
|
+
think=self.config.enable_thinking,
|
|
110
|
+
stream=True,
|
|
111
|
+
)
|
|
112
|
+
# Streaming chunks of text
|
|
113
|
+
reasoning_started = False
|
|
114
|
+
for chunk in response:
|
|
115
|
+
if hasattr(chunk.message, "thinking") and chunk.message.thinking:
|
|
116
|
+
if not reasoning_started and not self.config.remove_think_prefix:
|
|
117
|
+
yield "<think>"
|
|
118
|
+
reasoning_started = True
|
|
119
|
+
yield chunk.message.thinking
|
|
120
|
+
|
|
121
|
+
if hasattr(chunk.message, "content") and chunk.message.content:
|
|
122
|
+
if reasoning_started and not self.config.remove_think_prefix:
|
|
123
|
+
yield "</think>"
|
|
124
|
+
reasoning_started = False
|
|
125
|
+
yield chunk.message.content
|
|
126
|
+
|
|
127
|
+
def tool_call_parser(self, tool_calls: list[Message.ToolCall]) -> list[dict]:
|
|
128
|
+
"""Parse tool calls from OpenAI response."""
|
|
129
|
+
return [
|
|
130
|
+
{
|
|
131
|
+
"function_name": tool_call.function.name,
|
|
132
|
+
"arguments": tool_call.function.arguments,
|
|
133
|
+
}
|
|
134
|
+
for tool_call in tool_calls
|
|
135
|
+
]
|
memos/llms/openai.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from collections.abc import Generator
|
|
5
|
+
|
|
6
|
+
import openai
|
|
7
|
+
|
|
8
|
+
from openai._types import NOT_GIVEN
|
|
9
|
+
from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall
|
|
10
|
+
|
|
11
|
+
from memos.configs.llm import AzureLLMConfig, OpenAILLMConfig
|
|
12
|
+
from memos.llms.base import BaseLLM
|
|
13
|
+
from memos.llms.utils import remove_thinking_tags
|
|
14
|
+
from memos.log import get_logger
|
|
15
|
+
from memos.types import MessageList
|
|
16
|
+
from memos.utils import timed_with_status
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class OpenAILLM(BaseLLM):
|
|
23
|
+
"""OpenAI LLM class via openai.chat.completions.create."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, config: OpenAILLMConfig):
|
|
26
|
+
self.config = config
|
|
27
|
+
self.client = openai.Client(
|
|
28
|
+
api_key=config.api_key, base_url=config.api_base, default_headers=config.default_headers
|
|
29
|
+
)
|
|
30
|
+
logger.info("OpenAI LLM instance initialized")
|
|
31
|
+
|
|
32
|
+
@timed_with_status(
|
|
33
|
+
log_prefix="OpenAI LLM",
|
|
34
|
+
log_extra_args=lambda self, messages, **kwargs: {
|
|
35
|
+
"model_name_or_path": kwargs.get("model_name_or_path", self.config.model_name_or_path),
|
|
36
|
+
"messages": messages,
|
|
37
|
+
},
|
|
38
|
+
)
|
|
39
|
+
def generate(self, messages: MessageList, **kwargs) -> str:
|
|
40
|
+
"""Generate a response from OpenAI LLM, optionally overriding generation params."""
|
|
41
|
+
request_body = {
|
|
42
|
+
"model": kwargs.get("model_name_or_path", self.config.model_name_or_path),
|
|
43
|
+
"messages": messages,
|
|
44
|
+
"temperature": kwargs.get("temperature", self.config.temperature),
|
|
45
|
+
"max_tokens": kwargs.get("max_tokens", self.config.max_tokens),
|
|
46
|
+
"top_p": kwargs.get("top_p", self.config.top_p),
|
|
47
|
+
"extra_body": kwargs.get("extra_body", self.config.extra_body),
|
|
48
|
+
"tools": kwargs.get("tools", NOT_GIVEN),
|
|
49
|
+
}
|
|
50
|
+
start_time = time.perf_counter()
|
|
51
|
+
logger.info(f"OpenAI LLM Request body: {request_body}")
|
|
52
|
+
|
|
53
|
+
response = self.client.chat.completions.create(**request_body)
|
|
54
|
+
|
|
55
|
+
cost_time = time.perf_counter() - start_time
|
|
56
|
+
logger.info(
|
|
57
|
+
f"Request body: {request_body}, Response from OpenAI: {response.model_dump_json()}, Cost time: {cost_time}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if not response.choices:
|
|
61
|
+
logger.warning("OpenAI response has no choices")
|
|
62
|
+
return ""
|
|
63
|
+
|
|
64
|
+
tool_calls = getattr(response.choices[0].message, "tool_calls", None)
|
|
65
|
+
if isinstance(tool_calls, list) and len(tool_calls) > 0:
|
|
66
|
+
return self.tool_call_parser(tool_calls)
|
|
67
|
+
response_content = response.choices[0].message.content
|
|
68
|
+
reasoning_content = getattr(response.choices[0].message, "reasoning_content", None)
|
|
69
|
+
if isinstance(reasoning_content, str) and reasoning_content:
|
|
70
|
+
reasoning_content = f"<think>{reasoning_content}</think>"
|
|
71
|
+
if self.config.remove_think_prefix:
|
|
72
|
+
return remove_thinking_tags(response_content)
|
|
73
|
+
if reasoning_content:
|
|
74
|
+
return reasoning_content + (response_content or "")
|
|
75
|
+
return response_content or ""
|
|
76
|
+
|
|
77
|
+
@timed_with_status(
|
|
78
|
+
log_prefix="OpenAI LLM Stream",
|
|
79
|
+
log_extra_args=lambda self, messages, **kwargs: {
|
|
80
|
+
"model_name_or_path": self.config.model_name_or_path
|
|
81
|
+
},
|
|
82
|
+
)
|
|
83
|
+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
|
|
84
|
+
"""Stream response from OpenAI LLM with optional reasoning support."""
|
|
85
|
+
if kwargs.get("tools"):
|
|
86
|
+
logger.info("stream api not support tools")
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
request_body = {
|
|
90
|
+
"model": self.config.model_name_or_path,
|
|
91
|
+
"messages": messages,
|
|
92
|
+
"stream": True,
|
|
93
|
+
"temperature": kwargs.get("temperature", self.config.temperature),
|
|
94
|
+
"max_tokens": kwargs.get("max_tokens", self.config.max_tokens),
|
|
95
|
+
"top_p": kwargs.get("top_p", self.config.top_p),
|
|
96
|
+
"extra_body": kwargs.get("extra_body", self.config.extra_body),
|
|
97
|
+
"tools": kwargs.get("tools", NOT_GIVEN),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
logger.info(f"OpenAI LLM Stream Request body: {request_body}")
|
|
101
|
+
response = self.client.chat.completions.create(**request_body)
|
|
102
|
+
|
|
103
|
+
reasoning_started = False
|
|
104
|
+
|
|
105
|
+
for chunk in response:
|
|
106
|
+
if not chunk.choices:
|
|
107
|
+
continue
|
|
108
|
+
delta = chunk.choices[0].delta
|
|
109
|
+
|
|
110
|
+
# Support for custom 'reasoning_content' (if present in OpenAI-compatible models like Qwen, DeepSeek)
|
|
111
|
+
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
|
|
112
|
+
if not reasoning_started and not self.config.remove_think_prefix:
|
|
113
|
+
yield "<think>"
|
|
114
|
+
reasoning_started = True
|
|
115
|
+
yield delta.reasoning_content
|
|
116
|
+
elif hasattr(delta, "content") and delta.content:
|
|
117
|
+
if reasoning_started and not self.config.remove_think_prefix:
|
|
118
|
+
yield "</think>"
|
|
119
|
+
reasoning_started = False
|
|
120
|
+
yield delta.content
|
|
121
|
+
|
|
122
|
+
# Ensure we close the <think> block if not already done
|
|
123
|
+
if reasoning_started and not self.config.remove_think_prefix:
|
|
124
|
+
yield "</think>"
|
|
125
|
+
|
|
126
|
+
def tool_call_parser(self, tool_calls: list[ChatCompletionMessageToolCall]) -> list[dict]:
|
|
127
|
+
"""Parse tool calls from OpenAI response."""
|
|
128
|
+
return [
|
|
129
|
+
{
|
|
130
|
+
"tool_call_id": tool_call.id,
|
|
131
|
+
"function_name": tool_call.function.name,
|
|
132
|
+
"arguments": json.loads(tool_call.function.arguments),
|
|
133
|
+
}
|
|
134
|
+
for tool_call in tool_calls
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class AzureLLM(BaseLLM):
|
|
139
|
+
"""Azure OpenAI LLM class with singleton pattern."""
|
|
140
|
+
|
|
141
|
+
def __init__(self, config: AzureLLMConfig):
|
|
142
|
+
self.config = config
|
|
143
|
+
self.client = openai.AzureOpenAI(
|
|
144
|
+
azure_endpoint=config.base_url,
|
|
145
|
+
api_version=config.api_version,
|
|
146
|
+
api_key=config.api_key,
|
|
147
|
+
)
|
|
148
|
+
logger.info("Azure LLM instance initialized")
|
|
149
|
+
|
|
150
|
+
def generate(self, messages: MessageList, **kwargs) -> str:
|
|
151
|
+
"""Generate a response from Azure OpenAI LLM."""
|
|
152
|
+
response = self.client.chat.completions.create(
|
|
153
|
+
model=self.config.model_name_or_path,
|
|
154
|
+
messages=messages,
|
|
155
|
+
temperature=kwargs.get("temperature", self.config.temperature),
|
|
156
|
+
max_tokens=kwargs.get("max_tokens", self.config.max_tokens),
|
|
157
|
+
top_p=kwargs.get("top_p", self.config.top_p),
|
|
158
|
+
tools=kwargs.get("tools", NOT_GIVEN),
|
|
159
|
+
extra_body=kwargs.get("extra_body", self.config.extra_body),
|
|
160
|
+
)
|
|
161
|
+
logger.info(f"Response from Azure OpenAI: {response.model_dump_json()}")
|
|
162
|
+
if not response.choices:
|
|
163
|
+
logger.warning("Azure OpenAI response has no choices")
|
|
164
|
+
return ""
|
|
165
|
+
|
|
166
|
+
if response.choices[0].message.tool_calls:
|
|
167
|
+
return self.tool_call_parser(response.choices[0].message.tool_calls)
|
|
168
|
+
response_content = response.choices[0].message.content
|
|
169
|
+
if self.config.remove_think_prefix:
|
|
170
|
+
return remove_thinking_tags(response_content)
|
|
171
|
+
else:
|
|
172
|
+
return response_content or ""
|
|
173
|
+
|
|
174
|
+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
|
|
175
|
+
"""Stream response from Azure OpenAI LLM with optional reasoning support."""
|
|
176
|
+
if kwargs.get("tools"):
|
|
177
|
+
logger.info("stream api not support tools")
|
|
178
|
+
return
|
|
179
|
+
|
|
180
|
+
response = self.client.chat.completions.create(
|
|
181
|
+
model=self.config.model_name_or_path,
|
|
182
|
+
messages=messages,
|
|
183
|
+
stream=True,
|
|
184
|
+
temperature=kwargs.get("temperature", self.config.temperature),
|
|
185
|
+
max_tokens=kwargs.get("max_tokens", self.config.max_tokens),
|
|
186
|
+
top_p=kwargs.get("top_p", self.config.top_p),
|
|
187
|
+
extra_body=kwargs.get("extra_body", self.config.extra_body),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
reasoning_started = False
|
|
191
|
+
|
|
192
|
+
for chunk in response:
|
|
193
|
+
if not chunk.choices:
|
|
194
|
+
continue
|
|
195
|
+
delta = chunk.choices[0].delta
|
|
196
|
+
|
|
197
|
+
# Support for custom 'reasoning_content' (if present in OpenAI-compatible models like Qwen, DeepSeek)
|
|
198
|
+
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
|
|
199
|
+
if not reasoning_started and not self.config.remove_think_prefix:
|
|
200
|
+
yield "<think>"
|
|
201
|
+
reasoning_started = True
|
|
202
|
+
yield delta.reasoning_content
|
|
203
|
+
elif hasattr(delta, "content") and delta.content:
|
|
204
|
+
if reasoning_started and not self.config.remove_think_prefix:
|
|
205
|
+
yield "</think>"
|
|
206
|
+
reasoning_started = False
|
|
207
|
+
yield delta.content
|
|
208
|
+
|
|
209
|
+
# Ensure we close the <think> block if not already done
|
|
210
|
+
if reasoning_started and not self.config.remove_think_prefix:
|
|
211
|
+
yield "</think>"
|
|
212
|
+
|
|
213
|
+
def tool_call_parser(self, tool_calls: list[ChatCompletionMessageToolCall]) -> list[dict]:
|
|
214
|
+
"""Parse tool calls from OpenAI response."""
|
|
215
|
+
return [
|
|
216
|
+
{
|
|
217
|
+
"tool_call_id": tool_call.id,
|
|
218
|
+
"function_name": tool_call.function.name,
|
|
219
|
+
"arguments": json.loads(tool_call.function.arguments),
|
|
220
|
+
}
|
|
221
|
+
for tool_call in tool_calls
|
|
222
|
+
]
|
memos/llms/openai_new.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from collections.abc import Generator
|
|
4
|
+
|
|
5
|
+
import openai
|
|
6
|
+
|
|
7
|
+
from openai._types import NOT_GIVEN
|
|
8
|
+
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
|
|
9
|
+
from openai.types.responses.response_reasoning_item import ResponseReasoningItem
|
|
10
|
+
|
|
11
|
+
from memos.configs.llm import AzureLLMConfig, OpenAILLMConfig
|
|
12
|
+
from memos.llms.base import BaseLLM
|
|
13
|
+
from memos.llms.utils import remove_thinking_tags
|
|
14
|
+
from memos.log import get_logger
|
|
15
|
+
from memos.types import MessageList
|
|
16
|
+
from memos.utils import timed
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class OpenAIResponsesLLM(BaseLLM):
|
|
23
|
+
def __init__(self, config: OpenAILLMConfig):
|
|
24
|
+
self.config = config
|
|
25
|
+
self.client = openai.Client(
|
|
26
|
+
api_key=config.api_key, base_url=config.api_base, default_headers=config.default_headers
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
@timed(log=True, log_prefix="OpenAI Responses LLM")
|
|
30
|
+
def generate(self, messages: MessageList, **kwargs) -> str:
|
|
31
|
+
response = self.client.responses.create(
|
|
32
|
+
model=kwargs.get("model_name_or_path", self.config.model_name_or_path),
|
|
33
|
+
input=messages,
|
|
34
|
+
temperature=kwargs.get("temperature", self.config.temperature),
|
|
35
|
+
top_p=kwargs.get("top_p", self.config.top_p),
|
|
36
|
+
max_output_tokens=kwargs.get("max_tokens", self.config.max_tokens),
|
|
37
|
+
reasoning={"effort": "low", "summary": "auto"}
|
|
38
|
+
if self.config.enable_thinking
|
|
39
|
+
else NOT_GIVEN,
|
|
40
|
+
tools=kwargs.get("tools", NOT_GIVEN),
|
|
41
|
+
extra_body=kwargs.get("extra_body", self.config.extra_body),
|
|
42
|
+
)
|
|
43
|
+
tool_call_outputs = [
|
|
44
|
+
item for item in response.output if isinstance(item, ResponseFunctionToolCall)
|
|
45
|
+
]
|
|
46
|
+
if tool_call_outputs:
|
|
47
|
+
return self.tool_call_parser(tool_call_outputs)
|
|
48
|
+
|
|
49
|
+
output_text = getattr(response, "output_text", "")
|
|
50
|
+
output_reasoning = [
|
|
51
|
+
item for item in response.output if isinstance(item, ResponseReasoningItem)
|
|
52
|
+
]
|
|
53
|
+
summary = output_reasoning[0].summary
|
|
54
|
+
|
|
55
|
+
if self.config.remove_think_prefix:
|
|
56
|
+
return remove_thinking_tags(output_text)
|
|
57
|
+
if summary:
|
|
58
|
+
return f"<think>{summary[0].text}</think>" + output_text
|
|
59
|
+
return output_text
|
|
60
|
+
|
|
61
|
+
@timed(log=True, log_prefix="OpenAI Responses LLM")
|
|
62
|
+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
|
|
63
|
+
if kwargs.get("tools"):
|
|
64
|
+
logger.info("stream api not support tools")
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
stream = self.client.responses.create(
|
|
68
|
+
model=kwargs.get("model_name_or_path", self.config.model_name_or_path),
|
|
69
|
+
input=messages,
|
|
70
|
+
temperature=kwargs.get("temperature", self.config.temperature),
|
|
71
|
+
top_p=kwargs.get("top_p", self.config.top_p),
|
|
72
|
+
max_output_tokens=kwargs.get("max_tokens", self.config.max_tokens),
|
|
73
|
+
reasoning={"effort": "low", "summary": "auto"}
|
|
74
|
+
if self.config.enable_thinking
|
|
75
|
+
else NOT_GIVEN,
|
|
76
|
+
extra_body=kwargs.get("extra_body", self.config.extra_body),
|
|
77
|
+
stream=True,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
reasoning_started = False
|
|
81
|
+
|
|
82
|
+
for event in stream:
|
|
83
|
+
event_type = getattr(event, "type", "")
|
|
84
|
+
if event_type in (
|
|
85
|
+
"response.reasoning.delta",
|
|
86
|
+
"response.reasoning_summary_text.delta",
|
|
87
|
+
) and hasattr(event, "delta"):
|
|
88
|
+
if not self.config.remove_think_prefix:
|
|
89
|
+
if not reasoning_started:
|
|
90
|
+
yield "<think>"
|
|
91
|
+
reasoning_started = True
|
|
92
|
+
yield event.delta
|
|
93
|
+
elif event_type == "response.output_text.delta" and hasattr(event, "delta"):
|
|
94
|
+
if reasoning_started and not self.config.remove_think_prefix:
|
|
95
|
+
yield "</think>"
|
|
96
|
+
reasoning_started = False
|
|
97
|
+
yield event.delta
|
|
98
|
+
|
|
99
|
+
if reasoning_started and not self.config.remove_think_prefix:
|
|
100
|
+
yield "</think>"
|
|
101
|
+
|
|
102
|
+
def tool_call_parser(self, tool_calls: list[ResponseFunctionToolCall]) -> list[dict]:
|
|
103
|
+
"""Parse tool calls from OpenAI response."""
|
|
104
|
+
return [
|
|
105
|
+
{
|
|
106
|
+
"tool_call_id": tool_call.call_id,
|
|
107
|
+
"function_name": tool_call.name,
|
|
108
|
+
"arguments": json.loads(tool_call.arguments),
|
|
109
|
+
}
|
|
110
|
+
for tool_call in tool_calls
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class AzureResponsesLLM(BaseLLM):
|
|
115
|
+
def __init__(self, config: AzureLLMConfig):
|
|
116
|
+
self.config = config
|
|
117
|
+
self.client = openai.AzureOpenAI(
|
|
118
|
+
azure_endpoint=config.base_url,
|
|
119
|
+
api_version=config.api_version,
|
|
120
|
+
api_key=config.api_key,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def generate(self, messages: MessageList, **kwargs) -> str:
|
|
124
|
+
response = self.client.responses.create(
|
|
125
|
+
model=self.config.model_name_or_path,
|
|
126
|
+
input=messages,
|
|
127
|
+
temperature=kwargs.get("temperature", self.config.temperature),
|
|
128
|
+
top_p=kwargs.get("top_p", self.config.top_p),
|
|
129
|
+
max_output_tokens=kwargs.get("max_tokens", self.config.max_tokens),
|
|
130
|
+
tools=kwargs.get("tools", NOT_GIVEN),
|
|
131
|
+
extra_body=kwargs.get("extra_body", self.config.extra_body),
|
|
132
|
+
reasoning={"effort": "low", "summary": "auto"}
|
|
133
|
+
if self.config.enable_thinking
|
|
134
|
+
else NOT_GIVEN,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
output_text = getattr(response, "output_text", "")
|
|
138
|
+
output_reasoning = [
|
|
139
|
+
item for item in response.output if isinstance(item, ResponseReasoningItem)
|
|
140
|
+
]
|
|
141
|
+
summary = output_reasoning[0].summary
|
|
142
|
+
|
|
143
|
+
if self.config.remove_think_prefix:
|
|
144
|
+
return remove_thinking_tags(output_text)
|
|
145
|
+
if summary:
|
|
146
|
+
return f"<think>{summary[0].text}</think>" + output_text
|
|
147
|
+
return output_text
|
|
148
|
+
|
|
149
|
+
def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, None, None]:
|
|
150
|
+
if kwargs.get("tools"):
|
|
151
|
+
logger.info("stream api not support tools")
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
stream = self.client.responses.create(
|
|
155
|
+
model=self.config.model_name_or_path,
|
|
156
|
+
input=messages,
|
|
157
|
+
temperature=kwargs.get("temperature", self.config.temperature),
|
|
158
|
+
top_p=kwargs.get("top_p", self.config.top_p),
|
|
159
|
+
max_output_tokens=kwargs.get("max_tokens", self.config.max_tokens),
|
|
160
|
+
extra_body=kwargs.get("extra_body", self.config.extra_body),
|
|
161
|
+
stream=True,
|
|
162
|
+
reasoning={"effort": "low", "summary": "auto"}
|
|
163
|
+
if self.config.enable_thinking
|
|
164
|
+
else NOT_GIVEN,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
reasoning_started = False
|
|
168
|
+
|
|
169
|
+
for event in stream:
|
|
170
|
+
event_type = getattr(event, "type", "")
|
|
171
|
+
if event_type in (
|
|
172
|
+
"response.reasoning.delta",
|
|
173
|
+
"response.reasoning_summary_text.delta",
|
|
174
|
+
) and hasattr(event, "delta"):
|
|
175
|
+
if not self.config.remove_think_prefix:
|
|
176
|
+
if not reasoning_started:
|
|
177
|
+
yield "<think>"
|
|
178
|
+
reasoning_started = True
|
|
179
|
+
yield event.delta
|
|
180
|
+
elif event_type == "response.output_text.delta" and hasattr(event, "delta"):
|
|
181
|
+
if reasoning_started and not self.config.remove_think_prefix:
|
|
182
|
+
yield "</think>"
|
|
183
|
+
reasoning_started = False
|
|
184
|
+
yield event.delta
|
|
185
|
+
|
|
186
|
+
if reasoning_started and not self.config.remove_think_prefix:
|
|
187
|
+
yield "</think>"
|
|
188
|
+
|
|
189
|
+
def tool_call_parser(self, tool_calls: list[ResponseFunctionToolCall]) -> list[dict]:
|
|
190
|
+
"""Parse tool calls from OpenAI response."""
|
|
191
|
+
return [
|
|
192
|
+
{
|
|
193
|
+
"tool_call_id": tool_call.call_id,
|
|
194
|
+
"function_name": tool_call.name,
|
|
195
|
+
"arguments": json.loads(tool_call.arguments),
|
|
196
|
+
}
|
|
197
|
+
for tool_call in tool_calls
|
|
198
|
+
]
|
memos/llms/qwen.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from memos.configs.llm import QwenLLMConfig
|
|
2
|
+
from memos.llms.openai import OpenAILLM
|
|
3
|
+
from memos.log import get_logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
logger = get_logger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class QwenLLM(OpenAILLM):
|
|
10
|
+
"""Qwen (DashScope) LLM class via OpenAI-compatible API."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, config: QwenLLMConfig):
|
|
13
|
+
super().__init__(config)
|