MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
4
|
+
from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
|
|
5
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
6
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def batch_cosine_similarity(
|
|
10
|
+
query_vec: list[float], candidate_vecs: list[list[float]]
|
|
11
|
+
) -> list[float]:
|
|
12
|
+
"""
|
|
13
|
+
Compute cosine similarity between a single query vector and multiple candidate vectors using NumPy.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
query_vec (list[float]): The query embedding.
|
|
17
|
+
candidate_vecs (list[list[float]]): A list of memory embeddings.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
list[float]: Cosine similarity scores for each candidate.
|
|
21
|
+
"""
|
|
22
|
+
query = np.array(query_vec)
|
|
23
|
+
candidates = np.array(candidate_vecs)
|
|
24
|
+
|
|
25
|
+
# Normalize query and candidates
|
|
26
|
+
query_norm = np.linalg.norm(query)
|
|
27
|
+
candidates_norm = np.linalg.norm(candidates, axis=1)
|
|
28
|
+
|
|
29
|
+
# Compute dot products
|
|
30
|
+
dot_products = np.dot(candidates, query)
|
|
31
|
+
|
|
32
|
+
# Avoid division by zero
|
|
33
|
+
eps = 1e-10
|
|
34
|
+
similarities = dot_products / (candidates_norm * query_norm + eps)
|
|
35
|
+
|
|
36
|
+
return similarities.tolist()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MemoryReranker:
|
|
40
|
+
"""
|
|
41
|
+
Rank retrieved memory cards by structural priority and contextual similarity.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, llm: OpenAILLM | OllamaLLM | AzureLLM, embedder: OllamaEmbedder):
|
|
45
|
+
self.llm = llm
|
|
46
|
+
self.embedder = embedder
|
|
47
|
+
|
|
48
|
+
# Structural priority weights
|
|
49
|
+
self.level_weights = {
|
|
50
|
+
"topic": 1.0,
|
|
51
|
+
"concept": 1.0,
|
|
52
|
+
"fact": 1.0,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def rerank(
|
|
56
|
+
self,
|
|
57
|
+
query: str,
|
|
58
|
+
query_embedding: list[float],
|
|
59
|
+
graph_results: list,
|
|
60
|
+
top_k: int,
|
|
61
|
+
parsed_goal: ParsedTaskGoal,
|
|
62
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
63
|
+
"""
|
|
64
|
+
Rerank memory items by relevance to task.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
query (str): Original task.
|
|
68
|
+
query_embedding(list[float]): embedding of query
|
|
69
|
+
graph_results (list): Combined retrieval results.
|
|
70
|
+
top_k (int): Number of top results to return.
|
|
71
|
+
parsed_goal (dict): Structured task representation.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
list(tuple): Ranked list of memory items with similarity score.
|
|
75
|
+
"""
|
|
76
|
+
# Step 1: Filter out items without embeddings
|
|
77
|
+
items_with_embeddings = [item for item in graph_results if item.metadata.embedding]
|
|
78
|
+
embeddings = [item.metadata.embedding for item in items_with_embeddings]
|
|
79
|
+
|
|
80
|
+
if not embeddings:
|
|
81
|
+
return [(item, 0.5) for item in graph_results[:top_k]]
|
|
82
|
+
|
|
83
|
+
# Step 2: Compute cosine similarities
|
|
84
|
+
similarity_scores = batch_cosine_similarity(query_embedding, embeddings)
|
|
85
|
+
|
|
86
|
+
# Step 3: Apply structural weight boost
|
|
87
|
+
def get_weight(item: TextualMemoryItem) -> float:
|
|
88
|
+
level = item.metadata.background
|
|
89
|
+
return self.level_weights.get(level, 1.0)
|
|
90
|
+
|
|
91
|
+
weighted_scores = [
|
|
92
|
+
sim * get_weight(item)
|
|
93
|
+
for sim, item in zip(similarity_scores, items_with_embeddings, strict=False)
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
# Step 4: Sort by weighted score
|
|
97
|
+
sorted_items = sorted(
|
|
98
|
+
zip(items_with_embeddings, weighted_scores, strict=False),
|
|
99
|
+
key=lambda pair: pair[1],
|
|
100
|
+
reverse=True,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Step 5: Return top-k items with fallback
|
|
104
|
+
top_items = sorted_items[:top_k]
|
|
105
|
+
|
|
106
|
+
if len(top_items) < top_k:
|
|
107
|
+
selected_items = [item for item, _ in top_items]
|
|
108
|
+
remaining = [(item, -1.0) for item in graph_results if item not in selected_items]
|
|
109
|
+
top_items.extend(remaining[: top_k - len(top_items)])
|
|
110
|
+
|
|
111
|
+
return top_items # list of (item, score)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class ParsedTaskGoal:
|
|
6
|
+
"""
|
|
7
|
+
Goal structure for both Fast & LLM.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
memories: list[str] = field(default_factory=list)
|
|
11
|
+
keys: list[str] = field(default_factory=list)
|
|
12
|
+
tags: list[str] = field(default_factory=list)
|
|
13
|
+
rephrased_query: str | None = None
|
|
14
|
+
internet_search: bool = False
|
|
15
|
+
goal_type: str | None = None # e.g., 'default', 'explanation', etc.
|
|
16
|
+
context: str = ""
|
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from memos.dependency import require_python_package
|
|
10
|
+
from memos.log import get_logger
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_structured_output(content: str) -> dict[str, str | list[str]]:
|
|
17
|
+
"""
|
|
18
|
+
Parse structured text containing arbitrary XML-like tags in the format <tag_name>content</tag_name>.
|
|
19
|
+
|
|
20
|
+
This function extracts all tagged content and automatically determines whether each tag's content
|
|
21
|
+
should be returned as a string or a list of strings based on its format:
|
|
22
|
+
|
|
23
|
+
- If the content consists of multiple non-empty lines, and each line starts with "- ",
|
|
24
|
+
it is interpreted as a list (e.g., a bullet-point list of phrases).
|
|
25
|
+
- Otherwise, the entire content is returned as a single string.
|
|
26
|
+
|
|
27
|
+
The function is generic and supports any tag name (e.g., <can_answer>, <reason>, <missing_phrases>).
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
content (str): Raw text containing one or more <tag_name>...</tag_name> blocks.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Dict[str, Union[str, List[str]]]: A dictionary where keys are tag names and values are either:
|
|
34
|
+
- a string (for single-line or non-list content)
|
|
35
|
+
- a list of strings (for content formatted as bullet points with "- " prefix)
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
Input:
|
|
39
|
+
<can_answer>
|
|
40
|
+
true
|
|
41
|
+
</can_answer>
|
|
42
|
+
<missing_phrases>
|
|
43
|
+
- phrase 1
|
|
44
|
+
- phrase 2
|
|
45
|
+
</missing_phrases>
|
|
46
|
+
|
|
47
|
+
Output:
|
|
48
|
+
{
|
|
49
|
+
'can_answer': 'true',
|
|
50
|
+
'missing_phrases': ['phrase 1', 'phrase 2']
|
|
51
|
+
}
|
|
52
|
+
"""
|
|
53
|
+
result = {}
|
|
54
|
+
|
|
55
|
+
# Regex pattern to match any tag with name and content (supports multi-line content via DOTALL)
|
|
56
|
+
# Pattern explanation:
|
|
57
|
+
# <([a-zA-Z_][a-zA-Z0-9_]*)> : Captures valid tag name (letter/underscore + alphanumeric)
|
|
58
|
+
# (.*?) : Non-greedy capture of content (including newlines)
|
|
59
|
+
# </\1> : Closing tag matching the captured name
|
|
60
|
+
tag_pattern = r"<([a-zA-Z_][a-zA-Z0-9_]*)>(.*?)</\1>"
|
|
61
|
+
matches = re.findall(tag_pattern, content, re.DOTALL)
|
|
62
|
+
|
|
63
|
+
for tag_name, raw_content in matches:
|
|
64
|
+
content = raw_content.strip() # Remove leading/trailing whitespace
|
|
65
|
+
|
|
66
|
+
# If content is empty, store as empty string
|
|
67
|
+
if not content:
|
|
68
|
+
result[tag_name] = ""
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
# Split content into lines and filter out empty ones
|
|
72
|
+
lines = [line.strip() for line in content.splitlines() if line.strip()]
|
|
73
|
+
|
|
74
|
+
# Check if content is formatted as a bullet list: all non-empty lines start with "- "
|
|
75
|
+
if lines and all(line.startswith("-") for line in lines):
|
|
76
|
+
# Extract the text after the "- " prefix from each line
|
|
77
|
+
items = [line[1:].strip() for line in lines]
|
|
78
|
+
result[tag_name] = items
|
|
79
|
+
else:
|
|
80
|
+
# Treat as plain string (preserve original formatting if multi-line)
|
|
81
|
+
result[tag_name] = content
|
|
82
|
+
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def find_project_root(marker=".git"):
|
|
87
|
+
"""Find the project root directory by marking the file"""
|
|
88
|
+
current = Path(__file__).resolve()
|
|
89
|
+
while current != current.parent:
|
|
90
|
+
if (current / marker).exists():
|
|
91
|
+
return current
|
|
92
|
+
current = current.parent
|
|
93
|
+
return Path(".")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class StopwordManager:
|
|
97
|
+
_stopwords = None
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def _load_stopwords(cls):
|
|
101
|
+
"""load stopwords for once"""
|
|
102
|
+
if cls._stopwords is not None:
|
|
103
|
+
return cls._stopwords
|
|
104
|
+
|
|
105
|
+
stopwords = set()
|
|
106
|
+
stopwords = cls._load_default_stopwords()
|
|
107
|
+
|
|
108
|
+
cls._stopwords = stopwords
|
|
109
|
+
return stopwords
|
|
110
|
+
|
|
111
|
+
@classmethod
|
|
112
|
+
def _load_default_stopwords(cls):
|
|
113
|
+
"""load stop words"""
|
|
114
|
+
chinese_stop_words = {
|
|
115
|
+
"的",
|
|
116
|
+
"了",
|
|
117
|
+
"在",
|
|
118
|
+
"是",
|
|
119
|
+
"我",
|
|
120
|
+
"有",
|
|
121
|
+
"和",
|
|
122
|
+
"就",
|
|
123
|
+
"不",
|
|
124
|
+
"人",
|
|
125
|
+
"都",
|
|
126
|
+
"一",
|
|
127
|
+
"一个",
|
|
128
|
+
"上",
|
|
129
|
+
"也",
|
|
130
|
+
"很",
|
|
131
|
+
"到",
|
|
132
|
+
"说",
|
|
133
|
+
"要",
|
|
134
|
+
"去",
|
|
135
|
+
"你",
|
|
136
|
+
"会",
|
|
137
|
+
"着",
|
|
138
|
+
"没有",
|
|
139
|
+
"看",
|
|
140
|
+
"好",
|
|
141
|
+
"自己",
|
|
142
|
+
"这",
|
|
143
|
+
"那",
|
|
144
|
+
"他",
|
|
145
|
+
"她",
|
|
146
|
+
"它",
|
|
147
|
+
"我们",
|
|
148
|
+
"你们",
|
|
149
|
+
"他们",
|
|
150
|
+
"这个",
|
|
151
|
+
"那个",
|
|
152
|
+
"这些",
|
|
153
|
+
"那些",
|
|
154
|
+
"怎么",
|
|
155
|
+
"什么",
|
|
156
|
+
"为什么",
|
|
157
|
+
"如何",
|
|
158
|
+
"哪里",
|
|
159
|
+
"谁",
|
|
160
|
+
"几",
|
|
161
|
+
"多少",
|
|
162
|
+
"这样",
|
|
163
|
+
"那样",
|
|
164
|
+
"这么",
|
|
165
|
+
"那么",
|
|
166
|
+
}
|
|
167
|
+
english_stop_words = {
|
|
168
|
+
"the",
|
|
169
|
+
"a",
|
|
170
|
+
"an",
|
|
171
|
+
"and",
|
|
172
|
+
"or",
|
|
173
|
+
"but",
|
|
174
|
+
"in",
|
|
175
|
+
"on",
|
|
176
|
+
"at",
|
|
177
|
+
"to",
|
|
178
|
+
"for",
|
|
179
|
+
"of",
|
|
180
|
+
"with",
|
|
181
|
+
"by",
|
|
182
|
+
"as",
|
|
183
|
+
"is",
|
|
184
|
+
"are",
|
|
185
|
+
"was",
|
|
186
|
+
"were",
|
|
187
|
+
"be",
|
|
188
|
+
"been",
|
|
189
|
+
"have",
|
|
190
|
+
"has",
|
|
191
|
+
"had",
|
|
192
|
+
"do",
|
|
193
|
+
"does",
|
|
194
|
+
"did",
|
|
195
|
+
"will",
|
|
196
|
+
"would",
|
|
197
|
+
"could",
|
|
198
|
+
"should",
|
|
199
|
+
"may",
|
|
200
|
+
"might",
|
|
201
|
+
"must",
|
|
202
|
+
"this",
|
|
203
|
+
"that",
|
|
204
|
+
"these",
|
|
205
|
+
"those",
|
|
206
|
+
"i",
|
|
207
|
+
"you",
|
|
208
|
+
"he",
|
|
209
|
+
"she",
|
|
210
|
+
"it",
|
|
211
|
+
"we",
|
|
212
|
+
"they",
|
|
213
|
+
"me",
|
|
214
|
+
"him",
|
|
215
|
+
"her",
|
|
216
|
+
"us",
|
|
217
|
+
"them",
|
|
218
|
+
"my",
|
|
219
|
+
"your",
|
|
220
|
+
"his",
|
|
221
|
+
"its",
|
|
222
|
+
"our",
|
|
223
|
+
"their",
|
|
224
|
+
"mine",
|
|
225
|
+
"yours",
|
|
226
|
+
"hers",
|
|
227
|
+
"ours",
|
|
228
|
+
"theirs",
|
|
229
|
+
}
|
|
230
|
+
chinese_punctuation = {
|
|
231
|
+
",",
|
|
232
|
+
"。",
|
|
233
|
+
"!",
|
|
234
|
+
"?",
|
|
235
|
+
";",
|
|
236
|
+
":",
|
|
237
|
+
"「",
|
|
238
|
+
"」",
|
|
239
|
+
"『",
|
|
240
|
+
"』",
|
|
241
|
+
"【",
|
|
242
|
+
"】",
|
|
243
|
+
"(",
|
|
244
|
+
")",
|
|
245
|
+
"《",
|
|
246
|
+
"》",
|
|
247
|
+
"—",
|
|
248
|
+
"…",
|
|
249
|
+
"~",
|
|
250
|
+
"·",
|
|
251
|
+
"、",
|
|
252
|
+
"“",
|
|
253
|
+
"”",
|
|
254
|
+
"‘",
|
|
255
|
+
"’",
|
|
256
|
+
"〈",
|
|
257
|
+
"〉",
|
|
258
|
+
"〖",
|
|
259
|
+
"〗",
|
|
260
|
+
"〝",
|
|
261
|
+
"〞",
|
|
262
|
+
"{",
|
|
263
|
+
"}",
|
|
264
|
+
"〔",
|
|
265
|
+
"〕",
|
|
266
|
+
"¡",
|
|
267
|
+
"¿",
|
|
268
|
+
}
|
|
269
|
+
english_punctuation = {
|
|
270
|
+
",",
|
|
271
|
+
".",
|
|
272
|
+
"!",
|
|
273
|
+
"?",
|
|
274
|
+
";",
|
|
275
|
+
":",
|
|
276
|
+
'"',
|
|
277
|
+
"'",
|
|
278
|
+
"(",
|
|
279
|
+
")",
|
|
280
|
+
"[",
|
|
281
|
+
"]",
|
|
282
|
+
"{",
|
|
283
|
+
"}",
|
|
284
|
+
"<",
|
|
285
|
+
">",
|
|
286
|
+
"/",
|
|
287
|
+
"\\",
|
|
288
|
+
"|",
|
|
289
|
+
"-",
|
|
290
|
+
"_",
|
|
291
|
+
"=",
|
|
292
|
+
"+",
|
|
293
|
+
"@",
|
|
294
|
+
"#",
|
|
295
|
+
"$",
|
|
296
|
+
"%",
|
|
297
|
+
"^",
|
|
298
|
+
"&",
|
|
299
|
+
"*",
|
|
300
|
+
"~",
|
|
301
|
+
"`",
|
|
302
|
+
"¡",
|
|
303
|
+
"¿",
|
|
304
|
+
}
|
|
305
|
+
numbers = {
|
|
306
|
+
"0",
|
|
307
|
+
"1",
|
|
308
|
+
"2",
|
|
309
|
+
"3",
|
|
310
|
+
"4",
|
|
311
|
+
"5",
|
|
312
|
+
"6",
|
|
313
|
+
"7",
|
|
314
|
+
"8",
|
|
315
|
+
"9",
|
|
316
|
+
"零",
|
|
317
|
+
"一",
|
|
318
|
+
"二",
|
|
319
|
+
"三",
|
|
320
|
+
"四",
|
|
321
|
+
"五",
|
|
322
|
+
"六",
|
|
323
|
+
"七",
|
|
324
|
+
"八",
|
|
325
|
+
"九",
|
|
326
|
+
"十",
|
|
327
|
+
"百",
|
|
328
|
+
"千",
|
|
329
|
+
"万",
|
|
330
|
+
"亿",
|
|
331
|
+
}
|
|
332
|
+
whitespace = {" ", "\t", "\n", "\r", "\f", "\v"}
|
|
333
|
+
|
|
334
|
+
return (
|
|
335
|
+
chinese_stop_words
|
|
336
|
+
| english_stop_words
|
|
337
|
+
| chinese_punctuation
|
|
338
|
+
| english_punctuation
|
|
339
|
+
| numbers
|
|
340
|
+
| whitespace
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
@classmethod
|
|
344
|
+
def get_stopwords(cls):
|
|
345
|
+
if cls._stopwords is None:
|
|
346
|
+
cls._load_stopwords()
|
|
347
|
+
return cls._stopwords
|
|
348
|
+
|
|
349
|
+
@classmethod
|
|
350
|
+
def filter_words(cls, words):
|
|
351
|
+
if cls._stopwords is None:
|
|
352
|
+
cls._load_stopwords()
|
|
353
|
+
return [word for word in words if word not in cls._stopwords and word.strip()]
|
|
354
|
+
|
|
355
|
+
@classmethod
|
|
356
|
+
def is_stopword(cls, word):
|
|
357
|
+
if cls._stopwords is None:
|
|
358
|
+
cls._load_stopwords()
|
|
359
|
+
return word in cls._stopwords
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
class FastTokenizer:
|
|
363
|
+
def __init__(self, use_jieba=True, use_stopwords=True):
|
|
364
|
+
self.use_jieba = use_jieba
|
|
365
|
+
self.use_stopwords = use_stopwords
|
|
366
|
+
if self.use_stopwords:
|
|
367
|
+
self.stopword_manager = StopwordManager
|
|
368
|
+
|
|
369
|
+
def tokenize_mixed(self, text, **kwargs):
|
|
370
|
+
"""fast tokenizer"""
|
|
371
|
+
if self._is_chinese(text):
|
|
372
|
+
return self._tokenize_chinese(text)
|
|
373
|
+
else:
|
|
374
|
+
return self._tokenize_english(text)
|
|
375
|
+
|
|
376
|
+
def _is_chinese(self, text):
|
|
377
|
+
"""check if chinese"""
|
|
378
|
+
chinese_chars = sum(1 for char in text if "\u4e00" <= char <= "\u9fff")
|
|
379
|
+
return chinese_chars / max(len(text), 1) > 0.3
|
|
380
|
+
|
|
381
|
+
@require_python_package(
|
|
382
|
+
import_name="jieba",
|
|
383
|
+
install_command="pip install jieba",
|
|
384
|
+
install_link="https://github.com/fxsjy/jieba",
|
|
385
|
+
)
|
|
386
|
+
def _tokenize_chinese(self, text):
|
|
387
|
+
"""split zh jieba"""
|
|
388
|
+
import jieba
|
|
389
|
+
|
|
390
|
+
tokens = jieba.lcut(text) if self.use_jieba else list(text)
|
|
391
|
+
tokens = [token.strip() for token in tokens if token.strip()]
|
|
392
|
+
if self.use_stopwords:
|
|
393
|
+
return self.stopword_manager.filter_words(tokens)
|
|
394
|
+
|
|
395
|
+
return tokens
|
|
396
|
+
|
|
397
|
+
def _tokenize_english(self, text):
|
|
398
|
+
"""split zh regex"""
|
|
399
|
+
tokens = re.findall(r"\b[a-zA-Z0-9]+\b", text.lower())
|
|
400
|
+
if self.use_stopwords:
|
|
401
|
+
return self.stopword_manager.filter_words(tokens)
|
|
402
|
+
return tokens
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def parse_json_result(response_text):
|
|
406
|
+
try:
|
|
407
|
+
json_start = response_text.find("{")
|
|
408
|
+
response_text = response_text[json_start:]
|
|
409
|
+
response_text = response_text.replace("```", "").strip()
|
|
410
|
+
if not response_text.endswith("}"):
|
|
411
|
+
response_text += "}"
|
|
412
|
+
return json.loads(response_text)
|
|
413
|
+
except json.JSONDecodeError as e:
|
|
414
|
+
logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw:\n{response_text}")
|
|
415
|
+
return {}
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.error(f"[JSONParse] Unexpected error: {e}")
|
|
418
|
+
return {}
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def detect_lang(text):
|
|
422
|
+
try:
|
|
423
|
+
if not text or not isinstance(text, str):
|
|
424
|
+
return "en"
|
|
425
|
+
chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]"
|
|
426
|
+
chinese_chars = re.findall(chinese_pattern, text)
|
|
427
|
+
if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3:
|
|
428
|
+
return "zh"
|
|
429
|
+
return "en"
|
|
430
|
+
except Exception:
|
|
431
|
+
return "en"
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def format_memory_item(memory_data: Any) -> dict[str, Any]:
|
|
435
|
+
memory = memory_data.model_dump()
|
|
436
|
+
memory_id = memory["id"]
|
|
437
|
+
ref_id = f"[{memory_id.split('-')[0]}]"
|
|
438
|
+
|
|
439
|
+
memory["ref_id"] = ref_id
|
|
440
|
+
memory["metadata"]["embedding"] = []
|
|
441
|
+
memory["metadata"]["sources"] = []
|
|
442
|
+
memory["metadata"]["usage"] = []
|
|
443
|
+
memory["metadata"]["ref_id"] = ref_id
|
|
444
|
+
memory["metadata"]["id"] = memory_id
|
|
445
|
+
memory["metadata"]["memory"] = memory["memory"]
|
|
446
|
+
|
|
447
|
+
return memory
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def find_best_unrelated_subgroup(sentences: list, similarity_matrix: list, bar: float = 0.8):
|
|
451
|
+
assert len(sentences) == len(similarity_matrix)
|
|
452
|
+
|
|
453
|
+
num_sentence = len(sentences)
|
|
454
|
+
selected_sentences = []
|
|
455
|
+
selected_indices = []
|
|
456
|
+
for i in range(num_sentence):
|
|
457
|
+
can_add = True
|
|
458
|
+
for j in selected_indices:
|
|
459
|
+
if similarity_matrix[i][j] > bar:
|
|
460
|
+
can_add = False
|
|
461
|
+
break
|
|
462
|
+
if can_add:
|
|
463
|
+
selected_sentences.append(i)
|
|
464
|
+
selected_indices.append(i)
|
|
465
|
+
return selected_sentences, selected_indices
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def cosine_similarity_matrix(embeddings: list[list[float]]) -> list[list[float]]:
|
|
469
|
+
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
|
470
|
+
x_normalized = embeddings / norms
|
|
471
|
+
similarity_matrix = np.dot(x_normalized, x_normalized.T)
|
|
472
|
+
return similarity_matrix
|