MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,1192 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
import difflib
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
9
|
+
|
|
10
|
+
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
|
11
|
+
|
|
12
|
+
from memos.configs.memory import MemFeedbackConfig
|
|
13
|
+
from memos.context.context import ContextThreadPoolExecutor
|
|
14
|
+
from memos.dependency import require_python_package
|
|
15
|
+
from memos.embedders.factory import EmbedderFactory, OllamaEmbedder
|
|
16
|
+
from memos.graph_dbs.factory import GraphStoreFactory, PolarDBGraphDB
|
|
17
|
+
from memos.llms.factory import AzureLLM, LLMFactory, OllamaLLM, OpenAILLM
|
|
18
|
+
from memos.log import get_logger
|
|
19
|
+
from memos.mem_feedback.base import BaseMemFeedback
|
|
20
|
+
from memos.mem_feedback.utils import (
|
|
21
|
+
extract_bracket_content,
|
|
22
|
+
extract_square_brackets_content,
|
|
23
|
+
general_split_into_chunks,
|
|
24
|
+
make_mem_item,
|
|
25
|
+
should_keep_update,
|
|
26
|
+
split_into_chunks,
|
|
27
|
+
)
|
|
28
|
+
from memos.mem_reader.factory import MemReaderFactory
|
|
29
|
+
from memos.mem_reader.read_multi_modal import detect_lang
|
|
30
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
31
|
+
from memos.memories.textual.tree_text_memory.organize.manager import (
|
|
32
|
+
MemoryManager,
|
|
33
|
+
extract_working_binding_ids,
|
|
34
|
+
)
|
|
35
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import StopwordManager
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from memos.memories.textual.simple_preference import SimplePreferenceTextMemory
|
|
40
|
+
from memos.memories.textual.tree_text_memory.retrieve.searcher import Searcher
|
|
41
|
+
from memos.templates.mem_feedback_prompts import (
|
|
42
|
+
FEEDBACK_ANSWER_PROMPT,
|
|
43
|
+
FEEDBACK_ANSWER_PROMPT_ZH,
|
|
44
|
+
FEEDBACK_JUDGEMENT_PROMPT,
|
|
45
|
+
FEEDBACK_JUDGEMENT_PROMPT_ZH,
|
|
46
|
+
KEYWORDS_REPLACE,
|
|
47
|
+
KEYWORDS_REPLACE_ZH,
|
|
48
|
+
OPERATION_UPDATE_JUDGEMENT,
|
|
49
|
+
OPERATION_UPDATE_JUDGEMENT_ZH,
|
|
50
|
+
UPDATE_FORMER_MEMORIES,
|
|
51
|
+
UPDATE_FORMER_MEMORIES_ZH,
|
|
52
|
+
)
|
|
53
|
+
from memos.types import MessageDict
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
FEEDBACK_PROMPT_DICT = {
|
|
57
|
+
"if_kw_replace": {"en": KEYWORDS_REPLACE, "zh": KEYWORDS_REPLACE_ZH},
|
|
58
|
+
"judge": {"en": FEEDBACK_JUDGEMENT_PROMPT, "zh": FEEDBACK_JUDGEMENT_PROMPT_ZH},
|
|
59
|
+
"compare": {"en": UPDATE_FORMER_MEMORIES, "zh": UPDATE_FORMER_MEMORIES_ZH},
|
|
60
|
+
"compare_judge": {"en": OPERATION_UPDATE_JUDGEMENT, "zh": OPERATION_UPDATE_JUDGEMENT_ZH},
|
|
61
|
+
"generation": {"en": FEEDBACK_ANSWER_PROMPT, "zh": FEEDBACK_ANSWER_PROMPT_ZH},
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
logger = get_logger(__name__)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class MemFeedback(BaseMemFeedback):
|
|
68
|
+
def __init__(self, config: MemFeedbackConfig):
|
|
69
|
+
"""
|
|
70
|
+
Initialize the MemFeedback with configuration.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
config: Configuration object for the MemFeedback
|
|
74
|
+
"""
|
|
75
|
+
self.config = config
|
|
76
|
+
self.llm: OpenAILLM | OllamaLLM | AzureLLM = LLMFactory.from_config(config.extractor_llm)
|
|
77
|
+
self.embedder: OllamaEmbedder = EmbedderFactory.from_config(config.embedder)
|
|
78
|
+
self.graph_store: PolarDBGraphDB = GraphStoreFactory.from_config(config.graph_db)
|
|
79
|
+
# Pass graph_store to mem_reader for recall operations (deduplication, conflict detection)
|
|
80
|
+
self.mem_reader = MemReaderFactory.from_config(config.mem_reader, graph_db=self.graph_store)
|
|
81
|
+
|
|
82
|
+
self.is_reorganize = config.reorganize
|
|
83
|
+
self.memory_manager: MemoryManager = MemoryManager(
|
|
84
|
+
self.graph_store,
|
|
85
|
+
self.embedder,
|
|
86
|
+
self.llm,
|
|
87
|
+
memory_size=config.memory_size
|
|
88
|
+
or {
|
|
89
|
+
"WorkingMemory": 20,
|
|
90
|
+
"LongTermMemory": 1500,
|
|
91
|
+
"UserMemory": 480,
|
|
92
|
+
},
|
|
93
|
+
is_reorganize=self.is_reorganize,
|
|
94
|
+
)
|
|
95
|
+
self.stopword_manager = StopwordManager
|
|
96
|
+
self.searcher: Searcher = None
|
|
97
|
+
self.reranker = None
|
|
98
|
+
self.pref_mem: SimplePreferenceTextMemory = None
|
|
99
|
+
self.pref_feedback: bool = False
|
|
100
|
+
self.DB_IDX_READY = False
|
|
101
|
+
|
|
102
|
+
@require_python_package(
|
|
103
|
+
import_name="jieba",
|
|
104
|
+
install_command="pip install jieba",
|
|
105
|
+
install_link="https://github.com/fxsjy/jieba",
|
|
106
|
+
)
|
|
107
|
+
def _tokenize_chinese(self, text):
|
|
108
|
+
"""split zh jieba"""
|
|
109
|
+
import jieba
|
|
110
|
+
|
|
111
|
+
tokens = jieba.lcut(text)
|
|
112
|
+
tokens = [token.strip() for token in tokens if token.strip()]
|
|
113
|
+
return self.stopword_manager.filter_words(tokens)
|
|
114
|
+
|
|
115
|
+
@retry(stop=stop_after_attempt(4), wait=wait_random_exponential(multiplier=1, max=10))
|
|
116
|
+
def _embed_once(self, texts):
|
|
117
|
+
return self.embedder.embed(texts)
|
|
118
|
+
|
|
119
|
+
@retry(stop=stop_after_attempt(3), wait=wait_random_exponential(multiplier=1, min=4, max=10))
|
|
120
|
+
def _retry_db_operation(self, operation):
|
|
121
|
+
try:
|
|
122
|
+
return operation()
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.error(
|
|
125
|
+
f"[0107 Feedback Core: _retry_db_operation] DB operation failed: {e}", exc_info=True
|
|
126
|
+
)
|
|
127
|
+
raise
|
|
128
|
+
|
|
129
|
+
def _batch_embed(self, texts: list[str], embed_bs: int = 5):
|
|
130
|
+
results = []
|
|
131
|
+
dim = self.embedder.config.embedding_dims
|
|
132
|
+
|
|
133
|
+
for i in range(0, len(texts), embed_bs):
|
|
134
|
+
batch = texts[i : i + embed_bs]
|
|
135
|
+
try:
|
|
136
|
+
results.extend(self._embed_once(batch))
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.error(
|
|
139
|
+
f"[0107 Feedback Core: process_feedback_core] Embedding batch failed, Cover with all zeros: {len(batch)} entries: {e}"
|
|
140
|
+
)
|
|
141
|
+
results.extend([[0.0] * dim for _ in range(len(batch))])
|
|
142
|
+
return results
|
|
143
|
+
|
|
144
|
+
def _pure_add(self, user_name: str, feedback_content: str, feedback_time: str, info: dict):
|
|
145
|
+
"""
|
|
146
|
+
Directly add new memory
|
|
147
|
+
"""
|
|
148
|
+
scene_data = [[{"role": "user", "content": feedback_content, "chat_time": feedback_time}]]
|
|
149
|
+
memories = self.mem_reader.get_memory(scene_data, type="chat", info=info)
|
|
150
|
+
to_add_memories = [item for scene in memories for item in scene]
|
|
151
|
+
added_ids = self._retry_db_operation(
|
|
152
|
+
lambda: self.memory_manager.add(to_add_memories, user_name=user_name, use_batch=False)
|
|
153
|
+
)
|
|
154
|
+
logger.info(
|
|
155
|
+
f"[0107 Feedback Core: _pure_add] Pure added {len(added_ids)} memories for user {user_name}."
|
|
156
|
+
)
|
|
157
|
+
return {
|
|
158
|
+
"record": {
|
|
159
|
+
"add": [
|
|
160
|
+
{
|
|
161
|
+
"id": _id,
|
|
162
|
+
"text": added_mem.memory,
|
|
163
|
+
"source_doc_id": (
|
|
164
|
+
added_mem.metadata.file_ids[0]
|
|
165
|
+
if hasattr(added_mem.metadata, "file_ids")
|
|
166
|
+
and isinstance(added_mem.metadata.file_ids, list)
|
|
167
|
+
and added_mem.metadata.file_ids
|
|
168
|
+
else None
|
|
169
|
+
),
|
|
170
|
+
}
|
|
171
|
+
for _id, added_mem in zip(added_ids, to_add_memories, strict=False)
|
|
172
|
+
],
|
|
173
|
+
"update": [],
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
def _keyword_replace_judgement(self, feedback_content: str) -> dict | None:
|
|
178
|
+
"""
|
|
179
|
+
Determine whether it is keyword replacement
|
|
180
|
+
"""
|
|
181
|
+
lang = detect_lang(feedback_content)
|
|
182
|
+
template = FEEDBACK_PROMPT_DICT["if_kw_replace"][lang]
|
|
183
|
+
prompt = template.format(
|
|
184
|
+
user_feedback=feedback_content,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
judge_res = self._get_llm_response(prompt, load_type="bracket")
|
|
188
|
+
if judge_res:
|
|
189
|
+
return judge_res
|
|
190
|
+
else:
|
|
191
|
+
logger.warning(
|
|
192
|
+
"[0107 Feedback Core: _feedback_judgement] feedback judgement failed, return []"
|
|
193
|
+
)
|
|
194
|
+
return {}
|
|
195
|
+
|
|
196
|
+
def _feedback_judgement(
|
|
197
|
+
self, chat_history: list[MessageDict], feedback_content: str, feedback_time: str = ""
|
|
198
|
+
) -> dict | None:
|
|
199
|
+
"""
|
|
200
|
+
Generate a judgement for a given feedback.
|
|
201
|
+
"""
|
|
202
|
+
lang = detect_lang(feedback_content)
|
|
203
|
+
template = FEEDBACK_PROMPT_DICT["judge"][lang]
|
|
204
|
+
chat_history_lis = [f"""{msg["role"]}: {msg["content"]}""" for msg in chat_history[-4:]]
|
|
205
|
+
chat_history_str = "\n".join(chat_history_lis)
|
|
206
|
+
prompt = template.format(
|
|
207
|
+
chat_history=chat_history_str,
|
|
208
|
+
user_feedback=feedback_content,
|
|
209
|
+
feedback_time=feedback_time,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
judge_res = self._get_llm_response(prompt, load_type="square_bracket")
|
|
213
|
+
if judge_res:
|
|
214
|
+
return judge_res
|
|
215
|
+
else:
|
|
216
|
+
logger.warning(
|
|
217
|
+
"[0107 Feedback Core: _feedback_judgement] feedback judgement failed, return []"
|
|
218
|
+
)
|
|
219
|
+
return []
|
|
220
|
+
|
|
221
|
+
def _single_add_operation(
|
|
222
|
+
self,
|
|
223
|
+
old_memory_item: TextualMemoryItem | None,
|
|
224
|
+
new_memory_item: TextualMemoryItem,
|
|
225
|
+
user_id: str,
|
|
226
|
+
user_name: str,
|
|
227
|
+
async_mode: str = "sync",
|
|
228
|
+
) -> dict:
|
|
229
|
+
"""
|
|
230
|
+
Individual addition operations
|
|
231
|
+
"""
|
|
232
|
+
if old_memory_item:
|
|
233
|
+
to_add_memory = old_memory_item.model_copy(deep=True)
|
|
234
|
+
to_add_memory.metadata.key = new_memory_item.metadata.key
|
|
235
|
+
to_add_memory.metadata.tags = new_memory_item.metadata.tags
|
|
236
|
+
to_add_memory.memory = new_memory_item.memory
|
|
237
|
+
to_add_memory.metadata.embedding = new_memory_item.metadata.embedding
|
|
238
|
+
|
|
239
|
+
to_add_memory.metadata.user_id = new_memory_item.metadata.user_id
|
|
240
|
+
to_add_memory.metadata.created_at = to_add_memory.metadata.updated_at = (
|
|
241
|
+
datetime.now().isoformat()
|
|
242
|
+
)
|
|
243
|
+
to_add_memory.metadata.background = new_memory_item.metadata.background
|
|
244
|
+
else:
|
|
245
|
+
to_add_memory = new_memory_item.model_copy(deep=True)
|
|
246
|
+
to_add_memory.metadata.created_at = to_add_memory.metadata.updated_at = (
|
|
247
|
+
datetime.now().isoformat()
|
|
248
|
+
)
|
|
249
|
+
to_add_memory.metadata.background = new_memory_item.metadata.background
|
|
250
|
+
|
|
251
|
+
to_add_memory.id = ""
|
|
252
|
+
added_ids = self._retry_db_operation(
|
|
253
|
+
lambda: self.memory_manager.add([to_add_memory], user_name=user_name, use_batch=False)
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
logger.info(f"[Memory Feedback ADD] memory id: {added_ids!s}")
|
|
257
|
+
return {
|
|
258
|
+
"id": added_ids[0],
|
|
259
|
+
"text": to_add_memory.memory,
|
|
260
|
+
"source_doc_id": (
|
|
261
|
+
to_add_memory.metadata.file_ids[0]
|
|
262
|
+
if hasattr(to_add_memory.metadata, "file_ids")
|
|
263
|
+
and isinstance(to_add_memory.metadata.file_ids, list)
|
|
264
|
+
and to_add_memory.metadata.file_ids
|
|
265
|
+
else None
|
|
266
|
+
),
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
def _single_update_operation(
|
|
270
|
+
self,
|
|
271
|
+
old_memory_item: TextualMemoryItem,
|
|
272
|
+
new_memory_item: TextualMemoryItem,
|
|
273
|
+
user_id: str,
|
|
274
|
+
user_name: str,
|
|
275
|
+
async_mode: str = "sync",
|
|
276
|
+
operation: dict | None = None,
|
|
277
|
+
) -> dict:
|
|
278
|
+
"""
|
|
279
|
+
Individual update operations
|
|
280
|
+
"""
|
|
281
|
+
if "preference" in old_memory_item.metadata.__dict__:
|
|
282
|
+
logger.info(
|
|
283
|
+
f"[0107 Feedback Core: _single_update_operation] pref_memory: {old_memory_item.id}"
|
|
284
|
+
)
|
|
285
|
+
return self._single_update_pref(
|
|
286
|
+
old_memory_item, new_memory_item, user_id, user_name, operation
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
memory_type = old_memory_item.metadata.memory_type
|
|
290
|
+
source_doc_id = (
|
|
291
|
+
old_memory_item.metadata.file_ids[0]
|
|
292
|
+
if hasattr(old_memory_item.metadata, "file_ids")
|
|
293
|
+
and isinstance(old_memory_item.metadata.file_ids, list)
|
|
294
|
+
and old_memory_item.metadata.file_ids
|
|
295
|
+
else None
|
|
296
|
+
)
|
|
297
|
+
if operation and "text" in operation and operation["text"]:
|
|
298
|
+
new_memory_item.memory = operation["text"]
|
|
299
|
+
new_memory_item.metadata.embedding = self._batch_embed([operation["text"]])[0]
|
|
300
|
+
|
|
301
|
+
if memory_type == "WorkingMemory":
|
|
302
|
+
fields = {
|
|
303
|
+
"memory": new_memory_item.memory,
|
|
304
|
+
"key": new_memory_item.metadata.key,
|
|
305
|
+
"tags": new_memory_item.metadata.tags,
|
|
306
|
+
"embedding": new_memory_item.metadata.embedding,
|
|
307
|
+
"background": new_memory_item.metadata.background,
|
|
308
|
+
"covered_history": old_memory_item.id,
|
|
309
|
+
}
|
|
310
|
+
self.graph_store.update_node(old_memory_item.id, fields=fields, user_name=user_name)
|
|
311
|
+
item_id = old_memory_item.id
|
|
312
|
+
else:
|
|
313
|
+
done = self._single_add_operation(
|
|
314
|
+
old_memory_item, new_memory_item, user_id, user_name, async_mode
|
|
315
|
+
)
|
|
316
|
+
item_id = done.get("id")
|
|
317
|
+
self.graph_store.update_node(
|
|
318
|
+
item_id, {"covered_history": old_memory_item.id}, user_name=user_name
|
|
319
|
+
)
|
|
320
|
+
self.graph_store.update_node(
|
|
321
|
+
old_memory_item.id, {"status": "archived"}, user_name=user_name
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
logger.info(
|
|
325
|
+
f"[Memory Feedback UPDATE] New Add:{item_id} | Set archived:{old_memory_item.id} | memory_type: {memory_type}"
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
return {
|
|
329
|
+
"id": item_id,
|
|
330
|
+
"text": new_memory_item.memory,
|
|
331
|
+
"source_doc_id": source_doc_id,
|
|
332
|
+
"archived_id": old_memory_item.id,
|
|
333
|
+
"origin_memory": old_memory_item.memory,
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
def _single_update_pref(
|
|
337
|
+
self,
|
|
338
|
+
old_memory_item: TextualMemoryItem,
|
|
339
|
+
new_memory_item: TextualMemoryItem,
|
|
340
|
+
user_id: str,
|
|
341
|
+
user_name: str,
|
|
342
|
+
operation: dict,
|
|
343
|
+
):
|
|
344
|
+
"""update preference memory"""
|
|
345
|
+
|
|
346
|
+
feedback_context = new_memory_item.memory
|
|
347
|
+
if operation and "text" in operation and operation["text"]:
|
|
348
|
+
new_memory_item.memory = operation["text"]
|
|
349
|
+
new_memory_item.metadata.embedding = self._batch_embed([operation["text"]])[0]
|
|
350
|
+
|
|
351
|
+
to_add_memory = old_memory_item.model_copy(deep=True)
|
|
352
|
+
to_add_memory.metadata.key = new_memory_item.metadata.key
|
|
353
|
+
to_add_memory.metadata.tags = new_memory_item.metadata.tags
|
|
354
|
+
to_add_memory.memory = new_memory_item.memory
|
|
355
|
+
to_add_memory.metadata.preference = new_memory_item.memory
|
|
356
|
+
to_add_memory.metadata.embedding = new_memory_item.metadata.embedding
|
|
357
|
+
|
|
358
|
+
to_add_memory.metadata.user_id = new_memory_item.metadata.user_id
|
|
359
|
+
to_add_memory.metadata.original_text = old_memory_item.memory
|
|
360
|
+
to_add_memory.metadata.covered_history = old_memory_item.id
|
|
361
|
+
|
|
362
|
+
to_add_memory.metadata.created_at = to_add_memory.metadata.updated_at = (
|
|
363
|
+
datetime.now().isoformat()
|
|
364
|
+
)
|
|
365
|
+
to_add_memory.metadata.context_summary = (
|
|
366
|
+
old_memory_item.metadata.context_summary + " \n" + feedback_context
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# add new memory
|
|
370
|
+
to_add_memory.id = str(uuid.uuid4())
|
|
371
|
+
added_ids = self._retry_db_operation(lambda: self.pref_mem.add([to_add_memory]))
|
|
372
|
+
# delete
|
|
373
|
+
deleted_id = old_memory_item.id
|
|
374
|
+
collection_name = old_memory_item.metadata.preference_type
|
|
375
|
+
self._retry_db_operation(
|
|
376
|
+
lambda: self.pref_mem.delete_with_collection_name(collection_name, [deleted_id])
|
|
377
|
+
)
|
|
378
|
+
# add archived
|
|
379
|
+
old_memory_item.metadata.status = "archived"
|
|
380
|
+
old_memory_item.metadata.original_text = "archived"
|
|
381
|
+
old_memory_item.metadata.embedding = [0.0] * 1024
|
|
382
|
+
|
|
383
|
+
archived_ids = self._retry_db_operation(lambda: self.pref_mem.add([old_memory_item]))
|
|
384
|
+
|
|
385
|
+
logger.info(
|
|
386
|
+
f"[Memory Feedback UPDATE Pref] New Add:{added_ids!s} | Set archived:{archived_ids!s}"
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
return {
|
|
390
|
+
"id": to_add_memory.id,
|
|
391
|
+
"text": new_memory_item.memory,
|
|
392
|
+
"source_doc_id": "",
|
|
393
|
+
"archived_id": old_memory_item.id,
|
|
394
|
+
"origin_memory": old_memory_item.memory,
|
|
395
|
+
"type": "preference",
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
def _del_working_binding(self, user_name, mem_items: list[TextualMemoryItem]) -> set[str]:
|
|
399
|
+
"""Delete working memory bindings"""
|
|
400
|
+
bindings_to_delete = extract_working_binding_ids(mem_items)
|
|
401
|
+
|
|
402
|
+
logger.info(
|
|
403
|
+
f"[Memory Feedback UPDATE] Extracted {len(bindings_to_delete)} working_binding ids to cleanup: {list(bindings_to_delete)}"
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
delete_ids = []
|
|
407
|
+
if bindings_to_delete:
|
|
408
|
+
delete_ids = list({bindings_to_delete})
|
|
409
|
+
|
|
410
|
+
for mid in delete_ids:
|
|
411
|
+
try:
|
|
412
|
+
self.graph_store.delete_node(mid, user_name=user_name)
|
|
413
|
+
|
|
414
|
+
logger.info(
|
|
415
|
+
f"[0107 Feedback Core:_del_working_binding] Delete raw/working mem_ids: {delete_ids} for user_name: {user_name}"
|
|
416
|
+
)
|
|
417
|
+
except Exception as e:
|
|
418
|
+
logger.warning(
|
|
419
|
+
f"[0107 Feedback Core:_del_working_binding] TreeTextMemory.delete_hard: failed to delete {mid}: {e}"
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
def semantics_feedback(
|
|
423
|
+
self,
|
|
424
|
+
user_id: str,
|
|
425
|
+
user_name: str,
|
|
426
|
+
memory_item: TextualMemoryItem,
|
|
427
|
+
current_memories: list[TextualMemoryItem],
|
|
428
|
+
history_str: str,
|
|
429
|
+
chat_history_list: list,
|
|
430
|
+
info: dict,
|
|
431
|
+
):
|
|
432
|
+
"""Modify memory at the semantic level"""
|
|
433
|
+
lang = detect_lang("".join(memory_item.memory))
|
|
434
|
+
template = FEEDBACK_PROMPT_DICT["compare"][lang]
|
|
435
|
+
if current_memories == []:
|
|
436
|
+
# retrieve
|
|
437
|
+
last_user_index = max(i for i, d in enumerate(chat_history_list) if d["role"] == "user")
|
|
438
|
+
last_qa = " ".join([item["content"] for item in chat_history_list[last_user_index:]])
|
|
439
|
+
supplementary_retrieved = self._retrieve(last_qa, info=info, user_name=user_name)
|
|
440
|
+
feedback_retrieved = self._retrieve(memory_item.memory, info=info, user_name=user_name)
|
|
441
|
+
|
|
442
|
+
ids = []
|
|
443
|
+
for item in feedback_retrieved + supplementary_retrieved:
|
|
444
|
+
if item.id not in ids:
|
|
445
|
+
ids.append(item.id)
|
|
446
|
+
current_memories.append(item)
|
|
447
|
+
include_keys = ["agent_id", "app_id"]
|
|
448
|
+
current_memories = [
|
|
449
|
+
item for item in current_memories if self._info_comparison(item, info, include_keys)
|
|
450
|
+
]
|
|
451
|
+
operations = []
|
|
452
|
+
if not current_memories:
|
|
453
|
+
operations = [{"operation": "ADD"}]
|
|
454
|
+
logger.warning(
|
|
455
|
+
"[Feedback Core]: There was no recall of the relevant memory, so it was added directly."
|
|
456
|
+
)
|
|
457
|
+
else:
|
|
458
|
+
memory_chunks = split_into_chunks(current_memories, max_tokens_per_chunk=500)
|
|
459
|
+
|
|
460
|
+
all_operations = []
|
|
461
|
+
now_time = datetime.now().isoformat()
|
|
462
|
+
with ContextThreadPoolExecutor(max_workers=10) as executor:
|
|
463
|
+
future_to_chunk_idx = {}
|
|
464
|
+
for chunk in memory_chunks:
|
|
465
|
+
chunk_list = []
|
|
466
|
+
for item in chunk:
|
|
467
|
+
if "preference" in item.metadata.__dict__:
|
|
468
|
+
chunk_list.append(f"{item.id}: {item.metadata.preference}")
|
|
469
|
+
else:
|
|
470
|
+
chunk_list.append(f"{item.id}: {item.memory}")
|
|
471
|
+
current_memories_str = "\n".join(chunk_list)
|
|
472
|
+
|
|
473
|
+
prompt = template.format(
|
|
474
|
+
now_time=now_time,
|
|
475
|
+
current_memories=current_memories_str,
|
|
476
|
+
new_facts=memory_item.memory,
|
|
477
|
+
chat_history=history_str,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
future = executor.submit(self._get_llm_response, prompt, load_type="bracket")
|
|
481
|
+
future_to_chunk_idx[future] = chunk
|
|
482
|
+
for future in concurrent.futures.as_completed(future_to_chunk_idx):
|
|
483
|
+
try:
|
|
484
|
+
chunk_operations = future.result()
|
|
485
|
+
if (
|
|
486
|
+
chunk_operations
|
|
487
|
+
and "operations" in chunk_operations
|
|
488
|
+
and isinstance(chunk_operations["operations"], list)
|
|
489
|
+
):
|
|
490
|
+
all_operations.extend(chunk_operations["operations"])
|
|
491
|
+
except Exception as e:
|
|
492
|
+
logger.error(
|
|
493
|
+
f"[0107 Feedback Core: semantics_feedback] Operation failed: {e}"
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
standard_operations = self.standard_operations(all_operations, current_memories)
|
|
497
|
+
operations = self.filter_fault_update(standard_operations)
|
|
498
|
+
|
|
499
|
+
logger.info(f"[Feedback Core Operations]: {operations!s}")
|
|
500
|
+
|
|
501
|
+
if not operations:
|
|
502
|
+
return {"record": {"add": [], "update": []}}
|
|
503
|
+
|
|
504
|
+
add_results = []
|
|
505
|
+
update_results = []
|
|
506
|
+
id_to_item = {item.id: item for item in current_memories}
|
|
507
|
+
|
|
508
|
+
with ContextThreadPoolExecutor(max_workers=10) as executor:
|
|
509
|
+
future_to_op = {}
|
|
510
|
+
for op in operations:
|
|
511
|
+
event_type = op.get("operation", "").lower()
|
|
512
|
+
|
|
513
|
+
if event_type == "add":
|
|
514
|
+
future = executor.submit(
|
|
515
|
+
self._single_add_operation,
|
|
516
|
+
None,
|
|
517
|
+
memory_item,
|
|
518
|
+
user_id,
|
|
519
|
+
user_name,
|
|
520
|
+
)
|
|
521
|
+
future_to_op[future] = ("add", op)
|
|
522
|
+
elif event_type == "update":
|
|
523
|
+
future = executor.submit(
|
|
524
|
+
self._single_update_operation,
|
|
525
|
+
id_to_item[op["id"]],
|
|
526
|
+
memory_item,
|
|
527
|
+
user_id,
|
|
528
|
+
user_name,
|
|
529
|
+
operation=op,
|
|
530
|
+
)
|
|
531
|
+
future_to_op[future] = ("update", op)
|
|
532
|
+
|
|
533
|
+
for future in concurrent.futures.as_completed(future_to_op):
|
|
534
|
+
result_type, original_op = future_to_op[future]
|
|
535
|
+
try:
|
|
536
|
+
result = future.result()
|
|
537
|
+
if result_type == "add" and result:
|
|
538
|
+
add_results.append(result)
|
|
539
|
+
elif result_type == "update" and result:
|
|
540
|
+
update_results.append(result)
|
|
541
|
+
except Exception as e:
|
|
542
|
+
logger.error(
|
|
543
|
+
f"[0107 Feedback Core: semantics_feedback] Operation failed for {original_op}: {e}",
|
|
544
|
+
exc_info=True,
|
|
545
|
+
)
|
|
546
|
+
if update_results:
|
|
547
|
+
updated_ids = [item["archived_id"] for item in update_results]
|
|
548
|
+
self._del_working_binding(updated_ids, user_name)
|
|
549
|
+
|
|
550
|
+
return {"record": {"add": add_results, "update": update_results}}
|
|
551
|
+
|
|
552
|
+
def _feedback_memory(
|
|
553
|
+
self, user_id: str, user_name: str, feedback_memories: list[TextualMemoryItem], **kwargs
|
|
554
|
+
) -> dict:
|
|
555
|
+
retrieved_memory_ids = kwargs.get("retrieved_memory_ids") or []
|
|
556
|
+
chat_history = kwargs.get("chat_history", [])
|
|
557
|
+
feedback_content = kwargs.get("feedback_content", "")
|
|
558
|
+
info = kwargs.get("info", {})
|
|
559
|
+
|
|
560
|
+
chat_history_lis = [f"""{msg["role"]}: {msg["content"]}""" for msg in chat_history[-4:]]
|
|
561
|
+
history_str = "\n".join(chat_history_lis) + f"\nuser feedback: \n{feedback_content}"
|
|
562
|
+
|
|
563
|
+
retrieved_memories = [
|
|
564
|
+
self.graph_store.get_node(_id, user_name=user_name) for _id in retrieved_memory_ids
|
|
565
|
+
]
|
|
566
|
+
filterd_ids = [
|
|
567
|
+
item["id"] for item in retrieved_memories if "mode:fast" in item["metadata"]["tags"]
|
|
568
|
+
]
|
|
569
|
+
if filterd_ids:
|
|
570
|
+
logger.warning(
|
|
571
|
+
f"[0107 Feedback Core: _feedback_memory] Since the tags mode is fast, no modifications are made to the following memory {filterd_ids}."
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
current_memories = [
|
|
575
|
+
TextualMemoryItem(**item)
|
|
576
|
+
for item in retrieved_memories
|
|
577
|
+
if "mode:fast" not in item["metadata"]["tags"]
|
|
578
|
+
]
|
|
579
|
+
|
|
580
|
+
with ContextThreadPoolExecutor(max_workers=3) as ex:
|
|
581
|
+
futures = {
|
|
582
|
+
ex.submit(
|
|
583
|
+
self.semantics_feedback,
|
|
584
|
+
user_id,
|
|
585
|
+
user_name,
|
|
586
|
+
mem,
|
|
587
|
+
current_memories,
|
|
588
|
+
history_str,
|
|
589
|
+
chat_history,
|
|
590
|
+
info,
|
|
591
|
+
): i
|
|
592
|
+
for i, mem in enumerate(feedback_memories)
|
|
593
|
+
}
|
|
594
|
+
results = [None] * len(futures)
|
|
595
|
+
for fut in concurrent.futures.as_completed(futures):
|
|
596
|
+
i = futures[fut]
|
|
597
|
+
try:
|
|
598
|
+
node = fut.result()
|
|
599
|
+
if node:
|
|
600
|
+
results[i] = node
|
|
601
|
+
except Exception as e:
|
|
602
|
+
logger.error(
|
|
603
|
+
f"[0107 Feedback Core: _feedback_memory] Error processing memory index {i}: {e}",
|
|
604
|
+
exc_info=True,
|
|
605
|
+
)
|
|
606
|
+
mem_res = [r for r in results if r]
|
|
607
|
+
|
|
608
|
+
return {
|
|
609
|
+
"record": {
|
|
610
|
+
"add": [element for item in mem_res for element in item["record"]["add"]],
|
|
611
|
+
"update": [element for item in mem_res for element in item["record"]["update"]],
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
def _info_comparison(self, memory: TextualMemoryItem, _info: dict, include_keys: list) -> bool:
|
|
616
|
+
"""Filter the relevant memory items based on info"""
|
|
617
|
+
if not _info and not memory.metadata.info:
|
|
618
|
+
return True
|
|
619
|
+
|
|
620
|
+
record = []
|
|
621
|
+
for key in include_keys:
|
|
622
|
+
info_v = _info.get(key)
|
|
623
|
+
mem_v = memory.metadata.info.get(key, None) if memory.metadata.info else None
|
|
624
|
+
record.append(info_v == mem_v)
|
|
625
|
+
return all(record)
|
|
626
|
+
|
|
627
|
+
def _retrieve(self, query: str, info=None, top_k=20, user_name=None):
|
|
628
|
+
"""Retrieve memory items"""
|
|
629
|
+
retrieved_mems = self.searcher.search(
|
|
630
|
+
query, info=info, user_name=user_name, top_k=top_k, full_recall=True
|
|
631
|
+
)
|
|
632
|
+
retrieved_mems = [item[0] for item in retrieved_mems if float(item[1]) > 0.01]
|
|
633
|
+
|
|
634
|
+
if self.pref_feedback:
|
|
635
|
+
pref_info = {}
|
|
636
|
+
if "user_id" in info:
|
|
637
|
+
pref_info = {"user_id": info["user_id"]}
|
|
638
|
+
retrieved_prefs = self.pref_mem.search(query, top_k, pref_info)
|
|
639
|
+
return retrieved_mems + retrieved_prefs
|
|
640
|
+
else:
|
|
641
|
+
return retrieved_mems
|
|
642
|
+
|
|
643
|
+
def _vec_query(self, new_memories_embedding: list[float], user_name=None):
|
|
644
|
+
"""Vector retrieval query"""
|
|
645
|
+
retrieved_ids = []
|
|
646
|
+
retrieved_ids.extend(
|
|
647
|
+
self.graph_store.search_by_embedding(
|
|
648
|
+
new_memories_embedding,
|
|
649
|
+
scope="UserMemory",
|
|
650
|
+
user_name=user_name,
|
|
651
|
+
top_k=10,
|
|
652
|
+
threshold=0.2,
|
|
653
|
+
)
|
|
654
|
+
)
|
|
655
|
+
retrieved_ids.extend(
|
|
656
|
+
self.graph_store.search_by_embedding(
|
|
657
|
+
new_memories_embedding,
|
|
658
|
+
scope="LongTermMemory",
|
|
659
|
+
user_name=user_name,
|
|
660
|
+
top_k=10,
|
|
661
|
+
threshold=0.2,
|
|
662
|
+
)
|
|
663
|
+
)
|
|
664
|
+
current_memories = [
|
|
665
|
+
self.graph_store.get_node(item["id"], user_name=user_name) for item in retrieved_ids
|
|
666
|
+
]
|
|
667
|
+
|
|
668
|
+
if not retrieved_ids:
|
|
669
|
+
logger.info(
|
|
670
|
+
f"[0107 Feedback Core: _vec_query] No similar memories found for embedding query for user {user_name}."
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
filterd_ids = [
|
|
674
|
+
item["id"] for item in current_memories if "mode:fast" in item["metadata"]["tags"]
|
|
675
|
+
]
|
|
676
|
+
if filterd_ids:
|
|
677
|
+
logger.warning(
|
|
678
|
+
f"[0107 Feedback Core: _vec_query] Since the tags mode is fast, no modifications are made to the following memory {filterd_ids}."
|
|
679
|
+
)
|
|
680
|
+
return [
|
|
681
|
+
TextualMemoryItem(**item)
|
|
682
|
+
for item in current_memories
|
|
683
|
+
if "mode:fast" not in item["metadata"]["tags"]
|
|
684
|
+
]
|
|
685
|
+
|
|
686
|
+
def _get_llm_response(
|
|
687
|
+
self,
|
|
688
|
+
prompt: str,
|
|
689
|
+
dsl: bool = True,
|
|
690
|
+
load_type: Literal["bracket", "square_bracket"] | None = None,
|
|
691
|
+
) -> dict:
|
|
692
|
+
messages = [{"role": "user", "content": prompt}]
|
|
693
|
+
response_text = ""
|
|
694
|
+
try:
|
|
695
|
+
response_text = self.llm.generate(messages, temperature=0.3, timeout=60)
|
|
696
|
+
if not dsl:
|
|
697
|
+
return response_text
|
|
698
|
+
try:
|
|
699
|
+
response_text = response_text.replace("```", "").replace("json", "")
|
|
700
|
+
cleaned_text = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]", "", response_text)
|
|
701
|
+
response_json = json.loads(cleaned_text)
|
|
702
|
+
return response_json
|
|
703
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
704
|
+
if load_type == "bracket":
|
|
705
|
+
response_json = extract_bracket_content(response_text)
|
|
706
|
+
return response_json
|
|
707
|
+
elif load_type == "square_bracket":
|
|
708
|
+
response_json = extract_square_brackets_content(response_text)
|
|
709
|
+
return response_json
|
|
710
|
+
else:
|
|
711
|
+
logger.error(
|
|
712
|
+
f"[Feedback Core LLM Error] Exception during chat generation: {e} | response_text: {response_text}"
|
|
713
|
+
)
|
|
714
|
+
return None
|
|
715
|
+
|
|
716
|
+
except Exception as e:
|
|
717
|
+
logger.error(
|
|
718
|
+
f"[Feedback Core LLM Error] Exception during chat generation: {e} | response_text: {response_text}"
|
|
719
|
+
)
|
|
720
|
+
return None
|
|
721
|
+
|
|
722
|
+
def filter_fault_update(self, operations: list[dict]):
|
|
723
|
+
"""To address the randomness of large model outputs, it is necessary to conduct validity evaluation on the texts used for memory override operations."""
|
|
724
|
+
updated_operations = [item for item in operations if item["operation"] == "UPDATE"]
|
|
725
|
+
if len(updated_operations) < 5:
|
|
726
|
+
return operations
|
|
727
|
+
|
|
728
|
+
lang = detect_lang("".join(updated_operations[0]["text"]))
|
|
729
|
+
template = FEEDBACK_PROMPT_DICT["compare_judge"][lang]
|
|
730
|
+
|
|
731
|
+
all_judge = []
|
|
732
|
+
operations_chunks = general_split_into_chunks(updated_operations)
|
|
733
|
+
with ContextThreadPoolExecutor(max_workers=10) as executor:
|
|
734
|
+
future_to_chunk_idx = {}
|
|
735
|
+
for chunk in operations_chunks:
|
|
736
|
+
raw_operations_str = {"operations": chunk}
|
|
737
|
+
prompt = template.format(raw_operations=str(raw_operations_str))
|
|
738
|
+
|
|
739
|
+
future = executor.submit(self._get_llm_response, prompt, load_type="bracket")
|
|
740
|
+
future_to_chunk_idx[future] = chunk
|
|
741
|
+
for future in concurrent.futures.as_completed(future_to_chunk_idx):
|
|
742
|
+
try:
|
|
743
|
+
judge_res = future.result()
|
|
744
|
+
if (
|
|
745
|
+
judge_res
|
|
746
|
+
and "operations_judgement" in judge_res
|
|
747
|
+
and isinstance(judge_res["operations_judgement"], list)
|
|
748
|
+
):
|
|
749
|
+
all_judge.extend(judge_res["operations_judgement"])
|
|
750
|
+
except Exception as e:
|
|
751
|
+
logger.error(f"[0107 Feedback Core: filter_fault_update] Judgement failed: {e}")
|
|
752
|
+
|
|
753
|
+
logger.info(f"[0107 Feedback Core: filter_fault_update] LLM judgement: {all_judge}")
|
|
754
|
+
id2op = {item["id"]: item for item in updated_operations}
|
|
755
|
+
valid_updates = []
|
|
756
|
+
for judge in all_judge:
|
|
757
|
+
valid_update = None
|
|
758
|
+
if judge["judgement"] == "UPDATE_APPROVED":
|
|
759
|
+
valid_update = id2op.get(judge["id"], None)
|
|
760
|
+
if valid_update:
|
|
761
|
+
valid_updates.append(valid_update)
|
|
762
|
+
|
|
763
|
+
logger.info(
|
|
764
|
+
f"[0107 Feedback Core: filter_fault_update] {len(updated_operations)} -> {len(valid_updates)}"
|
|
765
|
+
)
|
|
766
|
+
return valid_updates + [item for item in operations if item["operation"] != "UPDATE"]
|
|
767
|
+
|
|
768
|
+
def standard_operations(self, operations, current_memories):
|
|
769
|
+
"""
|
|
770
|
+
Regularize the operation design
|
|
771
|
+
1. Map the id to the correct original memory id
|
|
772
|
+
2. If there is an update, skip the memory object of add
|
|
773
|
+
3. If the modified text is too long, skip the update
|
|
774
|
+
"""
|
|
775
|
+
right_ids = [item.id for item in current_memories]
|
|
776
|
+
right_lower_map = {x.lower(): x for x in right_ids}
|
|
777
|
+
|
|
778
|
+
def correct_item(data):
|
|
779
|
+
try:
|
|
780
|
+
assert "operation" in data
|
|
781
|
+
if data.get("operation", "").lower() == "add":
|
|
782
|
+
return data
|
|
783
|
+
|
|
784
|
+
if data.get("operation", "").lower() == "none":
|
|
785
|
+
return None
|
|
786
|
+
|
|
787
|
+
assert (
|
|
788
|
+
"id" in data
|
|
789
|
+
and "text" in data
|
|
790
|
+
and "old_memory" in data
|
|
791
|
+
and data["operation"].lower() == "update"
|
|
792
|
+
), "Invalid operation item"
|
|
793
|
+
|
|
794
|
+
if not should_keep_update(data["text"], data["old_memory"]):
|
|
795
|
+
logger.warning(
|
|
796
|
+
f"[0107 Feedback Core: correct_item] Due to the excessive proportion of changes, skip update: {data}"
|
|
797
|
+
)
|
|
798
|
+
return None
|
|
799
|
+
|
|
800
|
+
# id dehallucination
|
|
801
|
+
original_id = data["id"]
|
|
802
|
+
if original_id in right_ids:
|
|
803
|
+
return data
|
|
804
|
+
|
|
805
|
+
lower_id = original_id.lower()
|
|
806
|
+
if lower_id in right_lower_map:
|
|
807
|
+
data["id"] = right_lower_map[lower_id]
|
|
808
|
+
return data
|
|
809
|
+
|
|
810
|
+
matches = difflib.get_close_matches(original_id, right_ids, n=1, cutoff=0.8)
|
|
811
|
+
if matches:
|
|
812
|
+
data["id"] = matches[0]
|
|
813
|
+
return data
|
|
814
|
+
except Exception:
|
|
815
|
+
logger.error(
|
|
816
|
+
f"[0107 Feedback Core: standard_operations] Error processing operation item: {data}",
|
|
817
|
+
exc_info=True,
|
|
818
|
+
)
|
|
819
|
+
return None
|
|
820
|
+
|
|
821
|
+
dehallu_res = [correct_item(item) for item in operations]
|
|
822
|
+
dehalluded_operations = [item for item in dehallu_res if item]
|
|
823
|
+
logger.info(f"[0107 Feedback Core: dehalluded_operations] {dehalluded_operations}")
|
|
824
|
+
|
|
825
|
+
# c add objects
|
|
826
|
+
add_texts = []
|
|
827
|
+
llm_operations = []
|
|
828
|
+
for item in dehalluded_operations:
|
|
829
|
+
if item["operation"].lower() == "add" and "text" in item and item["text"]:
|
|
830
|
+
if item["text"] in add_texts:
|
|
831
|
+
continue
|
|
832
|
+
llm_operations.append(item)
|
|
833
|
+
add_texts.append(item["text"])
|
|
834
|
+
elif item["operation"].lower() == "update":
|
|
835
|
+
llm_operations.append(item)
|
|
836
|
+
logger.info(
|
|
837
|
+
f"[0107 Feedback Core: deduplicate add] {len(dehalluded_operations)} -> {len(llm_operations)} memories"
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
# Update takes precedence over add
|
|
841
|
+
has_update = any(item.get("operation").lower() == "update" for item in llm_operations)
|
|
842
|
+
if has_update:
|
|
843
|
+
filtered_items = [
|
|
844
|
+
item for item in llm_operations if item.get("operation").lower() == "add"
|
|
845
|
+
]
|
|
846
|
+
update_items = [
|
|
847
|
+
item for item in llm_operations if item.get("operation").lower() != "add"
|
|
848
|
+
]
|
|
849
|
+
if filtered_items:
|
|
850
|
+
logger.info(
|
|
851
|
+
f"[0107 Feedback Core: semantics_feedback] Due to have update objects, skip add: {filtered_items}"
|
|
852
|
+
)
|
|
853
|
+
return update_items
|
|
854
|
+
else:
|
|
855
|
+
return llm_operations
|
|
856
|
+
|
|
857
|
+
def _generate_answer(
|
|
858
|
+
self, chat_history: list[MessageDict], feedback_content: str, corrected_answer: bool
|
|
859
|
+
) -> str:
|
|
860
|
+
"""
|
|
861
|
+
Answer generation to facilitate concurrent submission.
|
|
862
|
+
"""
|
|
863
|
+
if not corrected_answer or feedback_content.strip() == "":
|
|
864
|
+
return ""
|
|
865
|
+
lang = detect_lang(feedback_content)
|
|
866
|
+
template = FEEDBACK_PROMPT_DICT["generation"][lang]
|
|
867
|
+
chat_history_str = "\n".join(
|
|
868
|
+
[f"{item['role']}: {item['content']}" for item in chat_history]
|
|
869
|
+
)
|
|
870
|
+
chat_history_str = chat_history_str if chat_history_str else "none"
|
|
871
|
+
prompt = template.format(chat_history=chat_history_str, question=feedback_content)
|
|
872
|
+
|
|
873
|
+
return self._get_llm_response(prompt, dsl=False)
|
|
874
|
+
|
|
875
|
+
def _doc_filter(self, doc_scope: str, memories: list[TextualMemoryItem]):
|
|
876
|
+
"""
|
|
877
|
+
Filter the memory based on filename
|
|
878
|
+
"""
|
|
879
|
+
filename2_memid = {}
|
|
880
|
+
filename_mems = []
|
|
881
|
+
|
|
882
|
+
for item in memories:
|
|
883
|
+
for file_info in item.metadata.sources:
|
|
884
|
+
if file_info.type == "file":
|
|
885
|
+
file_dict = file_info.original_part
|
|
886
|
+
filename = file_dict["file"]["filename"]
|
|
887
|
+
if filename not in filename2_memid:
|
|
888
|
+
filename2_memid[filename] = []
|
|
889
|
+
filename_mems.append(make_mem_item(filename))
|
|
890
|
+
filename2_memid[filename].append(item.id)
|
|
891
|
+
|
|
892
|
+
rerank_res = self.reranker.rerank(doc_scope, filename_mems, top_k=100)
|
|
893
|
+
inscope_docs = [item[0].memory for item in rerank_res if item[1] > 0.95]
|
|
894
|
+
|
|
895
|
+
inscope_ids = [
|
|
896
|
+
memid for inscope_file in inscope_docs for memid in filename2_memid[inscope_file]
|
|
897
|
+
]
|
|
898
|
+
logger.info(
|
|
899
|
+
f"[0107 Feedback Core: process_keyword_replace] These docs are in scope : {inscope_docs}, relared memids: {inscope_ids}"
|
|
900
|
+
)
|
|
901
|
+
filter_memories = [mem for mem in memories if mem.id in inscope_ids]
|
|
902
|
+
return filter_memories
|
|
903
|
+
|
|
904
|
+
def process_keyword_replace(
|
|
905
|
+
self, user_id: str, user_name: str, kwp_judge: dict | None = None, info: dict | None = None
|
|
906
|
+
):
|
|
907
|
+
"""
|
|
908
|
+
Memory keyword replace process
|
|
909
|
+
"""
|
|
910
|
+
info = info or {}
|
|
911
|
+
doc_scope = kwp_judge.get("doc_scope", "NONE")
|
|
912
|
+
original_word = kwp_judge.get("original")
|
|
913
|
+
target_word = kwp_judge.get("target")
|
|
914
|
+
include_keys = ["agent_id", "app_id"]
|
|
915
|
+
|
|
916
|
+
mem_info = {key: info[key] for key in info if key in include_keys}
|
|
917
|
+
filter_dict = {f"info.{key}": info[key] for key in mem_info}
|
|
918
|
+
|
|
919
|
+
if self.DB_IDX_READY:
|
|
920
|
+
# retrieve
|
|
921
|
+
lang = detect_lang(original_word)
|
|
922
|
+
queries = (
|
|
923
|
+
self._tokenize_chinese(original_word) if lang == "zh" else original_word.split()
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
must_part = f"{' & '.join(queries)}" if len(queries) > 1 else queries[0]
|
|
927
|
+
retrieved_ids = self.graph_store.seach_by_keywords_tfidf(
|
|
928
|
+
[must_part], user_name=user_name, filter=filter_dict
|
|
929
|
+
)
|
|
930
|
+
if len(retrieved_ids) < 1:
|
|
931
|
+
retrieved_ids = self.graph_store.search_by_fulltext(
|
|
932
|
+
queries, top_k=100, user_name=user_name, filter=filter_dict
|
|
933
|
+
)
|
|
934
|
+
else:
|
|
935
|
+
retrieved_ids = self.graph_store.seach_by_keywords_like(
|
|
936
|
+
f"%{original_word}%", user_name=user_name, filter=filter_dict
|
|
937
|
+
)
|
|
938
|
+
|
|
939
|
+
mem_data = [
|
|
940
|
+
self.graph_store.get_node(item["id"], user_name=user_name) for item in retrieved_ids
|
|
941
|
+
]
|
|
942
|
+
retrieved_memories = [TextualMemoryItem(**item) for item in mem_data]
|
|
943
|
+
retrieved_memories = [
|
|
944
|
+
item
|
|
945
|
+
for item in retrieved_memories
|
|
946
|
+
if self._info_comparison(item, mem_info, include_keys)
|
|
947
|
+
]
|
|
948
|
+
|
|
949
|
+
if doc_scope != "NONE":
|
|
950
|
+
retrieved_memories = self._doc_filter(doc_scope, retrieved_memories)
|
|
951
|
+
|
|
952
|
+
logger.info(
|
|
953
|
+
f"[0107 Feedback Core: process_keyword_replace] Keywords recalled memory for user {user_name}: {len(retrieved_ids)} memories | After filtering: {len(retrieved_memories)} memories."
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
if not retrieved_memories:
|
|
957
|
+
return {"record": {"add": [], "update": []}}
|
|
958
|
+
|
|
959
|
+
# replace keywords
|
|
960
|
+
pick_index = []
|
|
961
|
+
update_memories = []
|
|
962
|
+
for i, old_mem in enumerate(retrieved_memories):
|
|
963
|
+
if original_word in old_mem.memory:
|
|
964
|
+
mem = old_mem.model_copy(deep=True)
|
|
965
|
+
mem.memory = mem.memory.replace(original_word, target_word)
|
|
966
|
+
if original_word in mem.metadata.tags:
|
|
967
|
+
mem.metadata.tags.remove(original_word)
|
|
968
|
+
if target_word not in mem.metadata.tags:
|
|
969
|
+
mem.metadata.tags.append(target_word)
|
|
970
|
+
pick_index.append(i)
|
|
971
|
+
update_memories.append(mem)
|
|
972
|
+
update_memories_embed = self._batch_embed([mem.memory for mem in update_memories])
|
|
973
|
+
|
|
974
|
+
for _i, embed in zip(range(len(update_memories)), update_memories_embed, strict=False):
|
|
975
|
+
update_memories[_i].metadata.embedding = embed
|
|
976
|
+
|
|
977
|
+
update_results = []
|
|
978
|
+
with ContextThreadPoolExecutor(max_workers=10) as executor:
|
|
979
|
+
future_to_info = {}
|
|
980
|
+
for new_mem, old_idx in zip(update_memories, pick_index, strict=False):
|
|
981
|
+
old_mem = retrieved_memories[old_idx]
|
|
982
|
+
|
|
983
|
+
future = executor.submit(
|
|
984
|
+
self._single_update_operation,
|
|
985
|
+
old_mem,
|
|
986
|
+
new_mem,
|
|
987
|
+
user_id,
|
|
988
|
+
user_name,
|
|
989
|
+
)
|
|
990
|
+
future_to_info[future] = old_mem.id
|
|
991
|
+
|
|
992
|
+
for future in future_to_info:
|
|
993
|
+
try:
|
|
994
|
+
result = future.result()
|
|
995
|
+
update_results.append(result)
|
|
996
|
+
except Exception as e:
|
|
997
|
+
mem_id = future_to_info[future][0]
|
|
998
|
+
logger.error(
|
|
999
|
+
f"[Feedback Core DB] Exception during update operation for memory {mem_id}: {e}"
|
|
1000
|
+
)
|
|
1001
|
+
|
|
1002
|
+
return {"record": {"add": [], "update": update_results}}
|
|
1003
|
+
|
|
1004
|
+
def process_feedback_core(
|
|
1005
|
+
self,
|
|
1006
|
+
user_id: str,
|
|
1007
|
+
user_name: str,
|
|
1008
|
+
chat_history: list[MessageDict],
|
|
1009
|
+
feedback_content: str,
|
|
1010
|
+
info: dict | None = None,
|
|
1011
|
+
**kwargs,
|
|
1012
|
+
) -> dict:
|
|
1013
|
+
"""
|
|
1014
|
+
Core feedback processing: judgment, memory extraction, addition/update. Return record.
|
|
1015
|
+
"""
|
|
1016
|
+
|
|
1017
|
+
def check_validity(item):
|
|
1018
|
+
return (
|
|
1019
|
+
"validity" in item
|
|
1020
|
+
and item["validity"].lower() == "true"
|
|
1021
|
+
and "corrected_info" in item
|
|
1022
|
+
and item["corrected_info"].strip()
|
|
1023
|
+
and "key" in item
|
|
1024
|
+
and "tags" in item
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
if feedback_content.strip() == "":
|
|
1028
|
+
return {"record": {"add": [], "update": []}}
|
|
1029
|
+
try:
|
|
1030
|
+
feedback_time = kwargs.get("feedback_time") or datetime.now().isoformat()
|
|
1031
|
+
session_id = kwargs.get("session_id")
|
|
1032
|
+
if not info:
|
|
1033
|
+
info = {"user_id": user_id, "user_name": user_name, "session_id": session_id}
|
|
1034
|
+
else:
|
|
1035
|
+
info.update({"user_id": user_id, "user_name": user_name, "session_id": session_id})
|
|
1036
|
+
|
|
1037
|
+
logger.info(
|
|
1038
|
+
f"[0107 Feedback Core: process_feedback_core] Starting memory feedback process for user {user_name}"
|
|
1039
|
+
)
|
|
1040
|
+
# feedback keywords update
|
|
1041
|
+
kwp_judge = self._keyword_replace_judgement(feedback_content)
|
|
1042
|
+
if (
|
|
1043
|
+
kwp_judge
|
|
1044
|
+
and kwp_judge["if_keyword_replace"].lower() == "true"
|
|
1045
|
+
and kwp_judge.get("original", "NONE") != "NONE"
|
|
1046
|
+
and kwp_judge.get("target", "NONE") != "NONE"
|
|
1047
|
+
):
|
|
1048
|
+
return self.process_keyword_replace(
|
|
1049
|
+
user_id, user_name, kwp_judge=kwp_judge, info=info
|
|
1050
|
+
)
|
|
1051
|
+
|
|
1052
|
+
# llm update memory
|
|
1053
|
+
if not chat_history:
|
|
1054
|
+
return self._pure_add(user_name, feedback_content, feedback_time, info)
|
|
1055
|
+
else:
|
|
1056
|
+
raw_judge = self._feedback_judgement(
|
|
1057
|
+
chat_history, feedback_content, feedback_time=feedback_time
|
|
1058
|
+
)
|
|
1059
|
+
valid_feedback = (
|
|
1060
|
+
[item for item in raw_judge if check_validity(item)] if raw_judge else []
|
|
1061
|
+
)
|
|
1062
|
+
if (
|
|
1063
|
+
raw_judge
|
|
1064
|
+
and raw_judge[0]["validity"].lower() == "false"
|
|
1065
|
+
and raw_judge[0]["user_attitude"].lower() == "irrelevant"
|
|
1066
|
+
):
|
|
1067
|
+
return self._pure_add(user_name, feedback_content, feedback_time, info)
|
|
1068
|
+
|
|
1069
|
+
if not valid_feedback:
|
|
1070
|
+
logger.warning(
|
|
1071
|
+
f"[0107 Feedback Core: process_feedback_core] No valid judgements for user {user_name}: {raw_judge}."
|
|
1072
|
+
)
|
|
1073
|
+
return {"record": {"add": [], "update": []}}
|
|
1074
|
+
|
|
1075
|
+
feedback_memories = []
|
|
1076
|
+
|
|
1077
|
+
corrected_infos = [item["corrected_info"] for item in valid_feedback]
|
|
1078
|
+
feedback_memories_embeddings = self._batch_embed(corrected_infos)
|
|
1079
|
+
|
|
1080
|
+
for item, embedding in zip(
|
|
1081
|
+
valid_feedback, feedback_memories_embeddings, strict=False
|
|
1082
|
+
):
|
|
1083
|
+
value = item["corrected_info"]
|
|
1084
|
+
key = item["key"]
|
|
1085
|
+
tags = item["tags"]
|
|
1086
|
+
background = (
|
|
1087
|
+
"[Feedback update background]: "
|
|
1088
|
+
+ str(chat_history)
|
|
1089
|
+
+ "\nUser feedback: "
|
|
1090
|
+
+ str(feedback_content)
|
|
1091
|
+
)
|
|
1092
|
+
mem_item = make_mem_item(
|
|
1093
|
+
value,
|
|
1094
|
+
user_id=user_id,
|
|
1095
|
+
user_name=user_name,
|
|
1096
|
+
session_id=session_id,
|
|
1097
|
+
tags=tags,
|
|
1098
|
+
key=key,
|
|
1099
|
+
embedding=embedding,
|
|
1100
|
+
sources=[{"type": "chat"}],
|
|
1101
|
+
background=background,
|
|
1102
|
+
type="fine",
|
|
1103
|
+
info=info,
|
|
1104
|
+
)
|
|
1105
|
+
feedback_memories.append(mem_item)
|
|
1106
|
+
|
|
1107
|
+
mem_record = self._feedback_memory(
|
|
1108
|
+
user_id,
|
|
1109
|
+
user_name,
|
|
1110
|
+
feedback_memories,
|
|
1111
|
+
chat_history=chat_history,
|
|
1112
|
+
feedback_content=feedback_content,
|
|
1113
|
+
info=info,
|
|
1114
|
+
**kwargs,
|
|
1115
|
+
)
|
|
1116
|
+
add_memories = mem_record["record"]["add"]
|
|
1117
|
+
update_memories = mem_record["record"]["update"]
|
|
1118
|
+
logger.info(
|
|
1119
|
+
f"[0107 Feedback Core: process_feedback_core] Processed {len(feedback_memories)} feedback | add {len(add_memories)} memories | update {len(update_memories)} memories for user {user_name}."
|
|
1120
|
+
)
|
|
1121
|
+
return mem_record
|
|
1122
|
+
|
|
1123
|
+
except Exception as e:
|
|
1124
|
+
logger.error(
|
|
1125
|
+
f"[0107 Feedback Core: process_feedback_core] Error for user {user_name}: {e}"
|
|
1126
|
+
)
|
|
1127
|
+
return {"record": {"add": [], "update": []}}
|
|
1128
|
+
|
|
1129
|
+
def process_feedback(
|
|
1130
|
+
self,
|
|
1131
|
+
user_id: str,
|
|
1132
|
+
user_name: str,
|
|
1133
|
+
chat_history: list[MessageDict],
|
|
1134
|
+
feedback_content: str,
|
|
1135
|
+
info: dict[str, Any] | None = None,
|
|
1136
|
+
**kwargs,
|
|
1137
|
+
):
|
|
1138
|
+
"""
|
|
1139
|
+
Process feedback with different modes.
|
|
1140
|
+
|
|
1141
|
+
Args:
|
|
1142
|
+
user_name: cube_ids
|
|
1143
|
+
chat_history: List of chat messages
|
|
1144
|
+
feedback_content: Feedback content from user
|
|
1145
|
+
**kwargs: Additional arguments including async_mode
|
|
1146
|
+
|
|
1147
|
+
Returns:
|
|
1148
|
+
Dict with answer and/or memory operation records
|
|
1149
|
+
"""
|
|
1150
|
+
corrected_answer = kwargs.get("corrected_answer", False)
|
|
1151
|
+
|
|
1152
|
+
with ContextThreadPoolExecutor(max_workers=2) as ex:
|
|
1153
|
+
answer_future = ex.submit(
|
|
1154
|
+
self._generate_answer,
|
|
1155
|
+
chat_history,
|
|
1156
|
+
feedback_content,
|
|
1157
|
+
corrected_answer=corrected_answer,
|
|
1158
|
+
)
|
|
1159
|
+
core_future = ex.submit(
|
|
1160
|
+
self.process_feedback_core,
|
|
1161
|
+
user_id,
|
|
1162
|
+
user_name,
|
|
1163
|
+
chat_history,
|
|
1164
|
+
feedback_content,
|
|
1165
|
+
info,
|
|
1166
|
+
**kwargs,
|
|
1167
|
+
)
|
|
1168
|
+
done, pending = concurrent.futures.wait([answer_future, core_future], timeout=30)
|
|
1169
|
+
for fut in pending:
|
|
1170
|
+
fut.cancel()
|
|
1171
|
+
try:
|
|
1172
|
+
answer = answer_future.result()
|
|
1173
|
+
record = core_future.result()
|
|
1174
|
+
task_id = kwargs.get("task_id", "default")
|
|
1175
|
+
|
|
1176
|
+
logger.info(
|
|
1177
|
+
f"[Feedback Core MemFeedback process] Feedback Completed : user {user_name} | task_id {task_id} | record {record}."
|
|
1178
|
+
)
|
|
1179
|
+
|
|
1180
|
+
return {"answer": answer, "record": record["record"]}
|
|
1181
|
+
except concurrent.futures.TimeoutError:
|
|
1182
|
+
logger.error(
|
|
1183
|
+
f"[Feedback Core MemFeedback process] Timeout in sync mode for {user_name}",
|
|
1184
|
+
exc_info=True,
|
|
1185
|
+
)
|
|
1186
|
+
return {"answer": "", "record": {"add": [], "update": []}}
|
|
1187
|
+
except Exception as e:
|
|
1188
|
+
logger.error(
|
|
1189
|
+
f"[Feedback Core MemFeedback process] Error in concurrent tasks for {user_name}: {e}",
|
|
1190
|
+
exc_info=True,
|
|
1191
|
+
)
|
|
1192
|
+
return {"answer": "", "record": {"add": [], "update": []}}
|