MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Evaluation Analyzer for Bad Cases
|
|
3
|
+
|
|
4
|
+
This module provides the EvalAnalyzer class that extracts bad cases from evaluation results
|
|
5
|
+
and analyzes whether memories contain sufficient information to answer golden answers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from openai import OpenAI
|
|
16
|
+
|
|
17
|
+
from memos.log import get_logger
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
FILE_PATH = Path(__file__).absolute()
|
|
21
|
+
BASE_DIR = FILE_PATH.parent.parent.parent.parent.parent # Go up to project root
|
|
22
|
+
sys.path.insert(0, str(BASE_DIR)) # Enable execution from any working directory
|
|
23
|
+
|
|
24
|
+
logger = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class EvalAnalyzer:
|
|
28
|
+
"""
|
|
29
|
+
Evaluation Analyzer class for extracting and analyzing bad cases.
|
|
30
|
+
|
|
31
|
+
This class extracts bad cases from evaluation results and uses LLM to analyze
|
|
32
|
+
whether memories contain sufficient information to answer golden answers.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
openai_api_key: str | None = None,
|
|
38
|
+
openai_base_url: str | None = None,
|
|
39
|
+
openai_model: str = "gpt-4o-mini",
|
|
40
|
+
output_dir: str = "./tmp/eval_analyzer",
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Initialize the EvalAnalyzer.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
openai_api_key: OpenAI API key
|
|
47
|
+
openai_base_url: OpenAI base URL
|
|
48
|
+
openai_model: OpenAI model to use
|
|
49
|
+
output_dir: Output directory for results
|
|
50
|
+
"""
|
|
51
|
+
self.output_dir = Path(output_dir)
|
|
52
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
|
|
54
|
+
# Initialize OpenAI client
|
|
55
|
+
self.openai_client = OpenAI(
|
|
56
|
+
api_key=openai_api_key or os.getenv("MEMSCHEDULER_OPENAI_API_KEY"),
|
|
57
|
+
base_url=openai_base_url or os.getenv("MEMSCHEDULER_OPENAI_BASE_URL"),
|
|
58
|
+
)
|
|
59
|
+
self.openai_model = openai_model or os.getenv(
|
|
60
|
+
"MEMSCHEDULER_OPENAI_DEFAULT_MODEL", "gpt-4o-mini"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
logger.info(f"EvalAnalyzer initialized with model: {self.openai_model}")
|
|
64
|
+
|
|
65
|
+
def load_json_file(self, filepath: str) -> Any:
|
|
66
|
+
"""Load JSON file safely."""
|
|
67
|
+
try:
|
|
68
|
+
with open(filepath, encoding="utf-8") as f:
|
|
69
|
+
return json.load(f)
|
|
70
|
+
except FileNotFoundError:
|
|
71
|
+
logger.error(f"File not found: {filepath}")
|
|
72
|
+
return None
|
|
73
|
+
except json.JSONDecodeError as e:
|
|
74
|
+
logger.error(f"JSON decode error in {filepath}: {e}")
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
def extract_bad_cases(self, judged_file: str, search_results_file: str) -> list[dict[str, Any]]:
|
|
78
|
+
"""
|
|
79
|
+
Extract bad cases from judged results and corresponding search results.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
judged_file: Path to the judged results JSON file
|
|
83
|
+
search_results_file: Path to the search results JSON file
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
List of bad cases with their memories
|
|
87
|
+
"""
|
|
88
|
+
logger.info(f"Loading judged results from: {judged_file}")
|
|
89
|
+
judged_data = self.load_json_file(judged_file)
|
|
90
|
+
if not judged_data:
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
logger.info(f"Loading search results from: {search_results_file}")
|
|
94
|
+
search_data = self.load_json_file(search_results_file)
|
|
95
|
+
if not search_data:
|
|
96
|
+
return []
|
|
97
|
+
|
|
98
|
+
bad_cases = []
|
|
99
|
+
|
|
100
|
+
# Process each user's data
|
|
101
|
+
for user_id, user_judged_results in judged_data.items():
|
|
102
|
+
user_search_results = search_data.get(user_id, [])
|
|
103
|
+
|
|
104
|
+
# Create a mapping from query to search context
|
|
105
|
+
search_context_map = {}
|
|
106
|
+
for search_result in user_search_results:
|
|
107
|
+
query = search_result.get("query", "")
|
|
108
|
+
context = search_result.get("context", "")
|
|
109
|
+
search_context_map[query] = context
|
|
110
|
+
|
|
111
|
+
# Process each question for this user
|
|
112
|
+
for result in user_judged_results:
|
|
113
|
+
# Check if this is a bad case (all judgments are False)
|
|
114
|
+
judgments = result.get("llm_judgments", {})
|
|
115
|
+
is_bad_case = all(not judgment for judgment in judgments.values())
|
|
116
|
+
|
|
117
|
+
if is_bad_case:
|
|
118
|
+
question = result.get("question", "")
|
|
119
|
+
answer = result.get("answer", "")
|
|
120
|
+
golden_answer = result.get("golden_answer", "")
|
|
121
|
+
|
|
122
|
+
# Find corresponding memories from search results
|
|
123
|
+
memories = search_context_map.get(question, "")
|
|
124
|
+
|
|
125
|
+
bad_case = {
|
|
126
|
+
"user_id": user_id,
|
|
127
|
+
"query": question,
|
|
128
|
+
"answer": answer,
|
|
129
|
+
"golden_answer": golden_answer,
|
|
130
|
+
"memories": memories,
|
|
131
|
+
"category": result.get("category", 0),
|
|
132
|
+
"nlp_metrics": result.get("nlp_metrics", {}),
|
|
133
|
+
"response_duration_ms": result.get("response_duration_ms", 0),
|
|
134
|
+
"search_duration_ms": result.get("search_duration_ms", 0),
|
|
135
|
+
"total_duration_ms": result.get("total_duration_ms", 0),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
bad_cases.append(bad_case)
|
|
139
|
+
|
|
140
|
+
logger.info(f"Extracted {len(bad_cases)} bad cases")
|
|
141
|
+
return bad_cases
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def main(version_name="ct-1111"):
|
|
145
|
+
"""Main test function."""
|
|
146
|
+
print("=== EvalAnalyzer Simple Test ===")
|
|
147
|
+
|
|
148
|
+
# Initialize analyzer
|
|
149
|
+
analyzer = EvalAnalyzer(output_dir="./tmp/eval_analyzer")
|
|
150
|
+
|
|
151
|
+
print("Analyzer initialized")
|
|
152
|
+
|
|
153
|
+
# Test file paths
|
|
154
|
+
eval_result_dir = f"{BASE_DIR}/evaluation/results/locomo/memos-api-{version_name}"
|
|
155
|
+
judged_file = os.path.join(eval_result_dir, "memos-api_locomo_judged.json")
|
|
156
|
+
search_results_file = os.path.join(eval_result_dir, "memos-api_locomo_search_results.json")
|
|
157
|
+
|
|
158
|
+
print("Testing with files:")
|
|
159
|
+
print(f" Judged file: {judged_file}")
|
|
160
|
+
print(f" Search results file: {search_results_file}")
|
|
161
|
+
|
|
162
|
+
# Check if files exist
|
|
163
|
+
if not os.path.exists(judged_file):
|
|
164
|
+
print(f"❌ Judged file not found: {judged_file}")
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
if not os.path.exists(search_results_file):
|
|
168
|
+
print(f"❌ Search results file not found: {search_results_file}")
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
print("✅ Both files exist")
|
|
172
|
+
|
|
173
|
+
# Test bad case extraction only
|
|
174
|
+
try:
|
|
175
|
+
print("\n=== Testing Bad Case Extraction ===")
|
|
176
|
+
bad_cases = analyzer.extract_bad_cases(judged_file, search_results_file)
|
|
177
|
+
|
|
178
|
+
print(f"✅ Successfully extracted {len(bad_cases)} bad cases")
|
|
179
|
+
|
|
180
|
+
if bad_cases:
|
|
181
|
+
print("\n=== Sample Bad Cases ===")
|
|
182
|
+
for i, case in enumerate(bad_cases[:3]): # Show first 3 cases
|
|
183
|
+
print(f"\nBad Case {i + 1}:")
|
|
184
|
+
print(f" User ID: {case['user_id']}")
|
|
185
|
+
print(f" Query: {case['query'][:100]}...")
|
|
186
|
+
print(f" Golden Answer: {case['golden_answer']}...")
|
|
187
|
+
print(f" Answer: {case['answer']}...")
|
|
188
|
+
print(f" Has Memories: {len(case['memories']) > 0}")
|
|
189
|
+
print(f" Memory Length: {len(case['memories'])} chars")
|
|
190
|
+
|
|
191
|
+
# Save basic results without LLM analysis
|
|
192
|
+
basic_results = {
|
|
193
|
+
"bad_cases_count": len(bad_cases),
|
|
194
|
+
"bad_cases": bad_cases,
|
|
195
|
+
"metadata": {
|
|
196
|
+
"eval_result_dir": eval_result_dir,
|
|
197
|
+
"judged_file": judged_file,
|
|
198
|
+
"search_results_file": search_results_file,
|
|
199
|
+
"extraction_only": True,
|
|
200
|
+
},
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
output_file = analyzer.output_dir / "bad_cases_extraction_only.json"
|
|
204
|
+
import json
|
|
205
|
+
|
|
206
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
207
|
+
json.dump(basic_results, f, indent=2, ensure_ascii=False)
|
|
208
|
+
|
|
209
|
+
print(f"\n✅ Basic extraction results saved to: {output_file}")
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
print(f"❌ Error during extraction: {e}")
|
|
213
|
+
import traceback
|
|
214
|
+
|
|
215
|
+
traceback.print_exc()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
if __name__ == "__main__":
|
|
219
|
+
main(version_name="ct-1118")
|