MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
|
|
5
|
+
from memos import log
|
|
6
|
+
from memos.configs.mem_reader import StrategyStructMemReaderConfig
|
|
7
|
+
from memos.configs.parser import ParserConfigFactory
|
|
8
|
+
from memos.mem_reader.read_multi_modal import detect_lang
|
|
9
|
+
from memos.mem_reader.simple_struct import SimpleStructMemReader
|
|
10
|
+
from memos.parsers.factory import ParserFactory
|
|
11
|
+
from memos.templates.mem_reader_prompts import (
|
|
12
|
+
CUSTOM_TAGS_INSTRUCTION,
|
|
13
|
+
CUSTOM_TAGS_INSTRUCTION_ZH,
|
|
14
|
+
SIMPLE_STRUCT_DOC_READER_PROMPT,
|
|
15
|
+
SIMPLE_STRUCT_DOC_READER_PROMPT_ZH,
|
|
16
|
+
SIMPLE_STRUCT_MEM_READER_EXAMPLE,
|
|
17
|
+
SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
|
|
18
|
+
)
|
|
19
|
+
from memos.templates.mem_reader_strategy_prompts import (
|
|
20
|
+
STRATEGY_STRUCT_MEM_READER_PROMPT,
|
|
21
|
+
STRATEGY_STRUCT_MEM_READER_PROMPT_ZH,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
logger = log.get_logger(__name__)
|
|
26
|
+
STRATEGY_PROMPT_DICT = {
|
|
27
|
+
"chat": {
|
|
28
|
+
"en": STRATEGY_STRUCT_MEM_READER_PROMPT,
|
|
29
|
+
"zh": STRATEGY_STRUCT_MEM_READER_PROMPT_ZH,
|
|
30
|
+
"en_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE,
|
|
31
|
+
"zh_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
|
|
32
|
+
},
|
|
33
|
+
"doc": {"en": SIMPLE_STRUCT_DOC_READER_PROMPT, "zh": SIMPLE_STRUCT_DOC_READER_PROMPT_ZH},
|
|
34
|
+
"custom_tags": {"en": CUSTOM_TAGS_INSTRUCTION, "zh": CUSTOM_TAGS_INSTRUCTION_ZH},
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class StrategyStructMemReader(SimpleStructMemReader, ABC):
|
|
39
|
+
"""Naive implementation of MemReader."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, config: StrategyStructMemReaderConfig):
|
|
42
|
+
super().__init__(config)
|
|
43
|
+
self.chat_chunker = config.chat_chunker["config"]
|
|
44
|
+
|
|
45
|
+
def _get_llm_response(self, mem_str: str, custom_tags: list[str] | None) -> dict:
|
|
46
|
+
lang = detect_lang(mem_str)
|
|
47
|
+
template = STRATEGY_PROMPT_DICT["chat"][lang]
|
|
48
|
+
examples = STRATEGY_PROMPT_DICT["chat"][f"{lang}_example"]
|
|
49
|
+
prompt = template.replace("${conversation}", mem_str)
|
|
50
|
+
|
|
51
|
+
custom_tags_prompt = (
|
|
52
|
+
STRATEGY_PROMPT_DICT["custom_tags"][lang].replace("{custom_tags}", str(custom_tags))
|
|
53
|
+
if custom_tags
|
|
54
|
+
else ""
|
|
55
|
+
)
|
|
56
|
+
prompt = prompt.replace("${custom_tags_prompt}", custom_tags_prompt)
|
|
57
|
+
|
|
58
|
+
if self.config.remove_prompt_example: # TODO unused
|
|
59
|
+
prompt = prompt.replace(examples, "")
|
|
60
|
+
messages = [{"role": "user", "content": prompt}]
|
|
61
|
+
try:
|
|
62
|
+
response_text = self.llm.generate(messages)
|
|
63
|
+
response_json = self.parse_json_result(response_text)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.error(f"[LLM] Exception during chat generation: {e}")
|
|
66
|
+
response_json = {
|
|
67
|
+
"memory list": [
|
|
68
|
+
{
|
|
69
|
+
"key": mem_str[:10],
|
|
70
|
+
"memory_type": "UserMemory",
|
|
71
|
+
"value": mem_str,
|
|
72
|
+
"tags": [],
|
|
73
|
+
}
|
|
74
|
+
],
|
|
75
|
+
"summary": mem_str,
|
|
76
|
+
}
|
|
77
|
+
return response_json
|
|
78
|
+
|
|
79
|
+
def get_scene_data_info(self, scene_data: list, type: str) -> list[str]:
|
|
80
|
+
"""
|
|
81
|
+
Get raw information from scene_data.
|
|
82
|
+
If scene_data contains dictionaries, convert them to strings.
|
|
83
|
+
If scene_data contains file paths, parse them using the parser.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
scene_data: List of dialogue information or document paths
|
|
87
|
+
type: Type of scene data: ['doc', 'chat']
|
|
88
|
+
Returns:
|
|
89
|
+
List of strings containing the processed scene data
|
|
90
|
+
"""
|
|
91
|
+
results = []
|
|
92
|
+
|
|
93
|
+
if type == "chat":
|
|
94
|
+
if self.chat_chunker["chunk_type"] == "content_length":
|
|
95
|
+
content_len_thredshold = self.chat_chunker["chunk_length"]
|
|
96
|
+
for items in scene_data:
|
|
97
|
+
if not items:
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
results.append([])
|
|
101
|
+
current_length = 0
|
|
102
|
+
|
|
103
|
+
for _i, item in enumerate(items):
|
|
104
|
+
content_length = (
|
|
105
|
+
len(item.get("content", ""))
|
|
106
|
+
if isinstance(item, dict)
|
|
107
|
+
else len(str(item))
|
|
108
|
+
)
|
|
109
|
+
if not results[-1]:
|
|
110
|
+
results[-1].append(item)
|
|
111
|
+
current_length = content_length
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
if current_length + content_length <= content_len_thredshold:
|
|
115
|
+
results[-1].append(item)
|
|
116
|
+
current_length += content_length
|
|
117
|
+
else:
|
|
118
|
+
overlap_item = results[-1][-1]
|
|
119
|
+
overlap_length = (
|
|
120
|
+
len(overlap_item.get("content", ""))
|
|
121
|
+
if isinstance(overlap_item, dict)
|
|
122
|
+
else len(str(overlap_item))
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
results.append([overlap_item, item])
|
|
126
|
+
current_length = overlap_length + content_length
|
|
127
|
+
else:
|
|
128
|
+
cut_size, cut_overlap = (
|
|
129
|
+
self.chat_chunker["chunk_session"],
|
|
130
|
+
self.chat_chunker["chunk_overlap"],
|
|
131
|
+
)
|
|
132
|
+
for items in scene_data:
|
|
133
|
+
step = cut_size - cut_overlap
|
|
134
|
+
end = len(items) - cut_overlap
|
|
135
|
+
if end <= 0:
|
|
136
|
+
results.extend([items[:]])
|
|
137
|
+
else:
|
|
138
|
+
results.extend([items[i : i + cut_size] for i in range(0, end, step)])
|
|
139
|
+
|
|
140
|
+
elif type == "doc":
|
|
141
|
+
parser_config = ParserConfigFactory.model_validate(
|
|
142
|
+
{
|
|
143
|
+
"backend": "markitdown",
|
|
144
|
+
"config": {},
|
|
145
|
+
}
|
|
146
|
+
)
|
|
147
|
+
parser = ParserFactory.from_config(parser_config)
|
|
148
|
+
for item in scene_data:
|
|
149
|
+
try:
|
|
150
|
+
if os.path.exists(item):
|
|
151
|
+
try:
|
|
152
|
+
parsed_text = parser.parse(item)
|
|
153
|
+
results.append({"file": item, "text": parsed_text})
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.error(f"[SceneParser] Error parsing {item}: {e}")
|
|
156
|
+
continue
|
|
157
|
+
else:
|
|
158
|
+
parsed_text = item
|
|
159
|
+
results.append({"file": "pure_text", "text": parsed_text})
|
|
160
|
+
except Exception as e:
|
|
161
|
+
print(f"Error parsing file {item}: {e!s}")
|
|
162
|
+
|
|
163
|
+
return results
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from memos import log
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
logger = log.get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import tiktoken
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
_ENC = tiktoken.encoding_for_model("gpt-4o-mini")
|
|
14
|
+
except Exception:
|
|
15
|
+
_ENC = tiktoken.get_encoding("cl100k_base")
|
|
16
|
+
|
|
17
|
+
def count_tokens_text(s: str) -> int:
|
|
18
|
+
return len(_ENC.encode(s or "", disallowed_special=()))
|
|
19
|
+
except Exception:
|
|
20
|
+
# Heuristic fallback: zh chars ~1 token, others ~1 token per ~4 chars
|
|
21
|
+
def count_tokens_text(s: str) -> int:
|
|
22
|
+
if not s:
|
|
23
|
+
return 0
|
|
24
|
+
zh_chars = re.findall(r"[\u4e00-\u9fff]", s)
|
|
25
|
+
zh = len(zh_chars)
|
|
26
|
+
rest = len(s) - zh
|
|
27
|
+
return zh + max(1, rest // 4)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def derive_key(text: str, max_len: int = 80) -> str:
|
|
31
|
+
"""default key when without LLM: first max_len words"""
|
|
32
|
+
if not text:
|
|
33
|
+
return ""
|
|
34
|
+
sent = re.split(r"[。!?!?]\s*|\n", text.strip())[0]
|
|
35
|
+
return (sent[:max_len]).strip()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def parse_json_result(response_text: str) -> dict:
|
|
39
|
+
s = (response_text or "").strip()
|
|
40
|
+
|
|
41
|
+
m = re.search(r"```(?:json)?\s*([\s\S]*?)```", s, flags=re.I)
|
|
42
|
+
s = (m.group(1) if m else s.replace("```", "")).strip()
|
|
43
|
+
|
|
44
|
+
i = s.find("{")
|
|
45
|
+
if i == -1:
|
|
46
|
+
return {}
|
|
47
|
+
s = s[i:].strip()
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
return json.loads(s)
|
|
51
|
+
except json.JSONDecodeError:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
j = max(s.rfind("}"), s.rfind("]"))
|
|
55
|
+
if j != -1:
|
|
56
|
+
try:
|
|
57
|
+
return json.loads(s[: j + 1])
|
|
58
|
+
except json.JSONDecodeError:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
def _cheap_close(t: str) -> str:
|
|
62
|
+
t += "}" * max(0, t.count("{") - t.count("}"))
|
|
63
|
+
t += "]" * max(0, t.count("[") - t.count("]"))
|
|
64
|
+
return t
|
|
65
|
+
|
|
66
|
+
t = _cheap_close(s)
|
|
67
|
+
try:
|
|
68
|
+
return json.loads(t)
|
|
69
|
+
except json.JSONDecodeError as e:
|
|
70
|
+
if "Invalid \\escape" in str(e):
|
|
71
|
+
s = s.replace("\\", "\\\\")
|
|
72
|
+
return json.loads(s)
|
|
73
|
+
logger.warning(
|
|
74
|
+
f"[JSONParse] Failed to decode JSON: {e}\nTail: Raw {response_text} \
|
|
75
|
+
json: {s}"
|
|
76
|
+
)
|
|
77
|
+
return {}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def parse_rewritten_response(text: str) -> tuple[bool, dict[int, dict]]:
|
|
81
|
+
"""Parse index-keyed JSON from hallucination filter response.
|
|
82
|
+
Expected shape: { "0": {"need_rewrite": bool, "rewritten": str, "reason": str}, ... }
|
|
83
|
+
Returns (success, parsed_dict) with int keys.
|
|
84
|
+
"""
|
|
85
|
+
try:
|
|
86
|
+
m = re.search(r"```(?:json)?\s*([\s\S]*?)```", text, flags=re.I)
|
|
87
|
+
s = (m.group(1) if m else text).strip()
|
|
88
|
+
data = json.loads(s)
|
|
89
|
+
except Exception:
|
|
90
|
+
return False, {}
|
|
91
|
+
|
|
92
|
+
if not isinstance(data, dict):
|
|
93
|
+
return False, {}
|
|
94
|
+
|
|
95
|
+
result: dict[int, dict] = {}
|
|
96
|
+
for k, v in data.items():
|
|
97
|
+
try:
|
|
98
|
+
idx = int(k)
|
|
99
|
+
except Exception:
|
|
100
|
+
# allow integer keys as-is
|
|
101
|
+
if isinstance(k, int):
|
|
102
|
+
idx = k
|
|
103
|
+
else:
|
|
104
|
+
continue
|
|
105
|
+
if not isinstance(v, dict):
|
|
106
|
+
continue
|
|
107
|
+
need_rewrite = v.get("need_rewrite")
|
|
108
|
+
rewritten = v.get("rewritten", "")
|
|
109
|
+
reason = v.get("reason", "")
|
|
110
|
+
if (
|
|
111
|
+
isinstance(need_rewrite, bool)
|
|
112
|
+
and isinstance(rewritten, str)
|
|
113
|
+
and isinstance(reason, str)
|
|
114
|
+
):
|
|
115
|
+
result[idx] = {
|
|
116
|
+
"need_rewrite": need_rewrite,
|
|
117
|
+
"rewritten": rewritten,
|
|
118
|
+
"reason": reason,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return (len(result) > 0), result
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def parse_keep_filter_response(text: str) -> tuple[bool, dict[int, dict]]:
|
|
125
|
+
"""Parse index-keyed JSON from keep filter response.
|
|
126
|
+
Expected shape: { "0": {"keep": bool, "reason": str}, ... }
|
|
127
|
+
Returns (success, parsed_dict) with int keys.
|
|
128
|
+
"""
|
|
129
|
+
try:
|
|
130
|
+
m = re.search(r"```(?:json)?\s*([\s\S]*?)```", text, flags=re.I)
|
|
131
|
+
s = (m.group(1) if m else text).strip()
|
|
132
|
+
data = json.loads(s)
|
|
133
|
+
except Exception:
|
|
134
|
+
return False, {}
|
|
135
|
+
|
|
136
|
+
if not isinstance(data, dict):
|
|
137
|
+
return False, {}
|
|
138
|
+
|
|
139
|
+
result: dict[int, dict] = {}
|
|
140
|
+
for k, v in data.items():
|
|
141
|
+
try:
|
|
142
|
+
idx = int(k)
|
|
143
|
+
except Exception:
|
|
144
|
+
if isinstance(k, int):
|
|
145
|
+
idx = k
|
|
146
|
+
else:
|
|
147
|
+
continue
|
|
148
|
+
if not isinstance(v, dict):
|
|
149
|
+
continue
|
|
150
|
+
keep = v.get("keep")
|
|
151
|
+
reason = v.get("reason", "")
|
|
152
|
+
if isinstance(keep, bool):
|
|
153
|
+
result[idx] = {
|
|
154
|
+
"keep": keep,
|
|
155
|
+
"reason": reason,
|
|
156
|
+
}
|
|
157
|
+
return (len(result) > 0), result
|
|
File without changes
|
|
File without changes
|