MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from memos import log
|
|
2
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
3
|
+
from memos.graph_dbs.factory import PolarDBGraphDB
|
|
4
|
+
from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
|
|
5
|
+
from memos.mem_feedback.feedback import MemFeedback
|
|
6
|
+
from memos.mem_reader.simple_struct import SimpleStructMemReader
|
|
7
|
+
from memos.memories.textual.simple_preference import SimplePreferenceTextMemory
|
|
8
|
+
from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager
|
|
9
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import StopwordManager
|
|
10
|
+
from memos.memories.textual.tree_text_memory.retrieve.searcher import Searcher
|
|
11
|
+
from memos.reranker.base import BaseReranker
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = log.get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SimpleMemFeedback(MemFeedback):
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
llm: OpenAILLM | OllamaLLM | AzureLLM,
|
|
21
|
+
embedder: OllamaEmbedder,
|
|
22
|
+
graph_store: PolarDBGraphDB,
|
|
23
|
+
memory_manager: MemoryManager,
|
|
24
|
+
mem_reader: SimpleStructMemReader,
|
|
25
|
+
searcher: Searcher,
|
|
26
|
+
reranker: BaseReranker,
|
|
27
|
+
pref_mem: SimplePreferenceTextMemory,
|
|
28
|
+
pref_feedback: bool = False,
|
|
29
|
+
):
|
|
30
|
+
self.llm = llm
|
|
31
|
+
self.embedder = embedder
|
|
32
|
+
self.graph_store = graph_store
|
|
33
|
+
self.memory_manager = memory_manager
|
|
34
|
+
self.mem_reader = mem_reader
|
|
35
|
+
self.searcher = searcher
|
|
36
|
+
self.stopword_manager = StopwordManager
|
|
37
|
+
self.pref_mem = pref_mem
|
|
38
|
+
self.reranker = reranker
|
|
39
|
+
self.DB_IDX_READY = False
|
|
40
|
+
self.pref_feedback = pref_feedback
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def estimate_tokens(text: str) -> int:
|
|
8
|
+
"""
|
|
9
|
+
Estimate the approximate number of tokens for the text
|
|
10
|
+
"""
|
|
11
|
+
if not text:
|
|
12
|
+
return 0
|
|
13
|
+
|
|
14
|
+
chinese_chars = sum(1 for char in text if "\u4e00" <= char <= "\u9fff")
|
|
15
|
+
|
|
16
|
+
english_parts = text.split()
|
|
17
|
+
english_words = 0
|
|
18
|
+
for part in english_parts:
|
|
19
|
+
has_chinese = any("\u4e00" <= char <= "\u9fff" for char in part)
|
|
20
|
+
if not has_chinese and any(c.isalpha() for c in part):
|
|
21
|
+
english_words += 1
|
|
22
|
+
|
|
23
|
+
other_chars = len(text) - chinese_chars
|
|
24
|
+
|
|
25
|
+
estimated_tokens = int(chinese_chars * 1.5 + english_words * 1.33 + other_chars * 0.5)
|
|
26
|
+
|
|
27
|
+
return max(1, estimated_tokens)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def should_keep_update(new_text: str, old_text: str) -> bool:
|
|
31
|
+
"""
|
|
32
|
+
Determine whether the update should be skipped
|
|
33
|
+
Rule:
|
|
34
|
+
1. If the length of old_text is less than 50 and the modification ratio is less than 50% => returns True
|
|
35
|
+
2. If the length of old_text is greater than or equal to 50 and the modification ratio is less than 15% => returns True
|
|
36
|
+
3. Return False in other cases
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
old_len = estimate_tokens(old_text)
|
|
40
|
+
|
|
41
|
+
def calculate_similarity(text1: str, text2: str) -> float:
|
|
42
|
+
set1 = set(text1)
|
|
43
|
+
set2 = set(text2)
|
|
44
|
+
if not set1 and not set2:
|
|
45
|
+
return 1.0
|
|
46
|
+
|
|
47
|
+
intersection = len(set1.intersection(set2))
|
|
48
|
+
union = len(set1.union(set2))
|
|
49
|
+
return intersection / union if union > 0 else 0.0
|
|
50
|
+
|
|
51
|
+
similarity = calculate_similarity(old_text, new_text)
|
|
52
|
+
change_ratio = 1 - similarity
|
|
53
|
+
|
|
54
|
+
if change_ratio == float(0):
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
if old_len < 200:
|
|
58
|
+
return change_ratio < 0.7
|
|
59
|
+
else:
|
|
60
|
+
return change_ratio < 0.2
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def general_split_into_chunks(items: list[dict], max_tokens_per_chunk: int = 500):
|
|
64
|
+
chunks = []
|
|
65
|
+
current_chunk = []
|
|
66
|
+
current_tokens = 0
|
|
67
|
+
|
|
68
|
+
for item in items:
|
|
69
|
+
item_text = str(item)
|
|
70
|
+
item_tokens = estimate_tokens(item_text)
|
|
71
|
+
|
|
72
|
+
if item_tokens > max_tokens_per_chunk:
|
|
73
|
+
if current_chunk:
|
|
74
|
+
chunks.append(current_chunk)
|
|
75
|
+
current_chunk = []
|
|
76
|
+
|
|
77
|
+
chunks.append([item])
|
|
78
|
+
current_tokens = 0
|
|
79
|
+
|
|
80
|
+
elif current_tokens + item_tokens <= max_tokens_per_chunk:
|
|
81
|
+
current_chunk.append(item)
|
|
82
|
+
current_tokens += item_tokens
|
|
83
|
+
else:
|
|
84
|
+
if current_chunk:
|
|
85
|
+
chunks.append(current_chunk)
|
|
86
|
+
current_chunk = [item]
|
|
87
|
+
current_tokens = item_tokens
|
|
88
|
+
|
|
89
|
+
if current_chunk:
|
|
90
|
+
chunks.append(current_chunk)
|
|
91
|
+
|
|
92
|
+
return chunks
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def split_into_chunks(memories: list[TextualMemoryItem], max_tokens_per_chunk: int = 500):
|
|
96
|
+
chunks = []
|
|
97
|
+
current_chunk = []
|
|
98
|
+
current_tokens = 0
|
|
99
|
+
|
|
100
|
+
for item in memories:
|
|
101
|
+
item_text = f"{item.id}: {item.memory}"
|
|
102
|
+
item_tokens = estimate_tokens(item_text)
|
|
103
|
+
|
|
104
|
+
if item_tokens > max_tokens_per_chunk:
|
|
105
|
+
if current_chunk:
|
|
106
|
+
chunks.append(current_chunk)
|
|
107
|
+
current_chunk = []
|
|
108
|
+
|
|
109
|
+
chunks.append([item])
|
|
110
|
+
current_tokens = 0
|
|
111
|
+
|
|
112
|
+
elif current_tokens + item_tokens <= max_tokens_per_chunk:
|
|
113
|
+
current_chunk.append(item)
|
|
114
|
+
current_tokens += item_tokens
|
|
115
|
+
else:
|
|
116
|
+
if current_chunk:
|
|
117
|
+
chunks.append(current_chunk)
|
|
118
|
+
current_chunk = [item]
|
|
119
|
+
current_tokens = item_tokens
|
|
120
|
+
|
|
121
|
+
if current_chunk:
|
|
122
|
+
chunks.append(current_chunk)
|
|
123
|
+
|
|
124
|
+
return chunks
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def make_mem_item(text: str, **kwargs) -> TextualMemoryItem:
|
|
128
|
+
"""Build a minimal TextualMemoryItem."""
|
|
129
|
+
info = kwargs.get("info", {})
|
|
130
|
+
info_ = info.copy()
|
|
131
|
+
user_id = info_.pop("user_id", "")
|
|
132
|
+
session_id = info_.pop("session_id", "")
|
|
133
|
+
|
|
134
|
+
return TextualMemoryItem(
|
|
135
|
+
memory=text,
|
|
136
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
137
|
+
user_id=user_id,
|
|
138
|
+
session_id=session_id,
|
|
139
|
+
memory_type="LongTermMemory",
|
|
140
|
+
status="activated",
|
|
141
|
+
tags=kwargs.get("tags", []),
|
|
142
|
+
key=kwargs.get("key", ""),
|
|
143
|
+
embedding=kwargs.get("embedding", []),
|
|
144
|
+
usage=[],
|
|
145
|
+
sources=kwargs.get("sources", []),
|
|
146
|
+
user_name=kwargs.get("user_name", ""),
|
|
147
|
+
background=kwargs.get("background", ""),
|
|
148
|
+
confidence=0.99,
|
|
149
|
+
type=kwargs.get("type", ""),
|
|
150
|
+
info=info_,
|
|
151
|
+
),
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def extract_bracket_content(text):
|
|
156
|
+
"""
|
|
157
|
+
Extract and parse JSON content enclosed in curly braces {} from text.
|
|
158
|
+
"""
|
|
159
|
+
# Strategy 1: Greedy match to capture the outermost complete brace pair
|
|
160
|
+
greedy_match = re.search(r"\{.*\}", text, re.DOTALL)
|
|
161
|
+
if greedy_match is None:
|
|
162
|
+
error_msg = f"No curly brace content found in text: {text}"
|
|
163
|
+
raise ValueError(error_msg)
|
|
164
|
+
|
|
165
|
+
greedy_content = greedy_match.group(0)
|
|
166
|
+
|
|
167
|
+
# Strategy 2: Non-greedy match to find all brace pairs, use the last one
|
|
168
|
+
non_greedy_matches = re.findall(r"\{.*?\}", text, re.DOTALL)
|
|
169
|
+
if not non_greedy_matches:
|
|
170
|
+
error_msg = f"No curly brace content found in text: {text}"
|
|
171
|
+
raise ValueError(error_msg)
|
|
172
|
+
|
|
173
|
+
non_greedy_content = non_greedy_matches[-1]
|
|
174
|
+
|
|
175
|
+
for content in [greedy_content, non_greedy_content]:
|
|
176
|
+
try:
|
|
177
|
+
parsed_data = json.loads(content)
|
|
178
|
+
return parsed_data
|
|
179
|
+
except json.JSONDecodeError:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
for content in [greedy_content, non_greedy_content]:
|
|
183
|
+
try:
|
|
184
|
+
fixed_content = content.replace("{{", "{").replace("}}", "}")
|
|
185
|
+
parsed_data = json.loads(fixed_content)
|
|
186
|
+
return parsed_data
|
|
187
|
+
except json.JSONDecodeError:
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
error_msg = f"Failed to parse JSON content from curly braces. Text preview: {text}"
|
|
191
|
+
raise ValueError(error_msg)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def extract_square_brackets_content(text):
|
|
195
|
+
"""
|
|
196
|
+
Extract and parse JSON content enclosed in square brackets [] from text.
|
|
197
|
+
"""
|
|
198
|
+
# Strategy 1: Greedy match to capture the outermost complete bracket pair
|
|
199
|
+
greedy_match = re.search(r"\[.*\]", text, re.DOTALL)
|
|
200
|
+
if greedy_match is None:
|
|
201
|
+
error_msg = f"No square bracket content found in text: {text}"
|
|
202
|
+
raise ValueError(error_msg)
|
|
203
|
+
|
|
204
|
+
greedy_content = greedy_match.group(0)
|
|
205
|
+
|
|
206
|
+
# Strategy 2: Non-greedy match to find all bracket pairs, use the last one
|
|
207
|
+
non_greedy_matches = re.findall(r"\[.*?\]", text, re.DOTALL)
|
|
208
|
+
if not non_greedy_matches:
|
|
209
|
+
error_msg = f"No square bracket content found in text: {text}"
|
|
210
|
+
raise ValueError(error_msg)
|
|
211
|
+
|
|
212
|
+
non_greedy_content = non_greedy_matches[-1]
|
|
213
|
+
|
|
214
|
+
for content in [greedy_content, non_greedy_content]:
|
|
215
|
+
try:
|
|
216
|
+
parsed_data = json.loads(content)
|
|
217
|
+
return parsed_data
|
|
218
|
+
except json.JSONDecodeError:
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
for content in [greedy_content, non_greedy_content]:
|
|
222
|
+
try:
|
|
223
|
+
fixed_content = content.replace("{{", "{").replace("}}", "}")
|
|
224
|
+
parsed_data = json.loads(fixed_content)
|
|
225
|
+
return parsed_data
|
|
226
|
+
except json.JSONDecodeError:
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
error_msg = f"Failed to parse JSON content from square brackets. Text preview: {text}"
|
|
230
|
+
raise ValueError(error_msg)
|