MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,1385 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Redis Queue implementation for SchedulerMessageItem objects.
|
|
3
|
+
|
|
4
|
+
This module provides a Redis-based queue implementation that can replace
|
|
5
|
+
the local memos_message_queue functionality in BaseScheduler.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
|
|
13
|
+
from collections import deque
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
from uuid import uuid4
|
|
16
|
+
|
|
17
|
+
from memos.context.context import ContextThread
|
|
18
|
+
from memos.log import get_logger
|
|
19
|
+
from memos.mem_scheduler.schemas.message_schemas import ScheduleMessageItem
|
|
20
|
+
from memos.mem_scheduler.schemas.task_schemas import (
|
|
21
|
+
DEFAULT_STREAM_INACTIVITY_DELETE_SECONDS,
|
|
22
|
+
DEFAULT_STREAM_KEY_PREFIX,
|
|
23
|
+
DEFAULT_STREAM_KEYS_REFRESH_INTERVAL_SEC,
|
|
24
|
+
DEFAULT_STREAM_RECENT_ACTIVE_SECONDS,
|
|
25
|
+
)
|
|
26
|
+
from memos.mem_scheduler.task_schedule_modules.orchestrator import SchedulerOrchestrator
|
|
27
|
+
from memos.mem_scheduler.utils.status_tracker import TaskStatusTracker
|
|
28
|
+
from memos.mem_scheduler.webservice_modules.redis_service import RedisSchedulerModule
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
logger = get_logger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SchedulerRedisQueue(RedisSchedulerModule):
|
|
35
|
+
"""
|
|
36
|
+
Redis-based queue for storing and processing SchedulerMessageItem objects.
|
|
37
|
+
|
|
38
|
+
This class provides a Redis Stream-based implementation that can replace
|
|
39
|
+
the local memos_message_queue functionality, offering better scalability
|
|
40
|
+
and persistence for message processing.
|
|
41
|
+
|
|
42
|
+
Inherits from RedisSchedulerModule to leverage existing Redis connection
|
|
43
|
+
and initialization functionality.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
stream_key_prefix: str = os.getenv(
|
|
49
|
+
"MEMSCHEDULER_REDIS_STREAM_KEY_PREFIX",
|
|
50
|
+
DEFAULT_STREAM_KEY_PREFIX,
|
|
51
|
+
),
|
|
52
|
+
orchestrator: SchedulerOrchestrator | None = None,
|
|
53
|
+
consumer_group: str = "scheduler_group",
|
|
54
|
+
consumer_name: str | None = "scheduler_consumer",
|
|
55
|
+
max_len: int | None = None,
|
|
56
|
+
auto_delete_acked: bool = True, # Whether to automatically delete acknowledged messages
|
|
57
|
+
status_tracker: TaskStatusTracker | None = None,
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Initialize the Redis queue.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
stream_key_prefix: Name of the Redis stream
|
|
64
|
+
consumer_group: Name of the consumer group
|
|
65
|
+
consumer_name: Name of the consumer (auto-generated if None)
|
|
66
|
+
max_len: Maximum length of the stream (for memory management)
|
|
67
|
+
maxsize: Maximum size of the queue (for Queue compatibility, ignored)
|
|
68
|
+
auto_delete_acked: Whether to automatically delete acknowledged messages from stream
|
|
69
|
+
"""
|
|
70
|
+
super().__init__()
|
|
71
|
+
# Stream configuration
|
|
72
|
+
self.stream_key_prefix = stream_key_prefix
|
|
73
|
+
# Precompile regex for prefix filtering to reduce repeated compilation overhead
|
|
74
|
+
self.stream_prefix_regex_pattern = re.compile(f"^{re.escape(self.stream_key_prefix)}:")
|
|
75
|
+
self.consumer_group = consumer_group
|
|
76
|
+
self.consumer_name = f"{consumer_name}_{uuid4().hex[:8]}"
|
|
77
|
+
self.max_len = max_len
|
|
78
|
+
self.auto_delete_acked = auto_delete_acked # Whether to delete acknowledged messages
|
|
79
|
+
self.status_tracker = status_tracker
|
|
80
|
+
|
|
81
|
+
# Consumer state
|
|
82
|
+
self._is_listening = False
|
|
83
|
+
self._message_handler: Callable[[ScheduleMessageItem], None] | None = None
|
|
84
|
+
self.supports_xautoclaim = False
|
|
85
|
+
|
|
86
|
+
# Connection state
|
|
87
|
+
self._is_connected = False
|
|
88
|
+
|
|
89
|
+
# Task tracking for mem_scheduler_wait compatibility
|
|
90
|
+
self._unfinished_tasks = 0
|
|
91
|
+
|
|
92
|
+
# Broker flush threshold and async refill control
|
|
93
|
+
self.task_broker_flush_bar = 10
|
|
94
|
+
self._refill_lock = threading.Lock()
|
|
95
|
+
self._refill_thread: ContextThread | None = None
|
|
96
|
+
|
|
97
|
+
# Track empty streams first-seen time to avoid zombie keys
|
|
98
|
+
self._empty_stream_seen_times: dict[str, float] = {}
|
|
99
|
+
self._empty_stream_seen_lock = threading.Lock()
|
|
100
|
+
|
|
101
|
+
logger.info(
|
|
102
|
+
f"[REDIS_QUEUE] Initialized with stream_prefix='{self.stream_key_prefix}', "
|
|
103
|
+
f"consumer_group='{self.consumer_group}', consumer_name='{self.consumer_name}'"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Auto-initialize Redis connection
|
|
107
|
+
if self.auto_initialize_redis():
|
|
108
|
+
self._is_connected = True
|
|
109
|
+
self._check_xautoclaim_support()
|
|
110
|
+
|
|
111
|
+
self.seen_streams = set()
|
|
112
|
+
|
|
113
|
+
# Task Orchestrator
|
|
114
|
+
self.message_pack_cache = deque()
|
|
115
|
+
|
|
116
|
+
self.orchestrator = SchedulerOrchestrator() if orchestrator is None else orchestrator
|
|
117
|
+
|
|
118
|
+
# Cached stream keys and refresh control
|
|
119
|
+
self._stream_keys_cache: list[str] = []
|
|
120
|
+
self._stream_keys_last_refresh: float = 0.0
|
|
121
|
+
self._stream_keys_refresh_interval_sec: float = DEFAULT_STREAM_KEYS_REFRESH_INTERVAL_SEC
|
|
122
|
+
self._stream_keys_lock = threading.Lock()
|
|
123
|
+
self._stream_keys_refresh_thread: ContextThread | None = None
|
|
124
|
+
self._stream_keys_refresh_stop_event = threading.Event()
|
|
125
|
+
self._initial_scan_max_keys = int(
|
|
126
|
+
os.getenv("MEMSCHEDULER_REDIS_INITIAL_SCAN_MAX_KEYS", "1000") or 1000
|
|
127
|
+
)
|
|
128
|
+
self._initial_scan_time_limit_sec = float(
|
|
129
|
+
os.getenv("MEMSCHEDULER_REDIS_INITIAL_SCAN_TIME_LIMIT_SEC", "1.0") or 1.0
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Pipeline chunk size for XREVRANGE pipelined calls
|
|
133
|
+
self._pipeline_chunk_size = int(
|
|
134
|
+
os.getenv("MEMSCHEDULER_REDIS_PIPELINE_CHUNK_SIZE", "200") or 200
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Start background stream keys refresher if connected
|
|
138
|
+
if self._is_connected:
|
|
139
|
+
try:
|
|
140
|
+
self._refresh_stream_keys(
|
|
141
|
+
max_keys=self._initial_scan_max_keys,
|
|
142
|
+
time_limit_sec=self._initial_scan_time_limit_sec,
|
|
143
|
+
)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.debug(f"Initial stream keys refresh failed: {e}")
|
|
146
|
+
self._start_stream_keys_refresh_thread()
|
|
147
|
+
|
|
148
|
+
def _check_xautoclaim_support(self):
|
|
149
|
+
"""Check if the Redis server supports xautoclaim (v6.2+)."""
|
|
150
|
+
if not self._redis_conn:
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
info = self._redis_conn.info("server")
|
|
155
|
+
version_str = info.get("redis_version", "0.0.0")
|
|
156
|
+
# Simple version parsing
|
|
157
|
+
parts = [int(p) for p in version_str.split(".") if p.isdigit()]
|
|
158
|
+
while len(parts) < 3:
|
|
159
|
+
parts.append(0)
|
|
160
|
+
|
|
161
|
+
major, minor, _ = parts[:3]
|
|
162
|
+
if major > 6 or (major == 6 and minor >= 2):
|
|
163
|
+
self.supports_xautoclaim = True
|
|
164
|
+
else:
|
|
165
|
+
self.supports_xautoclaim = False
|
|
166
|
+
|
|
167
|
+
logger.info(
|
|
168
|
+
f"[REDIS_QUEUE] Redis version {version_str}. "
|
|
169
|
+
f"Supports xautoclaim: {self.supports_xautoclaim}"
|
|
170
|
+
)
|
|
171
|
+
except Exception as e:
|
|
172
|
+
logger.warning(f"Failed to check Redis version: {e}")
|
|
173
|
+
self.supports_xautoclaim = False
|
|
174
|
+
|
|
175
|
+
def get_stream_key(self, user_id: str, mem_cube_id: str, task_label: str) -> str:
|
|
176
|
+
stream_key = f"{self.stream_key_prefix}:{user_id}:{mem_cube_id}:{task_label}"
|
|
177
|
+
return stream_key
|
|
178
|
+
|
|
179
|
+
# --- Stream keys refresh background thread ---
|
|
180
|
+
def _refresh_stream_keys(
|
|
181
|
+
self,
|
|
182
|
+
stream_key_prefix: str | None = None,
|
|
183
|
+
max_keys: int | None = None,
|
|
184
|
+
time_limit_sec: float | None = None,
|
|
185
|
+
) -> list[str]:
|
|
186
|
+
"""Scan Redis and refresh cached stream keys for the queue prefix."""
|
|
187
|
+
if not self._redis_conn:
|
|
188
|
+
return []
|
|
189
|
+
|
|
190
|
+
if stream_key_prefix is None:
|
|
191
|
+
stream_key_prefix = self.stream_key_prefix
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
candidate_keys = self._scan_candidate_stream_keys(
|
|
195
|
+
stream_key_prefix=stream_key_prefix,
|
|
196
|
+
max_keys=max_keys,
|
|
197
|
+
time_limit_sec=time_limit_sec,
|
|
198
|
+
)
|
|
199
|
+
chunked_results = self._pipeline_last_entries(candidate_keys)
|
|
200
|
+
# Only process successful chunks to maintain 1:1 key-result mapping
|
|
201
|
+
processed_keys: list[str] = []
|
|
202
|
+
last_entries_results: list[list[tuple[str, dict]]] = []
|
|
203
|
+
|
|
204
|
+
total_key_count = 0
|
|
205
|
+
for chunk_keys, chunk_res, success in chunked_results:
|
|
206
|
+
if success:
|
|
207
|
+
processed_keys.extend(chunk_keys)
|
|
208
|
+
last_entries_results.extend(chunk_res)
|
|
209
|
+
total_key_count += len(chunk_keys)
|
|
210
|
+
|
|
211
|
+
# Abort refresh if any chunk failed, indicated by processed count mismatch
|
|
212
|
+
if len(candidate_keys) != total_key_count:
|
|
213
|
+
logger.error(
|
|
214
|
+
f"[REDIS_QUEUE] Last entries processed mismatch: "
|
|
215
|
+
f"candidates={len(candidate_keys)}, processed={len(processed_keys)}; aborting refresh"
|
|
216
|
+
)
|
|
217
|
+
return []
|
|
218
|
+
|
|
219
|
+
now_sec = time.time()
|
|
220
|
+
keys_to_delete = self._collect_inactive_keys(
|
|
221
|
+
candidate_keys=processed_keys,
|
|
222
|
+
last_entries_results=last_entries_results,
|
|
223
|
+
inactivity_seconds=DEFAULT_STREAM_INACTIVITY_DELETE_SECONDS,
|
|
224
|
+
now_sec=now_sec,
|
|
225
|
+
)
|
|
226
|
+
active_stream_keys = self._filter_active_keys(
|
|
227
|
+
candidate_keys=processed_keys,
|
|
228
|
+
last_entries_results=last_entries_results,
|
|
229
|
+
recent_seconds=DEFAULT_STREAM_RECENT_ACTIVE_SECONDS,
|
|
230
|
+
now_sec=now_sec,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Ensure consumer groups for newly discovered active streams
|
|
234
|
+
with self._stream_keys_lock:
|
|
235
|
+
# Identify keys we haven't seen yet
|
|
236
|
+
new_streams = [k for k in active_stream_keys if k not in self.seen_streams]
|
|
237
|
+
|
|
238
|
+
# Create groups outside the lock to avoid blocking
|
|
239
|
+
for key in new_streams:
|
|
240
|
+
self._ensure_consumer_group(key)
|
|
241
|
+
|
|
242
|
+
if new_streams:
|
|
243
|
+
with self._stream_keys_lock:
|
|
244
|
+
self.seen_streams.update(new_streams)
|
|
245
|
+
|
|
246
|
+
deleted_count = self._delete_streams(keys_to_delete)
|
|
247
|
+
self._update_stream_cache_with_log(
|
|
248
|
+
stream_key_prefix=stream_key_prefix,
|
|
249
|
+
candidate_keys=processed_keys,
|
|
250
|
+
active_stream_keys=active_stream_keys,
|
|
251
|
+
deleted_count=deleted_count,
|
|
252
|
+
active_threshold_sec=DEFAULT_STREAM_RECENT_ACTIVE_SECONDS,
|
|
253
|
+
)
|
|
254
|
+
return active_stream_keys
|
|
255
|
+
except Exception as e:
|
|
256
|
+
logger.warning(f"Failed to refresh stream keys: {e}")
|
|
257
|
+
return []
|
|
258
|
+
|
|
259
|
+
def _stream_keys_refresh_loop(self) -> None:
|
|
260
|
+
"""Background loop to periodically refresh Redis stream keys cache."""
|
|
261
|
+
# Seed cache immediately
|
|
262
|
+
self._refresh_stream_keys()
|
|
263
|
+
logger.debug(
|
|
264
|
+
f"Stream keys refresher started with interval={self._stream_keys_refresh_interval_sec}s"
|
|
265
|
+
)
|
|
266
|
+
while not self._stream_keys_refresh_stop_event.is_set():
|
|
267
|
+
try:
|
|
268
|
+
self._refresh_stream_keys()
|
|
269
|
+
except Exception as e:
|
|
270
|
+
logger.warning(f"Stream keys refresh iteration failed: {e}")
|
|
271
|
+
# Wait with ability to be interrupted
|
|
272
|
+
self._stream_keys_refresh_stop_event.wait(self._stream_keys_refresh_interval_sec)
|
|
273
|
+
|
|
274
|
+
logger.debug("Stream keys refresher stopped")
|
|
275
|
+
|
|
276
|
+
def _start_stream_keys_refresh_thread(self) -> None:
|
|
277
|
+
if self._stream_keys_refresh_thread and self._stream_keys_refresh_thread.is_alive():
|
|
278
|
+
return
|
|
279
|
+
self._stream_keys_refresh_stop_event.clear()
|
|
280
|
+
self._stream_keys_refresh_thread = ContextThread(
|
|
281
|
+
target=self._stream_keys_refresh_loop,
|
|
282
|
+
name="redis-stream-keys-refresher",
|
|
283
|
+
daemon=True,
|
|
284
|
+
)
|
|
285
|
+
self._stream_keys_refresh_thread.start()
|
|
286
|
+
|
|
287
|
+
def _stop_stream_keys_refresh_thread(self) -> None:
|
|
288
|
+
try:
|
|
289
|
+
self._stream_keys_refresh_stop_event.set()
|
|
290
|
+
if self._stream_keys_refresh_thread and self._stream_keys_refresh_thread.is_alive():
|
|
291
|
+
self._stream_keys_refresh_thread.join(timeout=2.0)
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logger.debug(f"Stopping stream keys refresh thread encountered: {e}")
|
|
294
|
+
|
|
295
|
+
def task_broker(
|
|
296
|
+
self,
|
|
297
|
+
consume_batch_size: int,
|
|
298
|
+
) -> list[list[ScheduleMessageItem]]:
|
|
299
|
+
stream_keys = self.get_stream_keys(stream_key_prefix=self.stream_key_prefix)
|
|
300
|
+
if not stream_keys:
|
|
301
|
+
return []
|
|
302
|
+
|
|
303
|
+
# Determine per-stream quotas for this cycle
|
|
304
|
+
stream_quotas = self.orchestrator.get_stream_quotas(
|
|
305
|
+
stream_keys=stream_keys, consume_batch_size=consume_batch_size
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
# Step A: batch-read new messages across streams (non-blocking)
|
|
309
|
+
new_messages_map: dict[str, list[tuple[str, list[tuple[str, dict]]]]] = (
|
|
310
|
+
self._read_new_messages_batch(stream_keys=stream_keys, stream_quotas=stream_quotas)
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Step B: compute pending needs per stream
|
|
314
|
+
claims_spec: list[tuple[str, int, str]] = []
|
|
315
|
+
for stream_key in stream_keys:
|
|
316
|
+
need_pending_count = self._compute_pending_need(
|
|
317
|
+
new_messages=new_messages_map.get(stream_key),
|
|
318
|
+
batch_size=stream_quotas[stream_key],
|
|
319
|
+
)
|
|
320
|
+
if need_pending_count:
|
|
321
|
+
# Derive task label from stream key suffix
|
|
322
|
+
task_label = stream_key.rsplit(":", 1)[1]
|
|
323
|
+
claims_spec.append((stream_key, need_pending_count, task_label))
|
|
324
|
+
|
|
325
|
+
# Step C: batch claim pending messages across streams
|
|
326
|
+
claimed_messages: list[tuple[str, list[tuple[str, dict]]]] = []
|
|
327
|
+
if claims_spec:
|
|
328
|
+
claimed_messages = self._batch_claim_pending_messages(claims_spec=claims_spec)
|
|
329
|
+
|
|
330
|
+
# Step D: assemble and convert to ScheduleMessageItem
|
|
331
|
+
messages: list[tuple[str, list[tuple[str, dict]]]] = []
|
|
332
|
+
for stream_key in stream_keys:
|
|
333
|
+
nm = new_messages_map.get(stream_key)
|
|
334
|
+
if nm:
|
|
335
|
+
messages.extend(nm)
|
|
336
|
+
|
|
337
|
+
if claimed_messages:
|
|
338
|
+
messages.extend(claimed_messages)
|
|
339
|
+
|
|
340
|
+
cache: list[ScheduleMessageItem] = self._convert_messages(messages)
|
|
341
|
+
|
|
342
|
+
# pack messages
|
|
343
|
+
packed: list[list[ScheduleMessageItem]] = []
|
|
344
|
+
for i in range(0, len(cache), consume_batch_size):
|
|
345
|
+
packed.append(cache[i : i + consume_batch_size])
|
|
346
|
+
# return packed list without overwriting existing cache
|
|
347
|
+
return packed
|
|
348
|
+
|
|
349
|
+
def _async_refill_cache(self, batch_size: int) -> None:
|
|
350
|
+
"""Background thread to refill message cache without blocking get_messages."""
|
|
351
|
+
try:
|
|
352
|
+
logger.debug(f"Starting async cache refill with batch_size={batch_size}")
|
|
353
|
+
new_packs = self.task_broker(consume_batch_size=batch_size)
|
|
354
|
+
logger.debug(f"task_broker returned {len(new_packs)} packs")
|
|
355
|
+
with self._refill_lock:
|
|
356
|
+
for pack in new_packs:
|
|
357
|
+
if pack: # Only add non-empty packs
|
|
358
|
+
self.message_pack_cache.append(pack)
|
|
359
|
+
logger.debug(f"Added pack with {len(pack)} messages to cache")
|
|
360
|
+
logger.debug(f"Cache refill complete, cache size now: {len(self.message_pack_cache)}")
|
|
361
|
+
except Exception as e:
|
|
362
|
+
logger.warning(f"Async cache refill failed: {e}", exc_info=True)
|
|
363
|
+
|
|
364
|
+
def get_messages(self, batch_size: int) -> list[ScheduleMessageItem]:
|
|
365
|
+
if self.message_pack_cache:
|
|
366
|
+
# Trigger async refill if below threshold (non-blocking)
|
|
367
|
+
if len(self.message_pack_cache) < self.task_broker_flush_bar and (
|
|
368
|
+
self._refill_thread is None or not self._refill_thread.is_alive()
|
|
369
|
+
):
|
|
370
|
+
logger.debug(
|
|
371
|
+
f"Triggering async cache refill: cache size {len(self.message_pack_cache)} < {self.task_broker_flush_bar}"
|
|
372
|
+
)
|
|
373
|
+
self._refill_thread = ContextThread(
|
|
374
|
+
target=self._async_refill_cache, args=(batch_size,), name="redis-cache-refill"
|
|
375
|
+
)
|
|
376
|
+
self._refill_thread.start()
|
|
377
|
+
else:
|
|
378
|
+
logger.debug(f"The size of message_pack_cache is {len(self.message_pack_cache)}")
|
|
379
|
+
else:
|
|
380
|
+
new_packs = self.task_broker(consume_batch_size=batch_size)
|
|
381
|
+
for pack in new_packs:
|
|
382
|
+
if pack: # Only add non-empty packs
|
|
383
|
+
self.message_pack_cache.append(pack)
|
|
384
|
+
if len(self.message_pack_cache) == 0:
|
|
385
|
+
return []
|
|
386
|
+
else:
|
|
387
|
+
return self.message_pack_cache.popleft()
|
|
388
|
+
|
|
389
|
+
def _ensure_consumer_group(self, stream_key) -> None:
|
|
390
|
+
"""Ensure the consumer group exists for the stream."""
|
|
391
|
+
if not self._redis_conn:
|
|
392
|
+
return
|
|
393
|
+
|
|
394
|
+
try:
|
|
395
|
+
self._redis_conn.xgroup_create(stream_key, self.consumer_group, id="0", mkstream=True)
|
|
396
|
+
logger.debug(
|
|
397
|
+
f"Created consumer group '{self.consumer_group}' for stream '{stream_key}'"
|
|
398
|
+
)
|
|
399
|
+
except Exception as e:
|
|
400
|
+
# Check if it's a "consumer group already exists" error
|
|
401
|
+
error_msg = str(e).lower()
|
|
402
|
+
if not ("busygroup" in error_msg or "already exists" in error_msg):
|
|
403
|
+
logger.error(f"Error creating consumer group: {e}", exc_info=True)
|
|
404
|
+
|
|
405
|
+
# Pending lock methods removed as they are unnecessary with idle-threshold claiming
|
|
406
|
+
|
|
407
|
+
def put(
|
|
408
|
+
self, message: ScheduleMessageItem, block: bool = True, timeout: float | None = None
|
|
409
|
+
) -> None:
|
|
410
|
+
"""
|
|
411
|
+
Add a message to the Redis queue (Queue-compatible interface).
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
message: SchedulerMessageItem to add to the queue
|
|
415
|
+
block: Ignored for Redis implementation (always non-blocking)
|
|
416
|
+
timeout: Ignored for Redis implementation
|
|
417
|
+
|
|
418
|
+
Raises:
|
|
419
|
+
ConnectionError: If not connected to Redis
|
|
420
|
+
TypeError: If message is not a ScheduleMessageItem
|
|
421
|
+
"""
|
|
422
|
+
if not self._redis_conn:
|
|
423
|
+
raise ConnectionError("Not connected to Redis. Redis connection not available.")
|
|
424
|
+
|
|
425
|
+
if not isinstance(message, ScheduleMessageItem):
|
|
426
|
+
raise TypeError(f"Expected ScheduleMessageItem, got {type(message)}")
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
stream_key = self.get_stream_key(
|
|
430
|
+
user_id=message.user_id, mem_cube_id=message.mem_cube_id, task_label=message.label
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
# Update stream keys cache with newly observed stream key
|
|
434
|
+
with self._stream_keys_lock:
|
|
435
|
+
if stream_key not in self.seen_streams:
|
|
436
|
+
self.seen_streams.add(stream_key)
|
|
437
|
+
self._ensure_consumer_group(stream_key=stream_key)
|
|
438
|
+
|
|
439
|
+
if stream_key not in self._stream_keys_cache:
|
|
440
|
+
self._stream_keys_cache.append(stream_key)
|
|
441
|
+
self._stream_keys_last_refresh = time.time()
|
|
442
|
+
|
|
443
|
+
message.stream_key = stream_key
|
|
444
|
+
|
|
445
|
+
# Convert message to dictionary for Redis storage
|
|
446
|
+
message_data = message.to_dict()
|
|
447
|
+
|
|
448
|
+
# Add to Redis stream with automatic trimming
|
|
449
|
+
message_id = self._redis_conn.xadd(
|
|
450
|
+
stream_key, message_data, maxlen=self.max_len, approximate=True
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
logger.info(
|
|
454
|
+
f"Added message {message_id} to Redis stream: {message.label} - {message.content[:100]}..."
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
except Exception as e:
|
|
458
|
+
logger.error(f"Failed to add message to Redis queue: {e}")
|
|
459
|
+
raise
|
|
460
|
+
|
|
461
|
+
def ack_message(
|
|
462
|
+
self,
|
|
463
|
+
user_id: str,
|
|
464
|
+
mem_cube_id: str,
|
|
465
|
+
task_label: str,
|
|
466
|
+
redis_message_id,
|
|
467
|
+
message: ScheduleMessageItem | None,
|
|
468
|
+
) -> None:
|
|
469
|
+
if message and hasattr(message, "stream_key") and message.stream_key:
|
|
470
|
+
stream_key = message.stream_key
|
|
471
|
+
else:
|
|
472
|
+
stream_key = self.get_stream_key(
|
|
473
|
+
user_id=user_id, mem_cube_id=mem_cube_id, task_label=task_label
|
|
474
|
+
)
|
|
475
|
+
# No-op if not connected or message doesn't come from Redis
|
|
476
|
+
if not self._redis_conn:
|
|
477
|
+
logger.debug(
|
|
478
|
+
f"Skip ack: Redis not connected for stream '{stream_key}', msg_id='{redis_message_id}'"
|
|
479
|
+
)
|
|
480
|
+
return
|
|
481
|
+
if not redis_message_id:
|
|
482
|
+
logger.debug(
|
|
483
|
+
f"Skip ack: Empty redis_message_id for stream '{stream_key}', user_id='{user_id}', label='{task_label}'"
|
|
484
|
+
)
|
|
485
|
+
return
|
|
486
|
+
|
|
487
|
+
try:
|
|
488
|
+
self._redis_conn.xack(stream_key, self.consumer_group, redis_message_id)
|
|
489
|
+
except Exception as e:
|
|
490
|
+
logger.warning(
|
|
491
|
+
f"xack failed for stream '{stream_key}', msg_id='{redis_message_id}': {e}"
|
|
492
|
+
)
|
|
493
|
+
if self.auto_delete_acked:
|
|
494
|
+
# Optionally delete the message from the stream to keep it clean
|
|
495
|
+
try:
|
|
496
|
+
self._redis_conn.xdel(stream_key, redis_message_id)
|
|
497
|
+
logger.info(f"Successfully delete acknowledged message {redis_message_id}")
|
|
498
|
+
except Exception as e:
|
|
499
|
+
logger.warning(f"Failed to delete acknowledged message {redis_message_id}: {e}")
|
|
500
|
+
|
|
501
|
+
def get(
|
|
502
|
+
self,
|
|
503
|
+
stream_key: str,
|
|
504
|
+
block: bool = True,
|
|
505
|
+
timeout: float | None = None,
|
|
506
|
+
batch_size: int | None = 1,
|
|
507
|
+
) -> list[ScheduleMessageItem]:
|
|
508
|
+
if not self._redis_conn:
|
|
509
|
+
raise ConnectionError("Not connected to Redis. Redis connection not available.")
|
|
510
|
+
|
|
511
|
+
redis_timeout = self._compute_redis_timeout(block=block, timeout=timeout)
|
|
512
|
+
|
|
513
|
+
# Step 1: read new messages first
|
|
514
|
+
new_messages = self._read_new_messages(
|
|
515
|
+
stream_key=stream_key, batch_size=batch_size, redis_timeout=redis_timeout
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
# Step 2: determine how many pending messages we need
|
|
519
|
+
need_pending_count = self._compute_pending_need(
|
|
520
|
+
new_messages=new_messages, batch_size=batch_size
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
# Step 3: claim eligible pending messages
|
|
524
|
+
pending_messages: list[tuple[str, list[tuple[str, dict]]]] = []
|
|
525
|
+
if need_pending_count:
|
|
526
|
+
task_label = stream_key.rsplit(":", 1)[1]
|
|
527
|
+
pending_messages = self._claim_pending_messages(
|
|
528
|
+
stream_key=stream_key,
|
|
529
|
+
need_pending_count=need_pending_count,
|
|
530
|
+
task_label=task_label,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# Step 4: assemble and convert to ScheduleMessageItem
|
|
534
|
+
messages = []
|
|
535
|
+
if new_messages:
|
|
536
|
+
messages.extend(new_messages)
|
|
537
|
+
if pending_messages:
|
|
538
|
+
messages.extend(pending_messages)
|
|
539
|
+
|
|
540
|
+
result_messages = self._convert_messages(messages)
|
|
541
|
+
|
|
542
|
+
if not result_messages:
|
|
543
|
+
if not block:
|
|
544
|
+
return []
|
|
545
|
+
else:
|
|
546
|
+
from queue import Empty
|
|
547
|
+
|
|
548
|
+
raise Empty("No messages available in Redis queue")
|
|
549
|
+
|
|
550
|
+
return result_messages
|
|
551
|
+
|
|
552
|
+
def _compute_redis_timeout(self, block: bool, timeout: float | None) -> int | None:
|
|
553
|
+
"""Compute Redis block timeout in milliseconds for xreadgroup."""
|
|
554
|
+
if block and timeout is not None:
|
|
555
|
+
return int(timeout * 1000)
|
|
556
|
+
return None
|
|
557
|
+
|
|
558
|
+
def _read_new_messages(
|
|
559
|
+
self, stream_key: str, batch_size: int | None, redis_timeout: int | None
|
|
560
|
+
) -> list[tuple[str, list[tuple[str, dict]]]]:
|
|
561
|
+
"""Read new messages for the consumer group, handling missing group/stream."""
|
|
562
|
+
try:
|
|
563
|
+
return self._redis_conn.xreadgroup(
|
|
564
|
+
self.consumer_group,
|
|
565
|
+
self.consumer_name,
|
|
566
|
+
{stream_key: ">"},
|
|
567
|
+
count=batch_size,
|
|
568
|
+
block=redis_timeout,
|
|
569
|
+
)
|
|
570
|
+
except Exception as read_err:
|
|
571
|
+
err_msg = str(read_err).lower()
|
|
572
|
+
if "nogroup" in err_msg or "no such key" in err_msg:
|
|
573
|
+
logger.warning(
|
|
574
|
+
f"Consumer group or stream missing for '{stream_key}/{self.consumer_group}'. Attempting to create and retry (new)."
|
|
575
|
+
)
|
|
576
|
+
self._ensure_consumer_group(stream_key=stream_key)
|
|
577
|
+
return self._redis_conn.xreadgroup(
|
|
578
|
+
self.consumer_group,
|
|
579
|
+
self.consumer_name,
|
|
580
|
+
{stream_key: ">"},
|
|
581
|
+
count=batch_size,
|
|
582
|
+
block=redis_timeout,
|
|
583
|
+
)
|
|
584
|
+
logger.error(f"{read_err}", stack_info=True)
|
|
585
|
+
raise
|
|
586
|
+
|
|
587
|
+
def _read_new_messages_batch(
|
|
588
|
+
self, stream_keys: list[str], stream_quotas: dict[str, int]
|
|
589
|
+
) -> dict[str, list[tuple[str, list[tuple[str, dict]]]]]:
|
|
590
|
+
"""Batch-read new messages (non-blocking) across multiple streams.
|
|
591
|
+
|
|
592
|
+
Uses a Redis pipeline to reduce round trips while honoring per-stream quotas.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
stream_keys: List of stream keys to read from.
|
|
596
|
+
stream_quotas: Per-stream message upper bounds.
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
Mapping from stream key to xreadgroup-style result list.
|
|
600
|
+
"""
|
|
601
|
+
if not self._redis_conn or not stream_keys:
|
|
602
|
+
return {}
|
|
603
|
+
|
|
604
|
+
# Pre-ensure consumer groups to avoid NOGROUP during batch reads
|
|
605
|
+
# (Optimization: rely on put() and _refresh_stream_keys() to ensure groups)
|
|
606
|
+
pipe = self._redis_conn.pipeline(transaction=False)
|
|
607
|
+
for stream_key in stream_keys:
|
|
608
|
+
pipe.xreadgroup(
|
|
609
|
+
self.consumer_group,
|
|
610
|
+
self.consumer_name,
|
|
611
|
+
{stream_key: ">"},
|
|
612
|
+
count=stream_quotas.get(stream_key),
|
|
613
|
+
block=None,
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
try:
|
|
617
|
+
res_list = pipe.execute()
|
|
618
|
+
except Exception as e:
|
|
619
|
+
err_msg = str(e).lower()
|
|
620
|
+
if "nogroup" in err_msg or "no such key" in err_msg:
|
|
621
|
+
# Fallback to sequential non-blocking reads
|
|
622
|
+
res_list = []
|
|
623
|
+
for stream_key in stream_keys:
|
|
624
|
+
try:
|
|
625
|
+
self._ensure_consumer_group(stream_key=stream_key)
|
|
626
|
+
res = self._redis_conn.xreadgroup(
|
|
627
|
+
self.consumer_group,
|
|
628
|
+
self.consumer_name,
|
|
629
|
+
{stream_key: ">"},
|
|
630
|
+
count=stream_quotas.get(stream_key),
|
|
631
|
+
block=None,
|
|
632
|
+
)
|
|
633
|
+
res_list.append(res)
|
|
634
|
+
except Exception:
|
|
635
|
+
res_list.append([])
|
|
636
|
+
else:
|
|
637
|
+
logger.error(f"Pipeline xreadgroup failed: {e}")
|
|
638
|
+
res_list = []
|
|
639
|
+
|
|
640
|
+
out: dict[str, list[tuple[str, list[tuple[str, dict]]]]] = {}
|
|
641
|
+
for stream_key, res in zip(stream_keys, res_list, strict=False):
|
|
642
|
+
out[stream_key] = res or []
|
|
643
|
+
return out
|
|
644
|
+
|
|
645
|
+
def _compute_pending_need(
|
|
646
|
+
self, new_messages: list[tuple[str, list[tuple[str, dict]]]] | None, batch_size: int | None
|
|
647
|
+
) -> int:
|
|
648
|
+
"""Compute how many pending messages are needed to fill the batch."""
|
|
649
|
+
if batch_size is None:
|
|
650
|
+
return 1 if not new_messages else 0
|
|
651
|
+
new_count = sum(len(sm) for _s, sm in new_messages) if new_messages else 0
|
|
652
|
+
need_pending = max(0, batch_size - new_count)
|
|
653
|
+
return need_pending if need_pending > 0 else 0
|
|
654
|
+
|
|
655
|
+
def _parse_pending_entry(self, entry) -> tuple[str, int]:
|
|
656
|
+
"""Extract message_id and idle_time from a pending entry (dict, tuple, or object)."""
|
|
657
|
+
if isinstance(entry, dict):
|
|
658
|
+
return entry.get("message_id"), entry.get("time_since_delivered")
|
|
659
|
+
elif isinstance(entry, tuple | list):
|
|
660
|
+
return entry[0], entry[2]
|
|
661
|
+
else:
|
|
662
|
+
# Assume object (redis-py 5.x+ PendingMessage)
|
|
663
|
+
return getattr(entry, "message_id", None), getattr(entry, "time_since_delivered", 0)
|
|
664
|
+
|
|
665
|
+
def _manual_xautoclaim(
|
|
666
|
+
self, stream_key: str, min_idle_time: int, count: int
|
|
667
|
+
) -> tuple[str, list[tuple[str, dict]], list[str]]:
|
|
668
|
+
"""
|
|
669
|
+
Simulate xautoclaim using xpending and xclaim for compatibility with older Redis versions.
|
|
670
|
+
"""
|
|
671
|
+
# 1. Get pending entries (fetch slightly more to increase chance of finding idle ones)
|
|
672
|
+
fetch_count = count * 3
|
|
673
|
+
pending_entries = self._redis_conn.xpending_range(
|
|
674
|
+
stream_key, self.consumer_group, "-", "+", fetch_count
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
if not pending_entries:
|
|
678
|
+
return "0-0", [], []
|
|
679
|
+
|
|
680
|
+
claim_ids = []
|
|
681
|
+
for entry in pending_entries:
|
|
682
|
+
# entry structure depends on redis-py version/decoding
|
|
683
|
+
# Assuming list of dicts: {'message_id': '...', 'time_since_delivered': ms, ...}
|
|
684
|
+
# or list of tuples
|
|
685
|
+
msg_id, idle_time = self._parse_pending_entry(entry)
|
|
686
|
+
|
|
687
|
+
if idle_time >= min_idle_time:
|
|
688
|
+
claim_ids.append(msg_id)
|
|
689
|
+
if len(claim_ids) >= count:
|
|
690
|
+
break
|
|
691
|
+
|
|
692
|
+
if not claim_ids:
|
|
693
|
+
return "0-0", [], []
|
|
694
|
+
|
|
695
|
+
# 2. Claim messages
|
|
696
|
+
claimed_messages = self._redis_conn.xclaim(
|
|
697
|
+
stream_key, self.consumer_group, self.consumer_name, min_idle_time, claim_ids
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
return "0-0", claimed_messages, []
|
|
701
|
+
|
|
702
|
+
def _claim_pending_messages(
|
|
703
|
+
self, stream_key: str, need_pending_count: int, task_label: str
|
|
704
|
+
) -> list[tuple[str, list[tuple[str, dict]]]]:
|
|
705
|
+
"""Claim pending messages exceeding idle threshold, with group existence handling."""
|
|
706
|
+
min_idle = self.orchestrator.get_task_idle_min(task_label=task_label)
|
|
707
|
+
|
|
708
|
+
# Use native xautoclaim if supported (Redis 6.2+)
|
|
709
|
+
if self.supports_xautoclaim:
|
|
710
|
+
try:
|
|
711
|
+
claimed_result = self._redis_conn.xautoclaim(
|
|
712
|
+
name=stream_key,
|
|
713
|
+
groupname=self.consumer_group,
|
|
714
|
+
consumername=self.consumer_name,
|
|
715
|
+
min_idle_time=min_idle,
|
|
716
|
+
start_id="0-0",
|
|
717
|
+
count=need_pending_count,
|
|
718
|
+
justid=False,
|
|
719
|
+
)
|
|
720
|
+
if len(claimed_result) == 2:
|
|
721
|
+
next_id, claimed = claimed_result
|
|
722
|
+
deleted_ids = []
|
|
723
|
+
elif len(claimed_result) == 3:
|
|
724
|
+
next_id, claimed, deleted_ids = claimed_result
|
|
725
|
+
else:
|
|
726
|
+
raise ValueError(
|
|
727
|
+
f"Unexpected xautoclaim response length: {len(claimed_result)}"
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
return [(stream_key, claimed)] if claimed else []
|
|
731
|
+
except Exception as read_err:
|
|
732
|
+
err_msg = str(read_err).lower()
|
|
733
|
+
if "nogroup" in err_msg or "no such key" in err_msg:
|
|
734
|
+
logger.warning(
|
|
735
|
+
f"Consumer group or stream missing for '{stream_key}/{self.consumer_group}'. Attempting to create and retry (xautoclaim)."
|
|
736
|
+
)
|
|
737
|
+
self._ensure_consumer_group(stream_key=stream_key)
|
|
738
|
+
claimed_result = self._redis_conn.xautoclaim(
|
|
739
|
+
name=stream_key,
|
|
740
|
+
groupname=self.consumer_group,
|
|
741
|
+
consumername=self.consumer_name,
|
|
742
|
+
min_idle_time=min_idle,
|
|
743
|
+
start_id="0-0",
|
|
744
|
+
count=need_pending_count,
|
|
745
|
+
justid=False,
|
|
746
|
+
)
|
|
747
|
+
if len(claimed_result) == 2:
|
|
748
|
+
next_id, claimed = claimed_result
|
|
749
|
+
deleted_ids = []
|
|
750
|
+
elif len(claimed_result) == 3:
|
|
751
|
+
next_id, claimed, deleted_ids = claimed_result
|
|
752
|
+
else:
|
|
753
|
+
raise ValueError(
|
|
754
|
+
f"Unexpected xautoclaim response length: {len(claimed_result)}"
|
|
755
|
+
) from read_err
|
|
756
|
+
|
|
757
|
+
return [(stream_key, claimed)] if claimed else []
|
|
758
|
+
return []
|
|
759
|
+
|
|
760
|
+
# Fallback to manual xautoclaim for older Redis versions
|
|
761
|
+
try:
|
|
762
|
+
_next, claimed, _deleted = self._manual_xautoclaim(
|
|
763
|
+
stream_key, min_idle, need_pending_count
|
|
764
|
+
)
|
|
765
|
+
return [(stream_key, claimed)] if claimed else []
|
|
766
|
+
except Exception as read_err:
|
|
767
|
+
err_msg = str(read_err).lower()
|
|
768
|
+
if "nogroup" in err_msg or "no such key" in err_msg:
|
|
769
|
+
logger.warning(
|
|
770
|
+
f"Consumer group or stream missing for '{stream_key}/{self.consumer_group}'. Attempting to create and retry (manual xautoclaim)."
|
|
771
|
+
)
|
|
772
|
+
self._ensure_consumer_group(stream_key=stream_key)
|
|
773
|
+
try:
|
|
774
|
+
_next, claimed, _deleted = self._manual_xautoclaim(
|
|
775
|
+
stream_key, min_idle, need_pending_count
|
|
776
|
+
)
|
|
777
|
+
return [(stream_key, claimed)] if claimed else []
|
|
778
|
+
except Exception:
|
|
779
|
+
return []
|
|
780
|
+
return []
|
|
781
|
+
|
|
782
|
+
def _batch_claim_native(
|
|
783
|
+
self, claims_spec: list[tuple[str, int, str]]
|
|
784
|
+
) -> list[tuple[str, list[tuple[str, dict]]]]:
|
|
785
|
+
"""Batch-claim pending messages using Redis xautoclaim pipeline (Redis 6.2+)."""
|
|
786
|
+
pipe = self._redis_conn.pipeline(transaction=False)
|
|
787
|
+
for stream_key, need_count, label in claims_spec:
|
|
788
|
+
pipe.xautoclaim(
|
|
789
|
+
name=stream_key,
|
|
790
|
+
groupname=self.consumer_group,
|
|
791
|
+
consumername=self.consumer_name,
|
|
792
|
+
min_idle_time=self.orchestrator.get_task_idle_min(task_label=label),
|
|
793
|
+
start_id="0-0",
|
|
794
|
+
count=need_count,
|
|
795
|
+
justid=False,
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
try:
|
|
799
|
+
results = pipe.execute(raise_on_error=False)
|
|
800
|
+
except Exception as e:
|
|
801
|
+
logger.error(f"Pipeline execution critical failure: {e}")
|
|
802
|
+
results = [e] * len(claims_spec)
|
|
803
|
+
|
|
804
|
+
final_results = []
|
|
805
|
+
for i, res in enumerate(results):
|
|
806
|
+
if isinstance(res, Exception):
|
|
807
|
+
err_msg = str(res).lower()
|
|
808
|
+
if "nogroup" in err_msg or "no such key" in err_msg:
|
|
809
|
+
stream_key, need_count, label = claims_spec[i]
|
|
810
|
+
try:
|
|
811
|
+
self._ensure_consumer_group(stream_key=stream_key)
|
|
812
|
+
retry_res = self._redis_conn.xautoclaim(
|
|
813
|
+
name=stream_key,
|
|
814
|
+
groupname=self.consumer_group,
|
|
815
|
+
consumername=self.consumer_name,
|
|
816
|
+
min_idle_time=self.orchestrator.get_task_idle_min(task_label=label),
|
|
817
|
+
start_id="0-0",
|
|
818
|
+
count=need_count,
|
|
819
|
+
justid=False,
|
|
820
|
+
)
|
|
821
|
+
final_results.append(retry_res)
|
|
822
|
+
except Exception as retry_err:
|
|
823
|
+
logger.warning(f"Retry xautoclaim failed for {stream_key}: {retry_err}")
|
|
824
|
+
final_results.append(None)
|
|
825
|
+
else:
|
|
826
|
+
final_results.append(None)
|
|
827
|
+
else:
|
|
828
|
+
final_results.append(res)
|
|
829
|
+
|
|
830
|
+
claimed_pairs = []
|
|
831
|
+
for (stream_key, _, _), claimed_result in zip(claims_spec, final_results, strict=False):
|
|
832
|
+
try:
|
|
833
|
+
if not claimed_result:
|
|
834
|
+
continue
|
|
835
|
+
if len(claimed_result) == 2:
|
|
836
|
+
_next_id, claimed = claimed_result
|
|
837
|
+
elif len(claimed_result) == 3:
|
|
838
|
+
_next_id, claimed, _deleted_ids = claimed_result
|
|
839
|
+
else:
|
|
840
|
+
raise ValueError(
|
|
841
|
+
f"Unexpected xautoclaim response length: {len(claimed_result)} for '{stream_key}'"
|
|
842
|
+
)
|
|
843
|
+
if claimed:
|
|
844
|
+
claimed_pairs.append((stream_key, claimed))
|
|
845
|
+
except Exception as parse_err:
|
|
846
|
+
logger.warning(f"Failed to parse xautoclaim result for '{stream_key}': {parse_err}")
|
|
847
|
+
|
|
848
|
+
return claimed_pairs
|
|
849
|
+
|
|
850
|
+
def _batch_claim_manual(
|
|
851
|
+
self, claims_spec: list[tuple[str, int, str]]
|
|
852
|
+
) -> list[tuple[str, list[tuple[str, dict]]]]:
|
|
853
|
+
"""Batch-claim pending messages using 2-phase pipeline (Redis < 6.2)."""
|
|
854
|
+
# Phase 1: Fetch pending messages for all streams
|
|
855
|
+
pending_pipe = self._redis_conn.pipeline(transaction=False)
|
|
856
|
+
for stream_key, need_count, _label in claims_spec:
|
|
857
|
+
fetch_count = need_count * 3
|
|
858
|
+
pending_pipe.xpending_range(stream_key, self.consumer_group, "-", "+", fetch_count)
|
|
859
|
+
|
|
860
|
+
try:
|
|
861
|
+
pending_results = pending_pipe.execute(raise_on_error=False)
|
|
862
|
+
except Exception as e:
|
|
863
|
+
logger.error(f"Pending fetch pipeline failed: {e}")
|
|
864
|
+
return []
|
|
865
|
+
|
|
866
|
+
# Phase 2: Filter and prepare claim pipeline
|
|
867
|
+
claim_pipe = self._redis_conn.pipeline(transaction=False)
|
|
868
|
+
streams_to_claim_indices = []
|
|
869
|
+
claimed_pairs: list[tuple[str, list[tuple[str, dict]]]] = []
|
|
870
|
+
|
|
871
|
+
for i, (stream_key, need_count, label) in enumerate(claims_spec):
|
|
872
|
+
pending_res = pending_results[i]
|
|
873
|
+
min_idle = self.orchestrator.get_task_idle_min(task_label=label)
|
|
874
|
+
|
|
875
|
+
if isinstance(pending_res, Exception):
|
|
876
|
+
err_msg = str(pending_res).lower()
|
|
877
|
+
if "nogroup" in err_msg or "no such key" in err_msg:
|
|
878
|
+
try:
|
|
879
|
+
self._ensure_consumer_group(stream_key)
|
|
880
|
+
_next, claimed, _ = self._manual_xautoclaim(
|
|
881
|
+
stream_key, min_idle, need_count
|
|
882
|
+
)
|
|
883
|
+
if claimed:
|
|
884
|
+
claimed_pairs.append((stream_key, claimed))
|
|
885
|
+
except Exception as retry_err:
|
|
886
|
+
logger.warning(f"Retry manual claim failed for {stream_key}: {retry_err}")
|
|
887
|
+
continue
|
|
888
|
+
|
|
889
|
+
if not pending_res:
|
|
890
|
+
continue
|
|
891
|
+
|
|
892
|
+
claim_ids = []
|
|
893
|
+
for entry in pending_res:
|
|
894
|
+
msg_id, idle_time = self._parse_pending_entry(entry)
|
|
895
|
+
if idle_time >= min_idle:
|
|
896
|
+
claim_ids.append(msg_id)
|
|
897
|
+
if len(claim_ids) >= need_count:
|
|
898
|
+
break
|
|
899
|
+
|
|
900
|
+
if claim_ids:
|
|
901
|
+
claim_pipe.xclaim(
|
|
902
|
+
stream_key,
|
|
903
|
+
self.consumer_group,
|
|
904
|
+
self.consumer_name,
|
|
905
|
+
min_idle,
|
|
906
|
+
claim_ids,
|
|
907
|
+
)
|
|
908
|
+
streams_to_claim_indices.append(i)
|
|
909
|
+
|
|
910
|
+
if streams_to_claim_indices:
|
|
911
|
+
try:
|
|
912
|
+
claim_results = claim_pipe.execute(raise_on_error=False)
|
|
913
|
+
for idx_in_results, original_idx in enumerate(streams_to_claim_indices):
|
|
914
|
+
res = claim_results[idx_in_results]
|
|
915
|
+
stream_key = claims_spec[original_idx][0]
|
|
916
|
+
if isinstance(res, list) and res:
|
|
917
|
+
claimed_pairs.append((stream_key, res))
|
|
918
|
+
except Exception as e:
|
|
919
|
+
logger.error(f"Claim pipeline failed: {e}")
|
|
920
|
+
|
|
921
|
+
return claimed_pairs
|
|
922
|
+
|
|
923
|
+
def _batch_claim_pending_messages(
|
|
924
|
+
self, claims_spec: list[tuple[str, int, str]]
|
|
925
|
+
) -> list[tuple[str, list[tuple[str, dict]]]]:
|
|
926
|
+
"""Batch-claim pending messages across multiple streams.
|
|
927
|
+
|
|
928
|
+
Args:
|
|
929
|
+
claims_spec: List of tuples (stream_key, need_pending_count, task_label)
|
|
930
|
+
|
|
931
|
+
Returns:
|
|
932
|
+
A list of (stream_key, claimed_entries) pairs for all successful claims.
|
|
933
|
+
"""
|
|
934
|
+
if not self._redis_conn or not claims_spec:
|
|
935
|
+
return []
|
|
936
|
+
|
|
937
|
+
if self.supports_xautoclaim:
|
|
938
|
+
return self._batch_claim_native(claims_spec)
|
|
939
|
+
|
|
940
|
+
return self._batch_claim_manual(claims_spec)
|
|
941
|
+
|
|
942
|
+
def _convert_messages(
|
|
943
|
+
self, messages: list[tuple[str, list[tuple[str, dict]]]]
|
|
944
|
+
) -> list[ScheduleMessageItem]:
|
|
945
|
+
"""Convert raw Redis messages into ScheduleMessageItem with metadata."""
|
|
946
|
+
result: list[ScheduleMessageItem] = []
|
|
947
|
+
for _stream, stream_messages in messages or []:
|
|
948
|
+
for message_id, fields in stream_messages:
|
|
949
|
+
try:
|
|
950
|
+
message = ScheduleMessageItem.from_dict(fields)
|
|
951
|
+
message.stream_key = _stream
|
|
952
|
+
message.redis_message_id = message_id
|
|
953
|
+
result.append(message)
|
|
954
|
+
except Exception as e:
|
|
955
|
+
logger.error(f"Failed to parse message {message_id}: {e}", stack_info=True)
|
|
956
|
+
return result
|
|
957
|
+
|
|
958
|
+
def qsize(self) -> dict:
|
|
959
|
+
"""
|
|
960
|
+
Get the current size of the Redis queue (Queue-compatible interface).
|
|
961
|
+
|
|
962
|
+
This method scans for all streams matching the `stream_key_prefix`
|
|
963
|
+
and sums up their lengths to get the total queue size.
|
|
964
|
+
|
|
965
|
+
Returns:
|
|
966
|
+
Total number of messages across all matching streams.
|
|
967
|
+
"""
|
|
968
|
+
if not self._redis_conn:
|
|
969
|
+
return {}
|
|
970
|
+
|
|
971
|
+
total_size = 0
|
|
972
|
+
try:
|
|
973
|
+
qsize_stats = {}
|
|
974
|
+
# Use filtered stream keys to avoid WRONGTYPE on non-stream keys
|
|
975
|
+
for stream_key in self.get_stream_keys():
|
|
976
|
+
stream_qsize = self._redis_conn.xlen(stream_key)
|
|
977
|
+
qsize_stats[stream_key] = stream_qsize
|
|
978
|
+
total_size += stream_qsize
|
|
979
|
+
qsize_stats["total_size"] = total_size
|
|
980
|
+
return qsize_stats
|
|
981
|
+
|
|
982
|
+
except Exception as e:
|
|
983
|
+
logger.error(f"Failed to get Redis queue size: {e}", stack_info=True)
|
|
984
|
+
return {}
|
|
985
|
+
|
|
986
|
+
def show_task_status(self, stream_key_prefix: str | None = None) -> dict[str, dict[str, int]]:
|
|
987
|
+
effective_prefix = (
|
|
988
|
+
stream_key_prefix if stream_key_prefix is not None else self.stream_key_prefix
|
|
989
|
+
)
|
|
990
|
+
stream_keys = self.get_stream_keys(stream_key_prefix=effective_prefix)
|
|
991
|
+
if not stream_keys:
|
|
992
|
+
logger.info(f"No Redis streams found for the configured prefix: {effective_prefix}")
|
|
993
|
+
return {}
|
|
994
|
+
|
|
995
|
+
grouped: dict[str, dict[str, int]] = {}
|
|
996
|
+
|
|
997
|
+
for sk in stream_keys:
|
|
998
|
+
uid = sk
|
|
999
|
+
if uid not in grouped:
|
|
1000
|
+
grouped[uid] = {"remaining": 0}
|
|
1001
|
+
|
|
1002
|
+
# Remaining count via XLEN
|
|
1003
|
+
remaining_count = 0
|
|
1004
|
+
try:
|
|
1005
|
+
remaining_count = int(self._redis_conn.xlen(sk))
|
|
1006
|
+
except Exception as e:
|
|
1007
|
+
logger.debug(f"XLEN failed for '{sk}': {e}")
|
|
1008
|
+
|
|
1009
|
+
grouped[uid]["remaining"] += remaining_count
|
|
1010
|
+
|
|
1011
|
+
# Pretty-print summary
|
|
1012
|
+
try:
|
|
1013
|
+
total_remaining = sum(v.get("remaining", 0) for v in grouped.values())
|
|
1014
|
+
header = f"Task Queue Status by user_id | remaining={total_remaining}"
|
|
1015
|
+
print(header)
|
|
1016
|
+
for uid in sorted(grouped.keys()):
|
|
1017
|
+
counts = grouped[uid]
|
|
1018
|
+
print(f"- {uid}: remaining={counts.get('remaining', 0)}")
|
|
1019
|
+
except Exception:
|
|
1020
|
+
# Printing is best-effort; return grouped regardless
|
|
1021
|
+
pass
|
|
1022
|
+
|
|
1023
|
+
return grouped
|
|
1024
|
+
|
|
1025
|
+
def get_stream_keys(self, stream_key_prefix: str | None = None) -> list[str]:
|
|
1026
|
+
"""
|
|
1027
|
+
Return cached Redis stream keys maintained by background refresher.
|
|
1028
|
+
|
|
1029
|
+
The cache is updated periodically by a background thread and also
|
|
1030
|
+
appended immediately on new stream creation via `put`.
|
|
1031
|
+
|
|
1032
|
+
Before returning, validate that all cached keys match the given
|
|
1033
|
+
`stream_key_prefix` (or the queue's configured prefix if None).
|
|
1034
|
+
If any key does not match, log an error.
|
|
1035
|
+
"""
|
|
1036
|
+
effective_prefix = stream_key_prefix or self.stream_key_prefix
|
|
1037
|
+
with self._stream_keys_lock:
|
|
1038
|
+
cache_snapshot = list(self._stream_keys_cache)
|
|
1039
|
+
|
|
1040
|
+
# Validate that cached keys conform to the expected prefix
|
|
1041
|
+
escaped_prefix = re.escape(effective_prefix)
|
|
1042
|
+
regex_pattern = f"^{escaped_prefix}:"
|
|
1043
|
+
for key in cache_snapshot:
|
|
1044
|
+
if not re.match(regex_pattern, key):
|
|
1045
|
+
logger.error(
|
|
1046
|
+
f"[REDIS_QUEUE] Cached stream key '{key}' does not match prefix '{effective_prefix}:'"
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
return cache_snapshot
|
|
1050
|
+
|
|
1051
|
+
def size(self) -> int:
|
|
1052
|
+
"""
|
|
1053
|
+
Get the current size of the Redis queue (total message count from qsize dict).
|
|
1054
|
+
|
|
1055
|
+
Returns:
|
|
1056
|
+
Total number of messages across all streams
|
|
1057
|
+
"""
|
|
1058
|
+
qsize_result = self.qsize()
|
|
1059
|
+
return qsize_result.get("total_size", 0)
|
|
1060
|
+
|
|
1061
|
+
def empty(self) -> bool:
|
|
1062
|
+
"""
|
|
1063
|
+
Check if the Redis queue is empty (Queue-compatible interface).
|
|
1064
|
+
|
|
1065
|
+
Returns:
|
|
1066
|
+
True if the queue is empty, False otherwise
|
|
1067
|
+
"""
|
|
1068
|
+
return self.size() == 0
|
|
1069
|
+
|
|
1070
|
+
def full(self) -> bool:
|
|
1071
|
+
if self.max_len is None:
|
|
1072
|
+
return False
|
|
1073
|
+
return self.size() >= self.max_len
|
|
1074
|
+
|
|
1075
|
+
def join(self) -> None:
|
|
1076
|
+
"""
|
|
1077
|
+
Block until all items in the queue have been gotten and processed (Queue-compatible interface).
|
|
1078
|
+
|
|
1079
|
+
For Redis streams, this would require tracking pending messages,
|
|
1080
|
+
which is complex. For now, this is a no-op.
|
|
1081
|
+
"""
|
|
1082
|
+
|
|
1083
|
+
def clear(self, stream_key=None) -> None:
|
|
1084
|
+
"""Clear all messages from the queue."""
|
|
1085
|
+
if not self._is_connected or not self._redis_conn:
|
|
1086
|
+
return
|
|
1087
|
+
|
|
1088
|
+
try:
|
|
1089
|
+
if stream_key is not None:
|
|
1090
|
+
self._redis_conn.delete(stream_key)
|
|
1091
|
+
logger.info(f"Cleared Redis stream: {stream_key}")
|
|
1092
|
+
else:
|
|
1093
|
+
stream_keys = self.get_stream_keys()
|
|
1094
|
+
|
|
1095
|
+
for stream_key in stream_keys:
|
|
1096
|
+
# Delete the entire stream
|
|
1097
|
+
self._redis_conn.delete(stream_key)
|
|
1098
|
+
logger.info(f"Cleared Redis stream: {stream_key}")
|
|
1099
|
+
|
|
1100
|
+
except Exception as e:
|
|
1101
|
+
logger.error(f"Failed to clear Redis queue: {e}")
|
|
1102
|
+
|
|
1103
|
+
def start_listening(
|
|
1104
|
+
self,
|
|
1105
|
+
handler: Callable[[ScheduleMessageItem], None],
|
|
1106
|
+
batch_size: int = 10,
|
|
1107
|
+
poll_interval: float = 0.1,
|
|
1108
|
+
) -> None:
|
|
1109
|
+
"""
|
|
1110
|
+
Start listening for messages and process them with the provided handler.
|
|
1111
|
+
|
|
1112
|
+
Args:
|
|
1113
|
+
handler: Function to call for each received message
|
|
1114
|
+
batch_size: Number of messages to process in each batch
|
|
1115
|
+
poll_interval: Interval between polling attempts in seconds
|
|
1116
|
+
"""
|
|
1117
|
+
if not self._is_connected:
|
|
1118
|
+
raise ConnectionError("Not connected to Redis. Call connect() first.")
|
|
1119
|
+
|
|
1120
|
+
self._message_handler = handler
|
|
1121
|
+
self._is_listening = True
|
|
1122
|
+
|
|
1123
|
+
logger.info(f"Started listening on Redis stream: {self.stream_key_prefix}")
|
|
1124
|
+
|
|
1125
|
+
try:
|
|
1126
|
+
while self._is_listening:
|
|
1127
|
+
messages = self.get_messages(batch_size=1)
|
|
1128
|
+
|
|
1129
|
+
for message in messages:
|
|
1130
|
+
try:
|
|
1131
|
+
self._message_handler(message)
|
|
1132
|
+
except Exception as e:
|
|
1133
|
+
logger.error(f"Error processing message {message.item_id}: {e}")
|
|
1134
|
+
|
|
1135
|
+
# Small sleep to prevent excessive CPU usage
|
|
1136
|
+
if not messages:
|
|
1137
|
+
time.sleep(poll_interval)
|
|
1138
|
+
|
|
1139
|
+
except KeyboardInterrupt:
|
|
1140
|
+
logger.info("Received interrupt signal, stopping listener")
|
|
1141
|
+
except Exception as e:
|
|
1142
|
+
logger.error(f"Error in message listener: {e}")
|
|
1143
|
+
finally:
|
|
1144
|
+
self._is_listening = False
|
|
1145
|
+
logger.info("Stopped listening for messages")
|
|
1146
|
+
|
|
1147
|
+
def stop_listening(self) -> None:
|
|
1148
|
+
"""Stop the message listener."""
|
|
1149
|
+
self._is_listening = False
|
|
1150
|
+
logger.info("Requested stop for message listener")
|
|
1151
|
+
|
|
1152
|
+
def connect(self) -> None:
|
|
1153
|
+
"""Establish connection to Redis and set up the queue."""
|
|
1154
|
+
if self._redis_conn is not None:
|
|
1155
|
+
try:
|
|
1156
|
+
# Test the connection
|
|
1157
|
+
self._redis_conn.ping()
|
|
1158
|
+
self._is_connected = True
|
|
1159
|
+
self._check_xautoclaim_support()
|
|
1160
|
+
logger.debug("Redis connection established successfully")
|
|
1161
|
+
# Start stream keys refresher when connected
|
|
1162
|
+
self._start_stream_keys_refresh_thread()
|
|
1163
|
+
except Exception as e:
|
|
1164
|
+
logger.error(f"Failed to connect to Redis: {e}")
|
|
1165
|
+
self._is_connected = False
|
|
1166
|
+
else:
|
|
1167
|
+
logger.error("Redis connection not initialized")
|
|
1168
|
+
self._is_connected = False
|
|
1169
|
+
|
|
1170
|
+
def disconnect(self) -> None:
|
|
1171
|
+
"""Disconnect from Redis and clean up resources."""
|
|
1172
|
+
self._is_connected = False
|
|
1173
|
+
# Stop background refresher
|
|
1174
|
+
self._stop_stream_keys_refresh_thread()
|
|
1175
|
+
if self._is_listening:
|
|
1176
|
+
self.stop_listening()
|
|
1177
|
+
logger.debug("Disconnected from Redis")
|
|
1178
|
+
|
|
1179
|
+
def __enter__(self):
|
|
1180
|
+
"""Context manager entry."""
|
|
1181
|
+
self.connect()
|
|
1182
|
+
return self
|
|
1183
|
+
|
|
1184
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
1185
|
+
"""Context manager exit."""
|
|
1186
|
+
self.stop_listening()
|
|
1187
|
+
self.disconnect()
|
|
1188
|
+
|
|
1189
|
+
def __del__(self):
|
|
1190
|
+
"""Cleanup when object is destroyed."""
|
|
1191
|
+
self._stop_stream_keys_refresh_thread()
|
|
1192
|
+
if self._is_connected:
|
|
1193
|
+
self.disconnect()
|
|
1194
|
+
|
|
1195
|
+
@property
|
|
1196
|
+
def unfinished_tasks(self) -> int:
|
|
1197
|
+
return self.qsize()
|
|
1198
|
+
|
|
1199
|
+
def _scan_candidate_stream_keys(
|
|
1200
|
+
self,
|
|
1201
|
+
stream_key_prefix: str,
|
|
1202
|
+
max_keys: int | None = None,
|
|
1203
|
+
time_limit_sec: float | None = None,
|
|
1204
|
+
count_hint: int = 200,
|
|
1205
|
+
) -> list[str]:
|
|
1206
|
+
"""Return stream keys matching the given prefix via SCAN with optional limits.
|
|
1207
|
+
|
|
1208
|
+
Uses a cursor-based SCAN to collect keys matching the prefix, honoring
|
|
1209
|
+
optional `max_keys` and `time_limit_sec` constraints. Filters results
|
|
1210
|
+
with a precompiled regex when scanning the configured prefix.
|
|
1211
|
+
"""
|
|
1212
|
+
redis_pattern = f"{stream_key_prefix}:*"
|
|
1213
|
+
collected = []
|
|
1214
|
+
cursor = 0
|
|
1215
|
+
start_ts = time.time() if time_limit_sec else None
|
|
1216
|
+
while True:
|
|
1217
|
+
if (
|
|
1218
|
+
start_ts is not None
|
|
1219
|
+
and time_limit_sec is not None
|
|
1220
|
+
and (time.time() - start_ts) > time_limit_sec
|
|
1221
|
+
):
|
|
1222
|
+
break
|
|
1223
|
+
cursor, keys = self._redis_conn.scan(
|
|
1224
|
+
cursor=cursor, match=redis_pattern, count=count_hint
|
|
1225
|
+
)
|
|
1226
|
+
collected.extend(keys)
|
|
1227
|
+
if max_keys is not None and len(collected) >= max_keys:
|
|
1228
|
+
break
|
|
1229
|
+
if cursor == 0 or cursor == "0":
|
|
1230
|
+
break
|
|
1231
|
+
|
|
1232
|
+
if stream_key_prefix == self.stream_key_prefix:
|
|
1233
|
+
pattern = self.stream_prefix_regex_pattern
|
|
1234
|
+
else:
|
|
1235
|
+
escaped_prefix = re.escape(stream_key_prefix)
|
|
1236
|
+
pattern = re.compile(f"^{escaped_prefix}:")
|
|
1237
|
+
return [key for key in collected if pattern.match(key)]
|
|
1238
|
+
|
|
1239
|
+
def _pipeline_last_entries(
|
|
1240
|
+
self, candidate_keys: list[str]
|
|
1241
|
+
) -> list[tuple[list[str], list[list[tuple[str, dict]]], bool]]:
|
|
1242
|
+
"""Fetch last entries for keys using pipelined XREVRANGE COUNT 1, per-chunk success.
|
|
1243
|
+
|
|
1244
|
+
Returns a list of tuples: (chunk_keys, chunk_results, success_bool).
|
|
1245
|
+
Only successful chunks should be processed by the caller to preserve
|
|
1246
|
+
a 1:1 mapping between keys and results.
|
|
1247
|
+
"""
|
|
1248
|
+
if not candidate_keys:
|
|
1249
|
+
return []
|
|
1250
|
+
|
|
1251
|
+
results_chunks: list[tuple[list[str], list[list[tuple[str, dict]]], bool]] = []
|
|
1252
|
+
chunk_size = max(1, int(self._pipeline_chunk_size))
|
|
1253
|
+
|
|
1254
|
+
for start in range(0, len(candidate_keys), chunk_size):
|
|
1255
|
+
chunk_keys = candidate_keys[start : start + chunk_size]
|
|
1256
|
+
try:
|
|
1257
|
+
pipe = self._redis_conn.pipeline(transaction=False)
|
|
1258
|
+
for key in chunk_keys:
|
|
1259
|
+
pipe.xrevrange(key, count=1)
|
|
1260
|
+
chunk_res = pipe.execute()
|
|
1261
|
+
results_chunks.append((chunk_keys, chunk_res, True))
|
|
1262
|
+
except Exception as e:
|
|
1263
|
+
logger.warning(
|
|
1264
|
+
f"[REDIS_QUEUE] Pipeline execute failed for last entries chunk: "
|
|
1265
|
+
f"offset={start}, size={len(chunk_keys)}, error={e}"
|
|
1266
|
+
)
|
|
1267
|
+
results_chunks.append((chunk_keys, [], False))
|
|
1268
|
+
|
|
1269
|
+
return results_chunks
|
|
1270
|
+
|
|
1271
|
+
def _parse_last_ms_from_entries(self, entries: list[tuple[str, dict]]) -> int | None:
|
|
1272
|
+
"""Parse millisecond timestamp from the last entry ID."""
|
|
1273
|
+
if not entries:
|
|
1274
|
+
return None
|
|
1275
|
+
try:
|
|
1276
|
+
last_id = entries[0][0]
|
|
1277
|
+
return int(str(last_id).split("-")[0])
|
|
1278
|
+
except Exception:
|
|
1279
|
+
return None
|
|
1280
|
+
|
|
1281
|
+
def _collect_inactive_keys(
|
|
1282
|
+
self,
|
|
1283
|
+
candidate_keys: list[str],
|
|
1284
|
+
last_entries_results: list[list[tuple[str, dict]]],
|
|
1285
|
+
inactivity_seconds: float,
|
|
1286
|
+
now_sec: float | None = None,
|
|
1287
|
+
) -> list[str]:
|
|
1288
|
+
"""Collect keys whose last entry time is older than inactivity threshold."""
|
|
1289
|
+
keys_to_delete: list[str] = []
|
|
1290
|
+
now = time.time() if now_sec is None else now_sec
|
|
1291
|
+
for key, entries in zip(candidate_keys, last_entries_results or [], strict=False):
|
|
1292
|
+
last_ms = self._parse_last_ms_from_entries(entries)
|
|
1293
|
+
if last_ms is None:
|
|
1294
|
+
# Empty stream (no entries). Track first-seen time and delete if past threshold
|
|
1295
|
+
with self._empty_stream_seen_lock:
|
|
1296
|
+
first_seen = self._empty_stream_seen_times.get(key)
|
|
1297
|
+
if first_seen is None:
|
|
1298
|
+
# Record when we first observed this empty stream
|
|
1299
|
+
self._empty_stream_seen_times[key] = now
|
|
1300
|
+
else:
|
|
1301
|
+
if (now - first_seen) > inactivity_seconds:
|
|
1302
|
+
keys_to_delete.append(key)
|
|
1303
|
+
continue
|
|
1304
|
+
# Stream has entries; clear any empty-tracking state
|
|
1305
|
+
with self._empty_stream_seen_lock:
|
|
1306
|
+
if key in self._empty_stream_seen_times:
|
|
1307
|
+
self._empty_stream_seen_times.pop(key, None)
|
|
1308
|
+
if (now - (last_ms / 1000.0)) > inactivity_seconds:
|
|
1309
|
+
keys_to_delete.append(key)
|
|
1310
|
+
return keys_to_delete
|
|
1311
|
+
|
|
1312
|
+
def _filter_active_keys(
|
|
1313
|
+
self,
|
|
1314
|
+
candidate_keys: list[str],
|
|
1315
|
+
last_entries_results: list[list[tuple[str, dict]]],
|
|
1316
|
+
recent_seconds: float,
|
|
1317
|
+
now_sec: float | None = None,
|
|
1318
|
+
) -> list[str]:
|
|
1319
|
+
"""Return keys whose last entry time is within the recent window."""
|
|
1320
|
+
active: list[str] = []
|
|
1321
|
+
now = time.time() if now_sec is None else now_sec
|
|
1322
|
+
for key, entries in zip(candidate_keys, last_entries_results or [], strict=False):
|
|
1323
|
+
last_ms = self._parse_last_ms_from_entries(entries)
|
|
1324
|
+
if last_ms is None:
|
|
1325
|
+
continue
|
|
1326
|
+
# Stream has entries; clear any empty-tracking state
|
|
1327
|
+
with self._empty_stream_seen_lock:
|
|
1328
|
+
if key in self._empty_stream_seen_times:
|
|
1329
|
+
self._empty_stream_seen_times.pop(key, None)
|
|
1330
|
+
# Active if last message is no older than recent_seconds
|
|
1331
|
+
if (now - (last_ms / 1000.0)) <= recent_seconds:
|
|
1332
|
+
active.append(key)
|
|
1333
|
+
return active
|
|
1334
|
+
|
|
1335
|
+
def _delete_streams(self, keys_to_delete: list[str]) -> int:
|
|
1336
|
+
"""Delete the given stream keys in batch, return deleted count."""
|
|
1337
|
+
if not keys_to_delete:
|
|
1338
|
+
return 0
|
|
1339
|
+
deleted_count = 0
|
|
1340
|
+
try:
|
|
1341
|
+
del_pipe = self._redis_conn.pipeline(transaction=False)
|
|
1342
|
+
for key in keys_to_delete:
|
|
1343
|
+
del_pipe.delete(key)
|
|
1344
|
+
del_pipe.execute()
|
|
1345
|
+
deleted_count = len(keys_to_delete)
|
|
1346
|
+
# Clean up empty-tracking state and seen_streams for deleted keys
|
|
1347
|
+
with self._empty_stream_seen_lock:
|
|
1348
|
+
for key in keys_to_delete:
|
|
1349
|
+
self._empty_stream_seen_times.pop(key, None)
|
|
1350
|
+
|
|
1351
|
+
with self._stream_keys_lock:
|
|
1352
|
+
for key in keys_to_delete:
|
|
1353
|
+
self.seen_streams.discard(key)
|
|
1354
|
+
except Exception:
|
|
1355
|
+
for key in keys_to_delete:
|
|
1356
|
+
try:
|
|
1357
|
+
self._redis_conn.delete(key)
|
|
1358
|
+
deleted_count += 1
|
|
1359
|
+
with self._empty_stream_seen_lock:
|
|
1360
|
+
self._empty_stream_seen_times.pop(key, None)
|
|
1361
|
+
with self._stream_keys_lock:
|
|
1362
|
+
self.seen_streams.discard(key)
|
|
1363
|
+
except Exception:
|
|
1364
|
+
pass
|
|
1365
|
+
return deleted_count
|
|
1366
|
+
|
|
1367
|
+
def _update_stream_cache_with_log(
|
|
1368
|
+
self,
|
|
1369
|
+
stream_key_prefix: str,
|
|
1370
|
+
candidate_keys: list[str],
|
|
1371
|
+
active_stream_keys: list[str],
|
|
1372
|
+
deleted_count: int,
|
|
1373
|
+
active_threshold_sec: float,
|
|
1374
|
+
) -> None:
|
|
1375
|
+
"""Update cache and emit an info log summarizing refresh statistics."""
|
|
1376
|
+
if stream_key_prefix != self.stream_key_prefix:
|
|
1377
|
+
return
|
|
1378
|
+
with self._stream_keys_lock:
|
|
1379
|
+
self._stream_keys_cache = active_stream_keys
|
|
1380
|
+
self._stream_keys_last_refresh = time.time()
|
|
1381
|
+
cache_count = len(self._stream_keys_cache)
|
|
1382
|
+
logger.info(
|
|
1383
|
+
f"Refreshed stream keys cache: {cache_count} active keys, "
|
|
1384
|
+
f"{deleted_count} deleted, {len(candidate_keys)} candidates examined."
|
|
1385
|
+
)
|