open-swarm 0.1.1743070217__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- open_swarm-0.1.1743070217.dist-info/METADATA +258 -0
- open_swarm-0.1.1743070217.dist-info/RECORD +89 -0
- open_swarm-0.1.1743070217.dist-info/WHEEL +5 -0
- open_swarm-0.1.1743070217.dist-info/entry_points.txt +3 -0
- open_swarm-0.1.1743070217.dist-info/licenses/LICENSE +21 -0
- open_swarm-0.1.1743070217.dist-info/top_level.txt +1 -0
- swarm/__init__.py +3 -0
- swarm/agent/__init__.py +7 -0
- swarm/agent/agent.py +49 -0
- swarm/apps.py +53 -0
- swarm/auth.py +56 -0
- swarm/consumers.py +141 -0
- swarm/core.py +326 -0
- swarm/extensions/__init__.py +1 -0
- swarm/extensions/blueprint/__init__.py +36 -0
- swarm/extensions/blueprint/agent_utils.py +45 -0
- swarm/extensions/blueprint/blueprint_base.py +562 -0
- swarm/extensions/blueprint/blueprint_discovery.py +112 -0
- swarm/extensions/blueprint/blueprint_utils.py +17 -0
- swarm/extensions/blueprint/common_utils.py +12 -0
- swarm/extensions/blueprint/django_utils.py +203 -0
- swarm/extensions/blueprint/interactive_mode.py +102 -0
- swarm/extensions/blueprint/modes/rest_mode.py +37 -0
- swarm/extensions/blueprint/output_utils.py +95 -0
- swarm/extensions/blueprint/spinner.py +91 -0
- swarm/extensions/cli/__init__.py +0 -0
- swarm/extensions/cli/blueprint_runner.py +251 -0
- swarm/extensions/cli/cli_args.py +88 -0
- swarm/extensions/cli/commands/__init__.py +0 -0
- swarm/extensions/cli/commands/blueprint_management.py +31 -0
- swarm/extensions/cli/commands/config_management.py +15 -0
- swarm/extensions/cli/commands/edit_config.py +77 -0
- swarm/extensions/cli/commands/list_blueprints.py +22 -0
- swarm/extensions/cli/commands/validate_env.py +57 -0
- swarm/extensions/cli/commands/validate_envvars.py +39 -0
- swarm/extensions/cli/interactive_shell.py +41 -0
- swarm/extensions/cli/main.py +36 -0
- swarm/extensions/cli/selection.py +43 -0
- swarm/extensions/cli/utils/discover_commands.py +32 -0
- swarm/extensions/cli/utils/env_setup.py +15 -0
- swarm/extensions/cli/utils.py +105 -0
- swarm/extensions/config/__init__.py +6 -0
- swarm/extensions/config/config_loader.py +208 -0
- swarm/extensions/config/config_manager.py +258 -0
- swarm/extensions/config/server_config.py +49 -0
- swarm/extensions/config/setup_wizard.py +103 -0
- swarm/extensions/config/utils/__init__.py +0 -0
- swarm/extensions/config/utils/logger.py +36 -0
- swarm/extensions/launchers/__init__.py +1 -0
- swarm/extensions/launchers/build_launchers.py +14 -0
- swarm/extensions/launchers/build_swarm_wrapper.py +12 -0
- swarm/extensions/launchers/swarm_api.py +68 -0
- swarm/extensions/launchers/swarm_cli.py +304 -0
- swarm/extensions/launchers/swarm_wrapper.py +29 -0
- swarm/extensions/mcp/__init__.py +1 -0
- swarm/extensions/mcp/cache_utils.py +36 -0
- swarm/extensions/mcp/mcp_client.py +341 -0
- swarm/extensions/mcp/mcp_constants.py +7 -0
- swarm/extensions/mcp/mcp_tool_provider.py +110 -0
- swarm/llm/chat_completion.py +195 -0
- swarm/messages.py +132 -0
- swarm/migrations/0010_initial_chat_models.py +51 -0
- swarm/migrations/__init__.py +0 -0
- swarm/models.py +45 -0
- swarm/repl/__init__.py +1 -0
- swarm/repl/repl.py +87 -0
- swarm/serializers.py +12 -0
- swarm/settings.py +189 -0
- swarm/tool_executor.py +239 -0
- swarm/types.py +126 -0
- swarm/urls.py +89 -0
- swarm/util.py +124 -0
- swarm/utils/color_utils.py +40 -0
- swarm/utils/context_utils.py +272 -0
- swarm/utils/general_utils.py +162 -0
- swarm/utils/logger.py +61 -0
- swarm/utils/logger_setup.py +25 -0
- swarm/utils/message_sequence.py +173 -0
- swarm/utils/message_utils.py +95 -0
- swarm/utils/redact.py +68 -0
- swarm/views/__init__.py +41 -0
- swarm/views/api_views.py +46 -0
- swarm/views/chat_views.py +76 -0
- swarm/views/core_views.py +118 -0
- swarm/views/message_views.py +40 -0
- swarm/views/model_views.py +135 -0
- swarm/views/utils.py +457 -0
- swarm/views/web_views.py +149 -0
- swarm/wsgi.py +16 -0
@@ -0,0 +1,272 @@
|
|
1
|
+
# --- src/swarm/utils/context_utils.py ---
|
2
|
+
"""
|
3
|
+
Utilities for managing context in message histories, including token counting
|
4
|
+
and truncation strategies.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
import json
|
10
|
+
from typing import List, Dict, Any
|
11
|
+
|
12
|
+
try:
|
13
|
+
import tiktoken
|
14
|
+
except ImportError:
|
15
|
+
tiktoken = None
|
16
|
+
logging.warning("tiktoken not found. Falling back to approximate token counting (word count).")
|
17
|
+
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
# --- Helper to check message validity ---
|
21
|
+
def _is_valid_message(msg: Any) -> bool:
|
22
|
+
if not isinstance(msg, dict): return False
|
23
|
+
role = msg.get("role")
|
24
|
+
if not role or not isinstance(role, str): logger.warning(f"Skipping msg missing role: {str(msg)[:150]}"); return False
|
25
|
+
content = msg.get("content"); tool_calls = msg.get("tool_calls"); tool_call_id = msg.get("tool_call_id")
|
26
|
+
if role == "system": is_valid = content is not None
|
27
|
+
elif role == "user": is_valid = content is not None
|
28
|
+
elif role == "assistant": is_valid = content is not None or (isinstance(tool_calls, list) and len(tool_calls) > 0)
|
29
|
+
elif role == "tool": is_valid = content is not None and tool_call_id is not None
|
30
|
+
else: is_valid = False
|
31
|
+
if not is_valid: logger.warning(f"Skipping msg failing validity check for role '{role}': {str(msg)[:150]}")
|
32
|
+
return is_valid
|
33
|
+
# --- End Helper ---
|
34
|
+
|
35
|
+
def get_token_count(text: Any, model: str) -> int:
|
36
|
+
processed_text = ""
|
37
|
+
try:
|
38
|
+
if isinstance(text, str): processed_text = text
|
39
|
+
elif isinstance(text, dict):
|
40
|
+
temp_dict = {k: v for k, v in text.items() if k in ["role", "content", "name", "tool_calls", "tool_call_id"]}
|
41
|
+
if temp_dict.get("content") is not None: temp_dict["content"] = str(temp_dict["content"])
|
42
|
+
processed_text = json.dumps(temp_dict, separators=(',', ':'), default=str)
|
43
|
+
elif isinstance(text, list): processed_text = json.dumps(text, separators=(',', ':'), default=str)
|
44
|
+
else: processed_text = str(text) if text is not None else ""
|
45
|
+
except Exception as e: logger.error(f"Error preprocessing token count: {e}."); processed_text = str(text) if text else ""
|
46
|
+
if not processed_text: return 0
|
47
|
+
if tiktoken:
|
48
|
+
try: return len(tiktoken.encoding_for_model(model).encode(processed_text))
|
49
|
+
except KeyError:
|
50
|
+
try: return len(tiktoken.get_encoding("cl100k_base").encode(processed_text))
|
51
|
+
except Exception as e: logger.error(f"tiktoken failed: {e}. Word count."); return len(processed_text.split()) + 5
|
52
|
+
except Exception as e: logger.error(f"tiktoken error: {e}. Word count."); return len(processed_text.split()) + 5
|
53
|
+
return len(processed_text.split()) + 5
|
54
|
+
|
55
|
+
# --- Truncation Strategies (v5.1 logic base + multi-tool deferral) ---
|
56
|
+
def _truncate_sophisticated(messages: List[Dict[str, Any]], model: str, max_tokens: int, max_messages: int) -> List[Dict[str, Any]]:
|
57
|
+
system_msgs = []; non_system_msgs = []; system_found = False
|
58
|
+
valid_messages = [msg for msg in messages if _is_valid_message(msg)]
|
59
|
+
if len(valid_messages) != len(messages): logger.info(f"Filtered {len(messages) - len(valid_messages)} invalid msgs.")
|
60
|
+
for msg in valid_messages:
|
61
|
+
if msg.get("role") == "system" and not system_found: system_msgs.append(msg); system_found = True
|
62
|
+
elif msg.get("role") != "system": non_system_msgs.append(msg)
|
63
|
+
try: system_tokens = sum(get_token_count(msg, model) for msg in system_msgs)
|
64
|
+
except Exception as e: logger.error(f"Error calc system tokens: {e}."); system_tokens = 0
|
65
|
+
target_msg_count = max(0, max_messages - len(system_msgs)); target_token_count = max(0, max_tokens - system_tokens)
|
66
|
+
if len(system_msgs) > max_messages or system_tokens > max_tokens: logger.warning(f"System msgs exceed limits."); return []
|
67
|
+
if not non_system_msgs: logger.info("No valid non-system msgs."); return system_msgs
|
68
|
+
try: msg_tokens = [(msg, get_token_count(msg, model)) for msg in non_system_msgs]
|
69
|
+
except Exception as e: logger.critical(f"Error preparing msg_tokens: {e}", exc_info=True); return system_msgs
|
70
|
+
current_total_tokens = sum(t for _, t in msg_tokens)
|
71
|
+
if len(non_system_msgs) <= target_msg_count and current_total_tokens <= target_token_count: logger.info(f"History fits."); return system_msgs + non_system_msgs
|
72
|
+
logger.info(f"Sophisticated truncation. Target: {target_msg_count} msgs, {target_token_count} tokens.")
|
73
|
+
truncated = []; total_tokens = 0; kept_indices = set(); i = len(msg_tokens) - 1
|
74
|
+
|
75
|
+
while i >= 0:
|
76
|
+
if i in kept_indices: logger.debug(f" [Loop Skip] Idx {i} already kept."); i -= 1; continue
|
77
|
+
if len(truncated) >= target_msg_count: logger.debug(f" [Loop Stop] Msg limit reached."); break
|
78
|
+
|
79
|
+
try: msg, tokens = msg_tokens[i]; assert isinstance(tokens, (int, float)) and tokens >= 0
|
80
|
+
except (IndexError, AssertionError): tokens = 9999; logger.warning(f"Bad tokens at {i}")
|
81
|
+
except Exception as e: logger.error(f" [Loop Error] {i}: {e}."); break
|
82
|
+
|
83
|
+
current_role = msg.get("role")
|
84
|
+
logger.debug(f" [Loop Eval] Idx={i}, Role={current_role}, Tokens={tokens}. Kept: Msgs={len(truncated)}, Tokens={total_tokens}")
|
85
|
+
|
86
|
+
if tokens > target_token_count - total_tokens and len(truncated) + 1 > target_msg_count:
|
87
|
+
logger.warning(f" [Pre-Check Skip] Msg {i} ({tokens}) exceeds remaining budget ({target_token_count - total_tokens}) and msg count. Skipping.")
|
88
|
+
i-=1
|
89
|
+
continue
|
90
|
+
|
91
|
+
action_taken_for_i = False
|
92
|
+
|
93
|
+
# Case 1: Tool message
|
94
|
+
if current_role == "tool" and "tool_call_id" in msg:
|
95
|
+
tool_call_id = msg["tool_call_id"]; logger.debug(f" -> Case 1: Tool Msg (ID: {tool_call_id})")
|
96
|
+
assistant_idx = i - 1; pair_found = False; search_depth = 0; max_search_depth = 10
|
97
|
+
while assistant_idx >= 0 and search_depth < max_search_depth:
|
98
|
+
if assistant_idx in kept_indices: assistant_idx -= 1; search_depth += 1; continue
|
99
|
+
try: prev_msg, prev_tokens = msg_tokens[assistant_idx]; assert isinstance(prev_tokens, (int, float)) and prev_tokens >= 0
|
100
|
+
except: prev_tokens = 9999
|
101
|
+
if prev_msg.get("role") == "assistant" and isinstance(prev_msg.get("tool_calls"), list):
|
102
|
+
assistant_tool_calls = prev_msg.get("tool_calls", [])
|
103
|
+
# ---> FIX: Check if this specific tool call ID is present AND if the assistant ONLY has ONE tool call <---
|
104
|
+
has_this_call = any(tc.get("id") == tool_call_id for tc in assistant_tool_calls if isinstance(tc, dict))
|
105
|
+
is_single_call_assistant = len(assistant_tool_calls) == 1
|
106
|
+
|
107
|
+
if has_this_call:
|
108
|
+
pair_found = True
|
109
|
+
if not is_single_call_assistant:
|
110
|
+
logger.debug(f" Found assistant pair at {assistant_idx}, but it has multiple tool calls ({len(assistant_tool_calls)}). Deferring to Case 2.")
|
111
|
+
# Do not attempt pair formation here, let Case 2 handle the block later
|
112
|
+
else:
|
113
|
+
# Assistant only has this one call, proceed with pairing check
|
114
|
+
pair_total_tokens = tokens + prev_tokens; pair_msg_count = 2
|
115
|
+
logger.debug(f" Found single-call assistant pair at {assistant_idx}. Pair cost={pair_total_tokens}, Pair msgs={pair_msg_count}")
|
116
|
+
check_token_fits = (total_tokens + pair_total_tokens <= target_token_count)
|
117
|
+
check_msg_fits = (len(truncated) + pair_msg_count <= target_msg_count)
|
118
|
+
logger.debug(f" Budget Check: (CurrentTokens={total_tokens} + PairTokens={pair_total_tokens} <= TargetTokens={target_token_count}) -> {check_token_fits}")
|
119
|
+
logger.debug(f" Budget Check: (CurrentMsgs={len(truncated)} + PairMsgs={pair_msg_count} <= TargetMsgs={target_msg_count}) -> {check_msg_fits}")
|
120
|
+
if check_token_fits and check_msg_fits:
|
121
|
+
logger.info(f" Action: KEEPING Pair T(idx {i})+A(idx {assistant_idx})")
|
122
|
+
truncated.insert(0, prev_msg); truncated.insert(1, msg)
|
123
|
+
total_tokens += pair_total_tokens; kept_indices.add(i); kept_indices.add(assistant_idx)
|
124
|
+
i -= 1 # Decrement normally
|
125
|
+
action_taken_for_i = True
|
126
|
+
else:
|
127
|
+
logger.debug(" Pair doesn't fit budget.")
|
128
|
+
break # Stop inner search (found the relevant assistant)
|
129
|
+
assistant_idx -= 1; search_depth += 1
|
130
|
+
if not pair_found: logger.debug(f" -> Case 1 Result: Pair not found.")
|
131
|
+
elif not action_taken_for_i: logger.debug(f" -> Case 1 Result: Pair found but deferred or didn't fit.")
|
132
|
+
|
133
|
+
|
134
|
+
# Case 2: Assistant message with tool calls
|
135
|
+
elif current_role == "assistant" and isinstance(msg.get("tool_calls"), list) and msg["tool_calls"]:
|
136
|
+
logger.debug(f" -> Case 2: Assistant w/ Tools at index {i}")
|
137
|
+
assistant_tokens = tokens; expected_tool_ids = {tc.get("id") for tc in msg.get("tool_calls") if isinstance(tc, dict)}
|
138
|
+
found_tools = []; found_indices = []; found_tokens = 0; j = i + 1
|
139
|
+
while j < len(non_system_msgs):
|
140
|
+
if j in kept_indices: j += 1; continue
|
141
|
+
try: tool_msg, tool_tokens_fwd = msg_tokens[j]; assert isinstance(tool_tokens_fwd, (int, float)) and tool_tokens_fwd >= 0
|
142
|
+
except: tool_tokens_fwd = 9999
|
143
|
+
tool_msg_call_id = tool_msg.get("tool_call_id")
|
144
|
+
if tool_msg.get("role") == "tool" and tool_msg_call_id in expected_tool_ids:
|
145
|
+
found_tools.append(tool_msg); found_indices.append(j); found_tokens += tool_tokens_fwd
|
146
|
+
elif tool_msg.get("role") != "tool": break # Stop search on non-tool
|
147
|
+
j += 1
|
148
|
+
pair_total_tokens = assistant_tokens + found_tokens; pair_msg_count = 1 + len(found_tools)
|
149
|
+
logger.debug(f" Found {len(found_tools)} tools for {len(expected_tool_ids)} calls. Pair Cost={pair_total_tokens}, Pair Len={pair_msg_count}.")
|
150
|
+
all_tools_found = (len(found_indices) == len(expected_tool_ids))
|
151
|
+
if not all_tools_found:
|
152
|
+
logger.debug(" Did not find all expected tools for this assistant call.")
|
153
|
+
|
154
|
+
check_token_fits = (total_tokens + pair_total_tokens <= target_token_count)
|
155
|
+
check_msg_fits = (len(truncated) + pair_msg_count <= target_msg_count)
|
156
|
+
logger.debug(f" Budget Check: (CurrentTokens={total_tokens} + PairTokens={pair_total_tokens} <= TargetTokens={target_token_count}) -> {check_token_fits}")
|
157
|
+
logger.debug(f" Budget Check: (CurrentMsgs={len(truncated)} + PairMsgs={pair_msg_count} <= TargetMsgs={target_msg_count}) -> {check_msg_fits}")
|
158
|
+
|
159
|
+
if all_tools_found and check_token_fits and check_msg_fits:
|
160
|
+
logger.info(f" -> Action: KEEPING Pair A(idx {i})+Tools({found_indices})")
|
161
|
+
truncated.insert(0, msg); kept_indices.add(i)
|
162
|
+
insert_idx = 1; added_tool_count = 0
|
163
|
+
sorted_tools = sorted(zip(found_indices, found_tools), key=lambda x: x[0])
|
164
|
+
for tool_idx, tool_item in sorted_tools:
|
165
|
+
if tool_idx not in kept_indices:
|
166
|
+
truncated.insert(insert_idx, tool_item); kept_indices.add(tool_idx)
|
167
|
+
insert_idx += 1; added_tool_count += 1
|
168
|
+
else: logger.error(f" Consistency Error! Tool index {tool_idx} already kept.")
|
169
|
+
total_tokens += pair_total_tokens; i -= 1; action_taken_for_i = True
|
170
|
+
else:
|
171
|
+
logger.debug(f" Pair doesn't fit or not all tools found.")
|
172
|
+
single_token_fits = total_tokens + tokens <= target_token_count
|
173
|
+
single_msg_fits = len(truncated) + 1 <= target_msg_count
|
174
|
+
if single_token_fits and single_msg_fits:
|
175
|
+
logger.info(f" -> Action: KEEPING SINGLE Assistant {i} (pair failed/incomplete).")
|
176
|
+
truncated.insert(0, msg); total_tokens += tokens; kept_indices.add(i)
|
177
|
+
i -= 1; action_taken_for_i = True
|
178
|
+
else:
|
179
|
+
logger.debug(f" Cannot keep single assistant {i} either (Tokens fit: {single_token_fits}, Msgs fit: {single_msg_fits}).")
|
180
|
+
|
181
|
+
# Case 3: Regular message (User or Assistant w/o tool calls)
|
182
|
+
elif not action_taken_for_i:
|
183
|
+
logger.debug(f" -> Case 3: Regular Message at index {i}")
|
184
|
+
single_token_fits = total_tokens + tokens <= target_token_count
|
185
|
+
single_msg_fits = len(truncated) + 1 <= target_msg_count
|
186
|
+
if single_token_fits and single_msg_fits:
|
187
|
+
logger.info(f" -> Action: KEEPING SINGLE message {i}")
|
188
|
+
truncated.insert(0, msg); total_tokens += tokens; kept_indices.add(i)
|
189
|
+
i -= 1; action_taken_for_i = True
|
190
|
+
else:
|
191
|
+
logger.info(f" -> Action: SKIPPING message {i} (Tokens fit: {single_token_fits}, Msgs fit: {single_msg_fits}). Stopping.")
|
192
|
+
break
|
193
|
+
|
194
|
+
# Make sure index 'i' decreases if no action modified it and loop didn't break
|
195
|
+
if not action_taken_for_i:
|
196
|
+
logger.debug(f" [Loop Default Decrement] No action/break for index {i}.")
|
197
|
+
i -= 1
|
198
|
+
|
199
|
+
final_messages = system_msgs + truncated
|
200
|
+
try: final_token_check = sum(get_token_count(m, model) for m in final_messages)
|
201
|
+
except Exception as e: logger.error(f"Error final token check: {e}."); final_token_check = -1
|
202
|
+
logger.info(f"Sophisticated truncation result: {len(final_messages)} msgs ({len(system_msgs)} sys, {len(truncated)} non-sys), ~{final_token_check} tokens.")
|
203
|
+
return final_messages
|
204
|
+
|
205
|
+
|
206
|
+
def _truncate_simple(messages: List[Dict[str, Any]], model: str, max_tokens: int, max_messages: int) -> List[Dict[str, Any]]:
|
207
|
+
# --- Simple Truncation (Unchanged) ---
|
208
|
+
system_msgs = []; non_system_msgs = []; system_found = False
|
209
|
+
valid_messages = [msg for msg in messages if _is_valid_message(msg)]
|
210
|
+
if len(valid_messages) != len(messages): logger.info(f"Simple Mode: Filtered {len(messages) - len(valid_messages)} invalid msgs.")
|
211
|
+
for msg in valid_messages:
|
212
|
+
if msg.get("role") == "system" and not system_found: system_msgs.append(msg); system_found = True
|
213
|
+
elif msg.get("role") != "system": non_system_msgs.append(msg)
|
214
|
+
try: system_tokens = sum(get_token_count(msg, model) for msg in system_msgs)
|
215
|
+
except Exception as e: logger.error(f"Simple Mode: Error calc system tokens: {e}."); system_tokens = 0
|
216
|
+
target_msg_count = max(0, max_messages - len(system_msgs)); target_token_count = max(0, max_tokens - system_tokens)
|
217
|
+
if len(system_msgs) > max_messages or system_tokens > max_tokens: logger.warning(f"Simple Mode: System msgs exceed limits."); return []
|
218
|
+
if not non_system_msgs: logger.info("Simple Mode: No valid non-system messages."); return system_msgs
|
219
|
+
result_non_system = []; current_tokens = 0; current_msg_count = 0
|
220
|
+
for msg_index, msg in reversed(list(enumerate(non_system_msgs))):
|
221
|
+
try: msg_tokens = get_token_count(msg, model); assert isinstance(msg_tokens, (int, float)) and msg_tokens >= 0
|
222
|
+
except Exception as e: logger.error(f"Simple Mode: Error token count msg idx {msg_index}: {e}. High cost."); msg_tokens = 9999
|
223
|
+
if (current_msg_count + 1 <= target_msg_count and current_tokens + msg_tokens <= target_token_count):
|
224
|
+
result_non_system.append(msg); current_tokens += msg_tokens; current_msg_count += 1
|
225
|
+
else: break
|
226
|
+
final_result = system_msgs + list(reversed(result_non_system))
|
227
|
+
try: final_token_check = sum(get_token_count(m, model) for m in final_result)
|
228
|
+
except Exception as e: logger.error(f"Simple Mode: Error final token check: {e}."); final_token_check = -1
|
229
|
+
logger.info(f"Simple truncation result: {len(final_result)} messages ({len(system_msgs)} sys), ~{final_token_check} tokens.")
|
230
|
+
return final_result
|
231
|
+
|
232
|
+
|
233
|
+
def truncate_message_history(messages: List[Dict[str, Any]], model: str, max_tokens: int, max_messages: int) -> List[Dict[str, Any]]:
|
234
|
+
# --- Main function (unchanged) ---
|
235
|
+
if not isinstance(messages, list) or not messages: logger.debug("Truncate called with empty/invalid list."); return []
|
236
|
+
truncation_mode = os.getenv("SWARM_TRUNCATION_MODE", "pairs").lower()
|
237
|
+
mode_name = f"Sophisticated (Pair-Preserving)" if truncation_mode == "pairs" else "Simple (Recent Only)"
|
238
|
+
logger.info(f"--- Starting Truncation --- Mode: {mode_name}, Max Tokens: {max_tokens}, Max Messages: {max_messages}, Input Msgs: {len(messages)}")
|
239
|
+
result = []
|
240
|
+
try:
|
241
|
+
if truncation_mode == "pairs": result = _truncate_sophisticated(messages, model, max_tokens, max_messages)
|
242
|
+
else:
|
243
|
+
if truncation_mode != "simple": logger.warning(f"Unknown SWARM_TRUNCATION_MODE '{truncation_mode}'. Defaulting 'simple'.")
|
244
|
+
result = _truncate_simple(messages, model, max_tokens, max_messages)
|
245
|
+
except Exception as e:
|
246
|
+
logger.error(f"!!! Critical error during primary truncation ({mode_name}): {e}", exc_info=True)
|
247
|
+
try:
|
248
|
+
logger.warning("Attempting fallback to simple truncation.")
|
249
|
+
result = _truncate_simple(messages, model, max_tokens, max_messages)
|
250
|
+
except Exception as fallback_e:
|
251
|
+
logger.error(f"!!! Fallback simple truncation also failed: {fallback_e}", exc_info=True)
|
252
|
+
logger.warning("Returning raw last N messages as final fallback.")
|
253
|
+
try:
|
254
|
+
system_msg_fallback = [m for m in messages if isinstance(m, dict) and m.get("role") == "system"][:1]
|
255
|
+
valid_non_system_fallback = [m for m in messages if _is_valid_message(m) and m.get("role") != "system"]
|
256
|
+
keep_count = max(0, max_messages - len(system_msg_fallback))
|
257
|
+
result = system_msg_fallback + valid_non_system_fallback[-keep_count:]
|
258
|
+
except Exception as final_fallback_e: logger.critical(f"!!! Final fallback failed: {final_fallback_e}.", exc_info=True); result = []
|
259
|
+
initial_valid_message_count = sum(1 for m in messages if _is_valid_message(m))
|
260
|
+
if initial_valid_message_count > 0 and not result:
|
261
|
+
system_msgs_in_input = [m for m in messages if isinstance(m, dict) and m.get("role") == "system"][:1]
|
262
|
+
if system_msgs_in_input:
|
263
|
+
try:
|
264
|
+
system_tokens_in_input = get_token_count(system_msgs_in_input[0], model)
|
265
|
+
if len(system_msgs_in_input) > max_messages or system_tokens_in_input > max_tokens:
|
266
|
+
logger.warning("Truncation empty list, likely due to system message exceeding limits.")
|
267
|
+
return []
|
268
|
+
except Exception: pass
|
269
|
+
logger.warning("Truncation resulted empty list unexpectedly.")
|
270
|
+
return []
|
271
|
+
logger.info(f"--- Finished Truncation --- Result Msgs: {len(result)}")
|
272
|
+
return result
|
@@ -0,0 +1,162 @@
|
|
1
|
+
"""
|
2
|
+
General utility functions for the Swarm framework.
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
import logging
|
6
|
+
import jmespath
|
7
|
+
import json
|
8
|
+
import datetime
|
9
|
+
from typing import Optional, List, Dict, Any
|
10
|
+
|
11
|
+
from swarm.utils.logger_setup import setup_logger
|
12
|
+
|
13
|
+
# Initialize logger for this module
|
14
|
+
logger = setup_logger(__name__)
|
15
|
+
|
16
|
+
# Define default JMESPath expressions here - split for individual processing
|
17
|
+
DEFAULT_CHAT_ID_PATHS_LIST = [
|
18
|
+
"metadata.channelInfo.channelId",
|
19
|
+
"metadata.userInfo.userId",
|
20
|
+
"`json_parse(messages[-1].tool_calls[-1].function.arguments).chat_id`" # This path requires custom handling or a registered json_parse function
|
21
|
+
]
|
22
|
+
|
23
|
+
def find_project_root(current_path: str, marker: str = ".git") -> str:
|
24
|
+
"""Find project root by looking for a marker (.git)."""
|
25
|
+
current_path = os.path.abspath(current_path)
|
26
|
+
while True:
|
27
|
+
if os.path.exists(os.path.join(current_path, marker)):
|
28
|
+
return current_path
|
29
|
+
parent_path = os.path.dirname(current_path)
|
30
|
+
if parent_path == current_path:
|
31
|
+
break
|
32
|
+
current_path = parent_path
|
33
|
+
logger.warning(f"Project root marker '{marker}' not found starting from {current_path}.")
|
34
|
+
raise FileNotFoundError(f"Project root with marker '{marker}' not found.")
|
35
|
+
|
36
|
+
def color_text(text: str, color: str = "white") -> str:
|
37
|
+
"""Color text using ANSI escape codes."""
|
38
|
+
colors = {"red": "\033[91m", "green": "\033[92m", "yellow": "\033[93m", "blue": "\033[94m", "magenta": "\033[95m", "cyan": "\033[96m", "white": "\033[97m", }
|
39
|
+
reset = "\033[0m"
|
40
|
+
return colors.get(color, "") + text + reset
|
41
|
+
|
42
|
+
def _search_and_process_jmespath(expression: str, payload: dict) -> str:
|
43
|
+
"""Helper to search JMESPath and process the result into a string ID."""
|
44
|
+
chat_id = ""
|
45
|
+
try:
|
46
|
+
# Handle the specific case of json_parse manually for now
|
47
|
+
if 'json_parse' in expression and 'messages[-1].tool_calls[-1].function.arguments' in expression and '.chat_id' in expression:
|
48
|
+
logger.debug(f"Attempting manual handling for json_parse expression: {expression}")
|
49
|
+
try:
|
50
|
+
# Extract the arguments string first using a simpler path
|
51
|
+
args_str = jmespath.search('messages[-1].tool_calls[-1].function.arguments', payload)
|
52
|
+
if isinstance(args_str, str):
|
53
|
+
args_dict = json.loads(args_str)
|
54
|
+
extracted_value = args_dict.get('chat_id')
|
55
|
+
# Proceed with processing extracted_value below
|
56
|
+
else:
|
57
|
+
logger.debug("Arguments for json_parse path not found or not a string.")
|
58
|
+
return ""
|
59
|
+
except (json.JSONDecodeError, jmespath.exceptions.JMESPathError, IndexError, TypeError, KeyError) as e:
|
60
|
+
logger.debug(f"Manual handling of json_parse failed: {e}")
|
61
|
+
return ""
|
62
|
+
else:
|
63
|
+
# Evaluate standard JMESPath expression
|
64
|
+
extracted_value = jmespath.search(expression, payload)
|
65
|
+
|
66
|
+
# Process the extracted value (whether from standard path or manual json_parse)
|
67
|
+
if extracted_value is not None:
|
68
|
+
if isinstance(extracted_value, str):
|
69
|
+
stripped_value = extracted_value.strip()
|
70
|
+
if stripped_value:
|
71
|
+
# Check if the result is the literal expression itself (contains backticks) - indicates failure for custom functions
|
72
|
+
if '`' in stripped_value or 'json_parse' in stripped_value:
|
73
|
+
logger.debug(f"JMESPath expression '{expression}' likely returned literal or unevaluated function string: '{stripped_value}'. Treating as not found.")
|
74
|
+
return ""
|
75
|
+
|
76
|
+
# Attempt to parse if it looks like JSON, otherwise treat as plain ID
|
77
|
+
try:
|
78
|
+
if stripped_value.startswith('{') and stripped_value.endswith('}'):
|
79
|
+
parsed_json = json.loads(stripped_value)
|
80
|
+
if isinstance(parsed_json, dict):
|
81
|
+
possible_keys = ["conversation_id", "chat_id", "channelId", "sessionId", "id"]
|
82
|
+
for key in possible_keys:
|
83
|
+
id_val = parsed_json.get(key)
|
84
|
+
if id_val and isinstance(id_val, str):
|
85
|
+
chat_id = id_val.strip()
|
86
|
+
if chat_id: return chat_id
|
87
|
+
return "" # Parsed dict, but no ID key
|
88
|
+
else: return "" # Parsed, but not dict
|
89
|
+
else:
|
90
|
+
chat_id = stripped_value # Treat as plain ID
|
91
|
+
except json.JSONDecodeError:
|
92
|
+
chat_id = stripped_value # Treat as plain ID if parsing fails but didn't look like JSON dict
|
93
|
+
except Exception as e:
|
94
|
+
logger.error(f"Unexpected error processing potential JSON string from '{expression}': {e}")
|
95
|
+
return ""
|
96
|
+
else: return "" # Empty string extracted
|
97
|
+
elif isinstance(extracted_value, dict):
|
98
|
+
possible_keys = ["conversation_id", "chat_id", "channelId", "sessionId", "id"]
|
99
|
+
for key in possible_keys:
|
100
|
+
id_val = extracted_value.get(key)
|
101
|
+
if id_val and isinstance(id_val, str):
|
102
|
+
chat_id = id_val.strip()
|
103
|
+
if chat_id: return chat_id
|
104
|
+
return "" # Dict found, but no ID key
|
105
|
+
elif isinstance(extracted_value, (int, float, bool)):
|
106
|
+
return str(extracted_value) # Convert simple types
|
107
|
+
else:
|
108
|
+
logger.warning(f"Extracted value via '{expression}' is of unsupported type: {type(extracted_value)}. Returning empty string.")
|
109
|
+
return ""
|
110
|
+
else: return "" # JMESPath returned None
|
111
|
+
|
112
|
+
except jmespath.exceptions.JMESPathError as jmes_err:
|
113
|
+
logger.debug(f"JMESPath expression '{expression}' failed: {jmes_err}")
|
114
|
+
return ""
|
115
|
+
except Exception as e:
|
116
|
+
logger.error(f"Unexpected error evaluating JMESPath '{expression}': {e}", exc_info=True)
|
117
|
+
return ""
|
118
|
+
|
119
|
+
return str(chat_id) if chat_id is not None else ""
|
120
|
+
|
121
|
+
|
122
|
+
def extract_chat_id(payload: dict) -> str:
|
123
|
+
"""
|
124
|
+
Extract chat ID using JMESPath defined by STATEFUL_CHAT_ID_PATH env var,
|
125
|
+
or fallback to trying a list of hardcoded default paths.
|
126
|
+
Returns the first valid chat ID found, or empty string ("").
|
127
|
+
"""
|
128
|
+
path_expr_env = os.getenv("STATEFUL_CHAT_ID_PATH", "").strip()
|
129
|
+
paths_to_try: List[str] = []
|
130
|
+
source = ""
|
131
|
+
|
132
|
+
if path_expr_env:
|
133
|
+
paths_to_try = [p.strip() for p in path_expr_env.split('||') if p.strip()]
|
134
|
+
source = "environment variable"
|
135
|
+
logger.debug(f"Using chat ID paths from {source}: {paths_to_try}")
|
136
|
+
else:
|
137
|
+
paths_to_try = DEFAULT_CHAT_ID_PATHS_LIST
|
138
|
+
source = "hardcoded defaults"
|
139
|
+
logger.debug(f"STATEFUL_CHAT_ID_PATH not set, using {source}: {paths_to_try}")
|
140
|
+
|
141
|
+
if not paths_to_try:
|
142
|
+
logger.warning(f"No chat ID JMESPath expressions found from {source}.")
|
143
|
+
return ""
|
144
|
+
|
145
|
+
for expression in paths_to_try:
|
146
|
+
logger.debug(f"Trying JMESPath expression: {expression}")
|
147
|
+
chat_id = _search_and_process_jmespath(expression, payload)
|
148
|
+
if chat_id: # If a non-empty string ID was found
|
149
|
+
return chat_id
|
150
|
+
|
151
|
+
logger.debug("No chat ID found after trying all expressions.")
|
152
|
+
return ""
|
153
|
+
|
154
|
+
def serialize_datetime(obj):
|
155
|
+
if isinstance(obj, datetime.datetime): return obj.isoformat()
|
156
|
+
elif isinstance(obj, str): return obj
|
157
|
+
raise TypeError(f"Type {type(obj)} not serializable")
|
158
|
+
|
159
|
+
def custom_json_dumps(obj, **kwargs):
|
160
|
+
defaults = {'default': serialize_datetime}; defaults.update(kwargs)
|
161
|
+
return json.dumps(obj, **defaults)
|
162
|
+
|
swarm/utils/logger.py
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
import logging
|
2
|
+
from logging.handlers import RotatingFileHandler
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
# DEBUG = False
|
6
|
+
|
7
|
+
# Fallback for when Django settings are not configured
|
8
|
+
DEFAULT_LOGS_DIR = Path.cwd() / "logs"
|
9
|
+
DEFAULT_LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
10
|
+
|
11
|
+
|
12
|
+
def setup_logger(name: str) -> logging.Logger:
|
13
|
+
"""
|
14
|
+
Sets up a logger with the specified name.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
name (str): Name of the logger.
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
logging.Logger: Configured logger instance.
|
21
|
+
"""
|
22
|
+
logger = logging.getLogger(name)
|
23
|
+
logger.setLevel(logging.DEBUG if DEBUG else logging.INFO) # Set to DEBUG for detailed logs
|
24
|
+
|
25
|
+
# Create console handler
|
26
|
+
ch = logging.StreamHandler()
|
27
|
+
ch.setLevel(logging.DEBUG)
|
28
|
+
|
29
|
+
# Determine log file path
|
30
|
+
try:
|
31
|
+
from django.conf import settings
|
32
|
+
from django.core.exceptions import ImproperlyConfigured
|
33
|
+
|
34
|
+
try:
|
35
|
+
log_dir = getattr(settings, "LOGS_DIR", DEFAULT_LOGS_DIR)
|
36
|
+
except ImproperlyConfigured:
|
37
|
+
log_dir = DEFAULT_LOGS_DIR
|
38
|
+
except ImportError:
|
39
|
+
log_dir = DEFAULT_LOGS_DIR
|
40
|
+
|
41
|
+
log_file = log_dir / f"{name}.log"
|
42
|
+
|
43
|
+
# Create file handler with rotation
|
44
|
+
fh = RotatingFileHandler(
|
45
|
+
filename=log_file,
|
46
|
+
maxBytes=5 * 1024 * 1024, # 5 MB
|
47
|
+
backupCount=5,
|
48
|
+
)
|
49
|
+
fh.setLevel(logging.DEBUG)
|
50
|
+
|
51
|
+
# Create formatter and add it to the handlers
|
52
|
+
formatter = logging.Formatter("[%(asctime)s] %(levelname)s %(name)s: %(message)s")
|
53
|
+
ch.setFormatter(formatter)
|
54
|
+
fh.setFormatter(formatter)
|
55
|
+
|
56
|
+
# Avoid adding multiple handlers if they already exist
|
57
|
+
if not logger.handlers:
|
58
|
+
logger.addHandler(ch)
|
59
|
+
logger.addHandler(fh)
|
60
|
+
|
61
|
+
return logger
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# src/swarm/utils/logger_setup.py
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
def setup_logger(name: str, level=logging.DEBUG) -> logging.Logger:
|
6
|
+
"""
|
7
|
+
Sets up and returns a logger with the specified name and level.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
name (str): Name of the logger.
|
11
|
+
level (int): Logging level (e.g., logging.DEBUG, logging.INFO).
|
12
|
+
|
13
|
+
Returns:
|
14
|
+
logging.Logger: Configured logger.
|
15
|
+
"""
|
16
|
+
logger = logging.getLogger(name)
|
17
|
+
logger.setLevel(level)
|
18
|
+
|
19
|
+
if not logger.handlers:
|
20
|
+
handler = logging.StreamHandler()
|
21
|
+
formatter = logging.Formatter("[%(levelname)s] %(asctime)s - %(name)s - %(message)s")
|
22
|
+
handler.setFormatter(formatter)
|
23
|
+
logger.addHandler(handler)
|
24
|
+
|
25
|
+
return logger
|