mindroot 8.2.0__py3-none-any.whl → 8.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindroot might be problematic. Click here for more details.
- mindroot/coreplugins/chat/router.py +40 -0
- mindroot/coreplugins/chat/services.py +12 -5
- mindroot/lib/buchatlog.py +140 -0
- mindroot/lib/buchatlog2.py +357 -0
- mindroot/lib/chatlog.py +237 -1
- mindroot/lib/token_counter.py +201 -0
- {mindroot-8.2.0.dist-info → mindroot-8.4.0.dist-info}/METADATA +1 -1
- {mindroot-8.2.0.dist-info → mindroot-8.4.0.dist-info}/RECORD +12 -9
- {mindroot-8.2.0.dist-info → mindroot-8.4.0.dist-info}/WHEEL +1 -1
- {mindroot-8.2.0.dist-info → mindroot-8.4.0.dist-info}/entry_points.txt +0 -0
- {mindroot-8.2.0.dist-info → mindroot-8.4.0.dist-info}/licenses/LICENSE +0 -0
- {mindroot-8.2.0.dist-info → mindroot-8.4.0.dist-info}/top_level.txt +0 -0
|
@@ -220,3 +220,43 @@ async def upload_file(request: Request, log_id: str, file: UploadFile = File(...
|
|
|
220
220
|
"mime_type": file.content_type
|
|
221
221
|
}
|
|
222
222
|
|
|
223
|
+
|
|
224
|
+
from lib.chatlog import count_tokens_for_log_id
|
|
225
|
+
|
|
226
|
+
@router.get("/chat/{log_id}/tokens")
|
|
227
|
+
async def get_token_count(request: Request, log_id: str):
|
|
228
|
+
"""
|
|
229
|
+
Get token counts for a chat log identified by log_id, including any delegated tasks.
|
|
230
|
+
|
|
231
|
+
Parameters:
|
|
232
|
+
- log_id: The log ID to count tokens for
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
- JSON with token counts or error message if log not found
|
|
236
|
+
"""
|
|
237
|
+
token_counts = count_tokens_for_log_id(log_id)
|
|
238
|
+
|
|
239
|
+
if token_counts is None:
|
|
240
|
+
return {"status": "error", "message": f"Chat log with ID {log_id} not found"}
|
|
241
|
+
|
|
242
|
+
return {"status": "ok", "token_counts": token_counts}
|
|
243
|
+
|
|
244
|
+
@router.get("/chat/{log_id}/tokens")
|
|
245
|
+
async def get_token_count(request: Request, log_id: str):
|
|
246
|
+
"""
|
|
247
|
+
Get token counts for a chat log identified by log_id, including any delegated tasks.
|
|
248
|
+
|
|
249
|
+
Parameters:
|
|
250
|
+
- log_id: The log ID to count tokens for
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
- JSON with token counts or error message if log not found
|
|
254
|
+
"""
|
|
255
|
+
from lib.token_counter import count_tokens_for_log_id
|
|
256
|
+
|
|
257
|
+
token_counts = count_tokens_for_log_id(log_id)
|
|
258
|
+
|
|
259
|
+
if token_counts is None:
|
|
260
|
+
return {"status": "error", "message": f"Chat log with ID {log_id} not found"}
|
|
261
|
+
|
|
262
|
+
return {"status": "ok", "token_counts": token_counts}
|
|
@@ -267,19 +267,25 @@ async def send_message_to_agent(session_id: str, message: str | List[MessagePart
|
|
|
267
267
|
parse_error = False
|
|
268
268
|
|
|
269
269
|
results, full_cmds = await agent_.chat_commands(context.current_model, context, messages=context.chat_log.get_recent())
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
270
|
+
if results is not None:
|
|
271
|
+
try:
|
|
272
|
+
for result in results:
|
|
273
|
+
if result['cmd'] == 'UNKNOWN':
|
|
274
|
+
consecutive_parse_errors += 1
|
|
275
|
+
parse_error = True
|
|
276
|
+
except Exception as e:
|
|
277
|
+
pass
|
|
274
278
|
|
|
275
279
|
if not parse_error:
|
|
276
280
|
consecutive_parse_errors = 0
|
|
281
|
+
else:
|
|
282
|
+
await asyncio.sleep(1)
|
|
277
283
|
|
|
278
284
|
if consecutive_parse_errors > 6:
|
|
279
285
|
raise Exception("Too many consecutive parse errors, stopping processing.")
|
|
280
286
|
|
|
281
287
|
elif consecutive_parse_errors > 3:
|
|
282
|
-
results.append({"cmd": "UNKNOWN", "args": { "SYSTEM WARNING: Issue valid command list or task processing will be halted. Simplify output."}})
|
|
288
|
+
results.append({"cmd": "UNKNOWN", "args": { "SYSTEM WARNING: Issue valid command list or task; processing will be halted. Simplify output."}})
|
|
283
289
|
|
|
284
290
|
try:
|
|
285
291
|
tmp_data3 = { "results": full_cmds }
|
|
@@ -340,6 +346,7 @@ async def send_message_to_agent(session_id: str, message: str | List[MessagePart
|
|
|
340
346
|
continue_processing = False
|
|
341
347
|
except Exception as e:
|
|
342
348
|
continue_processing = False
|
|
349
|
+
await asyncio.sleep(1)
|
|
343
350
|
trace = traceback.format_exc()
|
|
344
351
|
msg = str(e)
|
|
345
352
|
descr = msg + "\n\n" + trace
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
from typing import List, Dict
|
|
4
|
+
import sys
|
|
5
|
+
import traceback
|
|
6
|
+
from mindroot.lib.utils.debug import debug_box
|
|
7
|
+
|
|
8
|
+
class ChatLog:
|
|
9
|
+
def __init__(self, log_id=0, agent=None, context_length: int = 4096, user: str = None):
|
|
10
|
+
self.log_id = log_id
|
|
11
|
+
self.messages = []
|
|
12
|
+
self.agent = agent
|
|
13
|
+
if user is None or user == '' or user == 'None':
|
|
14
|
+
raise ValueError('User must be provided')
|
|
15
|
+
# make sure user is string
|
|
16
|
+
if not isinstance(user, str):
|
|
17
|
+
# does it have a username?
|
|
18
|
+
if hasattr(user, 'username'):
|
|
19
|
+
user = user.username
|
|
20
|
+
else:
|
|
21
|
+
# throw an error
|
|
22
|
+
raise ValueError('ChatLog(): user must be a string or have username field')
|
|
23
|
+
self.user = user
|
|
24
|
+
if agent is None or agent == '':
|
|
25
|
+
raise ValueError('Agent must be provided')
|
|
26
|
+
self.context_length = context_length
|
|
27
|
+
self.log_dir = os.environ.get('CHATLOG_DIR', 'data/chat')
|
|
28
|
+
self.log_dir = os.path.join(self.log_dir, self.user)
|
|
29
|
+
self.log_dir = os.path.join(self.log_dir, self.agent)
|
|
30
|
+
if not os.path.exists(self.log_dir):
|
|
31
|
+
os.makedirs(self.log_dir)
|
|
32
|
+
self.load_log()
|
|
33
|
+
|
|
34
|
+
def _get_log_data(self) -> Dict[str, any]:
|
|
35
|
+
return {
|
|
36
|
+
'agent': self.agent,
|
|
37
|
+
'messages': self.messages
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def _calculate_message_length(self, message: Dict[str, str]) -> int:
|
|
41
|
+
return len(json.dumps(message)) // 3
|
|
42
|
+
|
|
43
|
+
def add_message(self, message: Dict[str, str]) -> None:
|
|
44
|
+
if len(self.messages)>0 and self.messages[-1]['role'] == message['role']:
|
|
45
|
+
print("found repeat role")
|
|
46
|
+
# check if messasge is str
|
|
47
|
+
# if so, convert to dict with type 'text':
|
|
48
|
+
if type(message['content']) == str:
|
|
49
|
+
message['content'] = [{'type':'text', 'text': message['content']}]
|
|
50
|
+
elif type(message['content']) == list:
|
|
51
|
+
for part in message['content']:
|
|
52
|
+
if part['type'] == 'image':
|
|
53
|
+
print("found image")
|
|
54
|
+
self.messages.append(message)
|
|
55
|
+
self.save_log()
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
cmd_list = json.loads(self.messages[-1]['content'][0]['text'])
|
|
60
|
+
if type(cmd_list) != list:
|
|
61
|
+
debug_box("1")
|
|
62
|
+
cmd_list = [cmd_list]
|
|
63
|
+
new_json = json.loads(message['content'][0]['text'])
|
|
64
|
+
if type(new_json) != list:
|
|
65
|
+
debug_box("2")
|
|
66
|
+
new_json = [new_json]
|
|
67
|
+
new_cmd_list = cmd_list + new_json
|
|
68
|
+
debug_box("3")
|
|
69
|
+
self.messages[-1]['content'] = [{ 'type': 'text', 'text': json.dumps(new_cmd_list) }]
|
|
70
|
+
except Exception as e:
|
|
71
|
+
# assume previous mesage was not a command, was a string
|
|
72
|
+
debug_box("4")
|
|
73
|
+
print("Could not combine commands, probably normal if user message and previous system output, assuming string", e)
|
|
74
|
+
if type(self.messages[-1]['content']) == str:
|
|
75
|
+
new_msg_text = self.messages[-1]['content'] + message['content'][0]['text']
|
|
76
|
+
else:
|
|
77
|
+
new_msg_text = self.messages[-1]['content'][0]['text'] + message['content'][0]['text']
|
|
78
|
+
self.messages.append({'role': message['role'], 'content': [{'type': 'text', 'text': new_msg_text}]})
|
|
79
|
+
#print('could not combine commands. probably normal if user message and previous system output', e)
|
|
80
|
+
#print(self.messages[-1])
|
|
81
|
+
#print(message)
|
|
82
|
+
#raise e
|
|
83
|
+
else:
|
|
84
|
+
if len(self.messages)>0:
|
|
85
|
+
print('roles do not repeat, last message role is ', self.messages[-1]['role'], 'new message role is ', message['role'])
|
|
86
|
+
debug_box("5")
|
|
87
|
+
self.messages.append(message)
|
|
88
|
+
self.save_log()
|
|
89
|
+
|
|
90
|
+
def get_history(self) -> List[Dict[str, str]]:
|
|
91
|
+
return self.messages
|
|
92
|
+
|
|
93
|
+
def get_recent(self, max_tokens: int = 4096) -> List[Dict[str, str]]:
|
|
94
|
+
recent_messages = []
|
|
95
|
+
total_length = 0
|
|
96
|
+
#print('returning all messages', self.messages)
|
|
97
|
+
json_messages = json.dumps(self.messages)
|
|
98
|
+
return json.loads(json_messages)
|
|
99
|
+
|
|
100
|
+
#for message in self.messages:
|
|
101
|
+
# message_length = self._calculate_message_length(message)
|
|
102
|
+
# if total_length + message_length <= max_tokens:
|
|
103
|
+
# recent_messages.append(message)
|
|
104
|
+
# total_length += message_length
|
|
105
|
+
# else:
|
|
106
|
+
# break
|
|
107
|
+
#
|
|
108
|
+
#return recent_messages
|
|
109
|
+
|
|
110
|
+
def save_log(self) -> None:
|
|
111
|
+
log_file = os.path.join(self.log_dir, f'chatlog_{self.log_id}.json')
|
|
112
|
+
with open(log_file, 'w') as f:
|
|
113
|
+
json.dump(self._get_log_data(), f, indent=2)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def load_log(self, log_id = None) -> None:
|
|
117
|
+
if log_id is None:
|
|
118
|
+
log_id = self.log_id
|
|
119
|
+
self.log_id = log_id
|
|
120
|
+
log_file = os.path.join(self.log_dir, f'chatlog_{log_id}.json')
|
|
121
|
+
if os.path.exists(log_file):
|
|
122
|
+
with open(log_file, 'r') as f:
|
|
123
|
+
log_data = json.load(f)
|
|
124
|
+
self.agent = log_data.get('agent')
|
|
125
|
+
self.messages = log_data.get('messages', [])
|
|
126
|
+
print("Loaded log file at ", log_file)
|
|
127
|
+
print("Message length: ", len(self.messages))
|
|
128
|
+
else:
|
|
129
|
+
print("Could not find log file at ", log_file)
|
|
130
|
+
self.messages = []
|
|
131
|
+
|
|
132
|
+
def delete_log(self) -> None:
|
|
133
|
+
log_file = os.path.join(self.log_dir, f'chatlog_{self.log_id}.json')
|
|
134
|
+
if os.path.exists(log_file):
|
|
135
|
+
os.remove(log_file)
|
|
136
|
+
print("Deleted log file at ", log_file)
|
|
137
|
+
else:
|
|
138
|
+
print("Could not find log file at ", log_file)
|
|
139
|
+
|
|
140
|
+
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
from typing import List, Dict
|
|
4
|
+
import sys
|
|
5
|
+
import traceback
|
|
6
|
+
import re
|
|
7
|
+
import time
|
|
8
|
+
from mindroot.lib.utils.debug import debug_box
|
|
9
|
+
|
|
10
|
+
class ChatLog:
|
|
11
|
+
def __init__(self, log_id=0, agent=None, context_length: int = 4096, user: str = None):
|
|
12
|
+
self.log_id = log_id
|
|
13
|
+
self.messages = []
|
|
14
|
+
self.agent = agent
|
|
15
|
+
if user is None or user == '' or user == 'None':
|
|
16
|
+
raise ValueError('User must be provided')
|
|
17
|
+
# make sure user is string
|
|
18
|
+
if not isinstance(user, str):
|
|
19
|
+
# does it have a username?
|
|
20
|
+
if hasattr(user, 'username'):
|
|
21
|
+
user = user.username
|
|
22
|
+
else:
|
|
23
|
+
# throw an error
|
|
24
|
+
raise ValueError('ChatLog(): user must be a string or have username field')
|
|
25
|
+
self.user = user
|
|
26
|
+
if agent is None or agent == '':
|
|
27
|
+
raise ValueError('Agent must be provided')
|
|
28
|
+
self.context_length = context_length
|
|
29
|
+
self.log_dir = os.environ.get('CHATLOG_DIR', 'data/chat')
|
|
30
|
+
self.log_dir = os.path.join(self.log_dir, self.user)
|
|
31
|
+
self.log_dir = os.path.join(self.log_dir, self.agent)
|
|
32
|
+
if not os.path.exists(self.log_dir):
|
|
33
|
+
os.makedirs(self.log_dir)
|
|
34
|
+
self.load_log()
|
|
35
|
+
def _get_log_data(self) -> Dict[str, any]:
|
|
36
|
+
return {
|
|
37
|
+
'agent': self.agent,
|
|
38
|
+
'messages': self.messages
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def _calculate_message_length(self, message: Dict[str, str]) -> int:
|
|
42
|
+
return len(json.dumps(message)) // 3
|
|
43
|
+
|
|
44
|
+
def add_message(self, message: Dict[str, str]) -> None:
|
|
45
|
+
if len(self.messages)>0 and self.messages[-1]['role'] == message['role']:
|
|
46
|
+
print("found repeat role")
|
|
47
|
+
# check if messasge is str
|
|
48
|
+
# if so, convert to dict with type 'text':
|
|
49
|
+
if type(message['content']) == str:
|
|
50
|
+
message['content'] = [{'type':'text', 'text': message['content']}]
|
|
51
|
+
elif type(message['content']) == list:
|
|
52
|
+
for part in message['content']:
|
|
53
|
+
if part['type'] == 'image':
|
|
54
|
+
print("found image")
|
|
55
|
+
self.messages.append(message)
|
|
56
|
+
self.save_log()
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
cmd_list = json.loads(self.messages[-1]['content'][0]['text'])
|
|
61
|
+
if type(cmd_list) != list:
|
|
62
|
+
debug_box("1")
|
|
63
|
+
cmd_list = [cmd_list]
|
|
64
|
+
new_json = json.loads(message['content'][0]['text'])
|
|
65
|
+
if type(new_json) != list:
|
|
66
|
+
debug_box("2")
|
|
67
|
+
new_json = [new_json]
|
|
68
|
+
new_cmd_list = cmd_list + new_json
|
|
69
|
+
debug_box("3")
|
|
70
|
+
self.messages[-1]['content'] = [{ 'type': 'text', 'text': json.dumps(new_cmd_list) }]
|
|
71
|
+
except Exception as e:
|
|
72
|
+
# assume previous mesage was not a command, was a string
|
|
73
|
+
debug_box("4")
|
|
74
|
+
print("Could not combine commands, probably normal if user message and previous system output, assuming string", e)
|
|
75
|
+
if type(self.messages[-1]['content']) == str:
|
|
76
|
+
new_msg_text = self.messages[-1]['content'] + message['content'][0]['text']
|
|
77
|
+
else:
|
|
78
|
+
new_msg_text = self.messages[-1]['content'][0]['text'] + message['content'][0]['text']
|
|
79
|
+
self.messages.append({'role': message['role'], 'content': [{'type': 'text', 'text': new_msg_text}]})
|
|
80
|
+
#print('could not combine commands. probably normal if user message and previous system output', e)
|
|
81
|
+
#print(self.messages[-1])
|
|
82
|
+
#print(message)
|
|
83
|
+
#raise e
|
|
84
|
+
else:
|
|
85
|
+
if len(self.messages)>0:
|
|
86
|
+
print('roles do not repeat, last message role is ', self.messages[-1]['role'], 'new message role is ', message['role'])
|
|
87
|
+
debug_box("5")
|
|
88
|
+
self.messages.append(message)
|
|
89
|
+
self.save_log()
|
|
90
|
+
|
|
91
|
+
def get_history(self) -> List[Dict[str, str]]:
|
|
92
|
+
return self.messages
|
|
93
|
+
|
|
94
|
+
def get_recent(self, max_tokens: int = 4096) -> List[Dict[str, str]]:
|
|
95
|
+
recent_messages = []
|
|
96
|
+
total_length = 0
|
|
97
|
+
#print('returning all messages', self.messages)
|
|
98
|
+
json_messages = json.dumps(self.messages)
|
|
99
|
+
return json.loads(json_messages)
|
|
100
|
+
|
|
101
|
+
#for message in self.messages:
|
|
102
|
+
# message_length = self._calculate_message_length(message)
|
|
103
|
+
# if total_length + message_length <= max_tokens:
|
|
104
|
+
# recent_messages.append(message)
|
|
105
|
+
# total_length += message_length
|
|
106
|
+
# else:
|
|
107
|
+
# break
|
|
108
|
+
#
|
|
109
|
+
#return recent_messages
|
|
110
|
+
|
|
111
|
+
def save_log(self) -> None:
|
|
112
|
+
log_file = os.path.join(self.log_dir, f'chatlog_{self.log_id}.json')
|
|
113
|
+
with open(log_file, 'w') as f:
|
|
114
|
+
json.dump(self._get_log_data(), f, indent=2)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def load_log(self, log_id = None) -> None:
|
|
118
|
+
if log_id is None:
|
|
119
|
+
log_id = self.log_id
|
|
120
|
+
self.log_id = log_id
|
|
121
|
+
log_file = os.path.join(self.log_dir, f'chatlog_{log_id}.json')
|
|
122
|
+
if os.path.exists(log_file):
|
|
123
|
+
with open(log_file, 'r') as f:
|
|
124
|
+
log_data = json.load(f)
|
|
125
|
+
self.agent = log_data.get('agent')
|
|
126
|
+
self.messages = log_data.get('messages', [])
|
|
127
|
+
print("Loaded log file at ", log_file)
|
|
128
|
+
print("Message length: ", len(self.messages))
|
|
129
|
+
else:
|
|
130
|
+
print("Could not find log file at ", log_file)
|
|
131
|
+
self.messages = []
|
|
132
|
+
|
|
133
|
+
def delete_log(self) -> None:
|
|
134
|
+
log_file = os.path.join(self.log_dir, f'chatlog_{self.log_id}.json')
|
|
135
|
+
if os.path.exists(log_file):
|
|
136
|
+
os.remove(log_file)
|
|
137
|
+
print("Deleted log file at ", log_file)
|
|
138
|
+
else:
|
|
139
|
+
print("Could not find log file at ", log_file)
|
|
140
|
+
|
|
141
|
+
def count_tokens(self) -> Dict[str, int]:
|
|
142
|
+
"""
|
|
143
|
+
Count tokens in the chat log, providing both sequence totals and cumulative request totals.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Dict with the following keys:
|
|
147
|
+
- input_tokens_sequence: Total tokens in all user messages
|
|
148
|
+
- output_tokens_sequence: Total tokens in all assistant messages
|
|
149
|
+
- input_tokens_total: Cumulative tokens sent to LLM across all requests
|
|
150
|
+
"""
|
|
151
|
+
# Initialize counters
|
|
152
|
+
input_tokens_sequence = 0 # Total tokens in all user messages
|
|
153
|
+
output_tokens_sequence = 0 # Total tokens in all assistant messages
|
|
154
|
+
input_tokens_total = 0 # Cumulative tokens sent to LLM across all requests
|
|
155
|
+
|
|
156
|
+
# Process each message
|
|
157
|
+
for i, message in enumerate(self.messages):
|
|
158
|
+
# Calculate tokens in this message (rough approximation)
|
|
159
|
+
message_tokens = len(json.dumps(message)) // 4
|
|
160
|
+
|
|
161
|
+
# Add to appropriate sequence counter
|
|
162
|
+
if message['role'] == 'assistant':
|
|
163
|
+
output_tokens_sequence += message_tokens
|
|
164
|
+
else: # user or system
|
|
165
|
+
input_tokens_sequence += message_tokens
|
|
166
|
+
|
|
167
|
+
# For each assistant message, calculate the input tokens for that request
|
|
168
|
+
# (which includes all previous messages)
|
|
169
|
+
if message['role'] == 'assistant':
|
|
170
|
+
request_input_tokens = 0
|
|
171
|
+
for j in range(i):
|
|
172
|
+
request_input_tokens += len(json.dumps(self.messages[j])) // 4
|
|
173
|
+
input_tokens_total += request_input_tokens
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
'input_tokens_sequence': input_tokens_sequence,
|
|
177
|
+
'output_tokens_sequence': output_tokens_sequence,
|
|
178
|
+
'input_tokens_total': input_tokens_total
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
def find_chatlog_file(log_id: str) -> str:
|
|
182
|
+
"""
|
|
183
|
+
Find a chatlog file by its log_id.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
log_id: The log ID to search for
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
The full path to the chatlog file if found, None otherwise
|
|
190
|
+
"""
|
|
191
|
+
chat_dir = os.environ.get('CHATLOG_DIR', 'data/chat')
|
|
192
|
+
|
|
193
|
+
# Use os.walk to search through all subdirectories
|
|
194
|
+
for root, dirs, files in os.walk(chat_dir):
|
|
195
|
+
for file in files:
|
|
196
|
+
if file == f"chatlog_{log_id}.json":
|
|
197
|
+
return os.path.join(root, file)
|
|
198
|
+
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
def extract_delegate_task_log_ids(messages: List[Dict]) -> List[str]:
|
|
202
|
+
"""
|
|
203
|
+
Extract log IDs from delegate_task commands in messages.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
messages: List of chat messages
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
List of log IDs found in delegate_task commands
|
|
210
|
+
"""
|
|
211
|
+
log_ids = []
|
|
212
|
+
|
|
213
|
+
for message in messages:
|
|
214
|
+
if message['role'] == 'assistant':
|
|
215
|
+
content = message['content']
|
|
216
|
+
# Handle both string and list content formats
|
|
217
|
+
if isinstance(content, str):
|
|
218
|
+
text = content
|
|
219
|
+
elif isinstance(content, list) and len(content) > 0 and 'text' in content[0]:
|
|
220
|
+
text = content[0]['text']
|
|
221
|
+
else:
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
# Try to parse as JSON
|
|
225
|
+
try:
|
|
226
|
+
commands = json.loads(text)
|
|
227
|
+
if not isinstance(commands, list):
|
|
228
|
+
commands = [commands]
|
|
229
|
+
|
|
230
|
+
for cmd in commands:
|
|
231
|
+
for key, value in cmd.items():
|
|
232
|
+
if key == 'delegate_task' and 'log_id' in value:
|
|
233
|
+
log_ids.append(value['log_id'])
|
|
234
|
+
except (json.JSONDecodeError, TypeError, KeyError):
|
|
235
|
+
# If not JSON, try regex to find log_ids in delegate_task commands
|
|
236
|
+
matches = re.findall(r'"delegate_task"\s*:\s*{\s*"log_id"\s*:\s*"([^"]+)"', text)
|
|
237
|
+
log_ids.extend(matches)
|
|
238
|
+
|
|
239
|
+
return log_ids
|
|
240
|
+
|
|
241
|
+
def get_cache_dir() -> str:
|
|
242
|
+
"""
|
|
243
|
+
Get the directory for token count cache files.
|
|
244
|
+
Creates the directory if it doesn't exist.
|
|
245
|
+
"""
|
|
246
|
+
cache_dir = os.environ.get('TOKEN_CACHE_DIR', 'data/token_cache')
|
|
247
|
+
if not os.path.exists(cache_dir):
|
|
248
|
+
os.makedirs(cache_dir)
|
|
249
|
+
return cache_dir
|
|
250
|
+
|
|
251
|
+
def get_cache_path(log_id: str) -> str:
|
|
252
|
+
"""
|
|
253
|
+
Get the path to the cache file for a specific log_id.
|
|
254
|
+
"""
|
|
255
|
+
cache_dir = get_cache_dir()
|
|
256
|
+
return os.path.join(cache_dir, f"tokens_{log_id}.json")
|
|
257
|
+
|
|
258
|
+
def get_cached_token_counts(log_id: str, log_path: str) -> Dict[str, int]:
|
|
259
|
+
"""
|
|
260
|
+
Get cached token counts if available and valid.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
log_id: The log ID
|
|
264
|
+
log_path: Path to the actual log file
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Cached token counts if valid, None otherwise
|
|
268
|
+
"""
|
|
269
|
+
cache_path = get_cache_path(log_id)
|
|
270
|
+
|
|
271
|
+
# If cache doesn't exist, return None
|
|
272
|
+
if not os.path.exists(cache_path):
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
# Get modification times
|
|
277
|
+
log_mtime = os.path.getmtime(log_path)
|
|
278
|
+
cache_mtime = os.path.getmtime(cache_path)
|
|
279
|
+
current_time = time.time()
|
|
280
|
+
|
|
281
|
+
# If log was modified after cache was created, cache is invalid
|
|
282
|
+
if log_mtime > cache_mtime:
|
|
283
|
+
return None
|
|
284
|
+
|
|
285
|
+
# Don't recalculate sooner than 3 minutes after last calculation
|
|
286
|
+
if current_time - cache_mtime < 180: # 3 minutes in seconds
|
|
287
|
+
with open(cache_path, 'r') as f:
|
|
288
|
+
return json.load(f)
|
|
289
|
+
|
|
290
|
+
# For logs that haven't been modified in over an hour, consider them "finished"
|
|
291
|
+
# and use the cache regardless of when it was last calculated
|
|
292
|
+
if current_time - log_mtime > 3600: # 1 hour in seconds
|
|
293
|
+
with open(cache_path, 'r') as f:
|
|
294
|
+
return json.load(f)
|
|
295
|
+
|
|
296
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
297
|
+
print(f"Error reading token cache: {e}")
|
|
298
|
+
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
def save_token_counts_to_cache(log_id: str, token_counts: Dict[str, int]) -> None:
|
|
302
|
+
"""
|
|
303
|
+
Save token counts to cache.
|
|
304
|
+
"""
|
|
305
|
+
cache_path = get_cache_path(log_id)
|
|
306
|
+
with open(cache_path, 'w') as f:
|
|
307
|
+
json.dump(token_counts, f)
|
|
308
|
+
|
|
309
|
+
def count_tokens_for_log_id(log_id: str) -> Dict[str, int]:
|
|
310
|
+
"""
|
|
311
|
+
Count tokens for a chat log identified by log_id, including any delegated tasks.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
log_id: The log ID to count tokens for
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
Dictionary with token counts or None if log not found
|
|
318
|
+
"""
|
|
319
|
+
# Find the chatlog file
|
|
320
|
+
chatlog_path = find_chatlog_file(log_id)
|
|
321
|
+
if not chatlog_path:
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
# Check cache first
|
|
325
|
+
cached_counts = get_cached_token_counts(log_id, chatlog_path)
|
|
326
|
+
if cached_counts:
|
|
327
|
+
print(f"Using cached token counts for {log_id}")
|
|
328
|
+
return cached_counts
|
|
329
|
+
|
|
330
|
+
print(f"Calculating token counts for {log_id}")
|
|
331
|
+
|
|
332
|
+
# Load the chat log
|
|
333
|
+
with open(chatlog_path, 'r') as f:
|
|
334
|
+
log_data = json.load(f)
|
|
335
|
+
|
|
336
|
+
# Create a temporary ChatLog instance to count tokens
|
|
337
|
+
temp_log = ChatLog(log_id=log_id, user="system", agent=log_data.get('agent', 'unknown'))
|
|
338
|
+
temp_log.messages = log_data.get('messages', [])
|
|
339
|
+
|
|
340
|
+
# Count tokens for this log
|
|
341
|
+
token_counts = temp_log.count_tokens()
|
|
342
|
+
|
|
343
|
+
# Find delegated task log IDs
|
|
344
|
+
delegated_log_ids = extract_delegate_task_log_ids(temp_log.messages)
|
|
345
|
+
|
|
346
|
+
# Recursively count tokens for delegated tasks
|
|
347
|
+
for delegated_id in delegated_log_ids:
|
|
348
|
+
delegated_counts = count_tokens_for_log_id(delegated_id)
|
|
349
|
+
if delegated_counts:
|
|
350
|
+
token_counts['input_tokens_sequence'] += delegated_counts['input_tokens_sequence']
|
|
351
|
+
token_counts['output_tokens_sequence'] += delegated_counts['output_tokens_sequence']
|
|
352
|
+
token_counts['input_tokens_total'] += delegated_counts['input_tokens_total']
|
|
353
|
+
|
|
354
|
+
# Save to cache
|
|
355
|
+
save_token_counts_to_cache(log_id, token_counts)
|
|
356
|
+
|
|
357
|
+
return token_counts
|
mindroot/lib/chatlog.py
CHANGED
|
@@ -3,6 +3,8 @@ import json
|
|
|
3
3
|
from typing import List, Dict
|
|
4
4
|
import sys
|
|
5
5
|
import traceback
|
|
6
|
+
import re
|
|
7
|
+
import time
|
|
6
8
|
from mindroot.lib.utils.debug import debug_box
|
|
7
9
|
|
|
8
10
|
class ChatLog:
|
|
@@ -30,7 +32,6 @@ class ChatLog:
|
|
|
30
32
|
if not os.path.exists(self.log_dir):
|
|
31
33
|
os.makedirs(self.log_dir)
|
|
32
34
|
self.load_log()
|
|
33
|
-
|
|
34
35
|
def _get_log_data(self) -> Dict[str, any]:
|
|
35
36
|
return {
|
|
36
37
|
'agent': self.agent,
|
|
@@ -137,3 +138,238 @@ class ChatLog:
|
|
|
137
138
|
else:
|
|
138
139
|
print("Could not find log file at ", log_file)
|
|
139
140
|
|
|
141
|
+
def count_tokens(self) -> Dict[str, int]:
|
|
142
|
+
"""
|
|
143
|
+
Count tokens in the chat log, providing both sequence totals and cumulative request totals.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Dict with the following keys:
|
|
147
|
+
- input_tokens_sequence: Total tokens in all user messages
|
|
148
|
+
- output_tokens_sequence: Total tokens in all assistant messages
|
|
149
|
+
- input_tokens_total: Cumulative tokens sent to LLM across all requests
|
|
150
|
+
"""
|
|
151
|
+
# Initialize counters
|
|
152
|
+
input_tokens_sequence = 0 # Total tokens in all user messages
|
|
153
|
+
output_tokens_sequence = 0 # Total tokens in all assistant messages
|
|
154
|
+
input_tokens_total = 0 # Cumulative tokens sent to LLM across all requests
|
|
155
|
+
|
|
156
|
+
# Process each message
|
|
157
|
+
for i, message in enumerate(self.messages):
|
|
158
|
+
# Calculate tokens in this message (rough approximation)
|
|
159
|
+
message_tokens = len(json.dumps(message)) // 4
|
|
160
|
+
|
|
161
|
+
# Add to appropriate sequence counter
|
|
162
|
+
if message['role'] == 'assistant':
|
|
163
|
+
output_tokens_sequence += message_tokens
|
|
164
|
+
else: # user or system
|
|
165
|
+
input_tokens_sequence += message_tokens
|
|
166
|
+
|
|
167
|
+
# For each assistant message, calculate the input tokens for that request
|
|
168
|
+
# (which includes all previous messages)
|
|
169
|
+
if message['role'] == 'assistant':
|
|
170
|
+
request_input_tokens = 0
|
|
171
|
+
for j in range(i):
|
|
172
|
+
request_input_tokens += len(json.dumps(self.messages[j])) // 4
|
|
173
|
+
input_tokens_total += request_input_tokens
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
'input_tokens_sequence': input_tokens_sequence,
|
|
177
|
+
'output_tokens_sequence': output_tokens_sequence,
|
|
178
|
+
'input_tokens_total': input_tokens_total
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
def find_chatlog_file(log_id: str) -> str:
|
|
182
|
+
"""
|
|
183
|
+
Find a chatlog file by its log_id.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
log_id: The log ID to search for
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
The full path to the chatlog file if found, None otherwise
|
|
190
|
+
"""
|
|
191
|
+
chat_dir = os.environ.get('CHATLOG_DIR', 'data/chat')
|
|
192
|
+
|
|
193
|
+
# Use os.walk to search through all subdirectories
|
|
194
|
+
for root, dirs, files in os.walk(chat_dir):
|
|
195
|
+
for file in files:
|
|
196
|
+
if file == f"chatlog_{log_id}.json":
|
|
197
|
+
return os.path.join(root, file)
|
|
198
|
+
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
def extract_delegate_task_log_ids(messages: List[Dict]) -> List[str]:
|
|
202
|
+
"""
|
|
203
|
+
Extract log IDs from delegate_task commands in messages.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
messages: List of chat messages
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
List of log IDs found in delegate_task commands
|
|
210
|
+
"""
|
|
211
|
+
log_ids = []
|
|
212
|
+
|
|
213
|
+
for message in messages:
|
|
214
|
+
if message['role'] == 'assistant':
|
|
215
|
+
content = message['content']
|
|
216
|
+
# Handle both string and list content formats
|
|
217
|
+
if isinstance(content, str):
|
|
218
|
+
text = content
|
|
219
|
+
elif isinstance(content, list) and len(content) > 0 and 'text' in content[0]:
|
|
220
|
+
text = content[0]['text']
|
|
221
|
+
else:
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
# Try to parse as JSON
|
|
225
|
+
try:
|
|
226
|
+
commands = json.loads(text)
|
|
227
|
+
if not isinstance(commands, list):
|
|
228
|
+
commands = [commands]
|
|
229
|
+
|
|
230
|
+
for cmd in commands:
|
|
231
|
+
for key, value in cmd.items():
|
|
232
|
+
if key == 'delegate_task' and 'log_id' in value:
|
|
233
|
+
log_ids.append(value['log_id'])
|
|
234
|
+
except (json.JSONDecodeError, TypeError, KeyError):
|
|
235
|
+
# If not JSON, try regex to find log_ids in delegate_task commands
|
|
236
|
+
matches = re.findall(r'"delegate_task"\s*:\s*{\s*"log_id"\s*:\s*"([^"]+)"', text)
|
|
237
|
+
log_ids.extend(matches)
|
|
238
|
+
|
|
239
|
+
return log_ids
|
|
240
|
+
|
|
241
|
+
def get_cache_dir() -> str:
|
|
242
|
+
"""
|
|
243
|
+
Get the directory for token count cache files.
|
|
244
|
+
Creates the directory if it doesn't exist.
|
|
245
|
+
"""
|
|
246
|
+
cache_dir = os.environ.get('TOKEN_CACHE_DIR', 'data/token_cache')
|
|
247
|
+
if not os.path.exists(cache_dir):
|
|
248
|
+
os.makedirs(cache_dir)
|
|
249
|
+
return cache_dir
|
|
250
|
+
|
|
251
|
+
def get_cache_path(log_id: str) -> str:
|
|
252
|
+
"""
|
|
253
|
+
Get the path to the cache file for a specific log_id.
|
|
254
|
+
"""
|
|
255
|
+
cache_dir = get_cache_dir()
|
|
256
|
+
return os.path.join(cache_dir, f"tokens_{log_id}.json")
|
|
257
|
+
|
|
258
|
+
def get_cached_token_counts(log_id: str, log_path: str) -> Dict[str, int]:
|
|
259
|
+
"""
|
|
260
|
+
Get cached token counts if available and valid.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
log_id: The log ID
|
|
264
|
+
log_path: Path to the actual log file
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Cached token counts if valid, None otherwise
|
|
268
|
+
"""
|
|
269
|
+
cache_path = get_cache_path(log_id)
|
|
270
|
+
|
|
271
|
+
# If cache doesn't exist, return None
|
|
272
|
+
if not os.path.exists(cache_path):
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
# Get modification times
|
|
277
|
+
log_mtime = os.path.getmtime(log_path)
|
|
278
|
+
cache_mtime = os.path.getmtime(cache_path)
|
|
279
|
+
current_time = time.time()
|
|
280
|
+
|
|
281
|
+
# If log was modified after cache was created, cache is invalid
|
|
282
|
+
if log_mtime > cache_mtime:
|
|
283
|
+
return None
|
|
284
|
+
|
|
285
|
+
# Don't recalculate sooner than 3 minutes after last calculation
|
|
286
|
+
if current_time - cache_mtime < 180: # 3 minutes in seconds
|
|
287
|
+
with open(cache_path, 'r') as f:
|
|
288
|
+
return json.load(f)
|
|
289
|
+
|
|
290
|
+
# For logs that haven't been modified in over an hour, consider them "finished"
|
|
291
|
+
# and use the cache regardless of when it was last calculated
|
|
292
|
+
if current_time - log_mtime > 3600: # 1 hour in seconds
|
|
293
|
+
with open(cache_path, 'r') as f:
|
|
294
|
+
return json.load(f)
|
|
295
|
+
|
|
296
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
297
|
+
print(f"Error reading token cache: {e}")
|
|
298
|
+
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
def save_token_counts_to_cache(log_id: str, token_counts: Dict[str, int]) -> None:
|
|
302
|
+
"""
|
|
303
|
+
Save token counts to cache.
|
|
304
|
+
"""
|
|
305
|
+
cache_path = get_cache_path(log_id)
|
|
306
|
+
with open(cache_path, 'w') as f:
|
|
307
|
+
json.dump(token_counts, f)
|
|
308
|
+
def count_tokens_for_log_id(log_id: str) -> Dict[str, int]:
|
|
309
|
+
"""
|
|
310
|
+
Count tokens for a chat log identified by log_id, including any delegated tasks.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
log_id: The log ID to count tokens for
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Dictionary with token counts or None if log not found
|
|
317
|
+
"""
|
|
318
|
+
# Find the chatlog file
|
|
319
|
+
chatlog_path = find_chatlog_file(log_id)
|
|
320
|
+
if not chatlog_path:
|
|
321
|
+
return None
|
|
322
|
+
|
|
323
|
+
# Check cache first
|
|
324
|
+
cached_counts = get_cached_token_counts(log_id, chatlog_path)
|
|
325
|
+
if cached_counts:
|
|
326
|
+
print(f"Using cached token counts for {log_id}")
|
|
327
|
+
return cached_counts
|
|
328
|
+
|
|
329
|
+
print(f"Calculating token counts for {log_id}")
|
|
330
|
+
|
|
331
|
+
# Load the chat log
|
|
332
|
+
with open(chatlog_path, 'r') as f:
|
|
333
|
+
log_data = json.load(f)
|
|
334
|
+
|
|
335
|
+
# Create a temporary ChatLog instance to count tokens
|
|
336
|
+
temp_log = ChatLog(log_id=log_id, user="system", agent=log_data.get('agent', 'unknown'))
|
|
337
|
+
temp_log.messages = log_data.get('messages', [])
|
|
338
|
+
|
|
339
|
+
# Count tokens for this log
|
|
340
|
+
parent_counts = temp_log.count_tokens()
|
|
341
|
+
|
|
342
|
+
# Create combined counts (starting with parent counts)
|
|
343
|
+
combined_counts = {
|
|
344
|
+
'input_tokens_sequence': parent_counts['input_tokens_sequence'],
|
|
345
|
+
'output_tokens_sequence': parent_counts['output_tokens_sequence'],
|
|
346
|
+
'input_tokens_total': parent_counts['input_tokens_total']
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
# Find delegated task log IDs
|
|
350
|
+
delegated_log_ids = extract_delegate_task_log_ids(temp_log.messages)
|
|
351
|
+
|
|
352
|
+
# Recursively count tokens for delegated tasks
|
|
353
|
+
for delegated_id in delegated_log_ids:
|
|
354
|
+
delegated_counts = count_tokens_for_log_id(delegated_id)
|
|
355
|
+
if delegated_counts:
|
|
356
|
+
combined_counts['input_tokens_sequence'] += delegated_counts['input_tokens_sequence']
|
|
357
|
+
combined_counts['output_tokens_sequence'] += delegated_counts['output_tokens_sequence']
|
|
358
|
+
combined_counts['input_tokens_total'] += delegated_counts['input_tokens_total']
|
|
359
|
+
|
|
360
|
+
# Create final result with both parent and combined counts
|
|
361
|
+
token_counts = {
|
|
362
|
+
# Parent session only counts
|
|
363
|
+
'input_tokens_sequence': parent_counts['input_tokens_sequence'],
|
|
364
|
+
'output_tokens_sequence': parent_counts['output_tokens_sequence'],
|
|
365
|
+
'input_tokens_total': parent_counts['input_tokens_total'],
|
|
366
|
+
# Combined counts (parent + all subtasks)
|
|
367
|
+
'combined_input_tokens_sequence': combined_counts['input_tokens_sequence'],
|
|
368
|
+
'combined_output_tokens_sequence': combined_counts['output_tokens_sequence'],
|
|
369
|
+
'combined_input_tokens_total': combined_counts['input_tokens_total']
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
# Save to cache
|
|
373
|
+
save_token_counts_to_cache(log_id, token_counts)
|
|
374
|
+
|
|
375
|
+
return token_counts
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import re
|
|
4
|
+
import time
|
|
5
|
+
from typing import Dict, List
|
|
6
|
+
from mindroot.lib.chatlog import ChatLog
|
|
7
|
+
|
|
8
|
+
def find_chatlog_file(log_id: str) -> str:
|
|
9
|
+
"""
|
|
10
|
+
Find a chatlog file by its log_id.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
log_id: The log ID to search for
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
The full path to the chatlog file if found, None otherwise
|
|
17
|
+
"""
|
|
18
|
+
chat_dir = os.environ.get('CHATLOG_DIR', 'data/chat')
|
|
19
|
+
|
|
20
|
+
# Use os.walk to search through all subdirectories
|
|
21
|
+
for root, dirs, files in os.walk(chat_dir):
|
|
22
|
+
for file in files:
|
|
23
|
+
if file == f"chatlog_{log_id}.json":
|
|
24
|
+
return os.path.join(root, file)
|
|
25
|
+
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
def extract_delegate_task_log_ids(messages: List[Dict]) -> List[str]:
|
|
29
|
+
"""
|
|
30
|
+
Extract log IDs from delegate_task commands in messages.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
messages: List of chat messages
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List of log IDs found in delegate_task commands
|
|
37
|
+
"""
|
|
38
|
+
log_ids = []
|
|
39
|
+
|
|
40
|
+
for message in messages:
|
|
41
|
+
if message['role'] == 'assistant':
|
|
42
|
+
content = message['content']
|
|
43
|
+
# Handle both string and list content formats
|
|
44
|
+
if isinstance(content, str):
|
|
45
|
+
text = content
|
|
46
|
+
elif isinstance(content, list) and len(content) > 0 and 'text' in content[0]:
|
|
47
|
+
text = content[0]['text']
|
|
48
|
+
else:
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
# Try to parse as JSON
|
|
52
|
+
try:
|
|
53
|
+
commands = json.loads(text)
|
|
54
|
+
if not isinstance(commands, list):
|
|
55
|
+
commands = [commands]
|
|
56
|
+
|
|
57
|
+
for cmd in commands:
|
|
58
|
+
for key, value in cmd.items():
|
|
59
|
+
if key == 'delegate_task' and 'log_id' in value:
|
|
60
|
+
log_ids.append(value['log_id'])
|
|
61
|
+
except (json.JSONDecodeError, TypeError, KeyError):
|
|
62
|
+
# If not JSON, try regex to find log_ids in delegate_task commands
|
|
63
|
+
matches = re.findall(r'"delegate_task"\s*:\s*{\s*"log_id"\s*:\s*"([^"]+)"', text)
|
|
64
|
+
log_ids.extend(matches)
|
|
65
|
+
|
|
66
|
+
return log_ids
|
|
67
|
+
|
|
68
|
+
def get_cache_dir() -> str:
|
|
69
|
+
"""
|
|
70
|
+
Get the directory for token count cache files.
|
|
71
|
+
Creates the directory if it doesn't exist.
|
|
72
|
+
"""
|
|
73
|
+
cache_dir = os.environ.get('TOKEN_CACHE_DIR', 'data/token_cache')
|
|
74
|
+
if not os.path.exists(cache_dir):
|
|
75
|
+
os.makedirs(cache_dir)
|
|
76
|
+
return cache_dir
|
|
77
|
+
|
|
78
|
+
def get_cache_path(log_id: str) -> str:
|
|
79
|
+
"""
|
|
80
|
+
Get the path to the cache file for a specific log_id.
|
|
81
|
+
"""
|
|
82
|
+
cache_dir = get_cache_dir()
|
|
83
|
+
return os.path.join(cache_dir, f"tokens_{log_id}.json")
|
|
84
|
+
|
|
85
|
+
def get_cached_token_counts(log_id: str, log_path: str) -> Dict[str, int]:
|
|
86
|
+
"""
|
|
87
|
+
Get cached token counts if available and valid.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
log_id: The log ID
|
|
91
|
+
log_path: Path to the actual log file
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Cached token counts if valid, None otherwise
|
|
95
|
+
"""
|
|
96
|
+
cache_path = get_cache_path(log_id)
|
|
97
|
+
|
|
98
|
+
# If cache doesn't exist, return None
|
|
99
|
+
if not os.path.exists(cache_path):
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
# Get modification times
|
|
104
|
+
log_mtime = os.path.getmtime(log_path)
|
|
105
|
+
cache_mtime = os.path.getmtime(cache_path)
|
|
106
|
+
current_time = time.time()
|
|
107
|
+
|
|
108
|
+
# If log was modified after cache was created, cache is invalid
|
|
109
|
+
if log_mtime > cache_mtime:
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
# Don't recalculate sooner than 3 minutes after last calculation
|
|
113
|
+
if current_time - cache_mtime < 180: # 3 minutes in seconds
|
|
114
|
+
with open(cache_path, 'r') as f:
|
|
115
|
+
return json.load(f)
|
|
116
|
+
|
|
117
|
+
# For logs that haven't been modified in over an hour, consider them "finished"
|
|
118
|
+
# and use the cache regardless of when it was last calculated
|
|
119
|
+
if current_time - log_mtime > 3600: # 1 hour in seconds
|
|
120
|
+
with open(cache_path, 'r') as f:
|
|
121
|
+
return json.load(f)
|
|
122
|
+
|
|
123
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
124
|
+
print(f"Error reading token cache: {e}")
|
|
125
|
+
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
def save_token_counts_to_cache(log_id: str, token_counts: Dict[str, int]) -> None:
|
|
129
|
+
"""
|
|
130
|
+
Save token counts to cache.
|
|
131
|
+
"""
|
|
132
|
+
cache_path = get_cache_path(log_id)
|
|
133
|
+
with open(cache_path, 'w') as f:
|
|
134
|
+
json.dump(token_counts, f)
|
|
135
|
+
|
|
136
|
+
def count_tokens_for_log_id(log_id: str) -> Dict[str, int]:
|
|
137
|
+
"""
|
|
138
|
+
Count tokens for a chat log identified by log_id, including any delegated tasks.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
log_id: The log ID to count tokens for
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Dictionary with token counts or None if log not found
|
|
145
|
+
"""
|
|
146
|
+
# Find the chatlog file
|
|
147
|
+
chatlog_path = find_chatlog_file(log_id)
|
|
148
|
+
if not chatlog_path:
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
# Check cache first
|
|
152
|
+
cached_counts = get_cached_token_counts(log_id, chatlog_path)
|
|
153
|
+
if cached_counts:
|
|
154
|
+
print(f"Using cached token counts for {log_id}")
|
|
155
|
+
return cached_counts
|
|
156
|
+
|
|
157
|
+
print(f"Calculating token counts for {log_id}")
|
|
158
|
+
|
|
159
|
+
# Load the chat log
|
|
160
|
+
with open(chatlog_path, 'r') as f:
|
|
161
|
+
log_data = json.load(f)
|
|
162
|
+
|
|
163
|
+
# Create a temporary ChatLog instance to count tokens
|
|
164
|
+
temp_log = ChatLog(log_id=log_id, user="system", agent=log_data.get('agent', 'unknown'))
|
|
165
|
+
temp_log.messages = log_data.get('messages', [])
|
|
166
|
+
|
|
167
|
+
# Count tokens for this log
|
|
168
|
+
parent_counts = temp_log.count_tokens()
|
|
169
|
+
|
|
170
|
+
# Create combined counts (starting with parent counts)
|
|
171
|
+
combined_counts = {}
|
|
172
|
+
combined_counts['input_tokens_sequence'] = parent_counts['input_tokens_sequence']
|
|
173
|
+
combined_counts['output_tokens_sequence'] = parent_counts['output_tokens_sequence']
|
|
174
|
+
combined_counts['input_tokens_total'] = parent_counts['input_tokens_total']
|
|
175
|
+
|
|
176
|
+
# Find delegated task log IDs
|
|
177
|
+
delegated_log_ids = extract_delegate_task_log_ids(temp_log.messages)
|
|
178
|
+
|
|
179
|
+
# Recursively count tokens for delegated tasks
|
|
180
|
+
for delegated_id in delegated_log_ids:
|
|
181
|
+
delegated_counts = count_tokens_for_log_id(delegated_id)
|
|
182
|
+
if delegated_counts:
|
|
183
|
+
combined_counts['input_tokens_sequence'] += delegated_counts['input_tokens_sequence']
|
|
184
|
+
combined_counts['output_tokens_sequence'] += delegated_counts['output_tokens_sequence']
|
|
185
|
+
combined_counts['input_tokens_total'] += delegated_counts['input_tokens_total']
|
|
186
|
+
|
|
187
|
+
# Create final result with both parent and combined counts
|
|
188
|
+
token_counts = {}
|
|
189
|
+
# Parent session only counts
|
|
190
|
+
token_counts['input_tokens_sequence'] = parent_counts['input_tokens_sequence']
|
|
191
|
+
token_counts['output_tokens_sequence'] = parent_counts['output_tokens_sequence']
|
|
192
|
+
token_counts['input_tokens_total'] = parent_counts['input_tokens_total']
|
|
193
|
+
# Combined counts (parent + all subtasks)
|
|
194
|
+
token_counts['combined_input_tokens_sequence'] = combined_counts['input_tokens_sequence']
|
|
195
|
+
token_counts['combined_output_tokens_sequence'] = combined_counts['output_tokens_sequence']
|
|
196
|
+
token_counts['combined_input_tokens_total'] = combined_counts['input_tokens_total']
|
|
197
|
+
|
|
198
|
+
# Save to cache
|
|
199
|
+
save_token_counts_to_cache(log_id, token_counts)
|
|
200
|
+
|
|
201
|
+
return token_counts
|
|
@@ -449,8 +449,8 @@ mindroot/coreplugins/chat/commands.py,sha256=QozVxkLqtP6rlK3tSVLThfZz6nE7G_Yygcm
|
|
|
449
449
|
mindroot/coreplugins/chat/format_result_msgs.py,sha256=daEdpEyAJIa8b2VkCqSKcw8PaExcB6Qro80XNes_sHA,2
|
|
450
450
|
mindroot/coreplugins/chat/mod.py,sha256=Xydjv3feKJJRbwdiB7raqiQnWtaS_2GcdC9bXYQX3nE,425
|
|
451
451
|
mindroot/coreplugins/chat/models.py,sha256=GRcRuDUAJFpyWERPMxkxUaZ21igNlWeeamriruEKiEQ,692
|
|
452
|
-
mindroot/coreplugins/chat/router.py,sha256=
|
|
453
|
-
mindroot/coreplugins/chat/services.py,sha256=
|
|
452
|
+
mindroot/coreplugins/chat/router.py,sha256=ucSQ6_wztDq2PCUP0D4KHL5JaFUAaBtmCQy-4iI8e8c,9087
|
|
453
|
+
mindroot/coreplugins/chat/services.py,sha256=Ngv_TceFeXn2EzlZbSbR8Iv1E4M7qL7o3q1L7tufFq4,17835
|
|
454
454
|
mindroot/coreplugins/chat/utils.py,sha256=BiE14PpsAcQSO5vbU88klHGm8cAXJDXxgVgva-EXybU,155
|
|
455
455
|
mindroot/coreplugins/chat/static/assistant.png,sha256=oAt1ctkFKLSPBoAZGNnSixooW9ANVIk1GwniauVWDXo,215190
|
|
456
456
|
mindroot/coreplugins/chat/static/mindgen.png,sha256=fN3E3oOFvAGYjJq-Pvg2f75jIMv7kg5WRU0EeEbxCWg,235353
|
|
@@ -1765,9 +1765,11 @@ mindroot/coreplugins/user_service/role_service.py,sha256=e6XrxhMC4903C-Y515XSC54
|
|
|
1765
1765
|
mindroot/coreplugins/user_service/backup/admin_service.py,sha256=scc59rxlZz4uuVvgjf-9HL2gKi7-uiCdSt6LjWJILR8,4259
|
|
1766
1766
|
mindroot/coreplugins/user_service/backup/admin_setup.py,sha256=JGszAw8nVtnNiisSUGu9jtoStKGyN44KpbRlKAhDJho,3001
|
|
1767
1767
|
mindroot/lib/__init__.py,sha256=388n_hMskU0TnZ4xT10US_kFkya-EPBjWcv7AZf_HOk,74
|
|
1768
|
+
mindroot/lib/buchatlog.py,sha256=LJZc3ksKgJcStltmHrrwNLaON3EDzhOKVAWj0Wl22wk,5861
|
|
1769
|
+
mindroot/lib/buchatlog2.py,sha256=Va9FteBWePEjWD9OZcw-OtQfEb-IoCVGTmJeMRaX9is,13729
|
|
1768
1770
|
mindroot/lib/butemplates.py,sha256=gfHGPTOjvoEenXsR7xokNuqMjOAPuC2DawheH1Ae4bU,12196
|
|
1769
1771
|
mindroot/lib/chatcontext.py,sha256=OR63K63NMjaV7kXDf0MIcvOXFZhdiqr7LKgbyfZzjkE,6211
|
|
1770
|
-
mindroot/lib/chatlog.py,sha256=
|
|
1772
|
+
mindroot/lib/chatlog.py,sha256=6brFJASM7r1qlRX3t1HsXHehEnkaUW5dwmCG3WIPOqc,14690
|
|
1771
1773
|
mindroot/lib/json_escape.py,sha256=5cAmAdNbnYX2uyfQcnse2fFtNI0CdB-AfZ23RwaDm-k,884
|
|
1772
1774
|
mindroot/lib/model_selector.py,sha256=Wz-8NZoiclmnhLeCNnI3WCuKFmjsO5HE4bK5F8GpZzU,1397
|
|
1773
1775
|
mindroot/lib/parent_templates.py,sha256=elcQFFwrFtfAYfQOSTs06aiDDigN1f1R2f8I1V-wj6Q,2731
|
|
@@ -1777,6 +1779,7 @@ mindroot/lib/route_decorators.py,sha256=L3E-bn48zhuxk6YPtyc2oP76-5WuV_SmjxtngJeY
|
|
|
1777
1779
|
mindroot/lib/session_files.py,sha256=Vl50YfEnKt8ucGSDIavsAdbAcf8IbMG3Fdgm4pEh2Yg,1263
|
|
1778
1780
|
mindroot/lib/streamcmd.py,sha256=f9n3OtryEkMbNNuFr5BAZn1EpSLUKuDZw-zpo97XxJk,4714
|
|
1779
1781
|
mindroot/lib/templates.py,sha256=5dODCS6UeC9Y_PdMWlUuQCCZUUt2ICR0S1YF6XrG3eM,15154
|
|
1782
|
+
mindroot/lib/token_counter.py,sha256=U3tyw2CG1uK1FmupOHzTkyBwx5UKI30hRrRTqu_-ALQ,7170
|
|
1780
1783
|
mindroot/lib/auth/__init__.py,sha256=5EZbCTcdlnTHYE0JNk8znWNSO7mOsokMOvRBjb5Mq-M,49
|
|
1781
1784
|
mindroot/lib/auth/auth.py,sha256=2vhF_LfZcTPt2N2VLWy1ZP7h2pKFv7XM3xW1iRVOTkU,3129
|
|
1782
1785
|
mindroot/lib/db/organize_models.py,sha256=kiadXfhGjCY16c36l1JmxXcKSH4ShWWEUnHi7WRRn9c,5028
|
|
@@ -1814,9 +1817,9 @@ mindroot/protocols/services/stream_chat.py,sha256=fMnPfwaB5fdNMBLTEg8BXKAGvrELKH
|
|
|
1814
1817
|
mindroot/registry/__init__.py,sha256=40Xy9bmPHsgdIrOzbtBGzf4XMqXVi9P8oZTJhn0r654,151
|
|
1815
1818
|
mindroot/registry/component_manager.py,sha256=WZFNPg4SNvpqsM5NFiC2DpgmrJQCyR9cNhrCBpp30Qk,995
|
|
1816
1819
|
mindroot/registry/data_access.py,sha256=NgNMamxIjaKeYxzxnVaQz1Y-Rm0AI51si3788_JHUTM,5316
|
|
1817
|
-
mindroot-8.
|
|
1818
|
-
mindroot-8.
|
|
1819
|
-
mindroot-8.
|
|
1820
|
-
mindroot-8.
|
|
1821
|
-
mindroot-8.
|
|
1822
|
-
mindroot-8.
|
|
1820
|
+
mindroot-8.4.0.dist-info/licenses/LICENSE,sha256=8plAmZh8y9ccuuqFFz4kp7G-cO_qsPgAOoHNvabSB4U,1070
|
|
1821
|
+
mindroot-8.4.0.dist-info/METADATA,sha256=dL3FgMag3dUJHTDub3nQuv6Q0j2YDo6h8CuDNpaGKdA,356
|
|
1822
|
+
mindroot-8.4.0.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
1823
|
+
mindroot-8.4.0.dist-info/entry_points.txt,sha256=0bpyjMccLttx6VcjDp6zfJPN0Kk0rffor6IdIbP0j4c,50
|
|
1824
|
+
mindroot-8.4.0.dist-info/top_level.txt,sha256=gwKm7DmNjhdrCJTYCrxa9Szne4lLpCtrEBltfsX-Mm8,9
|
|
1825
|
+
mindroot-8.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|