theslopmachine 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/MANUAL.md +63 -0
- package/README.md +23 -0
- package/RELEASE.md +81 -0
- package/assets/agents/developer.md +294 -0
- package/assets/agents/slopmachine.md +510 -0
- package/assets/skills/beads-operations/SKILL.md +75 -0
- package/assets/skills/clarification-gate/SKILL.md +51 -0
- package/assets/skills/developer-session-lifecycle/SKILL.md +75 -0
- package/assets/skills/final-evaluation-orchestration/SKILL.md +75 -0
- package/assets/skills/frontend-design/SKILL.md +41 -0
- package/assets/skills/get-overlays/SKILL.md +157 -0
- package/assets/skills/planning-gate/SKILL.md +68 -0
- package/assets/skills/submission-packaging/SKILL.md +268 -0
- package/assets/skills/verification-gates/SKILL.md +106 -0
- package/assets/slopmachine/backend-evaluation-prompt.md +275 -0
- package/assets/slopmachine/beads-init.js +428 -0
- package/assets/slopmachine/document-completeness.md +45 -0
- package/assets/slopmachine/engineering-results.md +59 -0
- package/assets/slopmachine/frontend-evaluation-prompt.md +304 -0
- package/assets/slopmachine/implementation-comparison.md +36 -0
- package/assets/slopmachine/quality-document.md +108 -0
- package/assets/slopmachine/templates/AGENTS.md +114 -0
- package/assets/slopmachine/utils/convert_ai_session.py +1837 -0
- package/assets/slopmachine/utils/strip_session_parent.py +66 -0
- package/bin/slopmachine.js +9 -0
- package/package.json +25 -0
- package/src/cli.js +32 -0
- package/src/constants.js +77 -0
- package/src/init.js +179 -0
- package/src/install.js +330 -0
- package/src/utils.js +162 -0
|
@@ -0,0 +1,1837 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
AI Session 统一转换工具
|
|
4
|
+
|
|
5
|
+
支持自动识别并转换以下格式的 AI 对话历史为 OpenAI 标准格式:
|
|
6
|
+
- Claude JSONL (Claude Desktop/API session)
|
|
7
|
+
- Codex JSONL (Codex CLI session)
|
|
8
|
+
- Gemini JSON (Gemini CLI session)
|
|
9
|
+
- Kilocode JSON (Kilocode API conversation history)
|
|
10
|
+
- OpenCode JSON (OpenCode session)
|
|
11
|
+
|
|
12
|
+
输出格式:
|
|
13
|
+
- 符合 OPENAI_FORMAT_SPEC.md 规范
|
|
14
|
+
- 包含 messages 数组和 meta 元数据
|
|
15
|
+
- 支持 reasoning (推理内容)、tool_call (工具调用)、tool_output (工具结果) 等内容类型
|
|
16
|
+
- Token 统计信息统一存储在 meta.token_counts 数组中
|
|
17
|
+
|
|
18
|
+
特性:
|
|
19
|
+
- 自动检测输入文件格式
|
|
20
|
+
- 支持多种编码格式 (UTF-8, UTF-16, GBK 等)
|
|
21
|
+
- 保留完整的元数据和时间戳信息
|
|
22
|
+
- 完全独立运行,不依赖任何项目内其他脚本
|
|
23
|
+
- 支持批量转换指定目录下的所有会话文件
|
|
24
|
+
|
|
25
|
+
使用示例:
|
|
26
|
+
# 单文件转换 - 自动检测格式
|
|
27
|
+
python convert_ai_session.py -i session.json
|
|
28
|
+
|
|
29
|
+
# 单文件转换 - 指定输出文件
|
|
30
|
+
python convert_ai_session.py -i session.jsonl -o output.json
|
|
31
|
+
|
|
32
|
+
# 单文件转换 - 强制指定格式
|
|
33
|
+
python convert_ai_session.py -i session.jsonl --format claude
|
|
34
|
+
|
|
35
|
+
# 批量转换 - 转换指定目录下所有会话文件(仅处理一层目录) 固定输出到convert目录下
|
|
36
|
+
python convert_ai_session.py -d script/session/test
|
|
37
|
+
|
|
38
|
+
# 批量转换 - 转换当前目录下所有会话文件(仅处理一层目录) 固定输出到convert目录下
|
|
39
|
+
python convert_ai_session.py -d .
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# 批量转换 - 指定文件匹配模式
|
|
44
|
+
python convert_ai_session.py -d script/session/test --pattern "*.json" --exclude "*_converted.json"
|
|
45
|
+
|
|
46
|
+
批量转换说明:
|
|
47
|
+
- 仅扫描指定目录下的所有 .json 和 .jsonl 文件(不递归子目录)
|
|
48
|
+
- 默认跳过已转换的文件 (*_converted.json)
|
|
49
|
+
- 转换后的文件命名为: 原文件名_converted.json
|
|
50
|
+
- 可通过 --pattern 和 --exclude 参数自定义文件过滤规则
|
|
51
|
+
- 转换失败的文件会记录错误信息并继续处理其他文件
|
|
52
|
+
- 转换完成后输出统计信息: 成功数/失败数/跳过数
|
|
53
|
+
|
|
54
|
+
作者: liufei
|
|
55
|
+
版本: 1.3.0
|
|
56
|
+
更新日期: 2026-03-18
|
|
57
|
+
"""
|
|
58
|
+
from __future__ import annotations
|
|
59
|
+
|
|
60
|
+
import json
|
|
61
|
+
import sys
|
|
62
|
+
import argparse
|
|
63
|
+
import re
|
|
64
|
+
from pathlib import Path
|
|
65
|
+
from typing import Dict, Any, List, Optional, TextIO
|
|
66
|
+
from collections.abc import Iterable
|
|
67
|
+
from dataclasses import dataclass, field
|
|
68
|
+
from datetime import datetime, timezone
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ============================================================================
|
|
72
|
+
# 格式检测
|
|
73
|
+
# ============================================================================
|
|
74
|
+
|
|
75
|
+
def detect_format(file_path: Path) -> str:
|
|
76
|
+
"""
|
|
77
|
+
自动检测文件格式
|
|
78
|
+
|
|
79
|
+
返回: 'claude_jsonl' | 'codex_jsonl' | 'kilocode' | 'opencode' | 'gemini' | 'unknown'
|
|
80
|
+
"""
|
|
81
|
+
# JSONL 格式检测
|
|
82
|
+
if file_path.suffix == '.jsonl':
|
|
83
|
+
return detect_jsonl_format(file_path)
|
|
84
|
+
|
|
85
|
+
# JSON 格式检测
|
|
86
|
+
data = None
|
|
87
|
+
for encoding in ['utf-8-sig', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', 'gbk', 'gb2312']:
|
|
88
|
+
try:
|
|
89
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
|
90
|
+
data = json.load(f)
|
|
91
|
+
break
|
|
92
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
if data is None:
|
|
96
|
+
return 'unknown'
|
|
97
|
+
|
|
98
|
+
# Gemini 格式: {"sessionId": "...", "messages": [...], "startTime": "..."}
|
|
99
|
+
if isinstance(data, dict) and 'sessionId' in data and 'messages' in data:
|
|
100
|
+
messages = data.get('messages', [])
|
|
101
|
+
if isinstance(messages, list) and len(messages) > 0:
|
|
102
|
+
first_msg = messages[0]
|
|
103
|
+
if isinstance(first_msg, dict) and 'type' in first_msg and first_msg.get('type') in ('user', 'gemini'):
|
|
104
|
+
return 'gemini'
|
|
105
|
+
|
|
106
|
+
# OpenCode 格式: {"info": {...}, "messages": [...]}
|
|
107
|
+
if isinstance(data, dict) and 'info' in data and 'messages' in data:
|
|
108
|
+
info = data.get('info', {})
|
|
109
|
+
if isinstance(info, dict) and 'id' in info:
|
|
110
|
+
return 'opencode'
|
|
111
|
+
|
|
112
|
+
# Kilocode 格式: [{"role": "user", "content": [...], "ts": 123}]
|
|
113
|
+
if isinstance(data, list) and len(data) > 0:
|
|
114
|
+
first_item = data[0]
|
|
115
|
+
if isinstance(first_item, dict) and 'role' in first_item and 'content' in first_item and 'ts' in first_item:
|
|
116
|
+
content = first_item.get('content', [])
|
|
117
|
+
if isinstance(content, list) and len(content) > 0:
|
|
118
|
+
if isinstance(content[0], dict) and 'type' in content[0]:
|
|
119
|
+
return 'kilocode'
|
|
120
|
+
|
|
121
|
+
return 'unknown'
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def detect_jsonl_format(file_path: Path) -> str:
|
|
125
|
+
"""
|
|
126
|
+
检测 JSONL 文件的具体格式
|
|
127
|
+
|
|
128
|
+
返回: 'codex_jsonl' | 'claude_jsonl' | 'unknown'
|
|
129
|
+
"""
|
|
130
|
+
try:
|
|
131
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
132
|
+
lines = []
|
|
133
|
+
for i, line in enumerate(f):
|
|
134
|
+
if i >= 10:
|
|
135
|
+
break
|
|
136
|
+
line = line.strip()
|
|
137
|
+
if line:
|
|
138
|
+
lines.append(line)
|
|
139
|
+
|
|
140
|
+
if not lines:
|
|
141
|
+
return 'unknown'
|
|
142
|
+
|
|
143
|
+
first_obj = json.loads(lines[0])
|
|
144
|
+
|
|
145
|
+
# Claude 格式特征
|
|
146
|
+
if 'sessionId' in first_obj:
|
|
147
|
+
return 'claude_jsonl'
|
|
148
|
+
|
|
149
|
+
event_type = first_obj.get('type')
|
|
150
|
+
if event_type in ('user', 'assistant', 'progress', 'file-history-snapshot', 'system'):
|
|
151
|
+
if 'message' in first_obj or 'parentUuid' in first_obj or 'isSidechain' in first_obj:
|
|
152
|
+
return 'claude_jsonl'
|
|
153
|
+
|
|
154
|
+
# Codex 格式特征
|
|
155
|
+
if 'payload' in first_obj:
|
|
156
|
+
return 'codex_jsonl'
|
|
157
|
+
|
|
158
|
+
if event_type in ('session_meta', 'turn_context', 'event_msg', 'response_item'):
|
|
159
|
+
return 'codex_jsonl'
|
|
160
|
+
|
|
161
|
+
# 检查更多行
|
|
162
|
+
claude_indicators = 0
|
|
163
|
+
codex_indicators = 0
|
|
164
|
+
|
|
165
|
+
for line in lines[1:]:
|
|
166
|
+
try:
|
|
167
|
+
obj = json.loads(line)
|
|
168
|
+
if any(k in obj for k in ('sessionId', 'parentUuid', 'isSidechain', 'userType')):
|
|
169
|
+
claude_indicators += 1
|
|
170
|
+
if 'payload' in obj or obj.get('type') in ('session_meta', 'turn_context'):
|
|
171
|
+
codex_indicators += 1
|
|
172
|
+
except json.JSONDecodeError:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
if claude_indicators > codex_indicators:
|
|
176
|
+
return 'claude_jsonl'
|
|
177
|
+
elif codex_indicators > claude_indicators:
|
|
178
|
+
return 'codex_jsonl'
|
|
179
|
+
|
|
180
|
+
return 'codex_jsonl'
|
|
181
|
+
|
|
182
|
+
except Exception as e:
|
|
183
|
+
print(f"警告: 检测 JSONL 格式时出错: {str(e)}")
|
|
184
|
+
return 'unknown'
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# ============================================================================
|
|
188
|
+
# Claude JSONL 转换器 (原 claude_jsonl_to_openai_messages.py)
|
|
189
|
+
# ============================================================================
|
|
190
|
+
|
|
191
|
+
def _claude_read_jsonl(stream: TextIO) -> Iterable[dict]:
|
|
192
|
+
"""读取 JSONL 文件,每行一个 JSON 对象"""
|
|
193
|
+
for line_no, line in enumerate(stream, start=1):
|
|
194
|
+
line = line.strip()
|
|
195
|
+
if not line:
|
|
196
|
+
continue
|
|
197
|
+
try:
|
|
198
|
+
obj = json.loads(line)
|
|
199
|
+
except json.JSONDecodeError as exc:
|
|
200
|
+
raise ValueError(f"Invalid JSON at line {line_no}") from exc
|
|
201
|
+
if not isinstance(obj, dict):
|
|
202
|
+
raise ValueError(f"Expected object at line {line_no}, got {type(obj).__name__}")
|
|
203
|
+
yield obj
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@dataclass
|
|
207
|
+
class ClaudeConverterOptions:
|
|
208
|
+
"""Claude 转换器选项配置"""
|
|
209
|
+
include_thinking: bool = True
|
|
210
|
+
include_toolcall_content: bool = True
|
|
211
|
+
include_token_count: bool = True
|
|
212
|
+
messages_only: bool = False
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
@dataclass
|
|
216
|
+
class ClaudeConverterState:
|
|
217
|
+
"""Claude 转换器状态"""
|
|
218
|
+
session_id: str | None = None
|
|
219
|
+
token_counts: list = field(default_factory=list)
|
|
220
|
+
session_meta: dict = field(default_factory=dict)
|
|
221
|
+
skipped_events: list = field(default_factory=list)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def convert_claude_jsonl_to_messages(
|
|
225
|
+
events: Iterable[dict],
|
|
226
|
+
*,
|
|
227
|
+
options: ClaudeConverterOptions,
|
|
228
|
+
) -> dict:
|
|
229
|
+
"""
|
|
230
|
+
将 Claude session JSONL 转换为 OpenAI 消息格式
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
events: JSONL 事件迭代器
|
|
234
|
+
options: 转换选项
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
包含 messages 和 meta 的字典
|
|
238
|
+
"""
|
|
239
|
+
state = ClaudeConverterState()
|
|
240
|
+
messages: list = []
|
|
241
|
+
|
|
242
|
+
for obj in events:
|
|
243
|
+
event_type = obj.get("type")
|
|
244
|
+
timestamp = obj.get("timestamp")
|
|
245
|
+
|
|
246
|
+
# 提取 session 元数据
|
|
247
|
+
if event_type == "user" and state.session_id is None:
|
|
248
|
+
state.session_id = obj.get("sessionId")
|
|
249
|
+
state.session_meta = {
|
|
250
|
+
"session_id": obj.get("sessionId"),
|
|
251
|
+
"version": obj.get("version"),
|
|
252
|
+
"git_branch": obj.get("gitBranch"),
|
|
253
|
+
"cwd": obj.get("cwd"),
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
# 处理用户消息
|
|
257
|
+
if event_type == "user":
|
|
258
|
+
message = obj.get("message", {})
|
|
259
|
+
role = message.get("role")
|
|
260
|
+
content = message.get("content")
|
|
261
|
+
|
|
262
|
+
if role == "user" and isinstance(content, str):
|
|
263
|
+
user_msg = {
|
|
264
|
+
"role": "user",
|
|
265
|
+
"content": [{"type": "text", "text": content}],
|
|
266
|
+
}
|
|
267
|
+
if timestamp:
|
|
268
|
+
user_msg["_metadata"] = {"timestamp": timestamp}
|
|
269
|
+
messages.append(user_msg)
|
|
270
|
+
elif role == "user" and isinstance(content, list):
|
|
271
|
+
# 处理工具结果
|
|
272
|
+
user_msg = {
|
|
273
|
+
"role": "user",
|
|
274
|
+
"content": []
|
|
275
|
+
}
|
|
276
|
+
for item in content:
|
|
277
|
+
if isinstance(item, dict):
|
|
278
|
+
if item.get("type") == "tool_result":
|
|
279
|
+
tool_msg = {
|
|
280
|
+
"role": "tool",
|
|
281
|
+
"tool_call_id": item.get("tool_use_id", ""),
|
|
282
|
+
"content": [{"type": "tool_output", "text": item.get("content", "")}]
|
|
283
|
+
}
|
|
284
|
+
if timestamp:
|
|
285
|
+
tool_msg["_metadata"] = {"timestamp": timestamp}
|
|
286
|
+
messages.append(tool_msg)
|
|
287
|
+
else:
|
|
288
|
+
user_msg["content"].append(item)
|
|
289
|
+
|
|
290
|
+
# 如果有非工具结果的内容,添加用户消息
|
|
291
|
+
if user_msg["content"]:
|
|
292
|
+
if timestamp:
|
|
293
|
+
user_msg["_metadata"] = {"timestamp": timestamp}
|
|
294
|
+
messages.append(user_msg)
|
|
295
|
+
|
|
296
|
+
# 处理助手消息
|
|
297
|
+
elif event_type == "assistant":
|
|
298
|
+
message = obj.get("message", {})
|
|
299
|
+
role = message.get("role")
|
|
300
|
+
content = message.get("content")
|
|
301
|
+
usage = message.get("usage")
|
|
302
|
+
|
|
303
|
+
if role == "assistant" and isinstance(content, list):
|
|
304
|
+
assistant_msg = {
|
|
305
|
+
"role": "assistant",
|
|
306
|
+
"content": [],
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
tool_calls = []
|
|
310
|
+
|
|
311
|
+
for item in content:
|
|
312
|
+
if not isinstance(item, dict):
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
item_type = item.get("type")
|
|
316
|
+
|
|
317
|
+
# 处理思考过程
|
|
318
|
+
if item_type == "thinking" and options.include_thinking:
|
|
319
|
+
thinking_text = item.get("thinking", "")
|
|
320
|
+
if thinking_text:
|
|
321
|
+
assistant_msg["content"].append({
|
|
322
|
+
"type": "reasoning",
|
|
323
|
+
"text": thinking_text
|
|
324
|
+
})
|
|
325
|
+
|
|
326
|
+
# 处理文本内容
|
|
327
|
+
elif item_type == "text":
|
|
328
|
+
text = item.get("text", "")
|
|
329
|
+
if text:
|
|
330
|
+
assistant_msg["content"].append({
|
|
331
|
+
"type": "text",
|
|
332
|
+
"text": text
|
|
333
|
+
})
|
|
334
|
+
|
|
335
|
+
# 处理工具调用
|
|
336
|
+
elif item_type == "tool_use":
|
|
337
|
+
tool_id = item.get("id", "")
|
|
338
|
+
tool_name = item.get("name", "")
|
|
339
|
+
tool_input = item.get("input", {})
|
|
340
|
+
|
|
341
|
+
tool_call = {
|
|
342
|
+
"id": tool_id,
|
|
343
|
+
"type": "function",
|
|
344
|
+
"function": {
|
|
345
|
+
"name": tool_name,
|
|
346
|
+
"arguments": json.dumps(tool_input, ensure_ascii=False)
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
tool_calls.append(tool_call)
|
|
350
|
+
|
|
351
|
+
# 可选:在 content 中也包含工具调用信息
|
|
352
|
+
if options.include_toolcall_content:
|
|
353
|
+
assistant_msg["content"].append({
|
|
354
|
+
"type": "tool_call",
|
|
355
|
+
"tool_call_id": tool_id,
|
|
356
|
+
"name": tool_name,
|
|
357
|
+
"arguments": json.dumps(tool_input, ensure_ascii=False)
|
|
358
|
+
})
|
|
359
|
+
|
|
360
|
+
# 添加工具调用字段
|
|
361
|
+
if tool_calls:
|
|
362
|
+
assistant_msg["tool_calls"] = tool_calls
|
|
363
|
+
|
|
364
|
+
# 添加时间戳和元数据
|
|
365
|
+
if timestamp:
|
|
366
|
+
assistant_msg["_metadata"] = {"timestamp": timestamp}
|
|
367
|
+
|
|
368
|
+
# 只有当消息有内容或工具调用时才添加
|
|
369
|
+
if assistant_msg["content"] or tool_calls:
|
|
370
|
+
messages.append(assistant_msg)
|
|
371
|
+
|
|
372
|
+
# 收集 token 统计信息
|
|
373
|
+
if usage and options.include_token_count:
|
|
374
|
+
token_entry = {
|
|
375
|
+
"type": "token_count",
|
|
376
|
+
"info": {
|
|
377
|
+
"total_token_usage": {
|
|
378
|
+
"input_tokens": usage.get("input_tokens", 0),
|
|
379
|
+
"cached_input_tokens": usage.get("cache_read_input_tokens", 0),
|
|
380
|
+
"output_tokens": usage.get("output_tokens", 0),
|
|
381
|
+
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
|
|
382
|
+
},
|
|
383
|
+
"last_token_usage": {
|
|
384
|
+
"input_tokens": usage.get("input_tokens", 0),
|
|
385
|
+
"cached_input_tokens": usage.get("cache_read_input_tokens", 0),
|
|
386
|
+
"output_tokens": usage.get("output_tokens", 0),
|
|
387
|
+
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
|
|
388
|
+
}
|
|
389
|
+
},
|
|
390
|
+
"rate_limits": {
|
|
391
|
+
"primary": None,
|
|
392
|
+
"secondary": None,
|
|
393
|
+
"credits": None,
|
|
394
|
+
"plan_type": None
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
if timestamp:
|
|
398
|
+
token_entry["_timestamp"] = timestamp
|
|
399
|
+
state.token_counts.append(token_entry)
|
|
400
|
+
|
|
401
|
+
# 记录其他类型的事件
|
|
402
|
+
elif event_type in ("progress", "system", "file-history-snapshot"):
|
|
403
|
+
if options.include_token_count:
|
|
404
|
+
state.skipped_events.append({
|
|
405
|
+
"type": event_type,
|
|
406
|
+
"timestamp": timestamp,
|
|
407
|
+
"data": obj.get("data") or obj.get("subtype")
|
|
408
|
+
})
|
|
409
|
+
|
|
410
|
+
# 构建结果
|
|
411
|
+
result: dict = {"messages": messages}
|
|
412
|
+
if not options.messages_only:
|
|
413
|
+
result["meta"] = {
|
|
414
|
+
"session_meta": state.session_meta,
|
|
415
|
+
"token_counts": state.token_counts if options.include_token_count else None,
|
|
416
|
+
"skipped_events_count": len(state.skipped_events),
|
|
417
|
+
"skipped_events": state.skipped_events[:10] if state.skipped_events else []
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
return result
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
# ============================================================================
|
|
424
|
+
# Codex JSONL 转换器 (原 codex_jsonl_to_openai_messages.py)
|
|
425
|
+
# ============================================================================
|
|
426
|
+
|
|
427
|
+
def _codex_looks_like_agents_instructions(text: str) -> bool:
|
|
428
|
+
"""判断文本是否为 AGENTS.md 指令"""
|
|
429
|
+
t = text.lstrip()
|
|
430
|
+
return t.startswith("# AGENTS.md instructions") or ("## Skills" in t and "<INSTRUCTIONS>" in t)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def _codex_looks_like_environment_context(text: str) -> bool:
|
|
434
|
+
"""判断文本是否为环境上下文"""
|
|
435
|
+
t = text.lstrip()
|
|
436
|
+
return t.startswith("<environment_context>") and "</environment_context>" in t
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def _codex_as_text_parts(content: Any) -> list:
|
|
440
|
+
"""将 content 转换为文本 parts 列表"""
|
|
441
|
+
if not isinstance(content, list):
|
|
442
|
+
return []
|
|
443
|
+
out = []
|
|
444
|
+
for part in content:
|
|
445
|
+
if not isinstance(part, dict):
|
|
446
|
+
continue
|
|
447
|
+
if "text" in part and isinstance(part["text"], str):
|
|
448
|
+
out.append({"type": "text", "text": part["text"]})
|
|
449
|
+
continue
|
|
450
|
+
out.append(part)
|
|
451
|
+
return out
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def _codex_concat_text(content: Any) -> str:
|
|
455
|
+
"""拼接 content 中所有文本"""
|
|
456
|
+
if not isinstance(content, list):
|
|
457
|
+
return ""
|
|
458
|
+
chunks = []
|
|
459
|
+
for part in content:
|
|
460
|
+
if isinstance(part, dict) and isinstance(part.get("text"), str):
|
|
461
|
+
chunks.append(part["text"])
|
|
462
|
+
return "".join(chunks)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _codex_maybe_parse_json_string(value: str) -> Any:
|
|
466
|
+
"""尝试将字符串解析为 JSON"""
|
|
467
|
+
s = value.strip()
|
|
468
|
+
if not s:
|
|
469
|
+
return None
|
|
470
|
+
if not (s.startswith("{") or s.startswith("[")):
|
|
471
|
+
return None
|
|
472
|
+
try:
|
|
473
|
+
return json.loads(s)
|
|
474
|
+
except json.JSONDecodeError:
|
|
475
|
+
return None
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
@dataclass
|
|
479
|
+
class CodexConverterOptions:
|
|
480
|
+
"""Codex 转换器选项配置"""
|
|
481
|
+
promote_harness_messages: bool = True
|
|
482
|
+
emit_session_instructions: bool = True
|
|
483
|
+
include_toolcall_content: bool = True
|
|
484
|
+
include_token_count: bool = True
|
|
485
|
+
include_turn_context: bool = True
|
|
486
|
+
messages_only: bool = False
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
@dataclass
|
|
490
|
+
class CodexConverterState:
|
|
491
|
+
"""Codex 转换器状态"""
|
|
492
|
+
pending_reasoning: list = field(default_factory=list)
|
|
493
|
+
last_reasoning: str | None = None
|
|
494
|
+
session_instructions: str | None = None
|
|
495
|
+
token_counts: list = field(default_factory=list)
|
|
496
|
+
turn_contexts: list = field(default_factory=list)
|
|
497
|
+
session_meta: dict | None = None
|
|
498
|
+
|
|
499
|
+
def add_reasoning(self, text: str) -> None:
|
|
500
|
+
"""添加推理文本"""
|
|
501
|
+
t = text.strip()
|
|
502
|
+
if not t:
|
|
503
|
+
return
|
|
504
|
+
if self.last_reasoning == t:
|
|
505
|
+
return
|
|
506
|
+
self.pending_reasoning.append(t)
|
|
507
|
+
self.last_reasoning = t
|
|
508
|
+
|
|
509
|
+
def take_reasoning_parts(self) -> list:
|
|
510
|
+
"""取出并清空待处理推理内容"""
|
|
511
|
+
if not self.pending_reasoning:
|
|
512
|
+
return []
|
|
513
|
+
parts = [{"type": "reasoning", "text": t} for t in self.pending_reasoning]
|
|
514
|
+
self.pending_reasoning.clear()
|
|
515
|
+
self.last_reasoning = None
|
|
516
|
+
return parts
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def convert_codex_jsonl_to_messages(
|
|
520
|
+
events: Iterable[dict],
|
|
521
|
+
*,
|
|
522
|
+
options: CodexConverterOptions,
|
|
523
|
+
) -> dict:
|
|
524
|
+
"""将 Codex CLI session JSONL 转换为 OpenAI 消息格式"""
|
|
525
|
+
state = CodexConverterState()
|
|
526
|
+
messages: list = []
|
|
527
|
+
|
|
528
|
+
for obj in events:
|
|
529
|
+
timestamp = obj.get("timestamp")
|
|
530
|
+
outer_type = obj.get("type")
|
|
531
|
+
payload = obj.get("payload")
|
|
532
|
+
|
|
533
|
+
# 处理 session_meta 事件
|
|
534
|
+
if outer_type == "session_meta" and isinstance(payload, dict):
|
|
535
|
+
state.session_meta = payload
|
|
536
|
+
instr = payload.get("instructions")
|
|
537
|
+
if isinstance(instr, str):
|
|
538
|
+
state.session_instructions = instr
|
|
539
|
+
if options.emit_session_instructions and instr.strip():
|
|
540
|
+
messages.append({
|
|
541
|
+
"role": "developer",
|
|
542
|
+
"content": [{"type": "text", "text": instr}],
|
|
543
|
+
})
|
|
544
|
+
continue
|
|
545
|
+
|
|
546
|
+
# 处理 turn_context 事件
|
|
547
|
+
if outer_type == "turn_context" and isinstance(payload, dict):
|
|
548
|
+
if options.include_turn_context:
|
|
549
|
+
ctx = dict(payload)
|
|
550
|
+
if timestamp:
|
|
551
|
+
ctx["_timestamp"] = timestamp
|
|
552
|
+
state.turn_contexts.append(ctx)
|
|
553
|
+
continue
|
|
554
|
+
|
|
555
|
+
# 处理 event_msg 事件
|
|
556
|
+
if outer_type == "event_msg" and isinstance(payload, dict):
|
|
557
|
+
ptype = payload.get("type")
|
|
558
|
+
if ptype == "agent_reasoning":
|
|
559
|
+
text = payload.get("text")
|
|
560
|
+
if isinstance(text, str):
|
|
561
|
+
state.add_reasoning(text)
|
|
562
|
+
continue
|
|
563
|
+
if ptype == "token_count":
|
|
564
|
+
if options.include_token_count:
|
|
565
|
+
entry = dict(payload)
|
|
566
|
+
if timestamp:
|
|
567
|
+
entry["_timestamp"] = timestamp
|
|
568
|
+
state.token_counts.append(entry)
|
|
569
|
+
continue
|
|
570
|
+
continue
|
|
571
|
+
|
|
572
|
+
if outer_type != "response_item" or not isinstance(payload, dict):
|
|
573
|
+
continue
|
|
574
|
+
|
|
575
|
+
ptype = payload.get("type")
|
|
576
|
+
|
|
577
|
+
# 处理推理摘要
|
|
578
|
+
if ptype == "reasoning":
|
|
579
|
+
summary = payload.get("summary")
|
|
580
|
+
if isinstance(summary, list):
|
|
581
|
+
for item in summary:
|
|
582
|
+
if isinstance(item, dict) and isinstance(item.get("text"), str):
|
|
583
|
+
state.add_reasoning(item["text"])
|
|
584
|
+
continue
|
|
585
|
+
|
|
586
|
+
# 处理普通消息
|
|
587
|
+
if ptype == "message":
|
|
588
|
+
role = payload.get("role")
|
|
589
|
+
content = payload.get("content")
|
|
590
|
+
content_parts = _codex_as_text_parts(content)
|
|
591
|
+
content_text = _codex_concat_text(content)
|
|
592
|
+
|
|
593
|
+
if role == "assistant":
|
|
594
|
+
assistant_msg = {"role": "assistant", "content": []}
|
|
595
|
+
assistant_msg["content"].extend(state.take_reasoning_parts())
|
|
596
|
+
assistant_msg["content"].extend(content_parts)
|
|
597
|
+
if timestamp:
|
|
598
|
+
assistant_msg["_metadata"] = {"timestamp": timestamp}
|
|
599
|
+
messages.append(assistant_msg)
|
|
600
|
+
continue
|
|
601
|
+
|
|
602
|
+
if role == "user":
|
|
603
|
+
out_role = "user"
|
|
604
|
+
if options.promote_harness_messages:
|
|
605
|
+
if _codex_looks_like_environment_context(content_text):
|
|
606
|
+
out_role = "system"
|
|
607
|
+
elif _codex_looks_like_agents_instructions(content_text):
|
|
608
|
+
out_role = "developer"
|
|
609
|
+
|
|
610
|
+
# 避免重复 session 指令
|
|
611
|
+
if (
|
|
612
|
+
out_role == "developer"
|
|
613
|
+
and state.session_instructions
|
|
614
|
+
and state.session_instructions.strip() == content_text.strip()
|
|
615
|
+
and options.emit_session_instructions
|
|
616
|
+
):
|
|
617
|
+
continue
|
|
618
|
+
|
|
619
|
+
user_msg = {"role": out_role, "content": content_parts}
|
|
620
|
+
if timestamp:
|
|
621
|
+
user_msg["_metadata"] = {"timestamp": timestamp}
|
|
622
|
+
messages.append(user_msg)
|
|
623
|
+
continue
|
|
624
|
+
|
|
625
|
+
# 其他角色,原样保留
|
|
626
|
+
if isinstance(role, str) and role:
|
|
627
|
+
other_msg = {"role": role, "content": content_parts}
|
|
628
|
+
if timestamp:
|
|
629
|
+
other_msg["_metadata"] = {"timestamp": timestamp}
|
|
630
|
+
messages.append(other_msg)
|
|
631
|
+
continue
|
|
632
|
+
|
|
633
|
+
# 处理函数调用
|
|
634
|
+
if ptype in ("function_call", "custom_tool_call"):
|
|
635
|
+
call_id = payload.get("call_id")
|
|
636
|
+
name = payload.get("name")
|
|
637
|
+
if not isinstance(call_id, str) or not isinstance(name, str):
|
|
638
|
+
continue
|
|
639
|
+
|
|
640
|
+
if ptype == "function_call":
|
|
641
|
+
arguments = payload.get("arguments")
|
|
642
|
+
if isinstance(arguments, dict):
|
|
643
|
+
args_str = json.dumps(arguments, ensure_ascii=False)
|
|
644
|
+
elif isinstance(arguments, str):
|
|
645
|
+
args_str = arguments
|
|
646
|
+
else:
|
|
647
|
+
args_str = ""
|
|
648
|
+
else:
|
|
649
|
+
tool_input = payload.get("input")
|
|
650
|
+
args_str = json.dumps({"input": tool_input}, ensure_ascii=False)
|
|
651
|
+
|
|
652
|
+
tool_call = {
|
|
653
|
+
"id": call_id,
|
|
654
|
+
"type": "function",
|
|
655
|
+
"function": {"name": name, "arguments": args_str},
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
assistant_msg: dict = {
|
|
659
|
+
"role": "assistant",
|
|
660
|
+
"content": [],
|
|
661
|
+
"tool_calls": [tool_call],
|
|
662
|
+
}
|
|
663
|
+
assistant_msg["content"].extend(state.take_reasoning_parts())
|
|
664
|
+
if options.include_toolcall_content:
|
|
665
|
+
assistant_msg["content"].append({
|
|
666
|
+
"type": "tool_call",
|
|
667
|
+
"tool_call_id": call_id,
|
|
668
|
+
"name": name,
|
|
669
|
+
"arguments": args_str,
|
|
670
|
+
})
|
|
671
|
+
if timestamp:
|
|
672
|
+
assistant_msg["_metadata"] = {"timestamp": timestamp}
|
|
673
|
+
messages.append(assistant_msg)
|
|
674
|
+
continue
|
|
675
|
+
|
|
676
|
+
# 处理函数调用输出
|
|
677
|
+
if ptype in ("function_call_output", "custom_tool_call_output"):
|
|
678
|
+
call_id = payload.get("call_id")
|
|
679
|
+
output = payload.get("output")
|
|
680
|
+
if not isinstance(call_id, str):
|
|
681
|
+
continue
|
|
682
|
+
if not isinstance(output, str):
|
|
683
|
+
output = "" if output is None else str(output)
|
|
684
|
+
|
|
685
|
+
tool_msg: dict = {"role": "tool", "tool_call_id": call_id, "content": []}
|
|
686
|
+
parsed = _codex_maybe_parse_json_string(output)
|
|
687
|
+
if isinstance(parsed, dict) and isinstance(parsed.get("output"), str):
|
|
688
|
+
tool_msg["content"].append({"type": "tool_output", "text": parsed["output"]})
|
|
689
|
+
if isinstance(parsed.get("metadata"), dict):
|
|
690
|
+
tool_msg["metadata"] = parsed["metadata"]
|
|
691
|
+
else:
|
|
692
|
+
tool_msg["content"].append({"type": "tool_output", "text": output})
|
|
693
|
+
if timestamp:
|
|
694
|
+
tool_msg["_metadata"] = {"timestamp": timestamp}
|
|
695
|
+
messages.append(tool_msg)
|
|
696
|
+
continue
|
|
697
|
+
|
|
698
|
+
result: dict = {"messages": messages}
|
|
699
|
+
if not options.messages_only:
|
|
700
|
+
result["meta"] = {
|
|
701
|
+
"session_meta": state.session_meta,
|
|
702
|
+
"turn_contexts": state.turn_contexts,
|
|
703
|
+
"token_counts": state.token_counts if options.include_token_count else None,
|
|
704
|
+
}
|
|
705
|
+
return result
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
# ============================================================================
|
|
709
|
+
# OpenCode JSON 转换器 (原 opencode_jsonl_to_openai_messages.py)
|
|
710
|
+
# ============================================================================
|
|
711
|
+
|
|
712
|
+
@dataclass
|
|
713
|
+
class OpenCodeConverterOptions:
|
|
714
|
+
"""OpenCode 转换器选项配置"""
|
|
715
|
+
include_reasoning: bool = True # 是否包含推理过程
|
|
716
|
+
include_toolcall_content: bool = True # 是否在 content 中包含工具调用
|
|
717
|
+
include_token_count: bool = True # 是否包含 token 统计
|
|
718
|
+
include_session_info: bool = True # 是否包含会话信息
|
|
719
|
+
messages_only: bool = False # 是否只输出 messages 数组
|
|
720
|
+
include_timestamps: bool = True # 是否在每条消息中包含时间戳
|
|
721
|
+
include_full_tool_metadata: bool = True # 是否包含工具调用的完整元数据
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
@dataclass
|
|
725
|
+
class OpenCodeConverterState:
|
|
726
|
+
"""OpenCode 转换器状态"""
|
|
727
|
+
session_info: dict | None = None
|
|
728
|
+
token_counts: list = field(default_factory=list)
|
|
729
|
+
|
|
730
|
+
def add_token_count(self, tokens: dict, timestamp: int | None = None) -> None:
|
|
731
|
+
"""添加 token 统计信息 (Codex 嵌套格式)"""
|
|
732
|
+
input_tokens = tokens.get('input', 0)
|
|
733
|
+
output_tokens = tokens.get('output', 0)
|
|
734
|
+
reasoning_tokens = tokens.get('reasoning', 0)
|
|
735
|
+
cache_read = tokens.get('cache', {}).get('read', 0) if isinstance(tokens.get('cache'), dict) else 0
|
|
736
|
+
|
|
737
|
+
entry = {
|
|
738
|
+
'type': 'token_count',
|
|
739
|
+
'info': {
|
|
740
|
+
'total_token_usage': {
|
|
741
|
+
'input_tokens': input_tokens,
|
|
742
|
+
'cached_input_tokens': cache_read,
|
|
743
|
+
'output_tokens': output_tokens,
|
|
744
|
+
'reasoning_output_tokens': reasoning_tokens,
|
|
745
|
+
'total_tokens': input_tokens + output_tokens
|
|
746
|
+
},
|
|
747
|
+
'last_token_usage': {
|
|
748
|
+
'input_tokens': input_tokens,
|
|
749
|
+
'cached_input_tokens': cache_read,
|
|
750
|
+
'output_tokens': output_tokens,
|
|
751
|
+
'reasoning_output_tokens': reasoning_tokens,
|
|
752
|
+
'total_tokens': input_tokens + output_tokens
|
|
753
|
+
}
|
|
754
|
+
},
|
|
755
|
+
'rate_limits': {
|
|
756
|
+
'primary': None,
|
|
757
|
+
'secondary': None,
|
|
758
|
+
'credits': None,
|
|
759
|
+
'plan_type': None
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
if timestamp:
|
|
763
|
+
entry["_timestamp"] = timestamp
|
|
764
|
+
self.token_counts.append(entry)
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
def _opencode_format_timestamp(timestamp_ms: int | None) -> str | None:
|
|
768
|
+
"""将毫秒时间戳转换为 ISO8601 格式"""
|
|
769
|
+
if timestamp_ms is None:
|
|
770
|
+
return None
|
|
771
|
+
dt = datetime.fromtimestamp(timestamp_ms / 1000, tz=timezone.utc)
|
|
772
|
+
return dt.isoformat()
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
def _opencode_convert_tool_call(part: dict, include_full_metadata: bool = True):
|
|
776
|
+
"""
|
|
777
|
+
将 OpenCode 的工具调用转换为 OpenAI 格式
|
|
778
|
+
|
|
779
|
+
返回: (tool_call_dict, original_data_dict) 或 None
|
|
780
|
+
"""
|
|
781
|
+
call_id = part.get("callID")
|
|
782
|
+
tool_name = part.get("tool")
|
|
783
|
+
state_obj = part.get("state", {})
|
|
784
|
+
|
|
785
|
+
if not call_id or not tool_name:
|
|
786
|
+
return None
|
|
787
|
+
|
|
788
|
+
# 获取输入参数
|
|
789
|
+
input_data = state_obj.get("input", {})
|
|
790
|
+
if isinstance(input_data, dict):
|
|
791
|
+
args_str = json.dumps(input_data, ensure_ascii=False)
|
|
792
|
+
else:
|
|
793
|
+
args_str = json.dumps({"input": input_data}, ensure_ascii=False)
|
|
794
|
+
|
|
795
|
+
# 标准的 tool_call 格式
|
|
796
|
+
tool_call = {
|
|
797
|
+
"id": call_id,
|
|
798
|
+
"type": "function",
|
|
799
|
+
"function": {
|
|
800
|
+
"name": tool_name,
|
|
801
|
+
"arguments": args_str
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
# 原始数据,用于后续生成 metadata
|
|
806
|
+
original_data = {
|
|
807
|
+
"part_id": part.get("id"),
|
|
808
|
+
"tool": tool_name,
|
|
809
|
+
"state": state_obj
|
|
810
|
+
} if include_full_metadata else {}
|
|
811
|
+
|
|
812
|
+
return tool_call, original_data
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
def _opencode_convert_message(
|
|
816
|
+
message: dict,
|
|
817
|
+
options: OpenCodeConverterOptions,
|
|
818
|
+
state: OpenCodeConverterState
|
|
819
|
+
) -> list:
|
|
820
|
+
"""将 OpenCode 的单个 message 转换为 OpenAI 格式的 messages 列表"""
|
|
821
|
+
info = message.get("info", {})
|
|
822
|
+
parts = message.get("parts", [])
|
|
823
|
+
role = info.get("role")
|
|
824
|
+
|
|
825
|
+
# 提取时间戳和 token 信息
|
|
826
|
+
timestamp = info.get("time", {}).get("created")
|
|
827
|
+
tokens = info.get("tokens")
|
|
828
|
+
message_id = info.get("id")
|
|
829
|
+
|
|
830
|
+
# 记录 token 统计
|
|
831
|
+
if options.include_token_count and tokens:
|
|
832
|
+
state.add_token_count(tokens, timestamp)
|
|
833
|
+
|
|
834
|
+
# 用户消息
|
|
835
|
+
if role == "user":
|
|
836
|
+
content_parts = []
|
|
837
|
+
for part in parts:
|
|
838
|
+
if part.get("type") == "text":
|
|
839
|
+
content_parts.append({
|
|
840
|
+
"type": "text",
|
|
841
|
+
"text": part.get("text", "")
|
|
842
|
+
})
|
|
843
|
+
|
|
844
|
+
if content_parts:
|
|
845
|
+
user_msg = {
|
|
846
|
+
"role": "user",
|
|
847
|
+
"content": content_parts
|
|
848
|
+
}
|
|
849
|
+
if options.include_timestamps:
|
|
850
|
+
user_msg["_metadata"] = {
|
|
851
|
+
"message_id": message_id,
|
|
852
|
+
"timestamp": timestamp,
|
|
853
|
+
"tokens": tokens
|
|
854
|
+
}
|
|
855
|
+
return [user_msg]
|
|
856
|
+
return []
|
|
857
|
+
|
|
858
|
+
# 助手消息
|
|
859
|
+
if role == "assistant":
|
|
860
|
+
result_messages = []
|
|
861
|
+
|
|
862
|
+
text_parts = []
|
|
863
|
+
tool_calls = []
|
|
864
|
+
tool_outputs = []
|
|
865
|
+
tool_call_original_data = {}
|
|
866
|
+
|
|
867
|
+
for part in parts:
|
|
868
|
+
part_type = part.get("type")
|
|
869
|
+
|
|
870
|
+
# 推理内容
|
|
871
|
+
if part_type == "reasoning":
|
|
872
|
+
if options.include_reasoning:
|
|
873
|
+
reasoning_text = part.get("text", "")
|
|
874
|
+
if reasoning_text.strip():
|
|
875
|
+
text_parts.append({
|
|
876
|
+
"type": "reasoning",
|
|
877
|
+
"text": reasoning_text
|
|
878
|
+
})
|
|
879
|
+
|
|
880
|
+
# 文本内容
|
|
881
|
+
elif part_type == "text":
|
|
882
|
+
text = part.get("text", "")
|
|
883
|
+
if text.strip():
|
|
884
|
+
text_parts.append({
|
|
885
|
+
"type": "text",
|
|
886
|
+
"text": text
|
|
887
|
+
})
|
|
888
|
+
|
|
889
|
+
# 工具调用
|
|
890
|
+
elif part_type == "tool":
|
|
891
|
+
state_obj = part.get("state", {})
|
|
892
|
+
status = state_obj.get("status")
|
|
893
|
+
|
|
894
|
+
# 工具调用请求
|
|
895
|
+
if status in ("pending", "running", "completed"):
|
|
896
|
+
result = _opencode_convert_tool_call(part, options.include_full_tool_metadata)
|
|
897
|
+
if result:
|
|
898
|
+
tool_call, original_data = result
|
|
899
|
+
tool_calls.append(tool_call)
|
|
900
|
+
|
|
901
|
+
if original_data:
|
|
902
|
+
tool_call_original_data[tool_call["id"]] = original_data
|
|
903
|
+
|
|
904
|
+
if options.include_toolcall_content:
|
|
905
|
+
tool_call_content = {
|
|
906
|
+
"type": "tool_use",
|
|
907
|
+
"tool_call_id": tool_call["id"],
|
|
908
|
+
"name": tool_call["function"]["name"],
|
|
909
|
+
"arguments": tool_call["function"]["arguments"]
|
|
910
|
+
}
|
|
911
|
+
text_parts.append(tool_call_content)
|
|
912
|
+
|
|
913
|
+
# 工具调用结果
|
|
914
|
+
if status == "completed":
|
|
915
|
+
call_id = part.get("callID")
|
|
916
|
+
output = state_obj.get("output", "")
|
|
917
|
+
|
|
918
|
+
if call_id:
|
|
919
|
+
tool_outputs.append({
|
|
920
|
+
"call_id": call_id,
|
|
921
|
+
"output": output,
|
|
922
|
+
"state": state_obj
|
|
923
|
+
})
|
|
924
|
+
|
|
925
|
+
# 构建助手消息
|
|
926
|
+
if text_parts or tool_calls:
|
|
927
|
+
assistant_msg: dict = {
|
|
928
|
+
"role": "assistant",
|
|
929
|
+
"content": text_parts
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
if tool_calls:
|
|
933
|
+
assistant_msg["tool_calls"] = tool_calls
|
|
934
|
+
|
|
935
|
+
if options.include_timestamps:
|
|
936
|
+
assistant_msg["_metadata"] = {
|
|
937
|
+
"message_id": message_id,
|
|
938
|
+
"timestamp": timestamp,
|
|
939
|
+
"tokens": tokens
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
result_messages.append(assistant_msg)
|
|
943
|
+
|
|
944
|
+
# 添加工具输出消息
|
|
945
|
+
for tool_output in tool_outputs:
|
|
946
|
+
output_text = tool_output["output"]
|
|
947
|
+
if not isinstance(output_text, str):
|
|
948
|
+
output_text = str(output_text) if output_text is not None else ""
|
|
949
|
+
|
|
950
|
+
tool_msg = {
|
|
951
|
+
"role": "tool",
|
|
952
|
+
"tool_call_id": tool_output["call_id"],
|
|
953
|
+
"content": [{
|
|
954
|
+
"type": "tool_output",
|
|
955
|
+
"text": output_text
|
|
956
|
+
}]
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
# 如果启用完整元数据,保留所有 metadata 信息
|
|
960
|
+
if options.include_full_tool_metadata and "state" in tool_output:
|
|
961
|
+
s = tool_output["state"]
|
|
962
|
+
time_info = s.get("time", {})
|
|
963
|
+
metadata_info = s.get("metadata", {})
|
|
964
|
+
|
|
965
|
+
tool_msg["metadata"] = {}
|
|
966
|
+
|
|
967
|
+
# 保留完整的 metadata (包括 diff, files, diagnostics 等)
|
|
968
|
+
if isinstance(metadata_info, dict):
|
|
969
|
+
tool_msg["metadata"] = dict(metadata_info)
|
|
970
|
+
|
|
971
|
+
# 如果 metadata 中有 exit 字段,也添加 exit_code 别名
|
|
972
|
+
if "exit" in metadata_info:
|
|
973
|
+
tool_msg["metadata"]["exit_code"] = metadata_info["exit"]
|
|
974
|
+
|
|
975
|
+
# 添加 duration_seconds
|
|
976
|
+
if time_info and "start" in time_info and "end" in time_info:
|
|
977
|
+
duration_ms = time_info["end"] - time_info["start"]
|
|
978
|
+
tool_msg["metadata"]["duration_seconds"] = round(duration_ms / 1000, 3)
|
|
979
|
+
|
|
980
|
+
result_messages.append(tool_msg)
|
|
981
|
+
|
|
982
|
+
return result_messages
|
|
983
|
+
|
|
984
|
+
return []
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
def convert_opencode_to_messages(
|
|
988
|
+
session_data: dict,
|
|
989
|
+
*,
|
|
990
|
+
options: OpenCodeConverterOptions
|
|
991
|
+
) -> dict:
|
|
992
|
+
"""
|
|
993
|
+
将 OpenCode session 数据转换为 OpenAI messages 格式
|
|
994
|
+
|
|
995
|
+
参数:
|
|
996
|
+
session_data: OpenCode session JSON 数据
|
|
997
|
+
options: 转换选项
|
|
998
|
+
|
|
999
|
+
返回:
|
|
1000
|
+
包含 messages 和 meta 的字典
|
|
1001
|
+
"""
|
|
1002
|
+
state = OpenCodeConverterState()
|
|
1003
|
+
messages: list = []
|
|
1004
|
+
|
|
1005
|
+
# 提取会话信息
|
|
1006
|
+
session_info = session_data.get("info", {})
|
|
1007
|
+
|
|
1008
|
+
# 处理所有消息
|
|
1009
|
+
opencode_messages = session_data.get("messages", [])
|
|
1010
|
+
for msg in opencode_messages:
|
|
1011
|
+
converted = _opencode_convert_message(msg, options, state)
|
|
1012
|
+
messages.extend(converted)
|
|
1013
|
+
|
|
1014
|
+
# 构建结果
|
|
1015
|
+
result: dict = {"messages": messages}
|
|
1016
|
+
|
|
1017
|
+
if not options.messages_only:
|
|
1018
|
+
# 构建 session_meta
|
|
1019
|
+
session_meta = {
|
|
1020
|
+
"id": session_info.get("id"),
|
|
1021
|
+
"timestamp": _opencode_format_timestamp(session_info.get("time", {}).get("created")),
|
|
1022
|
+
"cwd": session_info.get("directory"),
|
|
1023
|
+
"originator": "ide",
|
|
1024
|
+
"cli_version": session_info.get("version"),
|
|
1025
|
+
"source": "opencode",
|
|
1026
|
+
"model_provider": None,
|
|
1027
|
+
"base_instructions": {
|
|
1028
|
+
"text": None
|
|
1029
|
+
},
|
|
1030
|
+
"git": {}
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
# 从第一条助手消息中提取模型信息
|
|
1034
|
+
for msg in opencode_messages:
|
|
1035
|
+
info = msg.get("info", {})
|
|
1036
|
+
if info.get("role") == "assistant":
|
|
1037
|
+
model_info = info.get("model", {})
|
|
1038
|
+
session_meta["model_provider"] = model_info.get("providerID")
|
|
1039
|
+
break
|
|
1040
|
+
|
|
1041
|
+
# 构建 turn_contexts
|
|
1042
|
+
turn_contexts = []
|
|
1043
|
+
for msg in opencode_messages:
|
|
1044
|
+
info = msg.get("info", {})
|
|
1045
|
+
if info.get("role") == "assistant":
|
|
1046
|
+
model_info = info.get("model", {})
|
|
1047
|
+
if not isinstance(model_info, dict):
|
|
1048
|
+
model_info = {}
|
|
1049
|
+
path_info = info.get("path", {})
|
|
1050
|
+
if not isinstance(path_info, dict):
|
|
1051
|
+
path_info = {}
|
|
1052
|
+
summary_info = info.get("summary", {})
|
|
1053
|
+
if not isinstance(summary_info, dict):
|
|
1054
|
+
summary_info = {}
|
|
1055
|
+
time_info = info.get("time", {})
|
|
1056
|
+
if not isinstance(time_info, dict):
|
|
1057
|
+
time_info = {}
|
|
1058
|
+
|
|
1059
|
+
turn_context = {
|
|
1060
|
+
"cwd": path_info.get("cwd"),
|
|
1061
|
+
"approval_policy": "auto",
|
|
1062
|
+
"sandbox_policy": {"type": "local"},
|
|
1063
|
+
"model": model_info.get("modelID"),
|
|
1064
|
+
"personality": info.get("agent"),
|
|
1065
|
+
"collaboration_mode": {"mode": "single"},
|
|
1066
|
+
"effort": info.get("mode"),
|
|
1067
|
+
"summary": summary_info.get("title"),
|
|
1068
|
+
"user_instructions": None,
|
|
1069
|
+
"truncation_policy": {"mode": "auto", "limit": 100000},
|
|
1070
|
+
"_timestamp": _opencode_format_timestamp(time_info.get("created"))
|
|
1071
|
+
}
|
|
1072
|
+
turn_contexts.append(turn_context)
|
|
1073
|
+
|
|
1074
|
+
result["meta"] = {
|
|
1075
|
+
"session_meta": session_meta,
|
|
1076
|
+
"turn_contexts": turn_contexts,
|
|
1077
|
+
"token_counts": state.token_counts if options.include_token_count else None
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
return result
|
|
1081
|
+
|
|
1082
|
+
|
|
1083
|
+
# ============================================================================
|
|
1084
|
+
# Gemini JSON 转换器
|
|
1085
|
+
# ============================================================================
|
|
1086
|
+
|
|
1087
|
+
def convert_gemini(file_path: Path) -> Dict[str, Any]:
|
|
1088
|
+
"""转换 Gemini CLI JSON 格式"""
|
|
1089
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
1090
|
+
data = json.load(f)
|
|
1091
|
+
|
|
1092
|
+
messages = []
|
|
1093
|
+
user_messages = 0
|
|
1094
|
+
assistant_messages = 0
|
|
1095
|
+
token_counts = []
|
|
1096
|
+
|
|
1097
|
+
for msg in data.get('messages', []):
|
|
1098
|
+
msg_type = msg.get('type', '')
|
|
1099
|
+
msg_id = msg.get('id', '')
|
|
1100
|
+
timestamp = msg.get('timestamp', '')
|
|
1101
|
+
content_text = msg.get('content', '')
|
|
1102
|
+
|
|
1103
|
+
# 确定角色
|
|
1104
|
+
if msg_type == 'user':
|
|
1105
|
+
role = 'user'
|
|
1106
|
+
user_messages += 1
|
|
1107
|
+
elif msg_type == 'gemini':
|
|
1108
|
+
role = 'assistant'
|
|
1109
|
+
assistant_messages += 1
|
|
1110
|
+
else:
|
|
1111
|
+
continue
|
|
1112
|
+
|
|
1113
|
+
# 构建内容数组
|
|
1114
|
+
content_blocks = []
|
|
1115
|
+
|
|
1116
|
+
# 处理 thoughts (推理内容)
|
|
1117
|
+
thoughts = msg.get('thoughts', [])
|
|
1118
|
+
if thoughts and role == 'assistant':
|
|
1119
|
+
reasoning_parts = []
|
|
1120
|
+
for thought in thoughts:
|
|
1121
|
+
subject = thought.get('subject', '')
|
|
1122
|
+
description = thought.get('description', '')
|
|
1123
|
+
if subject and description:
|
|
1124
|
+
reasoning_parts.append(f"**{subject}**\n{description}")
|
|
1125
|
+
|
|
1126
|
+
if reasoning_parts:
|
|
1127
|
+
content_blocks.append({
|
|
1128
|
+
'type': 'reasoning',
|
|
1129
|
+
'reasoning': '\n\n'.join(reasoning_parts)
|
|
1130
|
+
})
|
|
1131
|
+
|
|
1132
|
+
# 处理主要内容
|
|
1133
|
+
if content_text:
|
|
1134
|
+
content_blocks.append({
|
|
1135
|
+
'type': 'text',
|
|
1136
|
+
'text': content_text
|
|
1137
|
+
})
|
|
1138
|
+
|
|
1139
|
+
# 处理工具调用
|
|
1140
|
+
tool_calls_data = msg.get('toolCalls', [])
|
|
1141
|
+
tool_calls = []
|
|
1142
|
+
|
|
1143
|
+
for tool_call in tool_calls_data:
|
|
1144
|
+
tool_id = tool_call.get('id', '')
|
|
1145
|
+
tool_name = tool_call.get('name', '')
|
|
1146
|
+
tool_args = tool_call.get('args', {})
|
|
1147
|
+
|
|
1148
|
+
content_blocks.append({
|
|
1149
|
+
'type': 'tool_call',
|
|
1150
|
+
'tool_call_id': tool_id,
|
|
1151
|
+
'name': tool_name,
|
|
1152
|
+
'arguments': json.dumps(tool_args, ensure_ascii=False)
|
|
1153
|
+
})
|
|
1154
|
+
|
|
1155
|
+
tool_calls.append({
|
|
1156
|
+
'id': tool_id,
|
|
1157
|
+
'type': 'function',
|
|
1158
|
+
'function': {
|
|
1159
|
+
'name': tool_name,
|
|
1160
|
+
'arguments': json.dumps(tool_args, ensure_ascii=False)
|
|
1161
|
+
}
|
|
1162
|
+
})
|
|
1163
|
+
|
|
1164
|
+
# 构建消息对象
|
|
1165
|
+
message = {
|
|
1166
|
+
'role': role,
|
|
1167
|
+
'content': content_blocks
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
if tool_calls:
|
|
1171
|
+
message['tool_calls'] = tool_calls
|
|
1172
|
+
|
|
1173
|
+
metadata = {}
|
|
1174
|
+
if timestamp:
|
|
1175
|
+
metadata['timestamp'] = timestamp
|
|
1176
|
+
if msg.get('model'):
|
|
1177
|
+
metadata['model'] = msg['model']
|
|
1178
|
+
|
|
1179
|
+
if metadata:
|
|
1180
|
+
message['_metadata'] = metadata
|
|
1181
|
+
|
|
1182
|
+
messages.append(message)
|
|
1183
|
+
|
|
1184
|
+
# 处理工具结果消息
|
|
1185
|
+
for tool_call in tool_calls_data:
|
|
1186
|
+
tool_id = tool_call.get('id', '')
|
|
1187
|
+
tool_result = tool_call.get('result', [])
|
|
1188
|
+
|
|
1189
|
+
if tool_result:
|
|
1190
|
+
output_text = ''
|
|
1191
|
+
for result_item in tool_result:
|
|
1192
|
+
if isinstance(result_item, dict):
|
|
1193
|
+
func_response = result_item.get('functionResponse', {})
|
|
1194
|
+
response_data = func_response.get('response', {})
|
|
1195
|
+
output_text = response_data.get('output', '')
|
|
1196
|
+
break
|
|
1197
|
+
|
|
1198
|
+
tool_message = {
|
|
1199
|
+
'role': 'tool',
|
|
1200
|
+
'tool_call_id': tool_id,
|
|
1201
|
+
'content': [{
|
|
1202
|
+
'type': 'tool_output',
|
|
1203
|
+
'text': output_text
|
|
1204
|
+
}]
|
|
1205
|
+
}
|
|
1206
|
+
messages.append(tool_message)
|
|
1207
|
+
|
|
1208
|
+
# 收集 token 统计
|
|
1209
|
+
tokens = msg.get('tokens', {})
|
|
1210
|
+
if tokens:
|
|
1211
|
+
token_count = {
|
|
1212
|
+
'type': 'token_count',
|
|
1213
|
+
'input_tokens': tokens.get('input', 0),
|
|
1214
|
+
'output_tokens': tokens.get('output', 0),
|
|
1215
|
+
'_timestamp': timestamp
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
if 'cached' in tokens:
|
|
1219
|
+
token_count['cache_read_input_tokens'] = tokens['cached']
|
|
1220
|
+
if 'thoughts' in tokens:
|
|
1221
|
+
token_count['reasoning_tokens'] = tokens['thoughts']
|
|
1222
|
+
if 'tool' in tokens:
|
|
1223
|
+
token_count['tool_tokens'] = tokens['tool']
|
|
1224
|
+
if 'total' in tokens:
|
|
1225
|
+
token_count['total_tokens'] = tokens['total']
|
|
1226
|
+
|
|
1227
|
+
token_counts.append(token_count)
|
|
1228
|
+
|
|
1229
|
+
# 构建会话元数据
|
|
1230
|
+
session_meta = {
|
|
1231
|
+
'source': 'gemini',
|
|
1232
|
+
'session_id': data.get('sessionId', ''),
|
|
1233
|
+
'message_count': len(messages),
|
|
1234
|
+
'user_messages': user_messages,
|
|
1235
|
+
'assistant_messages': assistant_messages,
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
if data.get('startTime'):
|
|
1239
|
+
session_meta['created_at'] = data['startTime']
|
|
1240
|
+
if data.get('lastUpdated'):
|
|
1241
|
+
session_meta['last_updated_at'] = data['lastUpdated']
|
|
1242
|
+
if data.get('projectHash'):
|
|
1243
|
+
session_meta['project_hash'] = data['projectHash']
|
|
1244
|
+
|
|
1245
|
+
# 计算会话时长
|
|
1246
|
+
if data.get('startTime') and data.get('lastUpdated'):
|
|
1247
|
+
try:
|
|
1248
|
+
start = datetime.fromisoformat(data['startTime'].replace('Z', '+00:00'))
|
|
1249
|
+
end = datetime.fromisoformat(data['lastUpdated'].replace('Z', '+00:00'))
|
|
1250
|
+
duration = (end - start).total_seconds()
|
|
1251
|
+
session_meta['duration_seconds'] = round(duration, 2)
|
|
1252
|
+
except Exception:
|
|
1253
|
+
pass
|
|
1254
|
+
|
|
1255
|
+
return {
|
|
1256
|
+
'messages': messages,
|
|
1257
|
+
'meta': {
|
|
1258
|
+
'session_meta': session_meta,
|
|
1259
|
+
'token_counts': token_counts
|
|
1260
|
+
}
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
|
|
1264
|
+
# ============================================================================
|
|
1265
|
+
# Kilocode JSON 转换器
|
|
1266
|
+
# ============================================================================
|
|
1267
|
+
|
|
1268
|
+
def parse_tool_calls_from_text(text: str) -> List[Dict[str, Any]]:
|
|
1269
|
+
"""从文本中解析工具调用 (XML 格式)"""
|
|
1270
|
+
tool_calls = []
|
|
1271
|
+
pattern = r'<(\w+)>(.*?)</\1>'
|
|
1272
|
+
matches = re.finditer(pattern, text, re.DOTALL)
|
|
1273
|
+
|
|
1274
|
+
for idx, match in enumerate(matches):
|
|
1275
|
+
tool_name = match.group(1)
|
|
1276
|
+
tool_content = match.group(2).strip()
|
|
1277
|
+
|
|
1278
|
+
arguments = {}
|
|
1279
|
+
param_pattern = r'<(\w+)>(.*?)</\1>'
|
|
1280
|
+
param_matches = re.finditer(param_pattern, tool_content, re.DOTALL)
|
|
1281
|
+
|
|
1282
|
+
for param_match in param_matches:
|
|
1283
|
+
param_name = param_match.group(1)
|
|
1284
|
+
param_value = param_match.group(2).strip()
|
|
1285
|
+
arguments[param_name] = param_value
|
|
1286
|
+
|
|
1287
|
+
tool_call_id = f"call_{tool_name}_{idx}"
|
|
1288
|
+
|
|
1289
|
+
tool_calls.append({
|
|
1290
|
+
'id': tool_call_id,
|
|
1291
|
+
'name': tool_name,
|
|
1292
|
+
'arguments': arguments
|
|
1293
|
+
})
|
|
1294
|
+
|
|
1295
|
+
return tool_calls
|
|
1296
|
+
|
|
1297
|
+
|
|
1298
|
+
def parse_kilocode_content_block(block: Dict[str, Any], timestamp: Optional[int] = None) -> Dict[str, Any]:
|
|
1299
|
+
"""解析 Kilocode 内容块"""
|
|
1300
|
+
block_type = block.get('type', '')
|
|
1301
|
+
|
|
1302
|
+
if block_type == 'text':
|
|
1303
|
+
return {'type': 'text', 'text': block.get('text', '')}
|
|
1304
|
+
elif block_type == 'reasoning':
|
|
1305
|
+
return {'type': 'reasoning', 'reasoning': block.get('text', '')}
|
|
1306
|
+
else:
|
|
1307
|
+
text = block.get('text', json.dumps(block, ensure_ascii=False))
|
|
1308
|
+
return {'type': 'text', 'text': text}
|
|
1309
|
+
|
|
1310
|
+
|
|
1311
|
+
def parse_kilocode_content_array(content: List[Dict[str, Any]], timestamp: Optional[int] = None) -> tuple:
|
|
1312
|
+
"""解析 Kilocode 内容数组"""
|
|
1313
|
+
content_blocks = []
|
|
1314
|
+
tool_calls = []
|
|
1315
|
+
|
|
1316
|
+
for block in content:
|
|
1317
|
+
parsed_block = parse_kilocode_content_block(block, timestamp)
|
|
1318
|
+
|
|
1319
|
+
if parsed_block['type'] == 'text':
|
|
1320
|
+
text = parsed_block['text']
|
|
1321
|
+
extracted_tools = parse_tool_calls_from_text(text)
|
|
1322
|
+
|
|
1323
|
+
if extracted_tools:
|
|
1324
|
+
for tool in extracted_tools:
|
|
1325
|
+
tool_call_block = {
|
|
1326
|
+
'type': 'tool_call',
|
|
1327
|
+
'tool_call_id': tool['id'],
|
|
1328
|
+
'name': tool['name'],
|
|
1329
|
+
'arguments': json.dumps(tool['arguments'], ensure_ascii=False)
|
|
1330
|
+
}
|
|
1331
|
+
content_blocks.append(tool_call_block)
|
|
1332
|
+
|
|
1333
|
+
tool_calls.append({
|
|
1334
|
+
'id': tool['id'],
|
|
1335
|
+
'type': 'function',
|
|
1336
|
+
'function': {
|
|
1337
|
+
'name': tool['name'],
|
|
1338
|
+
'arguments': json.dumps(tool['arguments'], ensure_ascii=False)
|
|
1339
|
+
}
|
|
1340
|
+
})
|
|
1341
|
+
else:
|
|
1342
|
+
content_blocks.append(parsed_block)
|
|
1343
|
+
else:
|
|
1344
|
+
content_blocks.append(parsed_block)
|
|
1345
|
+
|
|
1346
|
+
return content_blocks, tool_calls
|
|
1347
|
+
|
|
1348
|
+
|
|
1349
|
+
def convert_kilocode(file_path: Path) -> Dict[str, Any]:
|
|
1350
|
+
"""转换 Kilocode 格式"""
|
|
1351
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
1352
|
+
data = json.load(f)
|
|
1353
|
+
|
|
1354
|
+
messages = []
|
|
1355
|
+
user_messages = 0
|
|
1356
|
+
assistant_messages = 0
|
|
1357
|
+
first_timestamp = None
|
|
1358
|
+
last_timestamp = None
|
|
1359
|
+
|
|
1360
|
+
for item in data:
|
|
1361
|
+
role = item.get('role', '')
|
|
1362
|
+
content = item.get('content', [])
|
|
1363
|
+
timestamp = item.get('ts', 0)
|
|
1364
|
+
|
|
1365
|
+
if first_timestamp is None:
|
|
1366
|
+
first_timestamp = timestamp
|
|
1367
|
+
last_timestamp = timestamp
|
|
1368
|
+
|
|
1369
|
+
if role == 'user':
|
|
1370
|
+
user_messages += 1
|
|
1371
|
+
elif role == 'assistant':
|
|
1372
|
+
assistant_messages += 1
|
|
1373
|
+
|
|
1374
|
+
if isinstance(content, list):
|
|
1375
|
+
content_blocks, tool_calls = parse_kilocode_content_array(content, timestamp)
|
|
1376
|
+
elif isinstance(content, str):
|
|
1377
|
+
content_blocks = [{'type': 'text', 'text': content}]
|
|
1378
|
+
tool_calls = []
|
|
1379
|
+
else:
|
|
1380
|
+
content_blocks = [{'type': 'text', 'text': json.dumps(content, ensure_ascii=False)}]
|
|
1381
|
+
tool_calls = []
|
|
1382
|
+
|
|
1383
|
+
message = {'role': role, 'content': content_blocks}
|
|
1384
|
+
|
|
1385
|
+
if tool_calls:
|
|
1386
|
+
message['tool_calls'] = tool_calls
|
|
1387
|
+
|
|
1388
|
+
metadata = {}
|
|
1389
|
+
if timestamp:
|
|
1390
|
+
try:
|
|
1391
|
+
dt = datetime.fromtimestamp(timestamp / 1000)
|
|
1392
|
+
metadata['timestamp'] = dt.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
|
|
1393
|
+
except Exception:
|
|
1394
|
+
pass
|
|
1395
|
+
|
|
1396
|
+
if metadata:
|
|
1397
|
+
message['_metadata'] = metadata
|
|
1398
|
+
|
|
1399
|
+
messages.append(message)
|
|
1400
|
+
|
|
1401
|
+
session_meta = {
|
|
1402
|
+
'source': 'kilocode',
|
|
1403
|
+
'message_count': len(data),
|
|
1404
|
+
'user_messages': user_messages,
|
|
1405
|
+
'assistant_messages': assistant_messages,
|
|
1406
|
+
}
|
|
1407
|
+
|
|
1408
|
+
if first_timestamp:
|
|
1409
|
+
try:
|
|
1410
|
+
dt = datetime.fromtimestamp(first_timestamp / 1000)
|
|
1411
|
+
session_meta['created_at'] = dt.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
|
|
1412
|
+
except Exception:
|
|
1413
|
+
pass
|
|
1414
|
+
|
|
1415
|
+
if last_timestamp:
|
|
1416
|
+
try:
|
|
1417
|
+
dt = datetime.fromtimestamp(last_timestamp / 1000)
|
|
1418
|
+
session_meta['last_updated_at'] = dt.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
|
|
1419
|
+
except Exception:
|
|
1420
|
+
pass
|
|
1421
|
+
|
|
1422
|
+
if first_timestamp and last_timestamp:
|
|
1423
|
+
duration_ms = last_timestamp - first_timestamp
|
|
1424
|
+
session_meta['duration_seconds'] = round(duration_ms / 1000, 2)
|
|
1425
|
+
|
|
1426
|
+
return {
|
|
1427
|
+
'messages': messages,
|
|
1428
|
+
'meta': {'session_meta': session_meta}
|
|
1429
|
+
}
|
|
1430
|
+
|
|
1431
|
+
|
|
1432
|
+
# ============================================================================
|
|
1433
|
+
# 各格式入口函数(对外统一接口)
|
|
1434
|
+
# ============================================================================
|
|
1435
|
+
|
|
1436
|
+
def convert_claude_jsonl(file_path: Path) -> Dict[str, Any]:
|
|
1437
|
+
"""转换 Claude JSONL 格式"""
|
|
1438
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
1439
|
+
events = [json.loads(line) for line in f if line.strip()]
|
|
1440
|
+
|
|
1441
|
+
options = ClaudeConverterOptions(messages_only=False)
|
|
1442
|
+
return convert_claude_jsonl_to_messages(events, options=options)
|
|
1443
|
+
|
|
1444
|
+
|
|
1445
|
+
def convert_codex_jsonl(file_path: Path) -> Dict[str, Any]:
|
|
1446
|
+
"""转换 Codex JSONL 格式"""
|
|
1447
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
1448
|
+
events = [json.loads(line) for line in f if line.strip()]
|
|
1449
|
+
|
|
1450
|
+
options = CodexConverterOptions(messages_only=False)
|
|
1451
|
+
return convert_codex_jsonl_to_messages(events, options=options)
|
|
1452
|
+
|
|
1453
|
+
|
|
1454
|
+
def convert_opencode(file_path: Path) -> Dict[str, Any]:
|
|
1455
|
+
"""转换 OpenCode 格式"""
|
|
1456
|
+
data = None
|
|
1457
|
+
for encoding in ['utf-8-sig', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', 'gbk', 'gb2312']:
|
|
1458
|
+
try:
|
|
1459
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
|
1460
|
+
data = json.load(f)
|
|
1461
|
+
break
|
|
1462
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
1463
|
+
continue
|
|
1464
|
+
|
|
1465
|
+
if data is None:
|
|
1466
|
+
raise ValueError(f"无法读取文件 {file_path},尝试了多种编码都失败")
|
|
1467
|
+
|
|
1468
|
+
options = OpenCodeConverterOptions(messages_only=False)
|
|
1469
|
+
return convert_opencode_to_messages(data, options=options)
|
|
1470
|
+
|
|
1471
|
+
|
|
1472
|
+
# ============================================================================
|
|
1473
|
+
# 主程序
|
|
1474
|
+
# ============================================================================
|
|
1475
|
+
|
|
1476
|
+
def convert_single_file(input_path: Path, output_path: Path, format_type: str) -> bool:
|
|
1477
|
+
"""
|
|
1478
|
+
转换单个文件
|
|
1479
|
+
|
|
1480
|
+
返回: 转换是否成功
|
|
1481
|
+
"""
|
|
1482
|
+
# 显示检测到的格式
|
|
1483
|
+
format_names = {
|
|
1484
|
+
'claude_jsonl': 'Claude JSONL',
|
|
1485
|
+
'codex_jsonl': 'Codex JSONL',
|
|
1486
|
+
'gemini': 'Gemini JSON',
|
|
1487
|
+
'kilocode': 'Kilocode JSON',
|
|
1488
|
+
'opencode': 'OpenCode JSON'
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
# 转换数据
|
|
1492
|
+
try:
|
|
1493
|
+
if format_type == 'claude_jsonl':
|
|
1494
|
+
result = convert_claude_jsonl(input_path)
|
|
1495
|
+
elif format_type == 'codex_jsonl':
|
|
1496
|
+
result = convert_codex_jsonl(input_path)
|
|
1497
|
+
elif format_type == 'gemini':
|
|
1498
|
+
result = convert_gemini(input_path)
|
|
1499
|
+
elif format_type == 'kilocode':
|
|
1500
|
+
result = convert_kilocode(input_path)
|
|
1501
|
+
elif format_type == 'opencode':
|
|
1502
|
+
result = convert_opencode(input_path)
|
|
1503
|
+
else:
|
|
1504
|
+
print(f" ❌ 不支持的格式: {format_type}")
|
|
1505
|
+
return False
|
|
1506
|
+
except Exception as e:
|
|
1507
|
+
print(f" ❌ 转换失败: {str(e)}")
|
|
1508
|
+
return False
|
|
1509
|
+
|
|
1510
|
+
# 写入输出文件
|
|
1511
|
+
try:
|
|
1512
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
1513
|
+
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
1514
|
+
except Exception as e:
|
|
1515
|
+
print(f" ❌ 写入输出文件失败: {str(e)}")
|
|
1516
|
+
return False
|
|
1517
|
+
|
|
1518
|
+
return True
|
|
1519
|
+
|
|
1520
|
+
|
|
1521
|
+
def process_directory(directory: Path, format_type: str = 'auto') -> None:
|
|
1522
|
+
"""
|
|
1523
|
+
批量处理目录下的所有 JSON/JSONL 文件
|
|
1524
|
+
|
|
1525
|
+
参数:
|
|
1526
|
+
directory: 目录路径
|
|
1527
|
+
format_type: 格式类型 ('auto' 为自动检测)
|
|
1528
|
+
"""
|
|
1529
|
+
# 扫描目录下的所有 JSON 和 JSONL 文件
|
|
1530
|
+
json_files = list(directory.glob('*.json'))
|
|
1531
|
+
jsonl_files = list(directory.glob('*.jsonl'))
|
|
1532
|
+
all_files = json_files + jsonl_files
|
|
1533
|
+
|
|
1534
|
+
# 过滤掉已经转换过的文件
|
|
1535
|
+
files_to_process = [f for f in all_files if not f.stem.endswith('_converted')]
|
|
1536
|
+
|
|
1537
|
+
if not files_to_process:
|
|
1538
|
+
print(f"❌ 目录中没有找到需要转换的文件: {directory}")
|
|
1539
|
+
print(" (已忽略 *_converted.json 文件)")
|
|
1540
|
+
return
|
|
1541
|
+
|
|
1542
|
+
# 创建 converted 输出目录
|
|
1543
|
+
output_dir = directory / 'converted'
|
|
1544
|
+
try:
|
|
1545
|
+
output_dir.mkdir(exist_ok=True)
|
|
1546
|
+
print(f"输出目录: {output_dir}")
|
|
1547
|
+
except Exception as e:
|
|
1548
|
+
print(f"❌ 错误: 无法创建输出目录: {str(e)}")
|
|
1549
|
+
return
|
|
1550
|
+
|
|
1551
|
+
print(f"找到 {len(files_to_process)} 个文件待处理")
|
|
1552
|
+
print()
|
|
1553
|
+
|
|
1554
|
+
success_count = 0
|
|
1555
|
+
failed_count = 0
|
|
1556
|
+
skipped_count = 0
|
|
1557
|
+
|
|
1558
|
+
for idx, input_path in enumerate(files_to_process, 1):
|
|
1559
|
+
print(f"[{idx}/{len(files_to_process)}] 处理: {input_path.name}")
|
|
1560
|
+
|
|
1561
|
+
# 生成输出文件名 (放到 converted 子目录下)
|
|
1562
|
+
output_path = output_dir / f"{input_path.stem}_converted.json"
|
|
1563
|
+
|
|
1564
|
+
# 检查输出文件是否已存在
|
|
1565
|
+
if output_path.exists():
|
|
1566
|
+
print(f" ⚠️ 输出文件已存在,跳过: {output_path.name}")
|
|
1567
|
+
skipped_count += 1
|
|
1568
|
+
continue
|
|
1569
|
+
|
|
1570
|
+
# 检测格式
|
|
1571
|
+
if format_type == 'auto':
|
|
1572
|
+
detected_format = detect_format(input_path)
|
|
1573
|
+
else:
|
|
1574
|
+
format_map = {
|
|
1575
|
+
'claude': 'claude_jsonl',
|
|
1576
|
+
'codex': 'codex_jsonl',
|
|
1577
|
+
'gemini': 'gemini',
|
|
1578
|
+
'kilocode': 'kilocode',
|
|
1579
|
+
'opencode': 'opencode'
|
|
1580
|
+
}
|
|
1581
|
+
detected_format = format_map.get(format_type, format_type)
|
|
1582
|
+
|
|
1583
|
+
if detected_format == 'unknown':
|
|
1584
|
+
print(f" ❌ 无法识别的文件格式,跳过")
|
|
1585
|
+
failed_count += 1
|
|
1586
|
+
continue
|
|
1587
|
+
|
|
1588
|
+
format_names = {
|
|
1589
|
+
'claude_jsonl': 'Claude JSONL',
|
|
1590
|
+
'codex_jsonl': 'Codex JSONL',
|
|
1591
|
+
'gemini': 'Gemini JSON',
|
|
1592
|
+
'kilocode': 'Kilocode JSON',
|
|
1593
|
+
'opencode': 'OpenCode JSON'
|
|
1594
|
+
}
|
|
1595
|
+
print(f" 格式: {format_names.get(detected_format, detected_format)}")
|
|
1596
|
+
|
|
1597
|
+
# 转换文件
|
|
1598
|
+
if convert_single_file(input_path, output_path, detected_format):
|
|
1599
|
+
file_size = output_path.stat().st_size / 1024
|
|
1600
|
+
print(f" ✅ 转换成功 ({file_size:.2f} KB) -> {output_path.name}")
|
|
1601
|
+
success_count += 1
|
|
1602
|
+
else:
|
|
1603
|
+
failed_count += 1
|
|
1604
|
+
|
|
1605
|
+
print()
|
|
1606
|
+
|
|
1607
|
+
# 显示汇总信息
|
|
1608
|
+
print("="*80)
|
|
1609
|
+
print("批量转换完成!")
|
|
1610
|
+
print("="*80)
|
|
1611
|
+
print(f"成功: {success_count} 个")
|
|
1612
|
+
print(f"失败: {failed_count} 个")
|
|
1613
|
+
print(f"跳过: {skipped_count} 个")
|
|
1614
|
+
print(f"总计: {len(files_to_process)} 个")
|
|
1615
|
+
|
|
1616
|
+
|
|
1617
|
+
def main():
|
|
1618
|
+
parser = argparse.ArgumentParser(
|
|
1619
|
+
description='AI Session 统一转换工具 - 自动识别格式并转换为 OpenAI 标准格式',
|
|
1620
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
1621
|
+
epilog="""
|
|
1622
|
+
支持的输入格式:
|
|
1623
|
+
1. Claude JSONL - Claude Desktop/API session 事件流
|
|
1624
|
+
2. Codex JSONL - Codex CLI session 事件流
|
|
1625
|
+
3. Gemini JSON - Gemini CLI session 数据
|
|
1626
|
+
4. Kilocode JSON - Kilocode API 对话历史数组
|
|
1627
|
+
5. OpenCode JSON - OpenCode session 数据
|
|
1628
|
+
|
|
1629
|
+
输出格式:
|
|
1630
|
+
- OpenAI 标准消息格式
|
|
1631
|
+
- 符合 OPENAI_FORMAT_SPEC.md 规范
|
|
1632
|
+
- 包含 messages 数组和 meta 元数据
|
|
1633
|
+
|
|
1634
|
+
使用示例:
|
|
1635
|
+
# 单文件转换 (自动检测格式)
|
|
1636
|
+
python convert_ai_session.py -i session.json
|
|
1637
|
+
|
|
1638
|
+
# 单文件转换 (指定输出文件)
|
|
1639
|
+
python convert_ai_session.py -i session.jsonl -o output.json
|
|
1640
|
+
|
|
1641
|
+
# 单文件转换 (强制指定格式)
|
|
1642
|
+
python convert_ai_session.py -i session.jsonl --format claude
|
|
1643
|
+
|
|
1644
|
+
# 批量转换指定目录下所有文件
|
|
1645
|
+
python convert_ai_session.py -d ./sessions
|
|
1646
|
+
|
|
1647
|
+
# 批量转换当前目录下所有文件
|
|
1648
|
+
python convert_ai_session.py -d .
|
|
1649
|
+
|
|
1650
|
+
# 批量转换 (强制指定格式)
|
|
1651
|
+
python convert_ai_session.py -d ./sessions --format claude
|
|
1652
|
+
|
|
1653
|
+
批量处理说明:
|
|
1654
|
+
- 批量模式会扫描目录下所有 .json 和 .jsonl 文件
|
|
1655
|
+
- 自动创建 converted/ 子目录存放转换后的文件
|
|
1656
|
+
- 输出文件命名规则: converted/<原文件名>_converted.json
|
|
1657
|
+
- 自动跳过已存在的输出文件和 *_converted.json 文件
|
|
1658
|
+
- 使用 -d . 可以处理当前目录下的所有文件
|
|
1659
|
+
"""
|
|
1660
|
+
)
|
|
1661
|
+
|
|
1662
|
+
# 创建互斥组: -i 和 -d 只能选一个
|
|
1663
|
+
input_group = parser.add_mutually_exclusive_group(required=True)
|
|
1664
|
+
|
|
1665
|
+
input_group.add_argument(
|
|
1666
|
+
'-i', '--input',
|
|
1667
|
+
help='输入文件路径 (单文件模式)'
|
|
1668
|
+
)
|
|
1669
|
+
|
|
1670
|
+
input_group.add_argument(
|
|
1671
|
+
'-d', '--directory',
|
|
1672
|
+
help='输入目录路径 (批量处理模式,会扫描目录下所有 .json 和 .jsonl 文件)'
|
|
1673
|
+
)
|
|
1674
|
+
|
|
1675
|
+
parser.add_argument(
|
|
1676
|
+
'-o', '--output',
|
|
1677
|
+
help='输出文件路径 (仅单文件模式有效,默认: <输入文件名>_converted.json)'
|
|
1678
|
+
)
|
|
1679
|
+
|
|
1680
|
+
parser.add_argument(
|
|
1681
|
+
'--format',
|
|
1682
|
+
choices=['claude', 'codex', 'gemini', 'kilocode', 'opencode', 'auto'],
|
|
1683
|
+
default='auto',
|
|
1684
|
+
help='强制指定输入格式 (默认: auto 自动检测)'
|
|
1685
|
+
)
|
|
1686
|
+
|
|
1687
|
+
args = parser.parse_args()
|
|
1688
|
+
|
|
1689
|
+
print("="*80)
|
|
1690
|
+
print("AI SESSION 统一转换工具")
|
|
1691
|
+
print("="*80)
|
|
1692
|
+
print()
|
|
1693
|
+
|
|
1694
|
+
# 批量处理模式
|
|
1695
|
+
if args.directory:
|
|
1696
|
+
directory_path = Path(args.directory)
|
|
1697
|
+
|
|
1698
|
+
if not directory_path.exists():
|
|
1699
|
+
print(f"❌ 错误: 目录不存在: {args.directory}")
|
|
1700
|
+
sys.exit(1)
|
|
1701
|
+
|
|
1702
|
+
if not directory_path.is_dir():
|
|
1703
|
+
print(f"❌ 错误: 不是一个目录: {args.directory}")
|
|
1704
|
+
sys.exit(1)
|
|
1705
|
+
|
|
1706
|
+
if args.output:
|
|
1707
|
+
print("⚠️ 警告: 批量处理模式下 -o 参数无效,将使用默认命名规则")
|
|
1708
|
+
print()
|
|
1709
|
+
|
|
1710
|
+
print(f"批量处理模式")
|
|
1711
|
+
print(f"输入目录: {directory_path}")
|
|
1712
|
+
print(f"输出规则: converted/<原文件名>_converted.json")
|
|
1713
|
+
print()
|
|
1714
|
+
|
|
1715
|
+
process_directory(directory_path, args.format)
|
|
1716
|
+
return
|
|
1717
|
+
|
|
1718
|
+
# 单文件处理模式
|
|
1719
|
+
input_path = Path(args.input)
|
|
1720
|
+
|
|
1721
|
+
if not input_path.exists():
|
|
1722
|
+
print(f"❌ 错误: 输入文件不存在: {args.input}")
|
|
1723
|
+
sys.exit(1)
|
|
1724
|
+
|
|
1725
|
+
# 确定输出文件名
|
|
1726
|
+
if args.output:
|
|
1727
|
+
output_path = Path(args.output)
|
|
1728
|
+
else:
|
|
1729
|
+
output_path = input_path.parent / f"{input_path.stem}_converted.json"
|
|
1730
|
+
|
|
1731
|
+
print(f"单文件处理模式")
|
|
1732
|
+
print(f"输入文件: {input_path}")
|
|
1733
|
+
print(f"输出文件: {output_path}")
|
|
1734
|
+
print()
|
|
1735
|
+
|
|
1736
|
+
# 检测格式
|
|
1737
|
+
if args.format == 'auto':
|
|
1738
|
+
print("正在检测文件格式...")
|
|
1739
|
+
format_type = detect_format(input_path)
|
|
1740
|
+
else:
|
|
1741
|
+
format_map = {
|
|
1742
|
+
'claude': 'claude_jsonl',
|
|
1743
|
+
'codex': 'codex_jsonl',
|
|
1744
|
+
'gemini': 'gemini',
|
|
1745
|
+
'kilocode': 'kilocode',
|
|
1746
|
+
'opencode': 'opencode'
|
|
1747
|
+
}
|
|
1748
|
+
format_type = format_map[args.format]
|
|
1749
|
+
print(f"使用指定格式: {args.format}")
|
|
1750
|
+
|
|
1751
|
+
if format_type == 'unknown':
|
|
1752
|
+
print("❌ 错误: 无法识别的文件格式")
|
|
1753
|
+
print()
|
|
1754
|
+
print("支持的格式:")
|
|
1755
|
+
print(" - Claude JSONL (*.jsonl)")
|
|
1756
|
+
print(" - Codex JSONL (*.jsonl)")
|
|
1757
|
+
print(" - Gemini JSON (*.json)")
|
|
1758
|
+
print(" - Kilocode JSON (*.json)")
|
|
1759
|
+
print(" - OpenCode JSON (*.json)")
|
|
1760
|
+
print()
|
|
1761
|
+
print("提示: 使用 --format 参数强制指定格式")
|
|
1762
|
+
sys.exit(1)
|
|
1763
|
+
|
|
1764
|
+
# 显示检测到的格式
|
|
1765
|
+
format_names = {
|
|
1766
|
+
'claude_jsonl': 'Claude JSONL',
|
|
1767
|
+
'codex_jsonl': 'Codex JSONL',
|
|
1768
|
+
'gemini': 'Gemini JSON',
|
|
1769
|
+
'kilocode': 'Kilocode JSON',
|
|
1770
|
+
'opencode': 'OpenCode JSON'
|
|
1771
|
+
}
|
|
1772
|
+
print(f"✅ 检测到格式: {format_names.get(format_type, format_type)}")
|
|
1773
|
+
print()
|
|
1774
|
+
|
|
1775
|
+
# 转换数据
|
|
1776
|
+
print("正在转换数据...")
|
|
1777
|
+
try:
|
|
1778
|
+
if format_type == 'claude_jsonl':
|
|
1779
|
+
result = convert_claude_jsonl(input_path)
|
|
1780
|
+
elif format_type == 'codex_jsonl':
|
|
1781
|
+
result = convert_codex_jsonl(input_path)
|
|
1782
|
+
elif format_type == 'gemini':
|
|
1783
|
+
result = convert_gemini(input_path)
|
|
1784
|
+
elif format_type == 'kilocode':
|
|
1785
|
+
result = convert_kilocode(input_path)
|
|
1786
|
+
elif format_type == 'opencode':
|
|
1787
|
+
result = convert_opencode(input_path)
|
|
1788
|
+
else:
|
|
1789
|
+
print(f"❌ 错误: 不支持的格式: {format_type}")
|
|
1790
|
+
sys.exit(1)
|
|
1791
|
+
except Exception as e:
|
|
1792
|
+
print(f"❌ 错误: 转换失败: {str(e)}")
|
|
1793
|
+
import traceback
|
|
1794
|
+
traceback.print_exc()
|
|
1795
|
+
sys.exit(1)
|
|
1796
|
+
|
|
1797
|
+
# 写入输出文件
|
|
1798
|
+
print("正在写入输出文件...")
|
|
1799
|
+
try:
|
|
1800
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
1801
|
+
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
1802
|
+
except Exception as e:
|
|
1803
|
+
print(f"❌ 错误: 写入输出文件失败: {str(e)}")
|
|
1804
|
+
sys.exit(1)
|
|
1805
|
+
|
|
1806
|
+
# 显示统计信息
|
|
1807
|
+
print()
|
|
1808
|
+
print("="*80)
|
|
1809
|
+
print("✅ 转换完成!")
|
|
1810
|
+
print("="*80)
|
|
1811
|
+
print()
|
|
1812
|
+
|
|
1813
|
+
if 'meta' in result and 'session_meta' in result['meta']:
|
|
1814
|
+
meta = result['meta']['session_meta']
|
|
1815
|
+
print("统计信息:")
|
|
1816
|
+
if 'message_count' in meta:
|
|
1817
|
+
print(f" 总消息数: {meta['message_count']}")
|
|
1818
|
+
if 'user_messages' in meta:
|
|
1819
|
+
print(f" 用户消息: {meta['user_messages']}")
|
|
1820
|
+
if 'assistant_messages' in meta:
|
|
1821
|
+
print(f" 助手消息: {meta['assistant_messages']}")
|
|
1822
|
+
if 'created_at' in meta:
|
|
1823
|
+
print(f" 开始时间: {meta['created_at']}")
|
|
1824
|
+
if 'last_updated_at' in meta:
|
|
1825
|
+
print(f" 结束时间: {meta['last_updated_at']}")
|
|
1826
|
+
if 'duration_seconds' in meta:
|
|
1827
|
+
print(f" 会话时长: {meta['duration_seconds']} 秒")
|
|
1828
|
+
print()
|
|
1829
|
+
|
|
1830
|
+
print("输出格式: 完整格式 (包含 meta)")
|
|
1831
|
+
file_size = output_path.stat().st_size / 1024
|
|
1832
|
+
print(f"文件大小: {file_size:.2f} KB")
|
|
1833
|
+
print(f"输出文件: {output_path}")
|
|
1834
|
+
|
|
1835
|
+
|
|
1836
|
+
if __name__ == "__main__":
|
|
1837
|
+
main()
|