theslopmachine 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/MANUAL.md +63 -0
  2. package/README.md +23 -0
  3. package/RELEASE.md +81 -0
  4. package/assets/agents/developer.md +294 -0
  5. package/assets/agents/slopmachine.md +510 -0
  6. package/assets/skills/beads-operations/SKILL.md +75 -0
  7. package/assets/skills/clarification-gate/SKILL.md +51 -0
  8. package/assets/skills/developer-session-lifecycle/SKILL.md +75 -0
  9. package/assets/skills/final-evaluation-orchestration/SKILL.md +75 -0
  10. package/assets/skills/frontend-design/SKILL.md +41 -0
  11. package/assets/skills/get-overlays/SKILL.md +157 -0
  12. package/assets/skills/planning-gate/SKILL.md +68 -0
  13. package/assets/skills/submission-packaging/SKILL.md +268 -0
  14. package/assets/skills/verification-gates/SKILL.md +106 -0
  15. package/assets/slopmachine/backend-evaluation-prompt.md +275 -0
  16. package/assets/slopmachine/beads-init.js +428 -0
  17. package/assets/slopmachine/document-completeness.md +45 -0
  18. package/assets/slopmachine/engineering-results.md +59 -0
  19. package/assets/slopmachine/frontend-evaluation-prompt.md +304 -0
  20. package/assets/slopmachine/implementation-comparison.md +36 -0
  21. package/assets/slopmachine/quality-document.md +108 -0
  22. package/assets/slopmachine/templates/AGENTS.md +114 -0
  23. package/assets/slopmachine/utils/convert_ai_session.py +1837 -0
  24. package/assets/slopmachine/utils/strip_session_parent.py +66 -0
  25. package/bin/slopmachine.js +9 -0
  26. package/package.json +25 -0
  27. package/src/cli.js +32 -0
  28. package/src/constants.js +77 -0
  29. package/src/init.js +179 -0
  30. package/src/install.js +330 -0
  31. package/src/utils.js +162 -0
@@ -0,0 +1,1837 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ AI Session 统一转换工具
4
+
5
+ 支持自动识别并转换以下格式的 AI 对话历史为 OpenAI 标准格式:
6
+ - Claude JSONL (Claude Desktop/API session)
7
+ - Codex JSONL (Codex CLI session)
8
+ - Gemini JSON (Gemini CLI session)
9
+ - Kilocode JSON (Kilocode API conversation history)
10
+ - OpenCode JSON (OpenCode session)
11
+
12
+ 输出格式:
13
+ - 符合 OPENAI_FORMAT_SPEC.md 规范
14
+ - 包含 messages 数组和 meta 元数据
15
+ - 支持 reasoning (推理内容)、tool_call (工具调用)、tool_output (工具结果) 等内容类型
16
+ - Token 统计信息统一存储在 meta.token_counts 数组中
17
+
18
+ 特性:
19
+ - 自动检测输入文件格式
20
+ - 支持多种编码格式 (UTF-8, UTF-16, GBK 等)
21
+ - 保留完整的元数据和时间戳信息
22
+ - 完全独立运行,不依赖任何项目内其他脚本
23
+ - 支持批量转换指定目录下的所有会话文件
24
+
25
+ 使用示例:
26
+ # 单文件转换 - 自动检测格式
27
+ python convert_ai_session.py -i session.json
28
+
29
+ # 单文件转换 - 指定输出文件
30
+ python convert_ai_session.py -i session.jsonl -o output.json
31
+
32
+ # 单文件转换 - 强制指定格式
33
+ python convert_ai_session.py -i session.jsonl --format claude
34
+
35
+ # 批量转换 - 转换指定目录下所有会话文件(仅处理一层目录) 固定输出到convert目录下
36
+ python convert_ai_session.py -d script/session/test
37
+
38
+ # 批量转换 - 转换当前目录下所有会话文件(仅处理一层目录) 固定输出到convert目录下
39
+ python convert_ai_session.py -d .
40
+
41
+
42
+
43
+ # 批量转换 - 指定文件匹配模式
44
+ python convert_ai_session.py -d script/session/test --pattern "*.json" --exclude "*_converted.json"
45
+
46
+ 批量转换说明:
47
+ - 仅扫描指定目录下的所有 .json 和 .jsonl 文件(不递归子目录)
48
+ - 默认跳过已转换的文件 (*_converted.json)
49
+ - 转换后的文件命名为: 原文件名_converted.json
50
+ - 可通过 --pattern 和 --exclude 参数自定义文件过滤规则
51
+ - 转换失败的文件会记录错误信息并继续处理其他文件
52
+ - 转换完成后输出统计信息: 成功数/失败数/跳过数
53
+
54
+ 作者: liufei
55
+ 版本: 1.3.0
56
+ 更新日期: 2026-03-18
57
+ """
58
+ from __future__ import annotations
59
+
60
+ import json
61
+ import sys
62
+ import argparse
63
+ import re
64
+ from pathlib import Path
65
+ from typing import Dict, Any, List, Optional, TextIO
66
+ from collections.abc import Iterable
67
+ from dataclasses import dataclass, field
68
+ from datetime import datetime, timezone
69
+
70
+
71
+ # ============================================================================
72
+ # 格式检测
73
+ # ============================================================================
74
+
75
+ def detect_format(file_path: Path) -> str:
76
+ """
77
+ 自动检测文件格式
78
+
79
+ 返回: 'claude_jsonl' | 'codex_jsonl' | 'kilocode' | 'opencode' | 'gemini' | 'unknown'
80
+ """
81
+ # JSONL 格式检测
82
+ if file_path.suffix == '.jsonl':
83
+ return detect_jsonl_format(file_path)
84
+
85
+ # JSON 格式检测
86
+ data = None
87
+ for encoding in ['utf-8-sig', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', 'gbk', 'gb2312']:
88
+ try:
89
+ with open(file_path, 'r', encoding=encoding) as f:
90
+ data = json.load(f)
91
+ break
92
+ except (json.JSONDecodeError, UnicodeDecodeError):
93
+ continue
94
+
95
+ if data is None:
96
+ return 'unknown'
97
+
98
+ # Gemini 格式: {"sessionId": "...", "messages": [...], "startTime": "..."}
99
+ if isinstance(data, dict) and 'sessionId' in data and 'messages' in data:
100
+ messages = data.get('messages', [])
101
+ if isinstance(messages, list) and len(messages) > 0:
102
+ first_msg = messages[0]
103
+ if isinstance(first_msg, dict) and 'type' in first_msg and first_msg.get('type') in ('user', 'gemini'):
104
+ return 'gemini'
105
+
106
+ # OpenCode 格式: {"info": {...}, "messages": [...]}
107
+ if isinstance(data, dict) and 'info' in data and 'messages' in data:
108
+ info = data.get('info', {})
109
+ if isinstance(info, dict) and 'id' in info:
110
+ return 'opencode'
111
+
112
+ # Kilocode 格式: [{"role": "user", "content": [...], "ts": 123}]
113
+ if isinstance(data, list) and len(data) > 0:
114
+ first_item = data[0]
115
+ if isinstance(first_item, dict) and 'role' in first_item and 'content' in first_item and 'ts' in first_item:
116
+ content = first_item.get('content', [])
117
+ if isinstance(content, list) and len(content) > 0:
118
+ if isinstance(content[0], dict) and 'type' in content[0]:
119
+ return 'kilocode'
120
+
121
+ return 'unknown'
122
+
123
+
124
+ def detect_jsonl_format(file_path: Path) -> str:
125
+ """
126
+ 检测 JSONL 文件的具体格式
127
+
128
+ 返回: 'codex_jsonl' | 'claude_jsonl' | 'unknown'
129
+ """
130
+ try:
131
+ with open(file_path, 'r', encoding='utf-8') as f:
132
+ lines = []
133
+ for i, line in enumerate(f):
134
+ if i >= 10:
135
+ break
136
+ line = line.strip()
137
+ if line:
138
+ lines.append(line)
139
+
140
+ if not lines:
141
+ return 'unknown'
142
+
143
+ first_obj = json.loads(lines[0])
144
+
145
+ # Claude 格式特征
146
+ if 'sessionId' in first_obj:
147
+ return 'claude_jsonl'
148
+
149
+ event_type = first_obj.get('type')
150
+ if event_type in ('user', 'assistant', 'progress', 'file-history-snapshot', 'system'):
151
+ if 'message' in first_obj or 'parentUuid' in first_obj or 'isSidechain' in first_obj:
152
+ return 'claude_jsonl'
153
+
154
+ # Codex 格式特征
155
+ if 'payload' in first_obj:
156
+ return 'codex_jsonl'
157
+
158
+ if event_type in ('session_meta', 'turn_context', 'event_msg', 'response_item'):
159
+ return 'codex_jsonl'
160
+
161
+ # 检查更多行
162
+ claude_indicators = 0
163
+ codex_indicators = 0
164
+
165
+ for line in lines[1:]:
166
+ try:
167
+ obj = json.loads(line)
168
+ if any(k in obj for k in ('sessionId', 'parentUuid', 'isSidechain', 'userType')):
169
+ claude_indicators += 1
170
+ if 'payload' in obj or obj.get('type') in ('session_meta', 'turn_context'):
171
+ codex_indicators += 1
172
+ except json.JSONDecodeError:
173
+ continue
174
+
175
+ if claude_indicators > codex_indicators:
176
+ return 'claude_jsonl'
177
+ elif codex_indicators > claude_indicators:
178
+ return 'codex_jsonl'
179
+
180
+ return 'codex_jsonl'
181
+
182
+ except Exception as e:
183
+ print(f"警告: 检测 JSONL 格式时出错: {str(e)}")
184
+ return 'unknown'
185
+
186
+
187
+ # ============================================================================
188
+ # Claude JSONL 转换器 (原 claude_jsonl_to_openai_messages.py)
189
+ # ============================================================================
190
+
191
+ def _claude_read_jsonl(stream: TextIO) -> Iterable[dict]:
192
+ """读取 JSONL 文件,每行一个 JSON 对象"""
193
+ for line_no, line in enumerate(stream, start=1):
194
+ line = line.strip()
195
+ if not line:
196
+ continue
197
+ try:
198
+ obj = json.loads(line)
199
+ except json.JSONDecodeError as exc:
200
+ raise ValueError(f"Invalid JSON at line {line_no}") from exc
201
+ if not isinstance(obj, dict):
202
+ raise ValueError(f"Expected object at line {line_no}, got {type(obj).__name__}")
203
+ yield obj
204
+
205
+
206
+ @dataclass
207
+ class ClaudeConverterOptions:
208
+ """Claude 转换器选项配置"""
209
+ include_thinking: bool = True
210
+ include_toolcall_content: bool = True
211
+ include_token_count: bool = True
212
+ messages_only: bool = False
213
+
214
+
215
+ @dataclass
216
+ class ClaudeConverterState:
217
+ """Claude 转换器状态"""
218
+ session_id: str | None = None
219
+ token_counts: list = field(default_factory=list)
220
+ session_meta: dict = field(default_factory=dict)
221
+ skipped_events: list = field(default_factory=list)
222
+
223
+
224
+ def convert_claude_jsonl_to_messages(
225
+ events: Iterable[dict],
226
+ *,
227
+ options: ClaudeConverterOptions,
228
+ ) -> dict:
229
+ """
230
+ 将 Claude session JSONL 转换为 OpenAI 消息格式
231
+
232
+ Args:
233
+ events: JSONL 事件迭代器
234
+ options: 转换选项
235
+
236
+ Returns:
237
+ 包含 messages 和 meta 的字典
238
+ """
239
+ state = ClaudeConverterState()
240
+ messages: list = []
241
+
242
+ for obj in events:
243
+ event_type = obj.get("type")
244
+ timestamp = obj.get("timestamp")
245
+
246
+ # 提取 session 元数据
247
+ if event_type == "user" and state.session_id is None:
248
+ state.session_id = obj.get("sessionId")
249
+ state.session_meta = {
250
+ "session_id": obj.get("sessionId"),
251
+ "version": obj.get("version"),
252
+ "git_branch": obj.get("gitBranch"),
253
+ "cwd": obj.get("cwd"),
254
+ }
255
+
256
+ # 处理用户消息
257
+ if event_type == "user":
258
+ message = obj.get("message", {})
259
+ role = message.get("role")
260
+ content = message.get("content")
261
+
262
+ if role == "user" and isinstance(content, str):
263
+ user_msg = {
264
+ "role": "user",
265
+ "content": [{"type": "text", "text": content}],
266
+ }
267
+ if timestamp:
268
+ user_msg["_metadata"] = {"timestamp": timestamp}
269
+ messages.append(user_msg)
270
+ elif role == "user" and isinstance(content, list):
271
+ # 处理工具结果
272
+ user_msg = {
273
+ "role": "user",
274
+ "content": []
275
+ }
276
+ for item in content:
277
+ if isinstance(item, dict):
278
+ if item.get("type") == "tool_result":
279
+ tool_msg = {
280
+ "role": "tool",
281
+ "tool_call_id": item.get("tool_use_id", ""),
282
+ "content": [{"type": "tool_output", "text": item.get("content", "")}]
283
+ }
284
+ if timestamp:
285
+ tool_msg["_metadata"] = {"timestamp": timestamp}
286
+ messages.append(tool_msg)
287
+ else:
288
+ user_msg["content"].append(item)
289
+
290
+ # 如果有非工具结果的内容,添加用户消息
291
+ if user_msg["content"]:
292
+ if timestamp:
293
+ user_msg["_metadata"] = {"timestamp": timestamp}
294
+ messages.append(user_msg)
295
+
296
+ # 处理助手消息
297
+ elif event_type == "assistant":
298
+ message = obj.get("message", {})
299
+ role = message.get("role")
300
+ content = message.get("content")
301
+ usage = message.get("usage")
302
+
303
+ if role == "assistant" and isinstance(content, list):
304
+ assistant_msg = {
305
+ "role": "assistant",
306
+ "content": [],
307
+ }
308
+
309
+ tool_calls = []
310
+
311
+ for item in content:
312
+ if not isinstance(item, dict):
313
+ continue
314
+
315
+ item_type = item.get("type")
316
+
317
+ # 处理思考过程
318
+ if item_type == "thinking" and options.include_thinking:
319
+ thinking_text = item.get("thinking", "")
320
+ if thinking_text:
321
+ assistant_msg["content"].append({
322
+ "type": "reasoning",
323
+ "text": thinking_text
324
+ })
325
+
326
+ # 处理文本内容
327
+ elif item_type == "text":
328
+ text = item.get("text", "")
329
+ if text:
330
+ assistant_msg["content"].append({
331
+ "type": "text",
332
+ "text": text
333
+ })
334
+
335
+ # 处理工具调用
336
+ elif item_type == "tool_use":
337
+ tool_id = item.get("id", "")
338
+ tool_name = item.get("name", "")
339
+ tool_input = item.get("input", {})
340
+
341
+ tool_call = {
342
+ "id": tool_id,
343
+ "type": "function",
344
+ "function": {
345
+ "name": tool_name,
346
+ "arguments": json.dumps(tool_input, ensure_ascii=False)
347
+ }
348
+ }
349
+ tool_calls.append(tool_call)
350
+
351
+ # 可选:在 content 中也包含工具调用信息
352
+ if options.include_toolcall_content:
353
+ assistant_msg["content"].append({
354
+ "type": "tool_call",
355
+ "tool_call_id": tool_id,
356
+ "name": tool_name,
357
+ "arguments": json.dumps(tool_input, ensure_ascii=False)
358
+ })
359
+
360
+ # 添加工具调用字段
361
+ if tool_calls:
362
+ assistant_msg["tool_calls"] = tool_calls
363
+
364
+ # 添加时间戳和元数据
365
+ if timestamp:
366
+ assistant_msg["_metadata"] = {"timestamp": timestamp}
367
+
368
+ # 只有当消息有内容或工具调用时才添加
369
+ if assistant_msg["content"] or tool_calls:
370
+ messages.append(assistant_msg)
371
+
372
+ # 收集 token 统计信息
373
+ if usage and options.include_token_count:
374
+ token_entry = {
375
+ "type": "token_count",
376
+ "info": {
377
+ "total_token_usage": {
378
+ "input_tokens": usage.get("input_tokens", 0),
379
+ "cached_input_tokens": usage.get("cache_read_input_tokens", 0),
380
+ "output_tokens": usage.get("output_tokens", 0),
381
+ "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
382
+ },
383
+ "last_token_usage": {
384
+ "input_tokens": usage.get("input_tokens", 0),
385
+ "cached_input_tokens": usage.get("cache_read_input_tokens", 0),
386
+ "output_tokens": usage.get("output_tokens", 0),
387
+ "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
388
+ }
389
+ },
390
+ "rate_limits": {
391
+ "primary": None,
392
+ "secondary": None,
393
+ "credits": None,
394
+ "plan_type": None
395
+ }
396
+ }
397
+ if timestamp:
398
+ token_entry["_timestamp"] = timestamp
399
+ state.token_counts.append(token_entry)
400
+
401
+ # 记录其他类型的事件
402
+ elif event_type in ("progress", "system", "file-history-snapshot"):
403
+ if options.include_token_count:
404
+ state.skipped_events.append({
405
+ "type": event_type,
406
+ "timestamp": timestamp,
407
+ "data": obj.get("data") or obj.get("subtype")
408
+ })
409
+
410
+ # 构建结果
411
+ result: dict = {"messages": messages}
412
+ if not options.messages_only:
413
+ result["meta"] = {
414
+ "session_meta": state.session_meta,
415
+ "token_counts": state.token_counts if options.include_token_count else None,
416
+ "skipped_events_count": len(state.skipped_events),
417
+ "skipped_events": state.skipped_events[:10] if state.skipped_events else []
418
+ }
419
+
420
+ return result
421
+
422
+
423
+ # ============================================================================
424
+ # Codex JSONL 转换器 (原 codex_jsonl_to_openai_messages.py)
425
+ # ============================================================================
426
+
427
+ def _codex_looks_like_agents_instructions(text: str) -> bool:
428
+ """判断文本是否为 AGENTS.md 指令"""
429
+ t = text.lstrip()
430
+ return t.startswith("# AGENTS.md instructions") or ("## Skills" in t and "<INSTRUCTIONS>" in t)
431
+
432
+
433
+ def _codex_looks_like_environment_context(text: str) -> bool:
434
+ """判断文本是否为环境上下文"""
435
+ t = text.lstrip()
436
+ return t.startswith("<environment_context>") and "</environment_context>" in t
437
+
438
+
439
+ def _codex_as_text_parts(content: Any) -> list:
440
+ """将 content 转换为文本 parts 列表"""
441
+ if not isinstance(content, list):
442
+ return []
443
+ out = []
444
+ for part in content:
445
+ if not isinstance(part, dict):
446
+ continue
447
+ if "text" in part and isinstance(part["text"], str):
448
+ out.append({"type": "text", "text": part["text"]})
449
+ continue
450
+ out.append(part)
451
+ return out
452
+
453
+
454
+ def _codex_concat_text(content: Any) -> str:
455
+ """拼接 content 中所有文本"""
456
+ if not isinstance(content, list):
457
+ return ""
458
+ chunks = []
459
+ for part in content:
460
+ if isinstance(part, dict) and isinstance(part.get("text"), str):
461
+ chunks.append(part["text"])
462
+ return "".join(chunks)
463
+
464
+
465
+ def _codex_maybe_parse_json_string(value: str) -> Any:
466
+ """尝试将字符串解析为 JSON"""
467
+ s = value.strip()
468
+ if not s:
469
+ return None
470
+ if not (s.startswith("{") or s.startswith("[")):
471
+ return None
472
+ try:
473
+ return json.loads(s)
474
+ except json.JSONDecodeError:
475
+ return None
476
+
477
+
478
+ @dataclass
479
+ class CodexConverterOptions:
480
+ """Codex 转换器选项配置"""
481
+ promote_harness_messages: bool = True
482
+ emit_session_instructions: bool = True
483
+ include_toolcall_content: bool = True
484
+ include_token_count: bool = True
485
+ include_turn_context: bool = True
486
+ messages_only: bool = False
487
+
488
+
489
+ @dataclass
490
+ class CodexConverterState:
491
+ """Codex 转换器状态"""
492
+ pending_reasoning: list = field(default_factory=list)
493
+ last_reasoning: str | None = None
494
+ session_instructions: str | None = None
495
+ token_counts: list = field(default_factory=list)
496
+ turn_contexts: list = field(default_factory=list)
497
+ session_meta: dict | None = None
498
+
499
+ def add_reasoning(self, text: str) -> None:
500
+ """添加推理文本"""
501
+ t = text.strip()
502
+ if not t:
503
+ return
504
+ if self.last_reasoning == t:
505
+ return
506
+ self.pending_reasoning.append(t)
507
+ self.last_reasoning = t
508
+
509
+ def take_reasoning_parts(self) -> list:
510
+ """取出并清空待处理推理内容"""
511
+ if not self.pending_reasoning:
512
+ return []
513
+ parts = [{"type": "reasoning", "text": t} for t in self.pending_reasoning]
514
+ self.pending_reasoning.clear()
515
+ self.last_reasoning = None
516
+ return parts
517
+
518
+
519
+ def convert_codex_jsonl_to_messages(
520
+ events: Iterable[dict],
521
+ *,
522
+ options: CodexConverterOptions,
523
+ ) -> dict:
524
+ """将 Codex CLI session JSONL 转换为 OpenAI 消息格式"""
525
+ state = CodexConverterState()
526
+ messages: list = []
527
+
528
+ for obj in events:
529
+ timestamp = obj.get("timestamp")
530
+ outer_type = obj.get("type")
531
+ payload = obj.get("payload")
532
+
533
+ # 处理 session_meta 事件
534
+ if outer_type == "session_meta" and isinstance(payload, dict):
535
+ state.session_meta = payload
536
+ instr = payload.get("instructions")
537
+ if isinstance(instr, str):
538
+ state.session_instructions = instr
539
+ if options.emit_session_instructions and instr.strip():
540
+ messages.append({
541
+ "role": "developer",
542
+ "content": [{"type": "text", "text": instr}],
543
+ })
544
+ continue
545
+
546
+ # 处理 turn_context 事件
547
+ if outer_type == "turn_context" and isinstance(payload, dict):
548
+ if options.include_turn_context:
549
+ ctx = dict(payload)
550
+ if timestamp:
551
+ ctx["_timestamp"] = timestamp
552
+ state.turn_contexts.append(ctx)
553
+ continue
554
+
555
+ # 处理 event_msg 事件
556
+ if outer_type == "event_msg" and isinstance(payload, dict):
557
+ ptype = payload.get("type")
558
+ if ptype == "agent_reasoning":
559
+ text = payload.get("text")
560
+ if isinstance(text, str):
561
+ state.add_reasoning(text)
562
+ continue
563
+ if ptype == "token_count":
564
+ if options.include_token_count:
565
+ entry = dict(payload)
566
+ if timestamp:
567
+ entry["_timestamp"] = timestamp
568
+ state.token_counts.append(entry)
569
+ continue
570
+ continue
571
+
572
+ if outer_type != "response_item" or not isinstance(payload, dict):
573
+ continue
574
+
575
+ ptype = payload.get("type")
576
+
577
+ # 处理推理摘要
578
+ if ptype == "reasoning":
579
+ summary = payload.get("summary")
580
+ if isinstance(summary, list):
581
+ for item in summary:
582
+ if isinstance(item, dict) and isinstance(item.get("text"), str):
583
+ state.add_reasoning(item["text"])
584
+ continue
585
+
586
+ # 处理普通消息
587
+ if ptype == "message":
588
+ role = payload.get("role")
589
+ content = payload.get("content")
590
+ content_parts = _codex_as_text_parts(content)
591
+ content_text = _codex_concat_text(content)
592
+
593
+ if role == "assistant":
594
+ assistant_msg = {"role": "assistant", "content": []}
595
+ assistant_msg["content"].extend(state.take_reasoning_parts())
596
+ assistant_msg["content"].extend(content_parts)
597
+ if timestamp:
598
+ assistant_msg["_metadata"] = {"timestamp": timestamp}
599
+ messages.append(assistant_msg)
600
+ continue
601
+
602
+ if role == "user":
603
+ out_role = "user"
604
+ if options.promote_harness_messages:
605
+ if _codex_looks_like_environment_context(content_text):
606
+ out_role = "system"
607
+ elif _codex_looks_like_agents_instructions(content_text):
608
+ out_role = "developer"
609
+
610
+ # 避免重复 session 指令
611
+ if (
612
+ out_role == "developer"
613
+ and state.session_instructions
614
+ and state.session_instructions.strip() == content_text.strip()
615
+ and options.emit_session_instructions
616
+ ):
617
+ continue
618
+
619
+ user_msg = {"role": out_role, "content": content_parts}
620
+ if timestamp:
621
+ user_msg["_metadata"] = {"timestamp": timestamp}
622
+ messages.append(user_msg)
623
+ continue
624
+
625
+ # 其他角色,原样保留
626
+ if isinstance(role, str) and role:
627
+ other_msg = {"role": role, "content": content_parts}
628
+ if timestamp:
629
+ other_msg["_metadata"] = {"timestamp": timestamp}
630
+ messages.append(other_msg)
631
+ continue
632
+
633
+ # 处理函数调用
634
+ if ptype in ("function_call", "custom_tool_call"):
635
+ call_id = payload.get("call_id")
636
+ name = payload.get("name")
637
+ if not isinstance(call_id, str) or not isinstance(name, str):
638
+ continue
639
+
640
+ if ptype == "function_call":
641
+ arguments = payload.get("arguments")
642
+ if isinstance(arguments, dict):
643
+ args_str = json.dumps(arguments, ensure_ascii=False)
644
+ elif isinstance(arguments, str):
645
+ args_str = arguments
646
+ else:
647
+ args_str = ""
648
+ else:
649
+ tool_input = payload.get("input")
650
+ args_str = json.dumps({"input": tool_input}, ensure_ascii=False)
651
+
652
+ tool_call = {
653
+ "id": call_id,
654
+ "type": "function",
655
+ "function": {"name": name, "arguments": args_str},
656
+ }
657
+
658
+ assistant_msg: dict = {
659
+ "role": "assistant",
660
+ "content": [],
661
+ "tool_calls": [tool_call],
662
+ }
663
+ assistant_msg["content"].extend(state.take_reasoning_parts())
664
+ if options.include_toolcall_content:
665
+ assistant_msg["content"].append({
666
+ "type": "tool_call",
667
+ "tool_call_id": call_id,
668
+ "name": name,
669
+ "arguments": args_str,
670
+ })
671
+ if timestamp:
672
+ assistant_msg["_metadata"] = {"timestamp": timestamp}
673
+ messages.append(assistant_msg)
674
+ continue
675
+
676
+ # 处理函数调用输出
677
+ if ptype in ("function_call_output", "custom_tool_call_output"):
678
+ call_id = payload.get("call_id")
679
+ output = payload.get("output")
680
+ if not isinstance(call_id, str):
681
+ continue
682
+ if not isinstance(output, str):
683
+ output = "" if output is None else str(output)
684
+
685
+ tool_msg: dict = {"role": "tool", "tool_call_id": call_id, "content": []}
686
+ parsed = _codex_maybe_parse_json_string(output)
687
+ if isinstance(parsed, dict) and isinstance(parsed.get("output"), str):
688
+ tool_msg["content"].append({"type": "tool_output", "text": parsed["output"]})
689
+ if isinstance(parsed.get("metadata"), dict):
690
+ tool_msg["metadata"] = parsed["metadata"]
691
+ else:
692
+ tool_msg["content"].append({"type": "tool_output", "text": output})
693
+ if timestamp:
694
+ tool_msg["_metadata"] = {"timestamp": timestamp}
695
+ messages.append(tool_msg)
696
+ continue
697
+
698
+ result: dict = {"messages": messages}
699
+ if not options.messages_only:
700
+ result["meta"] = {
701
+ "session_meta": state.session_meta,
702
+ "turn_contexts": state.turn_contexts,
703
+ "token_counts": state.token_counts if options.include_token_count else None,
704
+ }
705
+ return result
706
+
707
+
708
+ # ============================================================================
709
+ # OpenCode JSON 转换器 (原 opencode_jsonl_to_openai_messages.py)
710
+ # ============================================================================
711
+
712
+ @dataclass
713
+ class OpenCodeConverterOptions:
714
+ """OpenCode 转换器选项配置"""
715
+ include_reasoning: bool = True # 是否包含推理过程
716
+ include_toolcall_content: bool = True # 是否在 content 中包含工具调用
717
+ include_token_count: bool = True # 是否包含 token 统计
718
+ include_session_info: bool = True # 是否包含会话信息
719
+ messages_only: bool = False # 是否只输出 messages 数组
720
+ include_timestamps: bool = True # 是否在每条消息中包含时间戳
721
+ include_full_tool_metadata: bool = True # 是否包含工具调用的完整元数据
722
+
723
+
724
+ @dataclass
725
+ class OpenCodeConverterState:
726
+ """OpenCode 转换器状态"""
727
+ session_info: dict | None = None
728
+ token_counts: list = field(default_factory=list)
729
+
730
+ def add_token_count(self, tokens: dict, timestamp: int | None = None) -> None:
731
+ """添加 token 统计信息 (Codex 嵌套格式)"""
732
+ input_tokens = tokens.get('input', 0)
733
+ output_tokens = tokens.get('output', 0)
734
+ reasoning_tokens = tokens.get('reasoning', 0)
735
+ cache_read = tokens.get('cache', {}).get('read', 0) if isinstance(tokens.get('cache'), dict) else 0
736
+
737
+ entry = {
738
+ 'type': 'token_count',
739
+ 'info': {
740
+ 'total_token_usage': {
741
+ 'input_tokens': input_tokens,
742
+ 'cached_input_tokens': cache_read,
743
+ 'output_tokens': output_tokens,
744
+ 'reasoning_output_tokens': reasoning_tokens,
745
+ 'total_tokens': input_tokens + output_tokens
746
+ },
747
+ 'last_token_usage': {
748
+ 'input_tokens': input_tokens,
749
+ 'cached_input_tokens': cache_read,
750
+ 'output_tokens': output_tokens,
751
+ 'reasoning_output_tokens': reasoning_tokens,
752
+ 'total_tokens': input_tokens + output_tokens
753
+ }
754
+ },
755
+ 'rate_limits': {
756
+ 'primary': None,
757
+ 'secondary': None,
758
+ 'credits': None,
759
+ 'plan_type': None
760
+ }
761
+ }
762
+ if timestamp:
763
+ entry["_timestamp"] = timestamp
764
+ self.token_counts.append(entry)
765
+
766
+
767
+ def _opencode_format_timestamp(timestamp_ms: int | None) -> str | None:
768
+ """将毫秒时间戳转换为 ISO8601 格式"""
769
+ if timestamp_ms is None:
770
+ return None
771
+ dt = datetime.fromtimestamp(timestamp_ms / 1000, tz=timezone.utc)
772
+ return dt.isoformat()
773
+
774
+
775
+ def _opencode_convert_tool_call(part: dict, include_full_metadata: bool = True):
776
+ """
777
+ 将 OpenCode 的工具调用转换为 OpenAI 格式
778
+
779
+ 返回: (tool_call_dict, original_data_dict) 或 None
780
+ """
781
+ call_id = part.get("callID")
782
+ tool_name = part.get("tool")
783
+ state_obj = part.get("state", {})
784
+
785
+ if not call_id or not tool_name:
786
+ return None
787
+
788
+ # 获取输入参数
789
+ input_data = state_obj.get("input", {})
790
+ if isinstance(input_data, dict):
791
+ args_str = json.dumps(input_data, ensure_ascii=False)
792
+ else:
793
+ args_str = json.dumps({"input": input_data}, ensure_ascii=False)
794
+
795
+ # 标准的 tool_call 格式
796
+ tool_call = {
797
+ "id": call_id,
798
+ "type": "function",
799
+ "function": {
800
+ "name": tool_name,
801
+ "arguments": args_str
802
+ }
803
+ }
804
+
805
+ # 原始数据,用于后续生成 metadata
806
+ original_data = {
807
+ "part_id": part.get("id"),
808
+ "tool": tool_name,
809
+ "state": state_obj
810
+ } if include_full_metadata else {}
811
+
812
+ return tool_call, original_data
813
+
814
+
815
+ def _opencode_convert_message(
816
+ message: dict,
817
+ options: OpenCodeConverterOptions,
818
+ state: OpenCodeConverterState
819
+ ) -> list:
820
+ """将 OpenCode 的单个 message 转换为 OpenAI 格式的 messages 列表"""
821
+ info = message.get("info", {})
822
+ parts = message.get("parts", [])
823
+ role = info.get("role")
824
+
825
+ # 提取时间戳和 token 信息
826
+ timestamp = info.get("time", {}).get("created")
827
+ tokens = info.get("tokens")
828
+ message_id = info.get("id")
829
+
830
+ # 记录 token 统计
831
+ if options.include_token_count and tokens:
832
+ state.add_token_count(tokens, timestamp)
833
+
834
+ # 用户消息
835
+ if role == "user":
836
+ content_parts = []
837
+ for part in parts:
838
+ if part.get("type") == "text":
839
+ content_parts.append({
840
+ "type": "text",
841
+ "text": part.get("text", "")
842
+ })
843
+
844
+ if content_parts:
845
+ user_msg = {
846
+ "role": "user",
847
+ "content": content_parts
848
+ }
849
+ if options.include_timestamps:
850
+ user_msg["_metadata"] = {
851
+ "message_id": message_id,
852
+ "timestamp": timestamp,
853
+ "tokens": tokens
854
+ }
855
+ return [user_msg]
856
+ return []
857
+
858
+ # 助手消息
859
+ if role == "assistant":
860
+ result_messages = []
861
+
862
+ text_parts = []
863
+ tool_calls = []
864
+ tool_outputs = []
865
+ tool_call_original_data = {}
866
+
867
+ for part in parts:
868
+ part_type = part.get("type")
869
+
870
+ # 推理内容
871
+ if part_type == "reasoning":
872
+ if options.include_reasoning:
873
+ reasoning_text = part.get("text", "")
874
+ if reasoning_text.strip():
875
+ text_parts.append({
876
+ "type": "reasoning",
877
+ "text": reasoning_text
878
+ })
879
+
880
+ # 文本内容
881
+ elif part_type == "text":
882
+ text = part.get("text", "")
883
+ if text.strip():
884
+ text_parts.append({
885
+ "type": "text",
886
+ "text": text
887
+ })
888
+
889
+ # 工具调用
890
+ elif part_type == "tool":
891
+ state_obj = part.get("state", {})
892
+ status = state_obj.get("status")
893
+
894
+ # 工具调用请求
895
+ if status in ("pending", "running", "completed"):
896
+ result = _opencode_convert_tool_call(part, options.include_full_tool_metadata)
897
+ if result:
898
+ tool_call, original_data = result
899
+ tool_calls.append(tool_call)
900
+
901
+ if original_data:
902
+ tool_call_original_data[tool_call["id"]] = original_data
903
+
904
+ if options.include_toolcall_content:
905
+ tool_call_content = {
906
+ "type": "tool_use",
907
+ "tool_call_id": tool_call["id"],
908
+ "name": tool_call["function"]["name"],
909
+ "arguments": tool_call["function"]["arguments"]
910
+ }
911
+ text_parts.append(tool_call_content)
912
+
913
+ # 工具调用结果
914
+ if status == "completed":
915
+ call_id = part.get("callID")
916
+ output = state_obj.get("output", "")
917
+
918
+ if call_id:
919
+ tool_outputs.append({
920
+ "call_id": call_id,
921
+ "output": output,
922
+ "state": state_obj
923
+ })
924
+
925
+ # 构建助手消息
926
+ if text_parts or tool_calls:
927
+ assistant_msg: dict = {
928
+ "role": "assistant",
929
+ "content": text_parts
930
+ }
931
+
932
+ if tool_calls:
933
+ assistant_msg["tool_calls"] = tool_calls
934
+
935
+ if options.include_timestamps:
936
+ assistant_msg["_metadata"] = {
937
+ "message_id": message_id,
938
+ "timestamp": timestamp,
939
+ "tokens": tokens
940
+ }
941
+
942
+ result_messages.append(assistant_msg)
943
+
944
+ # 添加工具输出消息
945
+ for tool_output in tool_outputs:
946
+ output_text = tool_output["output"]
947
+ if not isinstance(output_text, str):
948
+ output_text = str(output_text) if output_text is not None else ""
949
+
950
+ tool_msg = {
951
+ "role": "tool",
952
+ "tool_call_id": tool_output["call_id"],
953
+ "content": [{
954
+ "type": "tool_output",
955
+ "text": output_text
956
+ }]
957
+ }
958
+
959
+ # 如果启用完整元数据,保留所有 metadata 信息
960
+ if options.include_full_tool_metadata and "state" in tool_output:
961
+ s = tool_output["state"]
962
+ time_info = s.get("time", {})
963
+ metadata_info = s.get("metadata", {})
964
+
965
+ tool_msg["metadata"] = {}
966
+
967
+ # 保留完整的 metadata (包括 diff, files, diagnostics 等)
968
+ if isinstance(metadata_info, dict):
969
+ tool_msg["metadata"] = dict(metadata_info)
970
+
971
+ # 如果 metadata 中有 exit 字段,也添加 exit_code 别名
972
+ if "exit" in metadata_info:
973
+ tool_msg["metadata"]["exit_code"] = metadata_info["exit"]
974
+
975
+ # 添加 duration_seconds
976
+ if time_info and "start" in time_info and "end" in time_info:
977
+ duration_ms = time_info["end"] - time_info["start"]
978
+ tool_msg["metadata"]["duration_seconds"] = round(duration_ms / 1000, 3)
979
+
980
+ result_messages.append(tool_msg)
981
+
982
+ return result_messages
983
+
984
+ return []
985
+
986
+
987
+ def convert_opencode_to_messages(
988
+ session_data: dict,
989
+ *,
990
+ options: OpenCodeConverterOptions
991
+ ) -> dict:
992
+ """
993
+ 将 OpenCode session 数据转换为 OpenAI messages 格式
994
+
995
+ 参数:
996
+ session_data: OpenCode session JSON 数据
997
+ options: 转换选项
998
+
999
+ 返回:
1000
+ 包含 messages 和 meta 的字典
1001
+ """
1002
+ state = OpenCodeConverterState()
1003
+ messages: list = []
1004
+
1005
+ # 提取会话信息
1006
+ session_info = session_data.get("info", {})
1007
+
1008
+ # 处理所有消息
1009
+ opencode_messages = session_data.get("messages", [])
1010
+ for msg in opencode_messages:
1011
+ converted = _opencode_convert_message(msg, options, state)
1012
+ messages.extend(converted)
1013
+
1014
+ # 构建结果
1015
+ result: dict = {"messages": messages}
1016
+
1017
+ if not options.messages_only:
1018
+ # 构建 session_meta
1019
+ session_meta = {
1020
+ "id": session_info.get("id"),
1021
+ "timestamp": _opencode_format_timestamp(session_info.get("time", {}).get("created")),
1022
+ "cwd": session_info.get("directory"),
1023
+ "originator": "ide",
1024
+ "cli_version": session_info.get("version"),
1025
+ "source": "opencode",
1026
+ "model_provider": None,
1027
+ "base_instructions": {
1028
+ "text": None
1029
+ },
1030
+ "git": {}
1031
+ }
1032
+
1033
+ # 从第一条助手消息中提取模型信息
1034
+ for msg in opencode_messages:
1035
+ info = msg.get("info", {})
1036
+ if info.get("role") == "assistant":
1037
+ model_info = info.get("model", {})
1038
+ session_meta["model_provider"] = model_info.get("providerID")
1039
+ break
1040
+
1041
+ # 构建 turn_contexts
1042
+ turn_contexts = []
1043
+ for msg in opencode_messages:
1044
+ info = msg.get("info", {})
1045
+ if info.get("role") == "assistant":
1046
+ model_info = info.get("model", {})
1047
+ if not isinstance(model_info, dict):
1048
+ model_info = {}
1049
+ path_info = info.get("path", {})
1050
+ if not isinstance(path_info, dict):
1051
+ path_info = {}
1052
+ summary_info = info.get("summary", {})
1053
+ if not isinstance(summary_info, dict):
1054
+ summary_info = {}
1055
+ time_info = info.get("time", {})
1056
+ if not isinstance(time_info, dict):
1057
+ time_info = {}
1058
+
1059
+ turn_context = {
1060
+ "cwd": path_info.get("cwd"),
1061
+ "approval_policy": "auto",
1062
+ "sandbox_policy": {"type": "local"},
1063
+ "model": model_info.get("modelID"),
1064
+ "personality": info.get("agent"),
1065
+ "collaboration_mode": {"mode": "single"},
1066
+ "effort": info.get("mode"),
1067
+ "summary": summary_info.get("title"),
1068
+ "user_instructions": None,
1069
+ "truncation_policy": {"mode": "auto", "limit": 100000},
1070
+ "_timestamp": _opencode_format_timestamp(time_info.get("created"))
1071
+ }
1072
+ turn_contexts.append(turn_context)
1073
+
1074
+ result["meta"] = {
1075
+ "session_meta": session_meta,
1076
+ "turn_contexts": turn_contexts,
1077
+ "token_counts": state.token_counts if options.include_token_count else None
1078
+ }
1079
+
1080
+ return result
1081
+
1082
+
1083
+ # ============================================================================
1084
+ # Gemini JSON 转换器
1085
+ # ============================================================================
1086
+
1087
+ def convert_gemini(file_path: Path) -> Dict[str, Any]:
1088
+ """转换 Gemini CLI JSON 格式"""
1089
+ with open(file_path, 'r', encoding='utf-8') as f:
1090
+ data = json.load(f)
1091
+
1092
+ messages = []
1093
+ user_messages = 0
1094
+ assistant_messages = 0
1095
+ token_counts = []
1096
+
1097
+ for msg in data.get('messages', []):
1098
+ msg_type = msg.get('type', '')
1099
+ msg_id = msg.get('id', '')
1100
+ timestamp = msg.get('timestamp', '')
1101
+ content_text = msg.get('content', '')
1102
+
1103
+ # 确定角色
1104
+ if msg_type == 'user':
1105
+ role = 'user'
1106
+ user_messages += 1
1107
+ elif msg_type == 'gemini':
1108
+ role = 'assistant'
1109
+ assistant_messages += 1
1110
+ else:
1111
+ continue
1112
+
1113
+ # 构建内容数组
1114
+ content_blocks = []
1115
+
1116
+ # 处理 thoughts (推理内容)
1117
+ thoughts = msg.get('thoughts', [])
1118
+ if thoughts and role == 'assistant':
1119
+ reasoning_parts = []
1120
+ for thought in thoughts:
1121
+ subject = thought.get('subject', '')
1122
+ description = thought.get('description', '')
1123
+ if subject and description:
1124
+ reasoning_parts.append(f"**{subject}**\n{description}")
1125
+
1126
+ if reasoning_parts:
1127
+ content_blocks.append({
1128
+ 'type': 'reasoning',
1129
+ 'reasoning': '\n\n'.join(reasoning_parts)
1130
+ })
1131
+
1132
+ # 处理主要内容
1133
+ if content_text:
1134
+ content_blocks.append({
1135
+ 'type': 'text',
1136
+ 'text': content_text
1137
+ })
1138
+
1139
+ # 处理工具调用
1140
+ tool_calls_data = msg.get('toolCalls', [])
1141
+ tool_calls = []
1142
+
1143
+ for tool_call in tool_calls_data:
1144
+ tool_id = tool_call.get('id', '')
1145
+ tool_name = tool_call.get('name', '')
1146
+ tool_args = tool_call.get('args', {})
1147
+
1148
+ content_blocks.append({
1149
+ 'type': 'tool_call',
1150
+ 'tool_call_id': tool_id,
1151
+ 'name': tool_name,
1152
+ 'arguments': json.dumps(tool_args, ensure_ascii=False)
1153
+ })
1154
+
1155
+ tool_calls.append({
1156
+ 'id': tool_id,
1157
+ 'type': 'function',
1158
+ 'function': {
1159
+ 'name': tool_name,
1160
+ 'arguments': json.dumps(tool_args, ensure_ascii=False)
1161
+ }
1162
+ })
1163
+
1164
+ # 构建消息对象
1165
+ message = {
1166
+ 'role': role,
1167
+ 'content': content_blocks
1168
+ }
1169
+
1170
+ if tool_calls:
1171
+ message['tool_calls'] = tool_calls
1172
+
1173
+ metadata = {}
1174
+ if timestamp:
1175
+ metadata['timestamp'] = timestamp
1176
+ if msg.get('model'):
1177
+ metadata['model'] = msg['model']
1178
+
1179
+ if metadata:
1180
+ message['_metadata'] = metadata
1181
+
1182
+ messages.append(message)
1183
+
1184
+ # 处理工具结果消息
1185
+ for tool_call in tool_calls_data:
1186
+ tool_id = tool_call.get('id', '')
1187
+ tool_result = tool_call.get('result', [])
1188
+
1189
+ if tool_result:
1190
+ output_text = ''
1191
+ for result_item in tool_result:
1192
+ if isinstance(result_item, dict):
1193
+ func_response = result_item.get('functionResponse', {})
1194
+ response_data = func_response.get('response', {})
1195
+ output_text = response_data.get('output', '')
1196
+ break
1197
+
1198
+ tool_message = {
1199
+ 'role': 'tool',
1200
+ 'tool_call_id': tool_id,
1201
+ 'content': [{
1202
+ 'type': 'tool_output',
1203
+ 'text': output_text
1204
+ }]
1205
+ }
1206
+ messages.append(tool_message)
1207
+
1208
+ # 收集 token 统计
1209
+ tokens = msg.get('tokens', {})
1210
+ if tokens:
1211
+ token_count = {
1212
+ 'type': 'token_count',
1213
+ 'input_tokens': tokens.get('input', 0),
1214
+ 'output_tokens': tokens.get('output', 0),
1215
+ '_timestamp': timestamp
1216
+ }
1217
+
1218
+ if 'cached' in tokens:
1219
+ token_count['cache_read_input_tokens'] = tokens['cached']
1220
+ if 'thoughts' in tokens:
1221
+ token_count['reasoning_tokens'] = tokens['thoughts']
1222
+ if 'tool' in tokens:
1223
+ token_count['tool_tokens'] = tokens['tool']
1224
+ if 'total' in tokens:
1225
+ token_count['total_tokens'] = tokens['total']
1226
+
1227
+ token_counts.append(token_count)
1228
+
1229
+ # 构建会话元数据
1230
+ session_meta = {
1231
+ 'source': 'gemini',
1232
+ 'session_id': data.get('sessionId', ''),
1233
+ 'message_count': len(messages),
1234
+ 'user_messages': user_messages,
1235
+ 'assistant_messages': assistant_messages,
1236
+ }
1237
+
1238
+ if data.get('startTime'):
1239
+ session_meta['created_at'] = data['startTime']
1240
+ if data.get('lastUpdated'):
1241
+ session_meta['last_updated_at'] = data['lastUpdated']
1242
+ if data.get('projectHash'):
1243
+ session_meta['project_hash'] = data['projectHash']
1244
+
1245
+ # 计算会话时长
1246
+ if data.get('startTime') and data.get('lastUpdated'):
1247
+ try:
1248
+ start = datetime.fromisoformat(data['startTime'].replace('Z', '+00:00'))
1249
+ end = datetime.fromisoformat(data['lastUpdated'].replace('Z', '+00:00'))
1250
+ duration = (end - start).total_seconds()
1251
+ session_meta['duration_seconds'] = round(duration, 2)
1252
+ except Exception:
1253
+ pass
1254
+
1255
+ return {
1256
+ 'messages': messages,
1257
+ 'meta': {
1258
+ 'session_meta': session_meta,
1259
+ 'token_counts': token_counts
1260
+ }
1261
+ }
1262
+
1263
+
1264
+ # ============================================================================
1265
+ # Kilocode JSON 转换器
1266
+ # ============================================================================
1267
+
1268
+ def parse_tool_calls_from_text(text: str) -> List[Dict[str, Any]]:
1269
+ """从文本中解析工具调用 (XML 格式)"""
1270
+ tool_calls = []
1271
+ pattern = r'<(\w+)>(.*?)</\1>'
1272
+ matches = re.finditer(pattern, text, re.DOTALL)
1273
+
1274
+ for idx, match in enumerate(matches):
1275
+ tool_name = match.group(1)
1276
+ tool_content = match.group(2).strip()
1277
+
1278
+ arguments = {}
1279
+ param_pattern = r'<(\w+)>(.*?)</\1>'
1280
+ param_matches = re.finditer(param_pattern, tool_content, re.DOTALL)
1281
+
1282
+ for param_match in param_matches:
1283
+ param_name = param_match.group(1)
1284
+ param_value = param_match.group(2).strip()
1285
+ arguments[param_name] = param_value
1286
+
1287
+ tool_call_id = f"call_{tool_name}_{idx}"
1288
+
1289
+ tool_calls.append({
1290
+ 'id': tool_call_id,
1291
+ 'name': tool_name,
1292
+ 'arguments': arguments
1293
+ })
1294
+
1295
+ return tool_calls
1296
+
1297
+
1298
+ def parse_kilocode_content_block(block: Dict[str, Any], timestamp: Optional[int] = None) -> Dict[str, Any]:
1299
+ """解析 Kilocode 内容块"""
1300
+ block_type = block.get('type', '')
1301
+
1302
+ if block_type == 'text':
1303
+ return {'type': 'text', 'text': block.get('text', '')}
1304
+ elif block_type == 'reasoning':
1305
+ return {'type': 'reasoning', 'reasoning': block.get('text', '')}
1306
+ else:
1307
+ text = block.get('text', json.dumps(block, ensure_ascii=False))
1308
+ return {'type': 'text', 'text': text}
1309
+
1310
+
1311
+ def parse_kilocode_content_array(content: List[Dict[str, Any]], timestamp: Optional[int] = None) -> tuple:
1312
+ """解析 Kilocode 内容数组"""
1313
+ content_blocks = []
1314
+ tool_calls = []
1315
+
1316
+ for block in content:
1317
+ parsed_block = parse_kilocode_content_block(block, timestamp)
1318
+
1319
+ if parsed_block['type'] == 'text':
1320
+ text = parsed_block['text']
1321
+ extracted_tools = parse_tool_calls_from_text(text)
1322
+
1323
+ if extracted_tools:
1324
+ for tool in extracted_tools:
1325
+ tool_call_block = {
1326
+ 'type': 'tool_call',
1327
+ 'tool_call_id': tool['id'],
1328
+ 'name': tool['name'],
1329
+ 'arguments': json.dumps(tool['arguments'], ensure_ascii=False)
1330
+ }
1331
+ content_blocks.append(tool_call_block)
1332
+
1333
+ tool_calls.append({
1334
+ 'id': tool['id'],
1335
+ 'type': 'function',
1336
+ 'function': {
1337
+ 'name': tool['name'],
1338
+ 'arguments': json.dumps(tool['arguments'], ensure_ascii=False)
1339
+ }
1340
+ })
1341
+ else:
1342
+ content_blocks.append(parsed_block)
1343
+ else:
1344
+ content_blocks.append(parsed_block)
1345
+
1346
+ return content_blocks, tool_calls
1347
+
1348
+
1349
+ def convert_kilocode(file_path: Path) -> Dict[str, Any]:
1350
+ """转换 Kilocode 格式"""
1351
+ with open(file_path, 'r', encoding='utf-8') as f:
1352
+ data = json.load(f)
1353
+
1354
+ messages = []
1355
+ user_messages = 0
1356
+ assistant_messages = 0
1357
+ first_timestamp = None
1358
+ last_timestamp = None
1359
+
1360
+ for item in data:
1361
+ role = item.get('role', '')
1362
+ content = item.get('content', [])
1363
+ timestamp = item.get('ts', 0)
1364
+
1365
+ if first_timestamp is None:
1366
+ first_timestamp = timestamp
1367
+ last_timestamp = timestamp
1368
+
1369
+ if role == 'user':
1370
+ user_messages += 1
1371
+ elif role == 'assistant':
1372
+ assistant_messages += 1
1373
+
1374
+ if isinstance(content, list):
1375
+ content_blocks, tool_calls = parse_kilocode_content_array(content, timestamp)
1376
+ elif isinstance(content, str):
1377
+ content_blocks = [{'type': 'text', 'text': content}]
1378
+ tool_calls = []
1379
+ else:
1380
+ content_blocks = [{'type': 'text', 'text': json.dumps(content, ensure_ascii=False)}]
1381
+ tool_calls = []
1382
+
1383
+ message = {'role': role, 'content': content_blocks}
1384
+
1385
+ if tool_calls:
1386
+ message['tool_calls'] = tool_calls
1387
+
1388
+ metadata = {}
1389
+ if timestamp:
1390
+ try:
1391
+ dt = datetime.fromtimestamp(timestamp / 1000)
1392
+ metadata['timestamp'] = dt.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
1393
+ except Exception:
1394
+ pass
1395
+
1396
+ if metadata:
1397
+ message['_metadata'] = metadata
1398
+
1399
+ messages.append(message)
1400
+
1401
+ session_meta = {
1402
+ 'source': 'kilocode',
1403
+ 'message_count': len(data),
1404
+ 'user_messages': user_messages,
1405
+ 'assistant_messages': assistant_messages,
1406
+ }
1407
+
1408
+ if first_timestamp:
1409
+ try:
1410
+ dt = datetime.fromtimestamp(first_timestamp / 1000)
1411
+ session_meta['created_at'] = dt.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
1412
+ except Exception:
1413
+ pass
1414
+
1415
+ if last_timestamp:
1416
+ try:
1417
+ dt = datetime.fromtimestamp(last_timestamp / 1000)
1418
+ session_meta['last_updated_at'] = dt.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
1419
+ except Exception:
1420
+ pass
1421
+
1422
+ if first_timestamp and last_timestamp:
1423
+ duration_ms = last_timestamp - first_timestamp
1424
+ session_meta['duration_seconds'] = round(duration_ms / 1000, 2)
1425
+
1426
+ return {
1427
+ 'messages': messages,
1428
+ 'meta': {'session_meta': session_meta}
1429
+ }
1430
+
1431
+
1432
+ # ============================================================================
1433
+ # 各格式入口函数(对外统一接口)
1434
+ # ============================================================================
1435
+
1436
+ def convert_claude_jsonl(file_path: Path) -> Dict[str, Any]:
1437
+ """转换 Claude JSONL 格式"""
1438
+ with open(file_path, 'r', encoding='utf-8') as f:
1439
+ events = [json.loads(line) for line in f if line.strip()]
1440
+
1441
+ options = ClaudeConverterOptions(messages_only=False)
1442
+ return convert_claude_jsonl_to_messages(events, options=options)
1443
+
1444
+
1445
+ def convert_codex_jsonl(file_path: Path) -> Dict[str, Any]:
1446
+ """转换 Codex JSONL 格式"""
1447
+ with open(file_path, 'r', encoding='utf-8') as f:
1448
+ events = [json.loads(line) for line in f if line.strip()]
1449
+
1450
+ options = CodexConverterOptions(messages_only=False)
1451
+ return convert_codex_jsonl_to_messages(events, options=options)
1452
+
1453
+
1454
+ def convert_opencode(file_path: Path) -> Dict[str, Any]:
1455
+ """转换 OpenCode 格式"""
1456
+ data = None
1457
+ for encoding in ['utf-8-sig', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', 'gbk', 'gb2312']:
1458
+ try:
1459
+ with open(file_path, 'r', encoding=encoding) as f:
1460
+ data = json.load(f)
1461
+ break
1462
+ except (json.JSONDecodeError, UnicodeDecodeError):
1463
+ continue
1464
+
1465
+ if data is None:
1466
+ raise ValueError(f"无法读取文件 {file_path},尝试了多种编码都失败")
1467
+
1468
+ options = OpenCodeConverterOptions(messages_only=False)
1469
+ return convert_opencode_to_messages(data, options=options)
1470
+
1471
+
1472
+ # ============================================================================
1473
+ # 主程序
1474
+ # ============================================================================
1475
+
1476
+ def convert_single_file(input_path: Path, output_path: Path, format_type: str) -> bool:
1477
+ """
1478
+ 转换单个文件
1479
+
1480
+ 返回: 转换是否成功
1481
+ """
1482
+ # 显示检测到的格式
1483
+ format_names = {
1484
+ 'claude_jsonl': 'Claude JSONL',
1485
+ 'codex_jsonl': 'Codex JSONL',
1486
+ 'gemini': 'Gemini JSON',
1487
+ 'kilocode': 'Kilocode JSON',
1488
+ 'opencode': 'OpenCode JSON'
1489
+ }
1490
+
1491
+ # 转换数据
1492
+ try:
1493
+ if format_type == 'claude_jsonl':
1494
+ result = convert_claude_jsonl(input_path)
1495
+ elif format_type == 'codex_jsonl':
1496
+ result = convert_codex_jsonl(input_path)
1497
+ elif format_type == 'gemini':
1498
+ result = convert_gemini(input_path)
1499
+ elif format_type == 'kilocode':
1500
+ result = convert_kilocode(input_path)
1501
+ elif format_type == 'opencode':
1502
+ result = convert_opencode(input_path)
1503
+ else:
1504
+ print(f" ❌ 不支持的格式: {format_type}")
1505
+ return False
1506
+ except Exception as e:
1507
+ print(f" ❌ 转换失败: {str(e)}")
1508
+ return False
1509
+
1510
+ # 写入输出文件
1511
+ try:
1512
+ with open(output_path, 'w', encoding='utf-8') as f:
1513
+ json.dump(result, f, ensure_ascii=False, indent=2)
1514
+ except Exception as e:
1515
+ print(f" ❌ 写入输出文件失败: {str(e)}")
1516
+ return False
1517
+
1518
+ return True
1519
+
1520
+
1521
+ def process_directory(directory: Path, format_type: str = 'auto') -> None:
1522
+ """
1523
+ 批量处理目录下的所有 JSON/JSONL 文件
1524
+
1525
+ 参数:
1526
+ directory: 目录路径
1527
+ format_type: 格式类型 ('auto' 为自动检测)
1528
+ """
1529
+ # 扫描目录下的所有 JSON 和 JSONL 文件
1530
+ json_files = list(directory.glob('*.json'))
1531
+ jsonl_files = list(directory.glob('*.jsonl'))
1532
+ all_files = json_files + jsonl_files
1533
+
1534
+ # 过滤掉已经转换过的文件
1535
+ files_to_process = [f for f in all_files if not f.stem.endswith('_converted')]
1536
+
1537
+ if not files_to_process:
1538
+ print(f"❌ 目录中没有找到需要转换的文件: {directory}")
1539
+ print(" (已忽略 *_converted.json 文件)")
1540
+ return
1541
+
1542
+ # 创建 converted 输出目录
1543
+ output_dir = directory / 'converted'
1544
+ try:
1545
+ output_dir.mkdir(exist_ok=True)
1546
+ print(f"输出目录: {output_dir}")
1547
+ except Exception as e:
1548
+ print(f"❌ 错误: 无法创建输出目录: {str(e)}")
1549
+ return
1550
+
1551
+ print(f"找到 {len(files_to_process)} 个文件待处理")
1552
+ print()
1553
+
1554
+ success_count = 0
1555
+ failed_count = 0
1556
+ skipped_count = 0
1557
+
1558
+ for idx, input_path in enumerate(files_to_process, 1):
1559
+ print(f"[{idx}/{len(files_to_process)}] 处理: {input_path.name}")
1560
+
1561
+ # 生成输出文件名 (放到 converted 子目录下)
1562
+ output_path = output_dir / f"{input_path.stem}_converted.json"
1563
+
1564
+ # 检查输出文件是否已存在
1565
+ if output_path.exists():
1566
+ print(f" ⚠️ 输出文件已存在,跳过: {output_path.name}")
1567
+ skipped_count += 1
1568
+ continue
1569
+
1570
+ # 检测格式
1571
+ if format_type == 'auto':
1572
+ detected_format = detect_format(input_path)
1573
+ else:
1574
+ format_map = {
1575
+ 'claude': 'claude_jsonl',
1576
+ 'codex': 'codex_jsonl',
1577
+ 'gemini': 'gemini',
1578
+ 'kilocode': 'kilocode',
1579
+ 'opencode': 'opencode'
1580
+ }
1581
+ detected_format = format_map.get(format_type, format_type)
1582
+
1583
+ if detected_format == 'unknown':
1584
+ print(f" ❌ 无法识别的文件格式,跳过")
1585
+ failed_count += 1
1586
+ continue
1587
+
1588
+ format_names = {
1589
+ 'claude_jsonl': 'Claude JSONL',
1590
+ 'codex_jsonl': 'Codex JSONL',
1591
+ 'gemini': 'Gemini JSON',
1592
+ 'kilocode': 'Kilocode JSON',
1593
+ 'opencode': 'OpenCode JSON'
1594
+ }
1595
+ print(f" 格式: {format_names.get(detected_format, detected_format)}")
1596
+
1597
+ # 转换文件
1598
+ if convert_single_file(input_path, output_path, detected_format):
1599
+ file_size = output_path.stat().st_size / 1024
1600
+ print(f" ✅ 转换成功 ({file_size:.2f} KB) -> {output_path.name}")
1601
+ success_count += 1
1602
+ else:
1603
+ failed_count += 1
1604
+
1605
+ print()
1606
+
1607
+ # 显示汇总信息
1608
+ print("="*80)
1609
+ print("批量转换完成!")
1610
+ print("="*80)
1611
+ print(f"成功: {success_count} 个")
1612
+ print(f"失败: {failed_count} 个")
1613
+ print(f"跳过: {skipped_count} 个")
1614
+ print(f"总计: {len(files_to_process)} 个")
1615
+
1616
+
1617
+ def main():
1618
+ parser = argparse.ArgumentParser(
1619
+ description='AI Session 统一转换工具 - 自动识别格式并转换为 OpenAI 标准格式',
1620
+ formatter_class=argparse.RawDescriptionHelpFormatter,
1621
+ epilog="""
1622
+ 支持的输入格式:
1623
+ 1. Claude JSONL - Claude Desktop/API session 事件流
1624
+ 2. Codex JSONL - Codex CLI session 事件流
1625
+ 3. Gemini JSON - Gemini CLI session 数据
1626
+ 4. Kilocode JSON - Kilocode API 对话历史数组
1627
+ 5. OpenCode JSON - OpenCode session 数据
1628
+
1629
+ 输出格式:
1630
+ - OpenAI 标准消息格式
1631
+ - 符合 OPENAI_FORMAT_SPEC.md 规范
1632
+ - 包含 messages 数组和 meta 元数据
1633
+
1634
+ 使用示例:
1635
+ # 单文件转换 (自动检测格式)
1636
+ python convert_ai_session.py -i session.json
1637
+
1638
+ # 单文件转换 (指定输出文件)
1639
+ python convert_ai_session.py -i session.jsonl -o output.json
1640
+
1641
+ # 单文件转换 (强制指定格式)
1642
+ python convert_ai_session.py -i session.jsonl --format claude
1643
+
1644
+ # 批量转换指定目录下所有文件
1645
+ python convert_ai_session.py -d ./sessions
1646
+
1647
+ # 批量转换当前目录下所有文件
1648
+ python convert_ai_session.py -d .
1649
+
1650
+ # 批量转换 (强制指定格式)
1651
+ python convert_ai_session.py -d ./sessions --format claude
1652
+
1653
+ 批量处理说明:
1654
+ - 批量模式会扫描目录下所有 .json 和 .jsonl 文件
1655
+ - 自动创建 converted/ 子目录存放转换后的文件
1656
+ - 输出文件命名规则: converted/<原文件名>_converted.json
1657
+ - 自动跳过已存在的输出文件和 *_converted.json 文件
1658
+ - 使用 -d . 可以处理当前目录下的所有文件
1659
+ """
1660
+ )
1661
+
1662
+ # 创建互斥组: -i 和 -d 只能选一个
1663
+ input_group = parser.add_mutually_exclusive_group(required=True)
1664
+
1665
+ input_group.add_argument(
1666
+ '-i', '--input',
1667
+ help='输入文件路径 (单文件模式)'
1668
+ )
1669
+
1670
+ input_group.add_argument(
1671
+ '-d', '--directory',
1672
+ help='输入目录路径 (批量处理模式,会扫描目录下所有 .json 和 .jsonl 文件)'
1673
+ )
1674
+
1675
+ parser.add_argument(
1676
+ '-o', '--output',
1677
+ help='输出文件路径 (仅单文件模式有效,默认: <输入文件名>_converted.json)'
1678
+ )
1679
+
1680
+ parser.add_argument(
1681
+ '--format',
1682
+ choices=['claude', 'codex', 'gemini', 'kilocode', 'opencode', 'auto'],
1683
+ default='auto',
1684
+ help='强制指定输入格式 (默认: auto 自动检测)'
1685
+ )
1686
+
1687
+ args = parser.parse_args()
1688
+
1689
+ print("="*80)
1690
+ print("AI SESSION 统一转换工具")
1691
+ print("="*80)
1692
+ print()
1693
+
1694
+ # 批量处理模式
1695
+ if args.directory:
1696
+ directory_path = Path(args.directory)
1697
+
1698
+ if not directory_path.exists():
1699
+ print(f"❌ 错误: 目录不存在: {args.directory}")
1700
+ sys.exit(1)
1701
+
1702
+ if not directory_path.is_dir():
1703
+ print(f"❌ 错误: 不是一个目录: {args.directory}")
1704
+ sys.exit(1)
1705
+
1706
+ if args.output:
1707
+ print("⚠️ 警告: 批量处理模式下 -o 参数无效,将使用默认命名规则")
1708
+ print()
1709
+
1710
+ print(f"批量处理模式")
1711
+ print(f"输入目录: {directory_path}")
1712
+ print(f"输出规则: converted/<原文件名>_converted.json")
1713
+ print()
1714
+
1715
+ process_directory(directory_path, args.format)
1716
+ return
1717
+
1718
+ # 单文件处理模式
1719
+ input_path = Path(args.input)
1720
+
1721
+ if not input_path.exists():
1722
+ print(f"❌ 错误: 输入文件不存在: {args.input}")
1723
+ sys.exit(1)
1724
+
1725
+ # 确定输出文件名
1726
+ if args.output:
1727
+ output_path = Path(args.output)
1728
+ else:
1729
+ output_path = input_path.parent / f"{input_path.stem}_converted.json"
1730
+
1731
+ print(f"单文件处理模式")
1732
+ print(f"输入文件: {input_path}")
1733
+ print(f"输出文件: {output_path}")
1734
+ print()
1735
+
1736
+ # 检测格式
1737
+ if args.format == 'auto':
1738
+ print("正在检测文件格式...")
1739
+ format_type = detect_format(input_path)
1740
+ else:
1741
+ format_map = {
1742
+ 'claude': 'claude_jsonl',
1743
+ 'codex': 'codex_jsonl',
1744
+ 'gemini': 'gemini',
1745
+ 'kilocode': 'kilocode',
1746
+ 'opencode': 'opencode'
1747
+ }
1748
+ format_type = format_map[args.format]
1749
+ print(f"使用指定格式: {args.format}")
1750
+
1751
+ if format_type == 'unknown':
1752
+ print("❌ 错误: 无法识别的文件格式")
1753
+ print()
1754
+ print("支持的格式:")
1755
+ print(" - Claude JSONL (*.jsonl)")
1756
+ print(" - Codex JSONL (*.jsonl)")
1757
+ print(" - Gemini JSON (*.json)")
1758
+ print(" - Kilocode JSON (*.json)")
1759
+ print(" - OpenCode JSON (*.json)")
1760
+ print()
1761
+ print("提示: 使用 --format 参数强制指定格式")
1762
+ sys.exit(1)
1763
+
1764
+ # 显示检测到的格式
1765
+ format_names = {
1766
+ 'claude_jsonl': 'Claude JSONL',
1767
+ 'codex_jsonl': 'Codex JSONL',
1768
+ 'gemini': 'Gemini JSON',
1769
+ 'kilocode': 'Kilocode JSON',
1770
+ 'opencode': 'OpenCode JSON'
1771
+ }
1772
+ print(f"✅ 检测到格式: {format_names.get(format_type, format_type)}")
1773
+ print()
1774
+
1775
+ # 转换数据
1776
+ print("正在转换数据...")
1777
+ try:
1778
+ if format_type == 'claude_jsonl':
1779
+ result = convert_claude_jsonl(input_path)
1780
+ elif format_type == 'codex_jsonl':
1781
+ result = convert_codex_jsonl(input_path)
1782
+ elif format_type == 'gemini':
1783
+ result = convert_gemini(input_path)
1784
+ elif format_type == 'kilocode':
1785
+ result = convert_kilocode(input_path)
1786
+ elif format_type == 'opencode':
1787
+ result = convert_opencode(input_path)
1788
+ else:
1789
+ print(f"❌ 错误: 不支持的格式: {format_type}")
1790
+ sys.exit(1)
1791
+ except Exception as e:
1792
+ print(f"❌ 错误: 转换失败: {str(e)}")
1793
+ import traceback
1794
+ traceback.print_exc()
1795
+ sys.exit(1)
1796
+
1797
+ # 写入输出文件
1798
+ print("正在写入输出文件...")
1799
+ try:
1800
+ with open(output_path, 'w', encoding='utf-8') as f:
1801
+ json.dump(result, f, ensure_ascii=False, indent=2)
1802
+ except Exception as e:
1803
+ print(f"❌ 错误: 写入输出文件失败: {str(e)}")
1804
+ sys.exit(1)
1805
+
1806
+ # 显示统计信息
1807
+ print()
1808
+ print("="*80)
1809
+ print("✅ 转换完成!")
1810
+ print("="*80)
1811
+ print()
1812
+
1813
+ if 'meta' in result and 'session_meta' in result['meta']:
1814
+ meta = result['meta']['session_meta']
1815
+ print("统计信息:")
1816
+ if 'message_count' in meta:
1817
+ print(f" 总消息数: {meta['message_count']}")
1818
+ if 'user_messages' in meta:
1819
+ print(f" 用户消息: {meta['user_messages']}")
1820
+ if 'assistant_messages' in meta:
1821
+ print(f" 助手消息: {meta['assistant_messages']}")
1822
+ if 'created_at' in meta:
1823
+ print(f" 开始时间: {meta['created_at']}")
1824
+ if 'last_updated_at' in meta:
1825
+ print(f" 结束时间: {meta['last_updated_at']}")
1826
+ if 'duration_seconds' in meta:
1827
+ print(f" 会话时长: {meta['duration_seconds']} 秒")
1828
+ print()
1829
+
1830
+ print("输出格式: 完整格式 (包含 meta)")
1831
+ file_size = output_path.stat().st_size / 1024
1832
+ print(f"文件大小: {file_size:.2f} KB")
1833
+ print(f"输出文件: {output_path}")
1834
+
1835
+
1836
+ if __name__ == "__main__":
1837
+ main()