@aiyiran/myclaw 1.1.128 → 1.1.130
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +10 -1
- package/injects/inject-claude.js +104 -0
- package/injects/inject-zai.js +16 -5
- package/package.json +1 -1
- package/skills/chat-history-extractor/SKILL.md +106 -55
- package/skills/chat-history-extractor/scripts/extract_chat.py +178 -47
package/index.js
CHANGED
|
@@ -1779,7 +1779,8 @@ function runInteractiveMenu() {
|
|
|
1779
1779
|
const INJECT_MENU = [
|
|
1780
1780
|
{ key: '1', cmd: 'inject-minimax', desc: '注入 MiniMax 模型配置' },
|
|
1781
1781
|
{ key: '2', cmd: 'inject-zai', desc: '注入智谱 GLM 模型配置' },
|
|
1782
|
-
{ key: '3', cmd: 'inject-
|
|
1782
|
+
{ key: '3', cmd: 'inject-claude', desc: '注入 B.AI 多模型配置 (Claude/GPT/Gemini)' },
|
|
1783
|
+
{ key: '4', cmd: 'inject-image', desc: '注入图像生成模型配置 (vveai)' },
|
|
1783
1784
|
{ key: '4', cmd: 'inject-search', desc: '注入 Tavily 搜索插件配置' },
|
|
1784
1785
|
{ key: '5', cmd: 'inject-token', desc: '设置 Gateway Token 为 aiyiran' },
|
|
1785
1786
|
{ key: '6', cmd: 'inject-workspaceAndSoul', desc: '替换默认 workspace 的 SOUL.md' },
|
|
@@ -1792,6 +1793,7 @@ function runInjectCommand(cmd, extraArgs) {
|
|
|
1792
1793
|
const modules = {
|
|
1793
1794
|
'inject-minimax': './inject-minimax',
|
|
1794
1795
|
'inject-zai': './inject-zai',
|
|
1796
|
+
'inject-claude': './inject-claude',
|
|
1795
1797
|
'inject-image': './inject-image',
|
|
1796
1798
|
'inject-search': './inject-search',
|
|
1797
1799
|
'inject-token': './inject-token',
|
|
@@ -2540,6 +2542,7 @@ function showHelp() {
|
|
|
2540
2542
|
console.log(' inject 交互选择要执行的注入脚本');
|
|
2541
2543
|
console.log(' inject-minimax 注入 MiniMax 模型配置 (可选: --key sk-xxx, -f 强制清理其他模型)');
|
|
2542
2544
|
console.log(' inject-zai 注入智谱 GLM 模型配置 (可选: --key xxx, -f 强制清理其他模型)');
|
|
2545
|
+
console.log(' inject-claude 注入 B.AI 多模型配置 (Claude/GPT/Gemini,仅追加)');
|
|
2543
2546
|
console.log(' inject-image 注入图像生成模型配置 (基于 vveai)');
|
|
2544
2547
|
console.log(' inject-token 设置 Gateway Token 为 aiyiran');
|
|
2545
2548
|
console.log(' inject-search 注入 Tavily 搜索插件配置');
|
|
@@ -2688,6 +2691,12 @@ if (!command) {
|
|
|
2688
2691
|
console.log('🔄 正在重启 Gateway 使配置生效...');
|
|
2689
2692
|
console.log('');
|
|
2690
2693
|
runRestart();
|
|
2694
|
+
} else if (command === 'inject-claude') {
|
|
2695
|
+
const claude = require('./injects/inject-claude');
|
|
2696
|
+
claude.run(args.slice(1));
|
|
2697
|
+
console.log('🔄 正在重启 Gateway 使配置生效...');
|
|
2698
|
+
console.log('');
|
|
2699
|
+
runRestart();
|
|
2691
2700
|
} else if (command === 'inject-image') {
|
|
2692
2701
|
const image = require('./injects/inject-image');
|
|
2693
2702
|
image.run(args.slice(1));
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* inject-claude.js
|
|
5
|
+
*
|
|
6
|
+
* 注入 B.AI 多模型配置到 openclaw.json(仅追加,不改默认)。
|
|
7
|
+
*
|
|
8
|
+
* 用法:
|
|
9
|
+
* myclaw inject-claude # 追加 bai provider + 白名单模型
|
|
10
|
+
* myclaw inject-claude --key xxx # 使用指定 API Key
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const { readConfig, writeConfig } = require('../find-config');
|
|
14
|
+
|
|
15
|
+
const DEFAULT_BAI_KEY = "sk-28zk9p6jcwz0u9etjwayual4rmoe4pms";
|
|
16
|
+
|
|
17
|
+
const PROVIDER_ID = "bai";
|
|
18
|
+
|
|
19
|
+
const BAI_MODELS = [
|
|
20
|
+
{
|
|
21
|
+
id: "gpt-5.4-mini",
|
|
22
|
+
name: "GPT-5.4 Mini",
|
|
23
|
+
input: ["text"],
|
|
24
|
+
reasoning: true,
|
|
25
|
+
contextWindow: 200000,
|
|
26
|
+
maxTokens: 131072
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
id: "claude-sonnet-4.6",
|
|
30
|
+
name: "Claude Sonnet 4.6",
|
|
31
|
+
input: ["text"],
|
|
32
|
+
reasoning: true,
|
|
33
|
+
contextWindow: 200000,
|
|
34
|
+
maxTokens: 131072
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
id: "gemini-3.1-pro",
|
|
38
|
+
name: "Gemini 3.1 Pro",
|
|
39
|
+
input: ["text"],
|
|
40
|
+
reasoning: true,
|
|
41
|
+
contextWindow: 200000,
|
|
42
|
+
maxTokens: 131072
|
|
43
|
+
}
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
function run(cliArgs) {
|
|
47
|
+
let apiKey = null;
|
|
48
|
+
|
|
49
|
+
for (let i = 0; i < cliArgs.length; i++) {
|
|
50
|
+
if (cliArgs[i] === '--key' && cliArgs[i + 1]) {
|
|
51
|
+
apiKey = cliArgs[i + 1];
|
|
52
|
+
i++;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (!apiKey) {
|
|
57
|
+
apiKey = DEFAULT_BAI_KEY;
|
|
58
|
+
console.log('💡 未传入 --key,使用默认 API Key');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
let config, configPath;
|
|
62
|
+
try {
|
|
63
|
+
({ config, configPath } = readConfig());
|
|
64
|
+
} catch (err) {
|
|
65
|
+
console.error('❌ ' + err.message);
|
|
66
|
+
process.exit(1);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
console.log('📍 找到配置: ' + configPath);
|
|
70
|
+
console.log('📌 模式: 追加');
|
|
71
|
+
|
|
72
|
+
// ── 注入 bai provider ──
|
|
73
|
+
console.log('');
|
|
74
|
+
console.log('📝 注入 B.AI 配置...');
|
|
75
|
+
|
|
76
|
+
if (!config.models) config.models = {};
|
|
77
|
+
if (!config.models.mode) config.models.mode = "merge";
|
|
78
|
+
if (!config.models.providers) config.models.providers = {};
|
|
79
|
+
|
|
80
|
+
config.models.providers[PROVIDER_ID] = {
|
|
81
|
+
baseUrl: "https://api.b.ai/v1/",
|
|
82
|
+
apiKey: apiKey,
|
|
83
|
+
api: "openai-completions",
|
|
84
|
+
models: BAI_MODELS
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// ── 白名单(带 alias)──
|
|
88
|
+
if (!config.agents) config.agents = {};
|
|
89
|
+
if (!config.agents.defaults) config.agents.defaults = {};
|
|
90
|
+
if (!config.agents.defaults.models) config.agents.defaults.models = {};
|
|
91
|
+
|
|
92
|
+
config.agents.defaults.models["bai/gpt-5.4-mini"] = { alias: "gpt-5.4-mini" };
|
|
93
|
+
config.agents.defaults.models["bai/claude-sonnet-4.6"] = { alias: "claude-sonnet-4.6" };
|
|
94
|
+
config.agents.defaults.models["bai/gemini-3.1-pro"] = { alias: "gemini-3.1-pro" };
|
|
95
|
+
|
|
96
|
+
writeConfig(config, configPath);
|
|
97
|
+
|
|
98
|
+
console.log('✅ B.AI 注入完成');
|
|
99
|
+
console.log(' 模型: gpt-5.4-mini / claude-sonnet-4.6 / gemini-3.1-pro');
|
|
100
|
+
console.log(' 默认模型: (未修改)');
|
|
101
|
+
console.log('');
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
module.exports = { run };
|
package/injects/inject-zai.js
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* 用法:
|
|
9
9
|
* myclaw inject-zai # 仅追加 zai provider,不改默认,副作用最小
|
|
10
|
-
* myclaw inject-zai --default # 追加 + 设为默认 + 全部对话迁移到 glm-5.
|
|
10
|
+
* myclaw inject-zai --default # 追加 + 设为默认 + 全部对话迁移到 glm-5.2
|
|
11
11
|
* myclaw inject-zai --only # 追加 + 设为默认 + 全部对话迁移 + 清掉所有其他 provider
|
|
12
12
|
* myclaw inject-zai --key xxx # 使用指定 API Key
|
|
13
13
|
*/
|
|
@@ -20,9 +20,18 @@ const os = require('os');
|
|
|
20
20
|
const DEFAULT_ZAI_KEY = "a7ae7806dea8406bb85eea5d859c00ad.pVnBuagTz6942JHp";
|
|
21
21
|
|
|
22
22
|
const PROVIDER_ID = "zai";
|
|
23
|
-
const DEFAULT_MODEL = "zai/glm-5.
|
|
23
|
+
const DEFAULT_MODEL = "zai/glm-5.2";
|
|
24
24
|
|
|
25
25
|
const ZAI_MODELS = [
|
|
26
|
+
{
|
|
27
|
+
id: "glm-5.2",
|
|
28
|
+
name: "GLM-5.2",
|
|
29
|
+
reasoning: true,
|
|
30
|
+
input: ["text"],
|
|
31
|
+
cost: { input: 1.2, output: 4, cacheRead: 0.24, cacheWrite: 0 },
|
|
32
|
+
contextWindow: 202800,
|
|
33
|
+
maxTokens: 131100
|
|
34
|
+
},
|
|
26
35
|
{
|
|
27
36
|
id: "glm-5.1",
|
|
28
37
|
name: "GLM-5.1",
|
|
@@ -230,8 +239,9 @@ function run(cliArgs) {
|
|
|
230
239
|
if (!config.agents.defaults) config.agents.defaults = {};
|
|
231
240
|
if (!config.agents.defaults.models) config.agents.defaults.models = {};
|
|
232
241
|
|
|
233
|
-
//
|
|
234
|
-
config.agents.defaults.models["zai/glm-5.
|
|
242
|
+
// 白名单:glm-5.2 + glm-5-turbo
|
|
243
|
+
config.agents.defaults.models["zai/glm-5.2"] = {};
|
|
244
|
+
config.agents.defaults.models["zai/glm-5-turbo"] = {};
|
|
235
245
|
|
|
236
246
|
// ── Step 3:--default 设默认模型 ──
|
|
237
247
|
if (setDefault) {
|
|
@@ -312,7 +322,8 @@ function run(cliArgs) {
|
|
|
312
322
|
const m = sessions[key]?.model;
|
|
313
323
|
if (!m) continue;
|
|
314
324
|
// 修复无前缀的旧格式残留
|
|
315
|
-
if (m === 'glm-5.1') { sessions[key].model = 'zai/glm-5.
|
|
325
|
+
if (m === 'glm-5.1') { sessions[key].model = 'zai/glm-5.2'; changed++; continue; }
|
|
326
|
+
if (m === 'zai/glm-5.1') { sessions[key].model = 'zai/glm-5.2'; changed++; continue; }
|
|
316
327
|
if (m === 'MiniMax-M2.7-highspeed') { sessions[key].model = 'minimax/MiniMax-M3'; changed++; continue; }
|
|
317
328
|
// 统一迁移到新默认
|
|
318
329
|
if (m !== DEFAULT_MODEL) { sessions[key].model = DEFAULT_MODEL; changed++; }
|
package/package.json
CHANGED
|
@@ -5,83 +5,134 @@ description: Extract and render chat history from OpenClaw session URLs. Use whe
|
|
|
5
5
|
|
|
6
6
|
# Chat History Extractor
|
|
7
7
|
|
|
8
|
-
##
|
|
8
|
+
## 支持的输入方式
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
- **单个 URL/key**:处理一个会话
|
|
11
|
+
- **逗号分隔**:`url1,url2,url3` 批量处理
|
|
12
|
+
- **JSON 数组**:`["url1","url2","url3"]` 批量处理
|
|
11
13
|
|
|
12
|
-
|
|
14
|
+
## 快速使用(推荐)
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
```bash
|
|
17
|
+
# 单个
|
|
18
|
+
python3 scripts/extract_chat.py "<url>" <output-dir>
|
|
16
19
|
|
|
17
|
-
|
|
20
|
+
# 批量(逗号分隔)
|
|
21
|
+
python3 scripts/extract_chat.py "url1,url2,url3" <output-dir>
|
|
18
22
|
|
|
19
|
-
|
|
23
|
+
# 批量(JSON 数组)
|
|
24
|
+
python3 scripts/extract_chat.py '["url1","url2","url3"]' <output-dir>
|
|
20
25
|
```
|
|
21
|
-
/root/.openclaw/agents/{agentId}/sessions/sessions.json
|
|
22
|
-
```
|
|
23
|
-
|
|
24
|
-
For session key `agent:c108-v1-1811:main`:
|
|
25
|
-
- Agent ID: `c108-v1-1811`
|
|
26
|
-
- Session file: `/root/.openclaw/agents/c108-v1-1811/sessions/sessions.json`
|
|
27
|
-
|
|
28
|
-
Look up the session key in `sessions.json` to find the `sessionFile` path (e.g., `5bee9664-7b72-4efd-8dc1-e8bf125c6b9c.jsonl`).
|
|
29
|
-
|
|
30
|
-
### Step 3: Parse JSONL and Build Conversation Pairs
|
|
31
26
|
|
|
32
|
-
|
|
27
|
+
## 输出文件
|
|
33
28
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
29
|
+
| 文件 | 说明 |
|
|
30
|
+
|---|---|
|
|
31
|
+
| `01-<session-name>.js` | 第 1 个会话的数据 |
|
|
32
|
+
| `02-<session-name>.js` | 第 2 个会话的数据 |
|
|
33
|
+
| `...` | |
|
|
34
|
+
| `index.js` | 所有会话的索引列表(仅批量模式生成) |
|
|
35
|
+
| `chat_history.js` | 向后兼容,指向第一个会话 |
|
|
38
36
|
|
|
39
|
-
|
|
40
|
-
- A user message pairs with the first assistant message that follows
|
|
41
|
-
- If the assistant sends multiple messages before the next user message, merge ALL of them into one AI reply
|
|
42
|
-
- AI messages with empty text content (just tool calls) should be skipped
|
|
43
|
-
- Use `toolResult` to skip over intermediate tool outputs
|
|
44
|
-
|
|
45
|
-
### Step 4: Generate JS Data File
|
|
46
|
-
|
|
47
|
-
Output a JS file with this structure:
|
|
37
|
+
每个 JS 文件结构:
|
|
48
38
|
|
|
49
39
|
```javascript
|
|
50
40
|
const chatData = {
|
|
51
41
|
"session": "session-name",
|
|
52
|
-
"session_id": "
|
|
42
|
+
"session_id": "agent:xxx:yyy",
|
|
53
43
|
"total_pairs": N,
|
|
54
|
-
"initiator": "
|
|
44
|
+
"initiator": "...",
|
|
55
45
|
"conversations": [
|
|
56
|
-
{
|
|
57
|
-
"user": "full user message text",
|
|
58
|
-
"user_time": "2026-05-14 18:11:56",
|
|
59
|
-
"ai": "full AI reply (merged if multiple messages)",
|
|
60
|
-
"ai_time": "2026-05-14 18:12:18"
|
|
61
|
-
},
|
|
46
|
+
{ "user": "...", "user_time": "...", "ai": "...", "ai_time": "..." },
|
|
62
47
|
...
|
|
63
48
|
]
|
|
64
49
|
};
|
|
65
50
|
```
|
|
66
51
|
|
|
67
|
-
|
|
52
|
+
`index.js` 结构:
|
|
53
|
+
|
|
54
|
+
```javascript
|
|
55
|
+
const chatIndex = [
|
|
56
|
+
{ "index": 1, "session": "...", "js_file": "01-xxx.js", "total_pairs": 10 },
|
|
57
|
+
{ "index": 2, "session": "...", "js_file": "02-yyy.js", "total_pairs": 8 },
|
|
58
|
+
];
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 工作流
|
|
62
|
+
|
|
63
|
+
### Step 1: 解析输入
|
|
64
|
+
|
|
65
|
+
从 URL 中提取 session key,或直接使用传入的 key。
|
|
66
|
+
|
|
67
|
+
URL 示例:`https://claw1.kekouen.cn/chat?session=agent%3Ac108-v1-1811%3Amain`
|
|
68
|
+
解码后:`agent:c108-v1-1811:main`
|
|
68
69
|
|
|
69
|
-
|
|
70
|
+
批量输入支持逗号分隔或 JSON 数组,脚本会自动识别。
|
|
70
71
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
- **间隔耗时** (gap to next user message)
|
|
77
|
-
- Rich visual styling with color-coded timing badges
|
|
72
|
+
### Step 2: 定位 JSONL 文件
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
/root/.openclaw/agents/{agentId}/sessions/sessions.json
|
|
76
|
+
```
|
|
78
77
|
|
|
79
|
-
|
|
78
|
+
用 session key 在 `sessions.json` 中查找 `sessionFile` 路径。
|
|
80
79
|
|
|
81
|
-
|
|
80
|
+
### Step 3: 解析 JSONL,构建对话对
|
|
81
|
+
|
|
82
|
+
1. 筛选 `type == "message"` 的事件
|
|
83
|
+
2. 每条用户消息与后续**所有连续的** AI 消息配对
|
|
84
|
+
3. 多条 AI 回复合并为一条
|
|
85
|
+
4. 跳过 `toolResult` 事件和空文本消息
|
|
86
|
+
|
|
87
|
+
### Step 4: 生成 JS 文件
|
|
88
|
+
|
|
89
|
+
批量模式下,每个会话生成独立的 `NN-<name>.js`,并额外生成 `index.js` 索引。
|
|
90
|
+
|
|
91
|
+
### Step 5: 渲染
|
|
92
|
+
|
|
93
|
+
将模板 `assets/chat-history-template.html` 复制到输出目录。
|
|
94
|
+
模板会加载 `chat_history.js`(单会话)或可通过 `index.js` 切换(多会话)。
|
|
95
|
+
|
|
96
|
+
模板展示三种时间指标:
|
|
97
|
+
- **课程流逝** — 距首条消息的时间
|
|
98
|
+
- **回复耗时** — AI 响应时间
|
|
99
|
+
- **间隔耗时** — 到下一条用户消息的间隔
|
|
100
|
+
|
|
101
|
+
## Agent 手动流程
|
|
102
|
+
|
|
103
|
+
如果不使用脚本(如当前 agent 直接操作):
|
|
104
|
+
|
|
105
|
+
1. **收集**:解析 URL → 查 `sessions.json` → 复制 JSONL 到输出目录
|
|
106
|
+
2. **运行脚本**:`python3 scripts/extract_chat.py "<urls>" <output-dir>`
|
|
107
|
+
3. **渲染**:复制 HTML 模板到输出目录,用浏览器打开
|
|
108
|
+
|
|
109
|
+
## 示例
|
|
110
|
+
|
|
111
|
+
### 单个会话
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
python3 scripts/extract_chat.py "https://claw1.kekouen.cn/chat?session=agent%3Ac108-v1-1811%3Amain" ./output
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
输出:
|
|
118
|
+
- `01-main.js`
|
|
119
|
+
- `chat_history.js`(= 01-main.js 的副本)
|
|
120
|
+
|
|
121
|
+
### 批量处理
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
python3 scripts/extract_chat.py '[
|
|
125
|
+
"https://claw6.kekouen.cn/chat?session=agent%3Axuexiji%3Amain",
|
|
126
|
+
"https://claw6.kekouen.cn/chat?session=agent%3Ausa%3Amain",
|
|
127
|
+
"https://claw6.kekouen.cn/chat?session=agent%3Ac109-v3-1813%3A...",
|
|
128
|
+
"https://claw6.kekouen.cn/chat?session=agent%3Ac108-v1-1810%3A..."
|
|
129
|
+
]' ./output
|
|
130
|
+
```
|
|
82
131
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
132
|
+
输出:
|
|
133
|
+
- `01-main.js`
|
|
134
|
+
- `02-main.js`
|
|
135
|
+
- `03-每天一个小习惯·宠物养成_1813.js`
|
|
136
|
+
- `04-我的金币任务板_1810.js`
|
|
137
|
+
- `index.js`
|
|
138
|
+
- `chat_history.js`(= 01-main.js 的副本)
|
|
@@ -1,7 +1,21 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
"""
|
|
3
|
-
Extract chat history from OpenClaw session URL and generate JS data file.
|
|
4
|
-
|
|
3
|
+
Extract chat history from OpenClaw session URL(s) and generate JS data file(s).
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
# 单个会话
|
|
7
|
+
python3 extract_chat.py <session-url-or-key> [output-dir]
|
|
8
|
+
|
|
9
|
+
# 多个会话(逗号分隔)
|
|
10
|
+
python3 extract_chat.py "url1,url2,url3" [output-dir]
|
|
11
|
+
|
|
12
|
+
# 多个会话(JSON 数组)
|
|
13
|
+
python3 extract_chat.py '["url1","url2","url3"]' [output-dir]
|
|
14
|
+
|
|
15
|
+
输出:
|
|
16
|
+
- 每个会话生成独立的 <index>-<session-name>.js
|
|
17
|
+
- 生成 index.js,包含所有会话的元信息列表
|
|
18
|
+
- 生成 chat_history.js(向后兼容,指向第一个会话)
|
|
5
19
|
"""
|
|
6
20
|
|
|
7
21
|
import json
|
|
@@ -13,6 +27,7 @@ from datetime import datetime, timezone, timedelta
|
|
|
13
27
|
|
|
14
28
|
tz_beijing = timezone(timedelta(hours=8))
|
|
15
29
|
|
|
30
|
+
|
|
16
31
|
def parse_time(ts_str):
|
|
17
32
|
"""Parse ISO timestamp string to Beijing time string."""
|
|
18
33
|
try:
|
|
@@ -24,6 +39,7 @@ def parse_time(ts_str):
|
|
|
24
39
|
except:
|
|
25
40
|
return None
|
|
26
41
|
|
|
42
|
+
|
|
27
43
|
def extract_session_key(url_or_key):
|
|
28
44
|
"""Extract session key from URL or return as-is if already a key."""
|
|
29
45
|
if 'session=' in url_or_key:
|
|
@@ -33,29 +49,56 @@ def extract_session_key(url_or_key):
|
|
|
33
49
|
return params['session'][0]
|
|
34
50
|
return url_or_key
|
|
35
51
|
|
|
52
|
+
|
|
53
|
+
def parse_input(input_str):
|
|
54
|
+
"""
|
|
55
|
+
Parse input into a list of session URLs/keys.
|
|
56
|
+
Supports: comma-separated, JSON array, or single value.
|
|
57
|
+
"""
|
|
58
|
+
input_str = input_str.strip()
|
|
59
|
+
|
|
60
|
+
# Try JSON array
|
|
61
|
+
if input_str.startswith('['):
|
|
62
|
+
try:
|
|
63
|
+
arr = json.loads(input_str)
|
|
64
|
+
return [item.strip() for item in arr if isinstance(item, str) and item.strip()]
|
|
65
|
+
except json.JSONDecodeError:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
# Comma-separated (but not commas inside URL-encoded params like %3A)
|
|
69
|
+
# URLs use %3A for colon, so raw commas are safe as separators
|
|
70
|
+
parts = [p.strip() for p in input_str.split(',') if p.strip()]
|
|
71
|
+
return parts if len(parts) > 1 else [input_str]
|
|
72
|
+
|
|
73
|
+
|
|
36
74
|
def find_session_file(session_key):
|
|
37
75
|
"""Find the JSONL file path for a given session key."""
|
|
38
|
-
# session_key format: agent:c108-v1-1811:session-name
|
|
39
76
|
parts = session_key.split(':')
|
|
40
77
|
if len(parts) < 2:
|
|
41
78
|
return None
|
|
42
79
|
agent_id = parts[1]
|
|
43
80
|
sessions_json_path = f'/root/.openclaw/agents/{agent_id}/sessions/sessions.json'
|
|
44
|
-
|
|
81
|
+
|
|
45
82
|
if not os.path.exists(sessions_json_path):
|
|
46
83
|
return None
|
|
47
|
-
|
|
84
|
+
|
|
48
85
|
with open(sessions_json_path, 'r') as f:
|
|
49
86
|
sessions = json.load(f)
|
|
50
|
-
|
|
87
|
+
|
|
51
88
|
if session_key in sessions:
|
|
52
|
-
|
|
89
|
+
session_file = sessions[session_key].get('sessionFile')
|
|
90
|
+
if session_file and not os.path.isabs(session_file):
|
|
91
|
+
session_file = os.path.join(
|
|
92
|
+
f'/root/.openclaw/agents/{agent_id}/sessions', session_file
|
|
93
|
+
)
|
|
94
|
+
return session_file
|
|
53
95
|
return None
|
|
54
96
|
|
|
97
|
+
|
|
55
98
|
def parse_jsonl_to_conversations(jsonl_path):
|
|
56
99
|
"""Parse JSONL file and build conversation pairs."""
|
|
57
100
|
messages = []
|
|
58
|
-
with open(jsonl_path, 'r') as f:
|
|
101
|
+
with open(jsonl_path, 'r', encoding='utf-8') as f:
|
|
59
102
|
for line in f:
|
|
60
103
|
line = line.strip()
|
|
61
104
|
if not line:
|
|
@@ -65,19 +108,21 @@ def parse_jsonl_to_conversations(jsonl_path):
|
|
|
65
108
|
role = obj['message']['role']
|
|
66
109
|
content = obj['message'].get('content', [])
|
|
67
110
|
timestamp = obj.get('timestamp', '')
|
|
68
|
-
|
|
111
|
+
|
|
69
112
|
if isinstance(content, list):
|
|
70
|
-
text = ''.join([
|
|
113
|
+
text = ''.join([
|
|
114
|
+
c.get('text', '') for c in content if c.get('type') == 'text'
|
|
115
|
+
])
|
|
71
116
|
else:
|
|
72
117
|
text = str(content)
|
|
73
|
-
|
|
118
|
+
|
|
74
119
|
messages.append({
|
|
75
120
|
'role': role,
|
|
76
121
|
'timestamp': timestamp,
|
|
77
122
|
'text': text.strip(),
|
|
78
123
|
'has_text': bool(text.strip())
|
|
79
124
|
})
|
|
80
|
-
|
|
125
|
+
|
|
81
126
|
# Build conversation pairs
|
|
82
127
|
conversations = []
|
|
83
128
|
i = 0
|
|
@@ -86,19 +131,17 @@ def parse_jsonl_to_conversations(jsonl_path):
|
|
|
86
131
|
if msg['role'] == 'user':
|
|
87
132
|
user_text = msg['text']
|
|
88
133
|
user_time = msg['timestamp']
|
|
89
|
-
|
|
90
|
-
# Collect ALL consecutive AI messages until next user
|
|
134
|
+
|
|
91
135
|
ai_messages = []
|
|
92
136
|
j = i + 1
|
|
93
137
|
while j < len(messages) and messages[j]['role'] != 'user':
|
|
94
138
|
if messages[j]['role'] == 'assistant':
|
|
95
139
|
ai_messages.append(messages[j])
|
|
96
140
|
j += 1
|
|
97
|
-
|
|
98
|
-
# Merge all AI messages with text
|
|
141
|
+
|
|
99
142
|
ai_text = '\n\n'.join([m['text'] for m in ai_messages if m['has_text']])
|
|
100
143
|
ai_time = ai_messages[-1]['timestamp'] if ai_messages else ''
|
|
101
|
-
|
|
144
|
+
|
|
102
145
|
conversations.append({
|
|
103
146
|
'user': user_text,
|
|
104
147
|
'user_time': parse_time(user_time) if user_time else '',
|
|
@@ -108,13 +151,22 @@ def parse_jsonl_to_conversations(jsonl_path):
|
|
|
108
151
|
i += 1
|
|
109
152
|
else:
|
|
110
153
|
i += 1
|
|
111
|
-
|
|
154
|
+
|
|
112
155
|
return conversations
|
|
113
156
|
|
|
157
|
+
|
|
158
|
+
def sanitize_filename(name):
|
|
159
|
+
"""Make a safe filename from session name."""
|
|
160
|
+
# Keep Chinese chars, alphanumerics, replace others with -
|
|
161
|
+
safe = re.sub(r'[^\w\u4e00-\u9fff\u3400-\u4dbf-]', '-', name)
|
|
162
|
+
safe = re.sub(r'-+', '-', safe).strip('-')
|
|
163
|
+
return safe or 'session'
|
|
164
|
+
|
|
165
|
+
|
|
114
166
|
def generate_js(conversations, session_key, output_path):
|
|
115
167
|
"""Generate JS file from conversations."""
|
|
116
168
|
session_name = session_key.split(':')[-1] if ':' in session_key else session_key
|
|
117
|
-
|
|
169
|
+
|
|
118
170
|
output_data = {
|
|
119
171
|
'session': session_name,
|
|
120
172
|
'session_id': session_key,
|
|
@@ -122,48 +174,127 @@ def generate_js(conversations, session_key, output_path):
|
|
|
122
174
|
'initiator': 'session initiator',
|
|
123
175
|
'conversations': conversations
|
|
124
176
|
}
|
|
125
|
-
|
|
177
|
+
|
|
126
178
|
js_content = f'''// Chat History - {session_name}
|
|
127
179
|
// Session Key: {session_key}
|
|
128
180
|
// Generated by chat-history-extractor skill
|
|
129
181
|
|
|
130
182
|
const chatData = {json.dumps(output_data, ensure_ascii=False, indent=2)};
|
|
131
183
|
'''
|
|
132
|
-
|
|
184
|
+
|
|
133
185
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
134
186
|
f.write(js_content)
|
|
135
|
-
|
|
136
|
-
return len(conversations)
|
|
137
187
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
# Extract session key
|
|
188
|
+
return len(conversations), session_name
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def process_one(url_or_key, output_dir, index):
|
|
192
|
+
"""
|
|
193
|
+
Process a single session URL/key.
|
|
194
|
+
Returns dict with results or None on failure.
|
|
195
|
+
"""
|
|
147
196
|
session_key = extract_session_key(url_or_key)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
197
|
+
session_short = sanitize_filename(session_key.split(':')[-1] if ':' in session_key else session_key)
|
|
198
|
+
print(f"\n[{index}] Session key: {session_key}")
|
|
199
|
+
|
|
151
200
|
jsonl_path = find_session_file(session_key)
|
|
152
201
|
if not jsonl_path:
|
|
153
|
-
print(f"ERROR: Could not find session file for {session_key}")
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
202
|
+
print(f"[{index}] ERROR: Could not find session file for {session_key}")
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
if not os.path.exists(jsonl_path):
|
|
206
|
+
print(f"[{index}] ERROR: JSONL file not found: {jsonl_path}")
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
print(f"[{index}] JSONL: {jsonl_path}")
|
|
158
210
|
conversations = parse_jsonl_to_conversations(jsonl_path)
|
|
159
|
-
print(f"Found {len(conversations)} conversation pairs")
|
|
160
|
-
|
|
161
|
-
# Generate JS
|
|
211
|
+
print(f"[{index}] Found {len(conversations)} conversation pairs")
|
|
212
|
+
|
|
213
|
+
# Generate per-session JS with index prefix
|
|
214
|
+
js_filename = f"{index:02d}-{session_short}.js"
|
|
215
|
+
js_path = os.path.join(output_dir, js_filename)
|
|
216
|
+
count, session_name = generate_js(conversations, session_key, js_path)
|
|
217
|
+
print(f"[{index}] Generated: {js_filename} ({count} pairs)")
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
'index': index,
|
|
221
|
+
'session_key': session_key,
|
|
222
|
+
'session_name': session_name,
|
|
223
|
+
'js_file': js_filename,
|
|
224
|
+
'total_pairs': count,
|
|
225
|
+
'jsonl_path': jsonl_path,
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def generate_index_js(results, output_dir):
|
|
230
|
+
"""Generate an index.js listing all sessions."""
|
|
231
|
+
sessions = []
|
|
232
|
+
for r in results:
|
|
233
|
+
sessions.append({
|
|
234
|
+
'index': r['index'],
|
|
235
|
+
'session': r['session_name'],
|
|
236
|
+
'session_id': r['session_key'],
|
|
237
|
+
'js_file': r['js_file'],
|
|
238
|
+
'total_pairs': r['total_pairs'],
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
index_content = f'''// Chat History Index - {len(sessions)} session(s)
|
|
242
|
+
// Generated by chat-history-extractor skill
|
|
243
|
+
|
|
244
|
+
const chatIndex = {json.dumps(sessions, ensure_ascii=False, indent=2)};
|
|
245
|
+
'''
|
|
246
|
+
index_path = os.path.join(output_dir, 'index.js')
|
|
247
|
+
with open(index_path, 'w', encoding='utf-8') as f:
|
|
248
|
+
f.write(index_content)
|
|
249
|
+
print(f"\nIndex: {index_path} ({len(sessions)} sessions)")
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def main():
|
|
253
|
+
if len(sys.argv) < 2:
|
|
254
|
+
print("Usage:")
|
|
255
|
+
print(" python3 extract_chat.py <url-or-key> [output-dir]")
|
|
256
|
+
print(" python3 extract_chat.py 'url1,url2,url3' [output-dir]")
|
|
257
|
+
print(' python3 extract_chat.py \'["url1","url2"]\' [output-dir]')
|
|
258
|
+
sys.exit(1)
|
|
259
|
+
|
|
260
|
+
raw_input = sys.argv[1]
|
|
261
|
+
output_dir = sys.argv[2] if len(sys.argv) > 2 else os.getcwd()
|
|
262
|
+
|
|
263
|
+
# Parse input into list
|
|
264
|
+
items = parse_input(raw_input)
|
|
265
|
+
print(f"Input: {len(items)} session(s)")
|
|
266
|
+
|
|
162
267
|
os.makedirs(output_dir, exist_ok=True)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
268
|
+
|
|
269
|
+
# Process each session
|
|
270
|
+
results = []
|
|
271
|
+
for i, item in enumerate(items, 1):
|
|
272
|
+
r = process_one(item, output_dir, i)
|
|
273
|
+
if r:
|
|
274
|
+
results.append(r)
|
|
275
|
+
|
|
276
|
+
if not results:
|
|
277
|
+
print("\nERROR: No sessions processed successfully.")
|
|
278
|
+
sys.exit(1)
|
|
279
|
+
|
|
280
|
+
# Generate index.js if multiple sessions
|
|
281
|
+
if len(results) > 1:
|
|
282
|
+
generate_index_js(results, output_dir)
|
|
283
|
+
|
|
284
|
+
# Backward compat: copy first session as chat_history.js
|
|
285
|
+
first_js = os.path.join(output_dir, results[0]['js_file'])
|
|
286
|
+
compat_js = os.path.join(output_dir, 'chat_history.js')
|
|
287
|
+
if first_js != compat_js:
|
|
288
|
+
import shutil
|
|
289
|
+
shutil.copy2(first_js, compat_js)
|
|
290
|
+
print(f"Compat: chat_history.js -> {results[0]['js_file']}")
|
|
291
|
+
|
|
292
|
+
# Summary
|
|
293
|
+
print(f"\n{'='*50}")
|
|
294
|
+
print(f"Done! {len(results)}/{len(items)} session(s) processed.")
|
|
295
|
+
for r in results:
|
|
296
|
+
print(f" [{r['index']}] {r['session_name']} ({r['total_pairs']} pairs) -> {r['js_file']}")
|
|
297
|
+
|
|
167
298
|
|
|
168
299
|
if __name__ == '__main__':
|
|
169
300
|
main()
|