ai-worklog 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-worklog",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "AI 对话工作日志自动收集工具 —— 从 Claude Code / Codex 对话记录生成每日工作日志并推送到 GitLab",
5
5
  "bin": {
6
6
  "ai-worklog": "./bin/index.js"
@@ -102,15 +102,17 @@ def utc_to_local_date(utc_ts: str) -> Optional[str]:
102
102
 
103
103
  # ─── Claude Code 数据收集 ─────────────────────────────────────────────────────
104
104
 
105
- def collect_claude_sessions(target_date: str) -> dict[str, list[str]]:
105
+ def collect_claude_sessions(target_date: str) -> dict[str, list[list[tuple[str, str]]]]:
106
106
  """
107
107
  遍历 ~/.claude/projects/ 下所有 jsonl,
108
- 按 message timestamp 过滤当天(UTC→本地),提取 user 消息,
108
+ 按 message timestamp 过滤当天(UTC→本地),
109
+ 每个 jsonl 文件作为一个对话,提取 (用户消息, Claude回复) 轮次对,
109
110
  按项目名(cwd basename)分组。
110
111
 
111
- 返回: {project_name: [message1, message2, ...]}
112
+ 返回: {project_name: [session1, session2, ...]}
113
+ session = [(user_msg, assistant_msg), ...]
112
114
  """
113
- results: dict[str, list[str]] = {}
115
+ results: dict[str, list[list[tuple[str, str]]]] = {}
114
116
 
115
117
  if not CLAUDE_PROJECTS_DIR.exists():
116
118
  return results
@@ -124,10 +126,46 @@ def collect_claude_sessions(target_date: str) -> dict[str, list[str]]:
124
126
  return results
125
127
 
126
128
 
129
+ def _extract_user_text(content) -> str:
130
+ """从 message.content 中提取用户文字(过滤 tool_result 等结构)"""
131
+ texts = []
132
+ if isinstance(content, str):
133
+ texts = [content]
134
+ elif isinstance(content, list):
135
+ for c in content:
136
+ if isinstance(c, dict) and c.get("type") == "text":
137
+ texts.append(c.get("text", ""))
138
+ elif isinstance(c, str):
139
+ texts.append(c)
140
+ return " ".join(t for t in texts if t).strip()
141
+
142
+
143
+ def _extract_assistant_text(content) -> str:
144
+ """从 assistant message.content 中提取最终文字回复,跳过 thinking 和 tool_use"""
145
+ if not isinstance(content, list):
146
+ return ""
147
+ parts = [
148
+ c.get("text", "").strip()
149
+ for c in content
150
+ if isinstance(c, dict) and c.get("type") == "text" and c.get("text", "").strip()
151
+ ]
152
+ return " ".join(parts)
153
+
154
+
127
155
  def _parse_claude_jsonl(
128
- jsonl_file: Path, target_date: str, results: dict[str, list[str]]
156
+ jsonl_file: Path, target_date: str, results: dict[str, list[list[tuple[str, str]]]]
129
157
  ) -> None:
130
- """解析单个 Claude Code jsonl 文件"""
158
+ """
159
+ 解析单个 Claude Code jsonl 文件。
160
+ 每个文件视为一个对话 session,提取当天的 (用户消息, Claude回复) 轮次对。
161
+ Claude 回复只取 text 块,跳过 thinking 和 tool_use。
162
+ """
163
+ session_turns: list[tuple[str, str]] = []
164
+ pending_user: str = ""
165
+ pending_user_date: str = ""
166
+ pending_assistant: str = ""
167
+ cwd: str = ""
168
+
131
169
  try:
132
170
  with open(jsonl_file, encoding="utf-8", errors="ignore") as f:
133
171
  for line in f:
@@ -139,34 +177,43 @@ def _parse_claude_jsonl(
139
177
  except json.JSONDecodeError:
140
178
  continue
141
179
 
142
- if entry.get("type") != "user":
143
- continue
180
+ if entry.get("cwd"):
181
+ cwd = entry["cwd"]
144
182
 
145
- # 按时间戳过滤日期
146
- ts = entry.get("timestamp", "")
147
- if ts and utc_to_local_date(ts) != target_date:
148
- continue
183
+ entry_type = entry.get("type", "")
149
184
 
150
- # 提取项目名
151
- cwd = entry.get("cwd", "")
152
- project_name = os.path.basename(cwd) if cwd else "unknown"
153
-
154
- # 提取消息内容
155
- message = entry.get("message", {})
156
- content = message.get("content", "")
157
- texts = []
158
- if isinstance(content, str):
159
- texts = [content]
160
- elif isinstance(content, list):
161
- for c in content:
162
- if isinstance(c, dict) and c.get("type") == "text":
163
- texts.append(c.get("text", ""))
164
- elif isinstance(c, str):
165
- texts.append(c)
166
-
167
- for text in texts:
168
- if text and not is_system_message(text):
169
- results.setdefault(project_name, []).append(text.strip())
185
+ if entry_type == "user":
186
+ user_text = _extract_user_text(entry.get("message", {}).get("content", ""))
187
+
188
+ if not user_text or is_system_message(user_text):
189
+ continue # tool_result 或系统消息,跳过
190
+
191
+ # 保存上一个 turn(日期匹配才入列)
192
+ if pending_user and pending_user_date == target_date:
193
+ session_turns.append((pending_user, pending_assistant))
194
+
195
+ # 开始新 turn
196
+ ts = entry.get("timestamp", "")
197
+ pending_user = user_text
198
+ pending_user_date = utc_to_local_date(ts) if ts else ""
199
+ pending_assistant = ""
200
+
201
+ elif entry_type == "assistant":
202
+ if not pending_user:
203
+ continue
204
+ assistant_text = _extract_assistant_text(
205
+ entry.get("message", {}).get("content", [])
206
+ )
207
+ if assistant_text:
208
+ pending_assistant = assistant_text # 用最新文字覆盖(多步工具调用后取最终回复)
209
+
210
+ # 最后一个 pending turn
211
+ if pending_user and pending_user_date == target_date:
212
+ session_turns.append((pending_user, pending_assistant))
213
+
214
+ if session_turns:
215
+ project_name = os.path.basename(cwd) if cwd else "unknown"
216
+ results.setdefault(project_name, []).append(session_turns)
170
217
 
171
218
  except Exception as e:
172
219
  print(f"警告: 解析 {jsonl_file} 失败: {e}", file=sys.stderr)
@@ -259,51 +306,68 @@ def _call_api_claude_cli(prompt: str) -> str:
259
306
 
260
307
  def generate_summary(
261
308
  target_date: str,
262
- claude_data: dict[str, list[str]],
309
+ claude_data: dict[str, list[list[tuple[str, str]]]],
263
310
  codex_data: dict[str, list[str]],
264
311
  ) -> str:
265
312
  """调用 Claude API 生成结构化工作日志"""
266
313
 
267
314
  # 合并数据(同项目名合并)
268
315
  all_projects: dict[str, dict] = {}
269
- for proj, msgs in claude_data.items():
270
- all_projects.setdefault(proj, {"claude": [], "codex": []})["claude"].extend(msgs)
316
+ for proj, sessions in claude_data.items():
317
+ all_projects.setdefault(proj, {"sessions": [], "codex": []})["sessions"].extend(sessions)
271
318
  for proj, msgs in codex_data.items():
272
- all_projects.setdefault(proj, {"claude": [], "codex": []})["codex"].extend(msgs)
319
+ all_projects.setdefault(proj, {"sessions": [], "codex": []})["codex"].extend(msgs)
273
320
 
274
- total_claude = sum(len(v["claude"]) for v in all_projects.values())
275
- total_codex = sum(len(v["codex"]) for v in all_projects.values())
321
+ total_claude = sum(
322
+ sum(len(turns) for turns in data["sessions"])
323
+ for data in all_projects.values()
324
+ )
325
+ total_codex = sum(len(data["codex"]) for data in all_projects.values())
276
326
  total_sessions = total_claude + total_codex
277
327
 
278
328
  if total_sessions == 0:
279
329
  return _generate_empty_log(target_date)
280
330
 
281
- # 构建给 AI 的原始数据(去重 + 截断,控制 token 消耗)
331
+ def trim(text: str, max_chars: int) -> str:
332
+ return text if len(text) <= max_chars else text[:max_chars] + "…"
333
+
282
334
  def dedup_and_trim(msgs: list[str], max_chars: int = 200) -> list[str]:
283
335
  seen: set[str] = set()
284
336
  result = []
285
337
  for m in msgs:
286
- key = m[:80] # 用前 80 字符去重
338
+ key = m[:80]
287
339
  if key in seen:
288
340
  continue
289
341
  seen.add(key)
290
- result.append(m if len(m) <= max_chars else m[:max_chars] + "…")
342
+ result.append(trim(m, max_chars))
291
343
  return result
292
344
 
293
345
  project_sections = []
294
346
  for proj_name, data in all_projects.items():
295
- msgs_parts = []
296
- if data["claude"]:
297
- trimmed = dedup_and_trim(data["claude"])
298
- msgs_parts.append(f"[Claude Code {len(data['claude'])} 条,去重后 {len(trimmed)} 条]")
299
- for i, m in enumerate(trimmed, 1):
300
- msgs_parts.append(f" {i}. {m}")
347
+ parts = []
348
+
349
+ # Claude 对话:按 session 分,每个 session 里是轮次对
350
+ for si, turns in enumerate(data["sessions"], 1):
351
+ parts.append(f" [对话 {si}]({len(turns)} 轮)")
352
+ for user_msg, asst_msg in turns:
353
+ parts.append(f" [用户] {trim(user_msg, 200)}")
354
+ if asst_msg:
355
+ parts.append(f" [Claude] {trim(asst_msg, 150)}")
356
+
357
+ # Codex 对话:没有回复,直接列用户消息
301
358
  if data["codex"]:
302
359
  trimmed = dedup_and_trim(data["codex"])
303
- msgs_parts.append(f"[Codex {len(data['codex'])} 条,去重后 {len(trimmed)} 条]")
304
- for i, m in enumerate(trimmed, 1):
305
- msgs_parts.append(f" {i}. {m}")
306
- project_sections.append(f"项目: {proj_name}\n" + "\n".join(msgs_parts))
360
+ parts.append(f" [Codex 对话]({len(data['codex'])} 条)")
361
+ for m in trimmed:
362
+ parts.append(f" [用户] {m}")
363
+
364
+ total_turns = sum(len(t) for t in data["sessions"])
365
+ header = (
366
+ f"项目: {proj_name}"
367
+ f"(Claude {len(data['sessions'])} 个对话 {total_turns} 轮,"
368
+ f"Codex {len(data['codex'])} 条)"
369
+ )
370
+ project_sections.append(header + "\n" + "\n".join(parts))
307
371
 
308
372
  raw_data = "\n\n".join(project_sections)
309
373
 
@@ -586,8 +650,11 @@ def main():
586
650
  # 1. 收集数据
587
651
  print("收集 Claude Code 对话记录...")
588
652
  claude_data = collect_claude_sessions(target_date)
589
- total_claude = sum(len(v) for v in claude_data.values())
590
- print(f" 找到 {len(claude_data)} 个项目,{total_claude} 条消息")
653
+ total_claude_turns = sum(
654
+ sum(len(turns) for turns in sessions) for sessions in claude_data.values()
655
+ )
656
+ total_claude_sessions = sum(len(sessions) for sessions in claude_data.values())
657
+ print(f" 找到 {len(claude_data)} 个项目,{total_claude_sessions} 个对话,{total_claude_turns} 轮交互")
591
658
 
592
659
  print("收集 Codex 对话记录...")
593
660
  codex_data = collect_codex_sessions(target_date)
@@ -596,10 +663,19 @@ def main():
596
663
 
597
664
  if args.dry_run:
598
665
  print("\n[dry-run] 数据预览:")
599
- for proj, msgs in {**claude_data, **codex_data}.items():
600
- print(f" [{proj}] {len(msgs)} 条")
666
+ for proj, sessions in claude_data.items():
667
+ total_turns = sum(len(t) for t in sessions)
668
+ print(f" [Claude/{proj}] {len(sessions)} 个对话,{total_turns} 轮")
669
+ for si, turns in enumerate(sessions[:2], 1):
670
+ print(f" 对话{si}: {len(turns)} 轮")
671
+ for user_msg, asst_msg in turns[:2]:
672
+ print(f" [用户] {user_msg[:60]}")
673
+ if asst_msg:
674
+ print(f" [Claude] {asst_msg[:60]}")
675
+ for proj, msgs in codex_data.items():
676
+ print(f" [Codex/{proj}] {len(msgs)} 条")
601
677
  for m in msgs[:2]:
602
- print(f" - {m[:80]}")
678
+ print(f" - {m[:60]}")
603
679
  return
604
680
 
605
681
  # 2. 生成摘要(通过 claude -p 复用 Claude Code CLI 认证)