ai-worklog 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-worklog",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "description": "AI 对话工作日志自动收集工具 —— 从 Claude Code / Codex 对话记录生成每日工作日志并推送到 GitLab",
5
5
  "bin": {
6
6
  "ai-worklog": "./bin/index.js"
@@ -102,15 +102,17 @@ def utc_to_local_date(utc_ts: str) -> Optional[str]:
102
102
 
103
103
  # ─── Claude Code 数据收集 ─────────────────────────────────────────────────────
104
104
 
105
- def collect_claude_sessions(target_date: str) -> dict[str, list[str]]:
105
+ def collect_claude_sessions(target_date: str) -> dict[str, list[list[tuple[str, str]]]]:
106
106
  """
107
107
  遍历 ~/.claude/projects/ 下所有 jsonl,
108
- 按 message timestamp 过滤当天(UTC→本地),提取 user 消息,
108
+ 按 message timestamp 过滤当天(UTC→本地),
109
+ 每个 jsonl 文件作为一个对话,提取 (用户消息, Claude回复) 轮次对,
109
110
  按项目名(cwd basename)分组。
110
111
 
111
- 返回: {project_name: [message1, message2, ...]}
112
+ 返回: {project_name: [session1, session2, ...]}
113
+ session = [(user_msg, assistant_msg), ...]
112
114
  """
113
- results: dict[str, list[str]] = {}
115
+ results: dict[str, list[list[tuple[str, str]]]] = {}
114
116
 
115
117
  if not CLAUDE_PROJECTS_DIR.exists():
116
118
  return results
@@ -124,10 +126,46 @@ def collect_claude_sessions(target_date: str) -> dict[str, list[str]]:
124
126
  return results
125
127
 
126
128
 
129
+ def _extract_user_text(content) -> str:
130
+ """从 message.content 中提取用户文字(过滤 tool_result 等结构)"""
131
+ texts = []
132
+ if isinstance(content, str):
133
+ texts = [content]
134
+ elif isinstance(content, list):
135
+ for c in content:
136
+ if isinstance(c, dict) and c.get("type") == "text":
137
+ texts.append(c.get("text", ""))
138
+ elif isinstance(c, str):
139
+ texts.append(c)
140
+ return " ".join(t for t in texts if t).strip()
141
+
142
+
143
+ def _extract_assistant_text(content) -> str:
144
+ """从 assistant message.content 中提取最终文字回复,跳过 thinking 和 tool_use"""
145
+ if not isinstance(content, list):
146
+ return ""
147
+ parts = [
148
+ c.get("text", "").strip()
149
+ for c in content
150
+ if isinstance(c, dict) and c.get("type") == "text" and c.get("text", "").strip()
151
+ ]
152
+ return " ".join(parts)
153
+
154
+
127
155
  def _parse_claude_jsonl(
128
- jsonl_file: Path, target_date: str, results: dict[str, list[str]]
156
+ jsonl_file: Path, target_date: str, results: dict[str, list[list[tuple[str, str]]]]
129
157
  ) -> None:
130
- """解析单个 Claude Code jsonl 文件"""
158
+ """
159
+ 解析单个 Claude Code jsonl 文件。
160
+ 每个文件视为一个对话 session,提取当天的 (用户消息, Claude回复) 轮次对。
161
+ Claude 回复只取 text 块,跳过 thinking 和 tool_use。
162
+ """
163
+ session_turns: list[tuple[str, str]] = []
164
+ pending_user: str = ""
165
+ pending_user_date: str = ""
166
+ pending_assistant: str = ""
167
+ cwd: str = ""
168
+
131
169
  try:
132
170
  with open(jsonl_file, encoding="utf-8", errors="ignore") as f:
133
171
  for line in f:
@@ -139,34 +177,43 @@ def _parse_claude_jsonl(
139
177
  except json.JSONDecodeError:
140
178
  continue
141
179
 
142
- if entry.get("type") != "user":
143
- continue
180
+ if entry.get("cwd"):
181
+ cwd = entry["cwd"]
144
182
 
145
- # 按时间戳过滤日期
146
- ts = entry.get("timestamp", "")
147
- if ts and utc_to_local_date(ts) != target_date:
148
- continue
183
+ entry_type = entry.get("type", "")
149
184
 
150
- # 提取项目名
151
- cwd = entry.get("cwd", "")
152
- project_name = os.path.basename(cwd) if cwd else "unknown"
153
-
154
- # 提取消息内容
155
- message = entry.get("message", {})
156
- content = message.get("content", "")
157
- texts = []
158
- if isinstance(content, str):
159
- texts = [content]
160
- elif isinstance(content, list):
161
- for c in content:
162
- if isinstance(c, dict) and c.get("type") == "text":
163
- texts.append(c.get("text", ""))
164
- elif isinstance(c, str):
165
- texts.append(c)
166
-
167
- for text in texts:
168
- if text and not is_system_message(text):
169
- results.setdefault(project_name, []).append(text.strip())
185
+ if entry_type == "user":
186
+ user_text = _extract_user_text(entry.get("message", {}).get("content", ""))
187
+
188
+ if not user_text or is_system_message(user_text):
189
+ continue # tool_result 或系统消息,跳过
190
+
191
+ # 保存上一个 turn(日期匹配才入列)
192
+ if pending_user and pending_user_date == target_date:
193
+ session_turns.append((pending_user, pending_assistant))
194
+
195
+ # 开始新 turn
196
+ ts = entry.get("timestamp", "")
197
+ pending_user = user_text
198
+ pending_user_date = utc_to_local_date(ts) if ts else ""
199
+ pending_assistant = ""
200
+
201
+ elif entry_type == "assistant":
202
+ if not pending_user:
203
+ continue
204
+ assistant_text = _extract_assistant_text(
205
+ entry.get("message", {}).get("content", [])
206
+ )
207
+ if assistant_text:
208
+ pending_assistant = assistant_text # 用最新文字覆盖(多步工具调用后取最终回复)
209
+
210
+ # 最后一个 pending turn
211
+ if pending_user and pending_user_date == target_date:
212
+ session_turns.append((pending_user, pending_assistant))
213
+
214
+ if session_turns:
215
+ project_name = os.path.basename(cwd) if cwd else "unknown"
216
+ results.setdefault(project_name, []).append(session_turns)
170
217
 
171
218
  except Exception as e:
172
219
  print(f"警告: 解析 {jsonl_file} 失败: {e}", file=sys.stderr)
@@ -259,41 +306,68 @@ def _call_api_claude_cli(prompt: str) -> str:
259
306
 
260
307
  def generate_summary(
261
308
  target_date: str,
262
- claude_data: dict[str, list[str]],
309
+ claude_data: dict[str, list[list[tuple[str, str]]]],
263
310
  codex_data: dict[str, list[str]],
264
311
  ) -> str:
265
312
  """调用 Claude API 生成结构化工作日志"""
266
313
 
267
314
  # 合并数据(同项目名合并)
268
315
  all_projects: dict[str, dict] = {}
269
- for proj, msgs in claude_data.items():
270
- all_projects.setdefault(proj, {"claude": [], "codex": []})["claude"].extend(msgs)
316
+ for proj, sessions in claude_data.items():
317
+ all_projects.setdefault(proj, {"sessions": [], "codex": []})["sessions"].extend(sessions)
271
318
  for proj, msgs in codex_data.items():
272
- all_projects.setdefault(proj, {"claude": [], "codex": []})["codex"].extend(msgs)
319
+ all_projects.setdefault(proj, {"sessions": [], "codex": []})["codex"].extend(msgs)
273
320
 
274
- total_claude = sum(len(v["claude"]) for v in all_projects.values())
275
- total_codex = sum(len(v["codex"]) for v in all_projects.values())
321
+ total_claude = sum(
322
+ sum(len(turns) for turns in data["sessions"])
323
+ for data in all_projects.values()
324
+ )
325
+ total_codex = sum(len(data["codex"]) for data in all_projects.values())
276
326
  total_sessions = total_claude + total_codex
277
327
 
278
328
  if total_sessions == 0:
279
329
  return _generate_empty_log(target_date)
280
330
 
281
- # 构建给 AI 的原始数据
331
+ def trim(text: str, max_chars: int) -> str:
332
+ return text if len(text) <= max_chars else text[:max_chars] + "…"
333
+
334
+ def dedup_and_trim(msgs: list[str], max_chars: int = 200) -> list[str]:
335
+ seen: set[str] = set()
336
+ result = []
337
+ for m in msgs:
338
+ key = m[:80]
339
+ if key in seen:
340
+ continue
341
+ seen.add(key)
342
+ result.append(trim(m, max_chars))
343
+ return result
344
+
282
345
  project_sections = []
283
346
  for proj_name, data in all_projects.items():
284
- msgs_parts = []
285
- if data["claude"]:
286
- msgs_parts.append(f"[Claude Code 对话 {len(data['claude'])} 条]")
287
- for i, m in enumerate(data["claude"], 1):
288
- # 截断过长消息
289
- msg = m if len(m) <= 500 else m[:500] + "..."
290
- msgs_parts.append(f" {i}. {msg}")
347
+ parts = []
348
+
349
+ # Claude 对话:按 session 分,每个 session 里是轮次对
350
+ for si, turns in enumerate(data["sessions"], 1):
351
+ parts.append(f" [对话 {si}]({len(turns)} 轮)")
352
+ for user_msg, asst_msg in turns:
353
+ parts.append(f" [用户] {trim(user_msg, 200)}")
354
+ if asst_msg:
355
+ parts.append(f" [Claude] {trim(asst_msg, 150)}")
356
+
357
+ # Codex 对话:没有回复,直接列用户消息
291
358
  if data["codex"]:
292
- msgs_parts.append(f"[Codex 对话 {len(data['codex'])} 条]")
293
- for i, m in enumerate(data["codex"], 1):
294
- msg = m if len(m) <= 500 else m[:500] + "..."
295
- msgs_parts.append(f" {i}. {msg}")
296
- project_sections.append(f"项目: {proj_name}\n" + "\n".join(msgs_parts))
359
+ trimmed = dedup_and_trim(data["codex"])
360
+ parts.append(f" [Codex 对话]({len(data['codex'])} 条)")
361
+ for m in trimmed:
362
+ parts.append(f" [用户] {m}")
363
+
364
+ total_turns = sum(len(t) for t in data["sessions"])
365
+ header = (
366
+ f"项目: {proj_name}"
367
+ f"(Claude {len(data['sessions'])} 个对话 {total_turns} 轮,"
368
+ f"Codex {len(data['codex'])} 条)"
369
+ )
370
+ project_sections.append(header + "\n" + "\n".join(parts))
297
371
 
298
372
  raw_data = "\n\n".join(project_sections)
299
373
 
@@ -462,7 +536,9 @@ def git_commit_and_push(log_file: Path, target_date: str, push: bool = True) ->
462
536
  """全自动 git init → GitLab 项目创建 → commit → push"""
463
537
 
464
538
  def run(cmd: list[str], check_err: bool = True) -> subprocess.CompletedProcess:
465
- r = subprocess.run(cmd, cwd=REPO_DIR, capture_output=True, text=True)
539
+ env = os.environ.copy()
540
+ env["LC_ALL"] = "C" # 强制英文输出,保证字符串匹配不受本地化影响
541
+ r = subprocess.run(cmd, cwd=REPO_DIR, capture_output=True, text=True, env=env)
466
542
  if check_err and r.returncode != 0:
467
543
  print(f"命令失败: {' '.join(cmd)}\n{r.stderr.strip()}", file=sys.stderr)
468
544
  return r
@@ -496,7 +572,7 @@ def git_commit_and_push(log_file: Path, target_date: str, push: bool = True) ->
496
572
  run(["git", "add", "-A"])
497
573
  run(["git", "commit", "-m", "init: 初始化工作日志仓库"])
498
574
 
499
- # ── 4. 提交日志 ────────────────────────────────────────────────────────────
575
+ # ── 4. 提交日志 ───────────────────────────────────────────────────────────
500
576
  rel_path = log_file.relative_to(REPO_DIR)
501
577
  print(f"Git: 添加 {rel_path}")
502
578
  if run(["git", "add", str(rel_path)]).returncode != 0:
@@ -574,8 +650,11 @@ def main():
574
650
  # 1. 收集数据
575
651
  print("收集 Claude Code 对话记录...")
576
652
  claude_data = collect_claude_sessions(target_date)
577
- total_claude = sum(len(v) for v in claude_data.values())
578
- print(f" 找到 {len(claude_data)} 个项目,{total_claude} 条消息")
653
+ total_claude_turns = sum(
654
+ sum(len(turns) for turns in sessions) for sessions in claude_data.values()
655
+ )
656
+ total_claude_sessions = sum(len(sessions) for sessions in claude_data.values())
657
+ print(f" 找到 {len(claude_data)} 个项目,{total_claude_sessions} 个对话,{total_claude_turns} 轮交互")
579
658
 
580
659
  print("收集 Codex 对话记录...")
581
660
  codex_data = collect_codex_sessions(target_date)
@@ -584,10 +663,19 @@ def main():
584
663
 
585
664
  if args.dry_run:
586
665
  print("\n[dry-run] 数据预览:")
587
- for proj, msgs in {**claude_data, **codex_data}.items():
588
- print(f" [{proj}] {len(msgs)} 条")
666
+ for proj, sessions in claude_data.items():
667
+ total_turns = sum(len(t) for t in sessions)
668
+ print(f" [Claude/{proj}] {len(sessions)} 个对话,{total_turns} 轮")
669
+ for si, turns in enumerate(sessions[:2], 1):
670
+ print(f" 对话{si}: {len(turns)} 轮")
671
+ for user_msg, asst_msg in turns[:2]:
672
+ print(f" [用户] {user_msg[:60]}")
673
+ if asst_msg:
674
+ print(f" [Claude] {asst_msg[:60]}")
675
+ for proj, msgs in codex_data.items():
676
+ print(f" [Codex/{proj}] {len(msgs)} 条")
589
677
  for m in msgs[:2]:
590
- print(f" - {m[:80]}")
678
+ print(f" - {m[:60]}")
591
679
  return
592
680
 
593
681
  # 2. 生成摘要(通过 claude -p 复用 Claude Code CLI 认证)