ai-worklog 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-worklog",
3
- "version": "1.0.3",
3
+ "version": "1.0.5",
4
4
  "description": "AI 对话工作日志自动收集工具 —— 从 Claude Code / Codex 对话记录生成每日工作日志并推送到 GitLab",
5
5
  "bin": {
6
6
  "ai-worklog": "./bin/index.js"
@@ -102,32 +102,84 @@ def utc_to_local_date(utc_ts: str) -> Optional[str]:
102
102
 
103
103
  # ─── Claude Code 数据收集 ─────────────────────────────────────────────────────
104
104
 
105
- def collect_claude_sessions(target_date: str) -> dict[str, list[str]]:
105
+ def collect_claude_sessions(
106
+ target_date: str,
107
+ prev_file_counts: Optional[dict[str, int]] = None,
108
+ ) -> tuple[dict[str, list[list[tuple[str, str]]]], dict[str, int]]:
106
109
  """
107
- 遍历 ~/.claude/projects/ 下所有 jsonl
108
- 按 message timestamp 过滤当天(UTC→本地),提取 user 消息,
109
- 按项目名(cwd basename)分组。
110
+ 遍历 ~/.claude/projects/ 下所有 jsonl,提取当天的对话轮次对。
110
111
 
111
- 返回: {project_name: [message1, message2, ...]}
112
+ prev_file_counts: 上次生成时各文件已处理的轮次数。
113
+ 传入时只返回新增轮次(增量模式)。
114
+
115
+ 返回:
116
+ sessions: {project_name: [session, ...]},session = [(user, assistant), ...]
117
+ file_counts: {jsonl_file_path: 本次处理的总轮次数}(用于保存 checkpoint)
112
118
  """
113
- results: dict[str, list[str]] = {}
119
+ sessions: dict[str, list[list[tuple[str, str]]]] = {}
120
+ file_counts: dict[str, int] = {}
114
121
 
115
122
  if not CLAUDE_PROJECTS_DIR.exists():
116
- return results
123
+ return sessions, file_counts
117
124
 
118
125
  for project_dir in CLAUDE_PROJECTS_DIR.iterdir():
119
126
  if not project_dir.is_dir():
120
127
  continue
121
128
  for jsonl_file in project_dir.glob("*.jsonl"):
122
- _parse_claude_jsonl(jsonl_file, target_date, results)
129
+ turns, cwd = _parse_claude_jsonl(jsonl_file, target_date)
130
+ file_key = str(jsonl_file)
131
+ file_counts[file_key] = len(turns)
123
132
 
124
- return results
133
+ prev = (prev_file_counts or {}).get(file_key, 0)
134
+ new_turns = turns[prev:] # 增量模式下只取新增部分;首次 prev=0 取全部
135
+
136
+ if new_turns:
137
+ project_name = os.path.basename(cwd) if cwd else "unknown"
138
+ sessions.setdefault(project_name, []).append(new_turns)
139
+
140
+ return sessions, file_counts
141
+
142
+
143
+ def _extract_user_text(content) -> str:
144
+ """从 message.content 中提取用户文字(过滤 tool_result 等结构)"""
145
+ texts = []
146
+ if isinstance(content, str):
147
+ texts = [content]
148
+ elif isinstance(content, list):
149
+ for c in content:
150
+ if isinstance(c, dict) and c.get("type") == "text":
151
+ texts.append(c.get("text", ""))
152
+ elif isinstance(c, str):
153
+ texts.append(c)
154
+ return " ".join(t for t in texts if t).strip()
155
+
156
+
157
+ def _extract_assistant_text(content) -> str:
158
+ """从 assistant message.content 中提取最终文字回复,跳过 thinking 和 tool_use"""
159
+ if not isinstance(content, list):
160
+ return ""
161
+ parts = [
162
+ c.get("text", "").strip()
163
+ for c in content
164
+ if isinstance(c, dict) and c.get("type") == "text" and c.get("text", "").strip()
165
+ ]
166
+ return " ".join(parts)
125
167
 
126
168
 
127
169
  def _parse_claude_jsonl(
128
- jsonl_file: Path, target_date: str, results: dict[str, list[str]]
129
- ) -> None:
130
- """解析单个 Claude Code jsonl 文件"""
170
+ jsonl_file: Path, target_date: str
171
+ ) -> tuple[list[tuple[str, str]], str]:
172
+ """
173
+ 解析单个 Claude Code jsonl 文件。
174
+ 返回 (当天所有轮次对, cwd)。
175
+ Claude 回复只取 text 块,跳过 thinking 和 tool_use。
176
+ """
177
+ turns: list[tuple[str, str]] = []
178
+ pending_user: str = ""
179
+ pending_user_date: str = ""
180
+ pending_assistant: str = ""
181
+ cwd: str = ""
182
+
131
183
  try:
132
184
  with open(jsonl_file, encoding="utf-8", errors="ignore") as f:
133
185
  for line in f:
@@ -139,38 +191,45 @@ def _parse_claude_jsonl(
139
191
  except json.JSONDecodeError:
140
192
  continue
141
193
 
142
- if entry.get("type") != "user":
143
- continue
194
+ if entry.get("cwd"):
195
+ cwd = entry["cwd"]
144
196
 
145
- # 按时间戳过滤日期
146
- ts = entry.get("timestamp", "")
147
- if ts and utc_to_local_date(ts) != target_date:
148
- continue
197
+ entry_type = entry.get("type", "")
149
198
 
150
- # 提取项目名
151
- cwd = entry.get("cwd", "")
152
- project_name = os.path.basename(cwd) if cwd else "unknown"
199
+ if entry_type == "user":
200
+ user_text = _extract_user_text(entry.get("message", {}).get("content", ""))
153
201
 
154
- # 提取消息内容
155
- message = entry.get("message", {})
156
- content = message.get("content", "")
157
- texts = []
158
- if isinstance(content, str):
159
- texts = [content]
160
- elif isinstance(content, list):
161
- for c in content:
162
- if isinstance(c, dict) and c.get("type") == "text":
163
- texts.append(c.get("text", ""))
164
- elif isinstance(c, str):
165
- texts.append(c)
166
-
167
- for text in texts:
168
- if text and not is_system_message(text):
169
- results.setdefault(project_name, []).append(text.strip())
202
+ if not user_text or is_system_message(user_text):
203
+ continue # tool_result 或系统消息,跳过
204
+
205
+ # 保存上一个 turn(日期匹配才入列)
206
+ if pending_user and pending_user_date == target_date:
207
+ turns.append((pending_user, pending_assistant))
208
+
209
+ # 开始新 turn
210
+ ts = entry.get("timestamp", "")
211
+ pending_user = user_text
212
+ pending_user_date = utc_to_local_date(ts) if ts else ""
213
+ pending_assistant = ""
214
+
215
+ elif entry_type == "assistant":
216
+ if not pending_user:
217
+ continue
218
+ assistant_text = _extract_assistant_text(
219
+ entry.get("message", {}).get("content", [])
220
+ )
221
+ if assistant_text:
222
+ pending_assistant = assistant_text # 多步工具调用后取最终回复
223
+
224
+ # 最后一个 pending turn
225
+ if pending_user and pending_user_date == target_date:
226
+ turns.append((pending_user, pending_assistant))
170
227
 
171
228
  except Exception as e:
172
229
  print(f"警告: 解析 {jsonl_file} 失败: {e}", file=sys.stderr)
173
230
 
231
+ return turns, cwd
232
+
174
233
 
175
234
  # ─── Codex 数据收集 ───────────────────────────────────────────────────────────
176
235
 
@@ -259,51 +318,68 @@ def _call_api_claude_cli(prompt: str) -> str:
259
318
 
260
319
  def generate_summary(
261
320
  target_date: str,
262
- claude_data: dict[str, list[str]],
321
+ claude_data: dict[str, list[list[tuple[str, str]]]],
263
322
  codex_data: dict[str, list[str]],
264
323
  ) -> str:
265
324
  """调用 Claude API 生成结构化工作日志"""
266
325
 
267
326
  # 合并数据(同项目名合并)
268
327
  all_projects: dict[str, dict] = {}
269
- for proj, msgs in claude_data.items():
270
- all_projects.setdefault(proj, {"claude": [], "codex": []})["claude"].extend(msgs)
328
+ for proj, sessions in claude_data.items():
329
+ all_projects.setdefault(proj, {"sessions": [], "codex": []})["sessions"].extend(sessions)
271
330
  for proj, msgs in codex_data.items():
272
- all_projects.setdefault(proj, {"claude": [], "codex": []})["codex"].extend(msgs)
331
+ all_projects.setdefault(proj, {"sessions": [], "codex": []})["codex"].extend(msgs)
273
332
 
274
- total_claude = sum(len(v["claude"]) for v in all_projects.values())
275
- total_codex = sum(len(v["codex"]) for v in all_projects.values())
333
+ total_claude = sum(
334
+ sum(len(turns) for turns in data["sessions"])
335
+ for data in all_projects.values()
336
+ )
337
+ total_codex = sum(len(data["codex"]) for data in all_projects.values())
276
338
  total_sessions = total_claude + total_codex
277
339
 
278
340
  if total_sessions == 0:
279
341
  return _generate_empty_log(target_date)
280
342
 
281
- # 构建给 AI 的原始数据(去重 + 截断,控制 token 消耗)
343
+ def trim(text: str, max_chars: int) -> str:
344
+ return text if len(text) <= max_chars else text[:max_chars] + "…"
345
+
282
346
  def dedup_and_trim(msgs: list[str], max_chars: int = 200) -> list[str]:
283
347
  seen: set[str] = set()
284
348
  result = []
285
349
  for m in msgs:
286
- key = m[:80] # 用前 80 字符去重
350
+ key = m[:80]
287
351
  if key in seen:
288
352
  continue
289
353
  seen.add(key)
290
- result.append(m if len(m) <= max_chars else m[:max_chars] + "…")
354
+ result.append(trim(m, max_chars))
291
355
  return result
292
356
 
293
357
  project_sections = []
294
358
  for proj_name, data in all_projects.items():
295
- msgs_parts = []
296
- if data["claude"]:
297
- trimmed = dedup_and_trim(data["claude"])
298
- msgs_parts.append(f"[Claude Code {len(data['claude'])} 条,去重后 {len(trimmed)} 条]")
299
- for i, m in enumerate(trimmed, 1):
300
- msgs_parts.append(f" {i}. {m}")
359
+ parts = []
360
+
361
+ # Claude 对话:按 session 分,每个 session 里是轮次对
362
+ for si, turns in enumerate(data["sessions"], 1):
363
+ parts.append(f" [对话 {si}]({len(turns)} 轮)")
364
+ for user_msg, asst_msg in turns:
365
+ parts.append(f" [用户] {trim(user_msg, 200)}")
366
+ if asst_msg:
367
+ parts.append(f" [Claude] {trim(asst_msg, 150)}")
368
+
369
+ # Codex 对话:没有回复,直接列用户消息
301
370
  if data["codex"]:
302
371
  trimmed = dedup_and_trim(data["codex"])
303
- msgs_parts.append(f"[Codex {len(data['codex'])} 条,去重后 {len(trimmed)} 条]")
304
- for i, m in enumerate(trimmed, 1):
305
- msgs_parts.append(f" {i}. {m}")
306
- project_sections.append(f"项目: {proj_name}\n" + "\n".join(msgs_parts))
372
+ parts.append(f" [Codex 对话]({len(data['codex'])} 条)")
373
+ for m in trimmed:
374
+ parts.append(f" [用户] {m}")
375
+
376
+ total_turns = sum(len(t) for t in data["sessions"])
377
+ header = (
378
+ f"项目: {proj_name}"
379
+ f"(Claude {len(data['sessions'])} 个对话 {total_turns} 轮,"
380
+ f"Codex {len(data['codex'])} 条)"
381
+ )
382
+ project_sections.append(header + "\n" + "\n".join(parts))
307
383
 
308
384
  raw_data = "\n\n".join(project_sections)
309
385
 
@@ -395,6 +471,90 @@ def save_log(target_date: str, content: str) -> Path:
395
471
  return log_file
396
472
 
397
473
 
474
+ # ─── Checkpoint(增量更新支持)────────────────────────────────────────────────
475
+
476
+ def _checkpoint_path(target_date: str) -> Path:
477
+ year = target_date[:4]
478
+ return LOGS_DIR / year / f"{target_date}.meta.json"
479
+
480
+
481
+ def load_checkpoint(target_date: str) -> Optional[dict]:
482
+ """读取上次生成的 checkpoint,不存在或解析失败返回 None"""
483
+ path = _checkpoint_path(target_date)
484
+ if not path.exists():
485
+ return None
486
+ try:
487
+ return json.loads(path.read_text())
488
+ except Exception:
489
+ return None
490
+
491
+
492
+ def save_checkpoint(target_date: str, file_counts: dict[str, int]) -> None:
493
+ """保存本次处理的 checkpoint(各文件已处理轮次数)"""
494
+ path = _checkpoint_path(target_date)
495
+ path.parent.mkdir(parents=True, exist_ok=True)
496
+ data = {
497
+ "generated_at": datetime.now().isoformat(timespec="seconds"),
498
+ "file_counts": file_counts,
499
+ }
500
+ path.write_text(json.dumps(data, ensure_ascii=False, indent=2))
501
+
502
+
503
+ def generate_incremental_update(
504
+ target_date: str,
505
+ existing_log: str,
506
+ new_claude_data: dict[str, list[list[tuple[str, str]]]],
507
+ new_codex_data: dict[str, list[str]],
508
+ ) -> str:
509
+ """基于已有日志 + 新增对话,调用 AI 生成更新后的完整日志"""
510
+
511
+ def trim(text: str, max_chars: int) -> str:
512
+ return text if len(text) <= max_chars else text[:max_chars] + "…"
513
+
514
+ project_sections = []
515
+ for proj_name, sessions in new_claude_data.items():
516
+ parts = []
517
+ for si, turns in enumerate(sessions, 1):
518
+ parts.append(f" [新增对话 {si}]({len(turns)} 轮)")
519
+ for user_msg, asst_msg in turns:
520
+ parts.append(f" [用户] {trim(user_msg, 200)}")
521
+ if asst_msg:
522
+ parts.append(f" [Claude] {trim(asst_msg, 150)}")
523
+ total_turns = sum(len(t) for t in sessions)
524
+ project_sections.append(
525
+ f"项目: {proj_name}(新增 {len(sessions)} 个对话 {total_turns} 轮)\n" + "\n".join(parts)
526
+ )
527
+ for proj_name, msgs in new_codex_data.items():
528
+ parts = [f" [Codex] {trim(m, 200)}" for m in msgs]
529
+ project_sections.append(f"项目: {proj_name}(Codex 新增 {len(msgs)} 条)\n" + "\n".join(parts))
530
+
531
+ new_data_text = "\n\n".join(project_sections)
532
+ now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
533
+
534
+ prompt = f"""以下是今天已生成的工作日志:
535
+
536
+ {existing_log}
537
+
538
+ ---
539
+ 以下是在上次生成之后新增的 AI 对话记录:
540
+
541
+ {new_data_text}
542
+
543
+ 请在已有日志基础上进行更新,要求:
544
+ 1. 更新「今日概览」中的统计数字(累计到最新)
545
+ 2. 在对应项目的「主要工作」中追加新工作内容(保留原有内容,不要重复)
546
+ 3. 如有新项目,新增对应章节
547
+ 4. 更新「今日总结」以反映全天整体工作
548
+ 5. 将页脚的生成时间改为 {now_str}
549
+ 6. 只输出完整的更新后日志,不要加任何额外说明"""
550
+
551
+ try:
552
+ return _call_api_claude_cli(prompt)
553
+ except Exception as e:
554
+ print(f"API 请求失败: {e}", file=sys.stderr)
555
+ return existing_log # 失败时保留原有日志
556
+
557
+
398
558
  GITLAB_HOST = "gitcode.lingjingai.cn"
399
559
 
400
560
 
@@ -583,31 +743,60 @@ def main():
583
743
  target_date = parse_date_arg(args.date)
584
744
  print(f"=== 收集 {target_date} 的工作日志 ===")
585
745
 
586
- # 1. 收集数据
746
+ # 1. 检查是否存在 checkpoint(判断是否为增量更新)
747
+ checkpoint = load_checkpoint(target_date)
748
+ is_incremental = checkpoint is not None
749
+ prev_file_counts = checkpoint["file_counts"] if is_incremental else {}
750
+
751
+ if is_incremental:
752
+ print(f"检测到已有日志(生成于 {checkpoint['generated_at']}),进入增量更新模式")
753
+
754
+ # 2. 收集数据(增量模式只返回新增轮次)
587
755
  print("收集 Claude Code 对话记录...")
588
- claude_data = collect_claude_sessions(target_date)
589
- total_claude = sum(len(v) for v in claude_data.values())
590
- print(f" 找到 {len(claude_data)} 个项目,{total_claude} 条消息")
756
+ claude_data, file_counts = collect_claude_sessions(target_date, prev_file_counts if is_incremental else None)
757
+ total_claude_turns = sum(sum(len(t) for t in s) for s in claude_data.values())
758
+ total_claude_sessions = sum(len(s) for s in claude_data.values())
759
+ label = "新增" if is_incremental else "找到"
760
+ print(f" {label} {len(claude_data)} 个项目,{total_claude_sessions} 个对话,{total_claude_turns} 轮交互")
591
761
 
592
762
  print("收集 Codex 对话记录...")
593
763
  codex_data = collect_codex_sessions(target_date)
594
764
  total_codex = sum(len(v) for v in codex_data.values())
595
- print(f" 找到 {len(codex_data)} 个项目,{total_codex} 条消息")
765
+ print(f" {label} {len(codex_data)} 个项目,{total_codex} 条消息")
596
766
 
597
767
  if args.dry_run:
598
- print("\n[dry-run] 数据预览:")
599
- for proj, msgs in {**claude_data, **codex_data}.items():
600
- print(f" [{proj}] {len(msgs)} 条")
768
+ print(f"\n[dry-run] {'新增' if is_incremental else ''}数据预览:")
769
+ for proj, sessions in claude_data.items():
770
+ total_turns = sum(len(t) for t in sessions)
771
+ print(f" [Claude/{proj}] {len(sessions)} 个对话,{total_turns} 轮")
772
+ for si, turns in enumerate(sessions[:2], 1):
773
+ print(f" 对话{si}: {len(turns)} 轮")
774
+ for user_msg, asst_msg in turns[:2]:
775
+ print(f" [用户] {user_msg[:60]}")
776
+ if asst_msg:
777
+ print(f" [Claude] {asst_msg[:60]}")
778
+ for proj, msgs in codex_data.items():
779
+ print(f" [Codex/{proj}] {len(msgs)} 条")
601
780
  for m in msgs[:2]:
602
- print(f" - {m[:80]}")
781
+ print(f" - {m[:60]}")
603
782
  return
604
783
 
605
- # 2. 生成摘要(通过 claude -p 复用 Claude Code CLI 认证)
606
- print("调用 Claude API 生成摘要...")
607
- content = generate_summary(target_date, claude_data, codex_data)
784
+ # 3. 生成日志内容
785
+ if is_incremental:
786
+ if total_claude_turns == 0 and total_codex == 0:
787
+ print("没有新增内容,无需更新")
788
+ return
789
+ print("调用 Claude API 更新日志(增量)...")
790
+ log_file_path = LOGS_DIR / target_date[:4] / f"{target_date}.md"
791
+ existing_log = log_file_path.read_text(encoding="utf-8") if log_file_path.exists() else ""
792
+ content = generate_incremental_update(target_date, existing_log, claude_data, codex_data)
793
+ else:
794
+ print("调用 Claude API 生成摘要...")
795
+ content = generate_summary(target_date, claude_data, codex_data)
608
796
 
609
- # 4. 保存文件
797
+ # 4. 保存文件 + checkpoint
610
798
  log_file = save_log(target_date, content)
799
+ save_checkpoint(target_date, file_counts)
611
800
 
612
801
  # 5. Git 操作
613
802
  if not args.no_git: