ai-worklog 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-worklog",
3
- "version": "1.0.4",
3
+ "version": "1.0.5",
4
4
  "description": "AI 对话工作日志自动收集工具 —— 从 Claude Code / Codex 对话记录生成每日工作日志并推送到 GitLab",
5
5
  "bin": {
6
6
  "ai-worklog": "./bin/index.js"
@@ -102,28 +102,42 @@ def utc_to_local_date(utc_ts: str) -> Optional[str]:
102
102
 
103
103
  # ─── Claude Code 数据收集 ─────────────────────────────────────────────────────
104
104
 
105
- def collect_claude_sessions(target_date: str) -> dict[str, list[list[tuple[str, str]]]]:
105
+ def collect_claude_sessions(
106
+ target_date: str,
107
+ prev_file_counts: Optional[dict[str, int]] = None,
108
+ ) -> tuple[dict[str, list[list[tuple[str, str]]]], dict[str, int]]:
106
109
  """
107
- 遍历 ~/.claude/projects/ 下所有 jsonl
108
- 按 message timestamp 过滤当天(UTC→本地),
109
- 每个 jsonl 文件作为一个对话,提取 (用户消息, Claude回复) 轮次对,
110
- 按项目名(cwd basename)分组。
110
+ 遍历 ~/.claude/projects/ 下所有 jsonl,提取当天的对话轮次对。
111
+
112
+ prev_file_counts: 上次生成时各文件已处理的轮次数。
113
+ 传入时只返回新增轮次(增量模式)。
111
114
 
112
- 返回: {project_name: [session1, session2, ...]}
113
- session = [(user_msg, assistant_msg), ...]
115
+ 返回:
116
+ sessions: {project_name: [session, ...]},session = [(user, assistant), ...]
117
+ file_counts: {jsonl_file_path: 本次处理的总轮次数}(用于保存 checkpoint)
114
118
  """
115
- results: dict[str, list[list[tuple[str, str]]]] = {}
119
+ sessions: dict[str, list[list[tuple[str, str]]]] = {}
120
+ file_counts: dict[str, int] = {}
116
121
 
117
122
  if not CLAUDE_PROJECTS_DIR.exists():
118
- return results
123
+ return sessions, file_counts
119
124
 
120
125
  for project_dir in CLAUDE_PROJECTS_DIR.iterdir():
121
126
  if not project_dir.is_dir():
122
127
  continue
123
128
  for jsonl_file in project_dir.glob("*.jsonl"):
124
- _parse_claude_jsonl(jsonl_file, target_date, results)
129
+ turns, cwd = _parse_claude_jsonl(jsonl_file, target_date)
130
+ file_key = str(jsonl_file)
131
+ file_counts[file_key] = len(turns)
125
132
 
126
- return results
133
+ prev = (prev_file_counts or {}).get(file_key, 0)
134
+ new_turns = turns[prev:] # 增量模式下只取新增部分;首次 prev=0 取全部
135
+
136
+ if new_turns:
137
+ project_name = os.path.basename(cwd) if cwd else "unknown"
138
+ sessions.setdefault(project_name, []).append(new_turns)
139
+
140
+ return sessions, file_counts
127
141
 
128
142
 
129
143
  def _extract_user_text(content) -> str:
@@ -153,14 +167,14 @@ def _extract_assistant_text(content) -> str:
153
167
 
154
168
 
155
169
  def _parse_claude_jsonl(
156
- jsonl_file: Path, target_date: str, results: dict[str, list[list[tuple[str, str]]]]
157
- ) -> None:
170
+ jsonl_file: Path, target_date: str
171
+ ) -> tuple[list[tuple[str, str]], str]:
158
172
  """
159
173
  解析单个 Claude Code jsonl 文件。
160
- 每个文件视为一个对话 session,提取当天的 (用户消息, Claude回复) 轮次对。
174
+ 返回 (当天所有轮次对, cwd)
161
175
  Claude 回复只取 text 块,跳过 thinking 和 tool_use。
162
176
  """
163
- session_turns: list[tuple[str, str]] = []
177
+ turns: list[tuple[str, str]] = []
164
178
  pending_user: str = ""
165
179
  pending_user_date: str = ""
166
180
  pending_assistant: str = ""
@@ -190,7 +204,7 @@ def _parse_claude_jsonl(
190
204
 
191
205
  # 保存上一个 turn(日期匹配才入列)
192
206
  if pending_user and pending_user_date == target_date:
193
- session_turns.append((pending_user, pending_assistant))
207
+ turns.append((pending_user, pending_assistant))
194
208
 
195
209
  # 开始新 turn
196
210
  ts = entry.get("timestamp", "")
@@ -205,19 +219,17 @@ def _parse_claude_jsonl(
205
219
  entry.get("message", {}).get("content", [])
206
220
  )
207
221
  if assistant_text:
208
- pending_assistant = assistant_text # 用最新文字覆盖(多步工具调用后取最终回复)
222
+ pending_assistant = assistant_text # 多步工具调用后取最终回复
209
223
 
210
224
  # 最后一个 pending turn
211
225
  if pending_user and pending_user_date == target_date:
212
- session_turns.append((pending_user, pending_assistant))
213
-
214
- if session_turns:
215
- project_name = os.path.basename(cwd) if cwd else "unknown"
216
- results.setdefault(project_name, []).append(session_turns)
226
+ turns.append((pending_user, pending_assistant))
217
227
 
218
228
  except Exception as e:
219
229
  print(f"警告: 解析 {jsonl_file} 失败: {e}", file=sys.stderr)
220
230
 
231
+ return turns, cwd
232
+
221
233
 
222
234
  # ─── Codex 数据收集 ───────────────────────────────────────────────────────────
223
235
 
@@ -459,6 +471,90 @@ def save_log(target_date: str, content: str) -> Path:
459
471
  return log_file
460
472
 
461
473
 
474
+ # ─── Checkpoint(增量更新支持)────────────────────────────────────────────────
475
+
476
+ def _checkpoint_path(target_date: str) -> Path:
477
+ year = target_date[:4]
478
+ return LOGS_DIR / year / f"{target_date}.meta.json"
479
+
480
+
481
+ def load_checkpoint(target_date: str) -> Optional[dict]:
482
+ """读取上次生成的 checkpoint,不存在或解析失败返回 None"""
483
+ path = _checkpoint_path(target_date)
484
+ if not path.exists():
485
+ return None
486
+ try:
487
+ return json.loads(path.read_text())
488
+ except Exception:
489
+ return None
490
+
491
+
492
+ def save_checkpoint(target_date: str, file_counts: dict[str, int]) -> None:
493
+ """保存本次处理的 checkpoint(各文件已处理轮次数)"""
494
+ path = _checkpoint_path(target_date)
495
+ path.parent.mkdir(parents=True, exist_ok=True)
496
+ data = {
497
+ "generated_at": datetime.now().isoformat(timespec="seconds"),
498
+ "file_counts": file_counts,
499
+ }
500
+ path.write_text(json.dumps(data, ensure_ascii=False, indent=2))
501
+
502
+
503
+ def generate_incremental_update(
504
+ target_date: str,
505
+ existing_log: str,
506
+ new_claude_data: dict[str, list[list[tuple[str, str]]]],
507
+ new_codex_data: dict[str, list[str]],
508
+ ) -> str:
509
+ """基于已有日志 + 新增对话,调用 AI 生成更新后的完整日志"""
510
+
511
+ def trim(text: str, max_chars: int) -> str:
512
+ return text if len(text) <= max_chars else text[:max_chars] + "…"
513
+
514
+ project_sections = []
515
+ for proj_name, sessions in new_claude_data.items():
516
+ parts = []
517
+ for si, turns in enumerate(sessions, 1):
518
+ parts.append(f" [新增对话 {si}]({len(turns)} 轮)")
519
+ for user_msg, asst_msg in turns:
520
+ parts.append(f" [用户] {trim(user_msg, 200)}")
521
+ if asst_msg:
522
+ parts.append(f" [Claude] {trim(asst_msg, 150)}")
523
+ total_turns = sum(len(t) for t in sessions)
524
+ project_sections.append(
525
+ f"项目: {proj_name}(新增 {len(sessions)} 个对话 {total_turns} 轮)\n" + "\n".join(parts)
526
+ )
527
+ for proj_name, msgs in new_codex_data.items():
528
+ parts = [f" [Codex] {trim(m, 200)}" for m in msgs]
529
+ project_sections.append(f"项目: {proj_name}(Codex 新增 {len(msgs)} 条)\n" + "\n".join(parts))
530
+
531
+ new_data_text = "\n\n".join(project_sections)
532
+ now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
533
+
534
+ prompt = f"""以下是今天已生成的工作日志:
535
+
536
+ {existing_log}
537
+
538
+ ---
539
+ 以下是在上次生成之后新增的 AI 对话记录:
540
+
541
+ {new_data_text}
542
+
543
+ 请在已有日志基础上进行更新,要求:
544
+ 1. 更新「今日概览」中的统计数字(累计到最新)
545
+ 2. 在对应项目的「主要工作」中追加新工作内容(保留原有内容,不要重复)
546
+ 3. 如有新项目,新增对应章节
547
+ 4. 更新「今日总结」以反映全天整体工作
548
+ 5. 将页脚的生成时间改为 {now_str}
549
+ 6. 只输出完整的更新后日志,不要加任何额外说明"""
550
+
551
+ try:
552
+ return _call_api_claude_cli(prompt)
553
+ except Exception as e:
554
+ print(f"API 请求失败: {e}", file=sys.stderr)
555
+ return existing_log # 失败时保留原有日志
556
+
557
+
462
558
  GITLAB_HOST = "gitcode.lingjingai.cn"
463
559
 
464
560
 
@@ -647,22 +743,29 @@ def main():
647
743
  target_date = parse_date_arg(args.date)
648
744
  print(f"=== 收集 {target_date} 的工作日志 ===")
649
745
 
650
- # 1. 收集数据
746
+ # 1. 检查是否存在 checkpoint(判断是否为增量更新)
747
+ checkpoint = load_checkpoint(target_date)
748
+ is_incremental = checkpoint is not None
749
+ prev_file_counts = checkpoint["file_counts"] if is_incremental else {}
750
+
751
+ if is_incremental:
752
+ print(f"检测到已有日志(生成于 {checkpoint['generated_at']}),进入增量更新模式")
753
+
754
+ # 2. 收集数据(增量模式只返回新增轮次)
651
755
  print("收集 Claude Code 对话记录...")
652
- claude_data = collect_claude_sessions(target_date)
653
- total_claude_turns = sum(
654
- sum(len(turns) for turns in sessions) for sessions in claude_data.values()
655
- )
656
- total_claude_sessions = sum(len(sessions) for sessions in claude_data.values())
657
- print(f" 找到 {len(claude_data)} 个项目,{total_claude_sessions} 个对话,{total_claude_turns} 轮交互")
756
+ claude_data, file_counts = collect_claude_sessions(target_date, prev_file_counts if is_incremental else None)
757
+ total_claude_turns = sum(sum(len(t) for t in s) for s in claude_data.values())
758
+ total_claude_sessions = sum(len(s) for s in claude_data.values())
759
+ label = "新增" if is_incremental else "找到"
760
+ print(f" {label} {len(claude_data)} 个项目,{total_claude_sessions} 个对话,{total_claude_turns} 轮交互")
658
761
 
659
762
  print("收集 Codex 对话记录...")
660
763
  codex_data = collect_codex_sessions(target_date)
661
764
  total_codex = sum(len(v) for v in codex_data.values())
662
- print(f" 找到 {len(codex_data)} 个项目,{total_codex} 条消息")
765
+ print(f" {label} {len(codex_data)} 个项目,{total_codex} 条消息")
663
766
 
664
767
  if args.dry_run:
665
- print("\n[dry-run] 数据预览:")
768
+ print(f"\n[dry-run] {'新增' if is_incremental else ''}数据预览:")
666
769
  for proj, sessions in claude_data.items():
667
770
  total_turns = sum(len(t) for t in sessions)
668
771
  print(f" [Claude/{proj}] {len(sessions)} 个对话,{total_turns} 轮")
@@ -678,12 +781,22 @@ def main():
678
781
  print(f" - {m[:60]}")
679
782
  return
680
783
 
681
- # 2. 生成摘要(通过 claude -p 复用 Claude Code CLI 认证)
682
- print("调用 Claude API 生成摘要...")
683
- content = generate_summary(target_date, claude_data, codex_data)
784
+ # 3. 生成日志内容
785
+ if is_incremental:
786
+ if total_claude_turns == 0 and total_codex == 0:
787
+ print("没有新增内容,无需更新")
788
+ return
789
+ print("调用 Claude API 更新日志(增量)...")
790
+ log_file_path = LOGS_DIR / target_date[:4] / f"{target_date}.md"
791
+ existing_log = log_file_path.read_text(encoding="utf-8") if log_file_path.exists() else ""
792
+ content = generate_incremental_update(target_date, existing_log, claude_data, codex_data)
793
+ else:
794
+ print("调用 Claude API 生成摘要...")
795
+ content = generate_summary(target_date, claude_data, codex_data)
684
796
 
685
- # 4. 保存文件
797
+ # 4. 保存文件 + checkpoint
686
798
  log_file = save_log(target_date, content)
799
+ save_checkpoint(target_date, file_counts)
687
800
 
688
801
  # 5. Git 操作
689
802
  if not args.no_git: