npm - ai-worklog - Versions diffs - 1.0.2 → 1.0.4 - Mend

ai-worklog 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/scripts/collect_work_log.py +146 -58

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-worklog",
-  "version": "1.0.2",
+  "version": "1.0.4",
   "description": "AI 对话工作日志自动收集工具 —— 从 Claude Code / Codex 对话记录生成每日工作日志并推送到 GitLab",
   "bin": {
     "ai-worklog": "./bin/index.js"

package/scripts/collect_work_log.py CHANGED Viewed

@@ -102,15 +102,17 @@ def utc_to_local_date(utc_ts: str) -> Optional[str]:
 # ─── Claude Code 数据收集 ─────────────────────────────────────────────────────
-def collect_claude_sessions(target_date: str) -> dict[str, list[str]]:
+def collect_claude_sessions(target_date: str) -> dict[str, list[list[tuple[str, str]]]]:
     """
     遍历 ~/.claude/projects/ 下所有 jsonl，
-    按 message timestamp 过滤当天（UTC→本地），提取 user 消息，
+    按 message timestamp 过滤当天（UTC→本地），
+    每个 jsonl 文件作为一个对话，提取 (用户消息, Claude回复) 轮次对，
     按项目名（cwd basename）分组。
-    返回: {project_name: [message1, message2, ...]}
+    返回: {project_name: [session1, session2, ...]}
+           session = [(user_msg, assistant_msg), ...]
     """
-    results: dict[str, list[str]] = {}
+    results: dict[str, list[list[tuple[str, str]]]] = {}
     if not CLAUDE_PROJECTS_DIR.exists():
         return results
@@ -124,10 +126,46 @@ def collect_claude_sessions(target_date: str) -> dict[str, list[str]]:
     return results
+def _extract_user_text(content) -> str:
+    """从 message.content 中提取用户文字（过滤 tool_result 等结构）"""
+    texts = []
+    if isinstance(content, str):
+        texts = [content]
+    elif isinstance(content, list):
+        for c in content:
+            if isinstance(c, dict) and c.get("type") == "text":
+                texts.append(c.get("text", ""))
+            elif isinstance(c, str):
+                texts.append(c)
+    return " ".join(t for t in texts if t).strip()
+def _extract_assistant_text(content) -> str:
+    """从 assistant message.content 中提取最终文字回复，跳过 thinking 和 tool_use"""
+    if not isinstance(content, list):
+        return ""
+    parts = [
+        c.get("text", "").strip()
+        for c in content
+        if isinstance(c, dict) and c.get("type") == "text" and c.get("text", "").strip()
+    ]
+    return " ".join(parts)
 def _parse_claude_jsonl(
-    jsonl_file: Path, target_date: str, results: dict[str, list[str]]
+    jsonl_file: Path, target_date: str, results: dict[str, list[list[tuple[str, str]]]]
 ) -> None:
-    """解析单个 Claude Code jsonl 文件"""
+    """
+    解析单个 Claude Code jsonl 文件。
+    每个文件视为一个对话 session，提取当天的 (用户消息, Claude回复) 轮次对。
+    Claude 回复只取 text 块，跳过 thinking 和 tool_use。
+    """
+    session_turns: list[tuple[str, str]] = []
+    pending_user: str = ""
+    pending_user_date: str = ""
+    pending_assistant: str = ""
+    cwd: str = ""
     try:
         with open(jsonl_file, encoding="utf-8", errors="ignore") as f:
             for line in f:
@@ -139,34 +177,43 @@ def _parse_claude_jsonl(
                 except json.JSONDecodeError:
                     continue
-                if entry.get("type") != "user":
-                    continue
+                if entry.get("cwd"):
+                    cwd = entry["cwd"]
-                # 按时间戳过滤日期
-                ts = entry.get("timestamp", "")
-                if ts and utc_to_local_date(ts) != target_date:
-                    continue
+                entry_type = entry.get("type", "")
-                # 提取项目名
-                cwd = entry.get("cwd", "")
-                project_name = os.path.basename(cwd) if cwd else "unknown"
-                # 提取消息内容
-                message = entry.get("message", {})
-                content = message.get("content", "")
-                texts = []
-                if isinstance(content, str):
-                    texts = [content]
-                elif isinstance(content, list):
-                    for c in content:
-                        if isinstance(c, dict) and c.get("type") == "text":
-                            texts.append(c.get("text", ""))
-                        elif isinstance(c, str):
-                            texts.append(c)
-                for text in texts:
-                    if text and not is_system_message(text):
-                        results.setdefault(project_name, []).append(text.strip())
+                if entry_type == "user":
+                    user_text = _extract_user_text(entry.get("message", {}).get("content", ""))
+                    if not user_text or is_system_message(user_text):
+                        continue  # tool_result 或系统消息，跳过
+                    # 保存上一个 turn（日期匹配才入列）
+                    if pending_user and pending_user_date == target_date:
+                        session_turns.append((pending_user, pending_assistant))
+                    # 开始新 turn
+                    ts = entry.get("timestamp", "")
+                    pending_user = user_text
+                    pending_user_date = utc_to_local_date(ts) if ts else ""
+                    pending_assistant = ""
+                elif entry_type == "assistant":
+                    if not pending_user:
+                        continue
+                    assistant_text = _extract_assistant_text(
+                        entry.get("message", {}).get("content", [])
+                    )
+                    if assistant_text:
+                        pending_assistant = assistant_text  # 用最新文字覆盖（多步工具调用后取最终回复）
+        # 最后一个 pending turn
+        if pending_user and pending_user_date == target_date:
+            session_turns.append((pending_user, pending_assistant))
+        if session_turns:
+            project_name = os.path.basename(cwd) if cwd else "unknown"
+            results.setdefault(project_name, []).append(session_turns)
     except Exception as e:
         print(f"警告: 解析 {jsonl_file} 失败: {e}", file=sys.stderr)
@@ -259,41 +306,68 @@ def _call_api_claude_cli(prompt: str) -> str:
 def generate_summary(
     target_date: str,
-    claude_data: dict[str, list[str]],
+    claude_data: dict[str, list[list[tuple[str, str]]]],
     codex_data: dict[str, list[str]],
 ) -> str:
     """调用 Claude API 生成结构化工作日志"""
     # 合并数据（同项目名合并）
     all_projects: dict[str, dict] = {}
-    for proj, msgs in claude_data.items():
-        all_projects.setdefault(proj, {"claude": [], "codex": []})["claude"].extend(msgs)
+    for proj, sessions in claude_data.items():
+        all_projects.setdefault(proj, {"sessions": [], "codex": []})["sessions"].extend(sessions)
     for proj, msgs in codex_data.items():
-        all_projects.setdefault(proj, {"claude": [], "codex": []})["codex"].extend(msgs)
+        all_projects.setdefault(proj, {"sessions": [], "codex": []})["codex"].extend(msgs)
-    total_claude = sum(len(v["claude"]) for v in all_projects.values())
-    total_codex = sum(len(v["codex"]) for v in all_projects.values())
+    total_claude = sum(
+        sum(len(turns) for turns in data["sessions"])
+        for data in all_projects.values()
+    )
+    total_codex = sum(len(data["codex"]) for data in all_projects.values())
     total_sessions = total_claude + total_codex
     if total_sessions == 0:
         return _generate_empty_log(target_date)
-    # 构建给 AI 的原始数据
+    def trim(text: str, max_chars: int) -> str:
+        return text if len(text) <= max_chars else text[:max_chars] + "…"
+    def dedup_and_trim(msgs: list[str], max_chars: int = 200) -> list[str]:
+        seen: set[str] = set()
+        result = []
+        for m in msgs:
+            key = m[:80]
+            if key in seen:
+                continue
+            seen.add(key)
+            result.append(trim(m, max_chars))
+        return result
     project_sections = []
     for proj_name, data in all_projects.items():
-        msgs_parts = []
-        if data["claude"]:
-            msgs_parts.append(f"[Claude Code 对话 {len(data['claude'])} 条]")
-            for i, m in enumerate(data["claude"], 1):
-                # 截断过长消息
-                msg = m if len(m) <= 500 else m[:500] + "..."
-                msgs_parts.append(f"  {i}. {msg}")
+        parts = []
+        # Claude 对话：按 session 分，每个 session 里是轮次对
+        for si, turns in enumerate(data["sessions"], 1):
+            parts.append(f"  [对话 {si}]（{len(turns)} 轮）")
+            for user_msg, asst_msg in turns:
+                parts.append(f"    [用户] {trim(user_msg, 200)}")
+                if asst_msg:
+                    parts.append(f"    [Claude] {trim(asst_msg, 150)}")
+        # Codex 对话：没有回复，直接列用户消息
         if data["codex"]:
-            msgs_parts.append(f"[Codex 对话 {len(data['codex'])} 条]")
-            for i, m in enumerate(data["codex"], 1):
-                msg = m if len(m) <= 500 else m[:500] + "..."
-                msgs_parts.append(f"  {i}. {msg}")
-        project_sections.append(f"项目: {proj_name}\n" + "\n".join(msgs_parts))
+            trimmed = dedup_and_trim(data["codex"])
+            parts.append(f"  [Codex 对话]（{len(data['codex'])} 条）")
+            for m in trimmed:
+                parts.append(f"    [用户] {m}")
+        total_turns = sum(len(t) for t in data["sessions"])
+        header = (
+            f"项目: {proj_name}"
+            f"（Claude {len(data['sessions'])} 个对话 {total_turns} 轮，"
+            f"Codex {len(data['codex'])} 条）"
+        )
+        project_sections.append(header + "\n" + "\n".join(parts))
     raw_data = "\n\n".join(project_sections)
@@ -462,7 +536,9 @@ def git_commit_and_push(log_file: Path, target_date: str, push: bool = True) ->
     """全自动 git init → GitLab 项目创建 → commit → push"""
     def run(cmd: list[str], check_err: bool = True) -> subprocess.CompletedProcess:
-        r = subprocess.run(cmd, cwd=REPO_DIR, capture_output=True, text=True)
+        env = os.environ.copy()
+        env["LC_ALL"] = "C"  # 强制英文输出，保证字符串匹配不受本地化影响
+        r = subprocess.run(cmd, cwd=REPO_DIR, capture_output=True, text=True, env=env)
         if check_err and r.returncode != 0:
             print(f"命令失败: {' '.join(cmd)}\n{r.stderr.strip()}", file=sys.stderr)
         return r
@@ -496,7 +572,7 @@ def git_commit_and_push(log_file: Path, target_date: str, push: bool = True) ->
         run(["git", "add", "-A"])
         run(["git", "commit", "-m", "init: 初始化工作日志仓库"])
-    # ── 4. 提交日志 ────────────────────────────────────────────────────────────
+    # ── 4. 提交日志 ───────────────────────────────────────────────────────────
     rel_path = log_file.relative_to(REPO_DIR)
     print(f"Git: 添加 {rel_path}")
     if run(["git", "add", str(rel_path)]).returncode != 0:
@@ -574,8 +650,11 @@ def main():
     # 1. 收集数据
     print("收集 Claude Code 对话记录...")
     claude_data = collect_claude_sessions(target_date)
-    total_claude = sum(len(v) for v in claude_data.values())
-    print(f"  找到 {len(claude_data)} 个项目，{total_claude} 条消息")
+    total_claude_turns = sum(
+        sum(len(turns) for turns in sessions) for sessions in claude_data.values()
+    )
+    total_claude_sessions = sum(len(sessions) for sessions in claude_data.values())
+    print(f"  找到 {len(claude_data)} 个项目，{total_claude_sessions} 个对话，{total_claude_turns} 轮交互")
     print("收集 Codex 对话记录...")
     codex_data = collect_codex_sessions(target_date)
@@ -584,10 +663,19 @@ def main():
     if args.dry_run:
         print("\n[dry-run] 数据预览：")
-        for proj, msgs in {**claude_data, **codex_data}.items():
-            print(f"  [{proj}] {len(msgs)} 条")
+        for proj, sessions in claude_data.items():
+            total_turns = sum(len(t) for t in sessions)
+            print(f"  [Claude/{proj}] {len(sessions)} 个对话，{total_turns} 轮")
+            for si, turns in enumerate(sessions[:2], 1):
+                print(f"    对话{si}: {len(turns)} 轮")
+                for user_msg, asst_msg in turns[:2]:
+                    print(f"      [用户] {user_msg[:60]}")
+                    if asst_msg:
+                        print(f"      [Claude] {asst_msg[:60]}")
+        for proj, msgs in codex_data.items():
+            print(f"  [Codex/{proj}] {len(msgs)} 条")
             for m in msgs[:2]:
-                print(f"    - {m[:80]}")
+                print(f"    - {m[:60]}")
         return
     # 2. 生成摘要（通过 claude -p 复用 Claude Code CLI 认证）