erk 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- erk/__init__.py +12 -0
- erk/__main__.py +6 -0
- erk/agent_docs/__init__.py +5 -0
- erk/agent_docs/models.py +123 -0
- erk/agent_docs/operations.py +666 -0
- erk/artifacts/__init__.py +5 -0
- erk/artifacts/artifact_health.py +623 -0
- erk/artifacts/detection.py +16 -0
- erk/artifacts/discovery.py +343 -0
- erk/artifacts/models.py +63 -0
- erk/artifacts/staleness.py +56 -0
- erk/artifacts/state.py +100 -0
- erk/artifacts/sync.py +624 -0
- erk/cli/__init__.py +0 -0
- erk/cli/activation.py +132 -0
- erk/cli/alias.py +53 -0
- erk/cli/cli.py +221 -0
- erk/cli/commands/__init__.py +0 -0
- erk/cli/commands/admin.py +153 -0
- erk/cli/commands/artifact/__init__.py +1 -0
- erk/cli/commands/artifact/check.py +260 -0
- erk/cli/commands/artifact/group.py +31 -0
- erk/cli/commands/artifact/list_cmd.py +89 -0
- erk/cli/commands/artifact/show.py +62 -0
- erk/cli/commands/artifact/sync_cmd.py +39 -0
- erk/cli/commands/branch/__init__.py +26 -0
- erk/cli/commands/branch/assign_cmd.py +152 -0
- erk/cli/commands/branch/checkout_cmd.py +357 -0
- erk/cli/commands/branch/create_cmd.py +161 -0
- erk/cli/commands/branch/list_cmd.py +82 -0
- erk/cli/commands/branch/unassign_cmd.py +197 -0
- erk/cli/commands/cc/__init__.py +15 -0
- erk/cli/commands/cc/jsonl_cmd.py +20 -0
- erk/cli/commands/cc/session/AGENTS.md +30 -0
- erk/cli/commands/cc/session/CLAUDE.md +1 -0
- erk/cli/commands/cc/session/__init__.py +15 -0
- erk/cli/commands/cc/session/list_cmd.py +167 -0
- erk/cli/commands/cc/session/show_cmd.py +175 -0
- erk/cli/commands/completion.py +89 -0
- erk/cli/commands/completions.py +165 -0
- erk/cli/commands/config.py +327 -0
- erk/cli/commands/docs/__init__.py +1 -0
- erk/cli/commands/docs/group.py +16 -0
- erk/cli/commands/docs/sync.py +121 -0
- erk/cli/commands/docs/validate.py +102 -0
- erk/cli/commands/doctor.py +243 -0
- erk/cli/commands/down.py +171 -0
- erk/cli/commands/exec/__init__.py +1 -0
- erk/cli/commands/exec/group.py +164 -0
- erk/cli/commands/exec/scripts/AGENTS.md +79 -0
- erk/cli/commands/exec/scripts/CLAUDE.md +1 -0
- erk/cli/commands/exec/scripts/__init__.py +5 -0
- erk/cli/commands/exec/scripts/add_reaction_to_comment.py +69 -0
- erk/cli/commands/exec/scripts/add_remote_execution_note.py +68 -0
- erk/cli/commands/exec/scripts/check_impl.py +152 -0
- erk/cli/commands/exec/scripts/ci_update_pr_body.py +294 -0
- erk/cli/commands/exec/scripts/create_extraction_branch.py +138 -0
- erk/cli/commands/exec/scripts/create_extraction_plan.py +242 -0
- erk/cli/commands/exec/scripts/create_issue_from_session.py +103 -0
- erk/cli/commands/exec/scripts/create_plan_from_context.py +103 -0
- erk/cli/commands/exec/scripts/create_worker_impl_from_issue.py +93 -0
- erk/cli/commands/exec/scripts/detect_trunk_branch.py +121 -0
- erk/cli/commands/exec/scripts/exit_plan_mode_hook.py +777 -0
- erk/cli/commands/exec/scripts/extract_latest_plan.py +49 -0
- erk/cli/commands/exec/scripts/extract_session_from_issue.py +150 -0
- erk/cli/commands/exec/scripts/find_project_dir.py +214 -0
- erk/cli/commands/exec/scripts/generate_pr_summary.py +112 -0
- erk/cli/commands/exec/scripts/get_closing_text.py +98 -0
- erk/cli/commands/exec/scripts/get_embedded_prompt.py +62 -0
- erk/cli/commands/exec/scripts/get_plan_metadata.py +95 -0
- erk/cli/commands/exec/scripts/get_pr_body_footer.py +70 -0
- erk/cli/commands/exec/scripts/get_pr_discussion_comments.py +149 -0
- erk/cli/commands/exec/scripts/get_pr_review_comments.py +155 -0
- erk/cli/commands/exec/scripts/impl_init.py +158 -0
- erk/cli/commands/exec/scripts/impl_signal.py +375 -0
- erk/cli/commands/exec/scripts/impl_verify.py +49 -0
- erk/cli/commands/exec/scripts/issue_title_to_filename.py +34 -0
- erk/cli/commands/exec/scripts/list_sessions.py +296 -0
- erk/cli/commands/exec/scripts/mark_impl_ended.py +188 -0
- erk/cli/commands/exec/scripts/mark_impl_started.py +188 -0
- erk/cli/commands/exec/scripts/marker.py +163 -0
- erk/cli/commands/exec/scripts/objective_save_to_issue.py +109 -0
- erk/cli/commands/exec/scripts/plan_save_to_issue.py +269 -0
- erk/cli/commands/exec/scripts/plan_update_issue.py +147 -0
- erk/cli/commands/exec/scripts/post_extraction_comment.py +237 -0
- erk/cli/commands/exec/scripts/post_or_update_pr_summary.py +133 -0
- erk/cli/commands/exec/scripts/post_pr_inline_comment.py +143 -0
- erk/cli/commands/exec/scripts/post_workflow_started_comment.py +168 -0
- erk/cli/commands/exec/scripts/preprocess_session.py +777 -0
- erk/cli/commands/exec/scripts/quick_submit.py +32 -0
- erk/cli/commands/exec/scripts/rebase_with_conflict_resolution.py +260 -0
- erk/cli/commands/exec/scripts/reply_to_discussion_comment.py +173 -0
- erk/cli/commands/exec/scripts/resolve_review_thread.py +170 -0
- erk/cli/commands/exec/scripts/session_id_injector_hook.py +52 -0
- erk/cli/commands/exec/scripts/setup_impl_from_issue.py +159 -0
- erk/cli/commands/exec/scripts/slot_objective.py +102 -0
- erk/cli/commands/exec/scripts/tripwires_reminder_hook.py +20 -0
- erk/cli/commands/exec/scripts/update_dispatch_info.py +116 -0
- erk/cli/commands/exec/scripts/user_prompt_hook.py +113 -0
- erk/cli/commands/exec/scripts/validate_plan_content.py +98 -0
- erk/cli/commands/exec/scripts/wrap_plan_in_metadata_block.py +34 -0
- erk/cli/commands/implement.py +695 -0
- erk/cli/commands/implement_shared.py +649 -0
- erk/cli/commands/info/__init__.py +14 -0
- erk/cli/commands/info/release_notes_cmd.py +128 -0
- erk/cli/commands/init.py +801 -0
- erk/cli/commands/land_cmd.py +690 -0
- erk/cli/commands/log_cmd.py +137 -0
- erk/cli/commands/md/__init__.py +5 -0
- erk/cli/commands/md/check.py +118 -0
- erk/cli/commands/md/group.py +14 -0
- erk/cli/commands/navigation_helpers.py +430 -0
- erk/cli/commands/objective/__init__.py +16 -0
- erk/cli/commands/objective/list_cmd.py +47 -0
- erk/cli/commands/objective_helpers.py +132 -0
- erk/cli/commands/plan/__init__.py +32 -0
- erk/cli/commands/plan/check_cmd.py +174 -0
- erk/cli/commands/plan/close_cmd.py +69 -0
- erk/cli/commands/plan/create_cmd.py +120 -0
- erk/cli/commands/plan/docs/__init__.py +18 -0
- erk/cli/commands/plan/docs/extract_cmd.py +53 -0
- erk/cli/commands/plan/docs/unextract_cmd.py +38 -0
- erk/cli/commands/plan/docs/unextracted_cmd.py +72 -0
- erk/cli/commands/plan/extraction/__init__.py +16 -0
- erk/cli/commands/plan/extraction/complete_cmd.py +101 -0
- erk/cli/commands/plan/extraction/create_raw_cmd.py +63 -0
- erk/cli/commands/plan/get.py +71 -0
- erk/cli/commands/plan/list_cmd.py +754 -0
- erk/cli/commands/plan/log_cmd.py +440 -0
- erk/cli/commands/plan/start_cmd.py +459 -0
- erk/cli/commands/planner/__init__.py +40 -0
- erk/cli/commands/planner/configure_cmd.py +73 -0
- erk/cli/commands/planner/connect_cmd.py +96 -0
- erk/cli/commands/planner/create_cmd.py +148 -0
- erk/cli/commands/planner/list_cmd.py +51 -0
- erk/cli/commands/planner/register_cmd.py +105 -0
- erk/cli/commands/planner/set_default_cmd.py +23 -0
- erk/cli/commands/planner/unregister_cmd.py +43 -0
- erk/cli/commands/pr/__init__.py +23 -0
- erk/cli/commands/pr/check_cmd.py +112 -0
- erk/cli/commands/pr/checkout_cmd.py +165 -0
- erk/cli/commands/pr/fix_conflicts_cmd.py +82 -0
- erk/cli/commands/pr/parse_pr_reference.py +10 -0
- erk/cli/commands/pr/submit_cmd.py +360 -0
- erk/cli/commands/pr/sync_cmd.py +181 -0
- erk/cli/commands/prepare_cwd_recovery.py +60 -0
- erk/cli/commands/project/__init__.py +16 -0
- erk/cli/commands/project/init_cmd.py +91 -0
- erk/cli/commands/run/__init__.py +17 -0
- erk/cli/commands/run/list_cmd.py +189 -0
- erk/cli/commands/run/logs_cmd.py +54 -0
- erk/cli/commands/run/shared.py +19 -0
- erk/cli/commands/shell_integration.py +29 -0
- erk/cli/commands/slot/__init__.py +23 -0
- erk/cli/commands/slot/check_cmd.py +277 -0
- erk/cli/commands/slot/common.py +314 -0
- erk/cli/commands/slot/init_pool_cmd.py +157 -0
- erk/cli/commands/slot/list_cmd.py +228 -0
- erk/cli/commands/slot/repair_cmd.py +190 -0
- erk/cli/commands/stack/__init__.py +23 -0
- erk/cli/commands/stack/consolidate_cmd.py +470 -0
- erk/cli/commands/stack/list_cmd.py +79 -0
- erk/cli/commands/stack/move_cmd.py +309 -0
- erk/cli/commands/stack/split_old/README.md +64 -0
- erk/cli/commands/stack/split_old/__init__.py +5 -0
- erk/cli/commands/stack/split_old/command.py +233 -0
- erk/cli/commands/stack/split_old/display.py +116 -0
- erk/cli/commands/stack/split_old/plan.py +216 -0
- erk/cli/commands/status.py +58 -0
- erk/cli/commands/submit.py +768 -0
- erk/cli/commands/up.py +154 -0
- erk/cli/commands/upgrade.py +82 -0
- erk/cli/commands/wt/__init__.py +29 -0
- erk/cli/commands/wt/checkout_cmd.py +110 -0
- erk/cli/commands/wt/create_cmd.py +998 -0
- erk/cli/commands/wt/current_cmd.py +35 -0
- erk/cli/commands/wt/delete_cmd.py +573 -0
- erk/cli/commands/wt/list_cmd.py +332 -0
- erk/cli/commands/wt/rename_cmd.py +66 -0
- erk/cli/config.py +242 -0
- erk/cli/constants.py +29 -0
- erk/cli/core.py +65 -0
- erk/cli/debug.py +9 -0
- erk/cli/ensure-conversion-tasks.md +288 -0
- erk/cli/ensure.py +628 -0
- erk/cli/github_parsing.py +96 -0
- erk/cli/graphite.py +81 -0
- erk/cli/graphite_command.py +80 -0
- erk/cli/help_formatter.py +345 -0
- erk/cli/output.py +361 -0
- erk/cli/presets/dagster.toml +12 -0
- erk/cli/presets/generic.toml +12 -0
- erk/cli/prompt_hooks_templates/README.md +68 -0
- erk/cli/script_output.py +32 -0
- erk/cli/shell_integration/bash_wrapper.sh +32 -0
- erk/cli/shell_integration/fish_wrapper.fish +39 -0
- erk/cli/shell_integration/handler.py +338 -0
- erk/cli/shell_integration/zsh_wrapper.sh +32 -0
- erk/cli/shell_utils.py +171 -0
- erk/cli/subprocess_utils.py +92 -0
- erk/cli/uvx_detection.py +59 -0
- erk/core/__init__.py +0 -0
- erk/core/claude_executor.py +511 -0
- erk/core/claude_settings.py +317 -0
- erk/core/command_log.py +406 -0
- erk/core/commit_message_generator.py +234 -0
- erk/core/completion.py +10 -0
- erk/core/consolidation_utils.py +177 -0
- erk/core/context.py +570 -0
- erk/core/display/__init__.py +4 -0
- erk/core/display/abc.py +24 -0
- erk/core/display/real.py +30 -0
- erk/core/display_utils.py +526 -0
- erk/core/file_utils.py +87 -0
- erk/core/health_checks.py +1315 -0
- erk/core/health_checks_dogfooder/__init__.py +85 -0
- erk/core/health_checks_dogfooder/deprecated_dot_agent_config.py +64 -0
- erk/core/health_checks_dogfooder/legacy_claude_docs.py +69 -0
- erk/core/health_checks_dogfooder/legacy_config_locations.py +122 -0
- erk/core/health_checks_dogfooder/legacy_erk_docs_agent.py +61 -0
- erk/core/health_checks_dogfooder/legacy_erk_kits_folder.py +60 -0
- erk/core/health_checks_dogfooder/legacy_hook_settings.py +104 -0
- erk/core/health_checks_dogfooder/legacy_kit_yaml.py +78 -0
- erk/core/health_checks_dogfooder/legacy_kits_toml.py +43 -0
- erk/core/health_checks_dogfooder/outdated_erk_skill.py +43 -0
- erk/core/implementation_queue/__init__.py +1 -0
- erk/core/implementation_queue/github/__init__.py +8 -0
- erk/core/implementation_queue/github/abc.py +7 -0
- erk/core/implementation_queue/github/noop.py +38 -0
- erk/core/implementation_queue/github/printing.py +43 -0
- erk/core/implementation_queue/github/real.py +119 -0
- erk/core/init_utils.py +227 -0
- erk/core/output_filter.py +338 -0
- erk/core/plan_store/__init__.py +6 -0
- erk/core/planner/__init__.py +1 -0
- erk/core/planner/registry_abc.py +8 -0
- erk/core/planner/registry_fake.py +129 -0
- erk/core/planner/registry_real.py +195 -0
- erk/core/planner/types.py +7 -0
- erk/core/pr_utils.py +30 -0
- erk/core/release_notes.py +263 -0
- erk/core/repo_discovery.py +126 -0
- erk/core/script_writer.py +41 -0
- erk/core/services/__init__.py +1 -0
- erk/core/services/plan_list_service.py +94 -0
- erk/core/shell.py +51 -0
- erk/core/user_feedback.py +11 -0
- erk/core/version_check.py +55 -0
- erk/core/workflow_display.py +75 -0
- erk/core/worktree_pool.py +190 -0
- erk/core/worktree_utils.py +300 -0
- erk/data/CHANGELOG.md +438 -0
- erk/data/__init__.py +1 -0
- erk/data/claude/agents/devrun.md +180 -0
- erk/data/claude/commands/erk/__init__.py +0 -0
- erk/data/claude/commands/erk/create-extraction-plan.md +360 -0
- erk/data/claude/commands/erk/fix-conflicts.md +25 -0
- erk/data/claude/commands/erk/git-pr-push.md +345 -0
- erk/data/claude/commands/erk/implement-stacked-plan.md +96 -0
- erk/data/claude/commands/erk/land.md +193 -0
- erk/data/claude/commands/erk/objective-create.md +370 -0
- erk/data/claude/commands/erk/objective-list.md +34 -0
- erk/data/claude/commands/erk/objective-next-plan.md +220 -0
- erk/data/claude/commands/erk/objective-update-with-landed-pr.md +216 -0
- erk/data/claude/commands/erk/plan-implement.md +202 -0
- erk/data/claude/commands/erk/plan-save.md +45 -0
- erk/data/claude/commands/erk/plan-submit.md +39 -0
- erk/data/claude/commands/erk/pr-address.md +367 -0
- erk/data/claude/commands/erk/pr-submit.md +58 -0
- erk/data/claude/skills/dignified-python/SKILL.md +48 -0
- erk/data/claude/skills/dignified-python/cli-patterns.md +155 -0
- erk/data/claude/skills/dignified-python/dignified-python-core.md +1190 -0
- erk/data/claude/skills/dignified-python/subprocess.md +99 -0
- erk/data/claude/skills/dignified-python/versions/python-3.10.md +517 -0
- erk/data/claude/skills/dignified-python/versions/python-3.11.md +536 -0
- erk/data/claude/skills/dignified-python/versions/python-3.12.md +662 -0
- erk/data/claude/skills/dignified-python/versions/python-3.13.md +653 -0
- erk/data/claude/skills/erk-diff-analysis/SKILL.md +27 -0
- erk/data/claude/skills/erk-diff-analysis/references/commit-message-prompt.md +78 -0
- erk/data/claude/skills/learned-docs/SKILL.md +362 -0
- erk/data/github/actions/setup-claude-erk/action.yml +11 -0
- erk/data/github/prompts/dignified-python-review.md +125 -0
- erk/data/github/workflows/dignified-python-review.yml +61 -0
- erk/data/github/workflows/erk-impl.yml +251 -0
- erk/hooks/__init__.py +1 -0
- erk/hooks/decorators.py +319 -0
- erk/status/__init__.py +8 -0
- erk/status/collectors/__init__.py +9 -0
- erk/status/collectors/base.py +52 -0
- erk/status/collectors/git.py +76 -0
- erk/status/collectors/github.py +81 -0
- erk/status/collectors/graphite.py +80 -0
- erk/status/collectors/impl.py +145 -0
- erk/status/models/__init__.py +4 -0
- erk/status/models/status_data.py +404 -0
- erk/status/orchestrator.py +169 -0
- erk/status/renderers/__init__.py +5 -0
- erk/status/renderers/simple.py +322 -0
- erk/tui/AGENTS.md +193 -0
- erk/tui/CLAUDE.md +1 -0
- erk/tui/__init__.py +1 -0
- erk/tui/app.py +1404 -0
- erk/tui/commands/__init__.py +1 -0
- erk/tui/commands/executor.py +66 -0
- erk/tui/commands/provider.py +165 -0
- erk/tui/commands/real_executor.py +63 -0
- erk/tui/commands/registry.py +121 -0
- erk/tui/commands/types.py +36 -0
- erk/tui/data/__init__.py +1 -0
- erk/tui/data/provider.py +492 -0
- erk/tui/data/types.py +104 -0
- erk/tui/filtering/__init__.py +1 -0
- erk/tui/filtering/logic.py +43 -0
- erk/tui/filtering/types.py +55 -0
- erk/tui/jsonl_viewer/__init__.py +1 -0
- erk/tui/jsonl_viewer/app.py +61 -0
- erk/tui/jsonl_viewer/models.py +208 -0
- erk/tui/jsonl_viewer/widgets.py +204 -0
- erk/tui/sorting/__init__.py +6 -0
- erk/tui/sorting/logic.py +55 -0
- erk/tui/sorting/types.py +68 -0
- erk/tui/styles/dash.tcss +95 -0
- erk/tui/widgets/__init__.py +1 -0
- erk/tui/widgets/command_output.py +112 -0
- erk/tui/widgets/plan_table.py +276 -0
- erk/tui/widgets/status_bar.py +116 -0
- erk-0.4.5.dist-info/METADATA +376 -0
- erk-0.4.5.dist-info/RECORD +331 -0
- erk-0.4.5.dist-info/WHEEL +4 -0
- erk-0.4.5.dist-info/entry_points.txt +2 -0
- erk-0.4.5.dist-info/licenses/LICENSE.md +3 -0
|
@@ -0,0 +1,777 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Session Log Preprocessor
|
|
4
|
+
|
|
5
|
+
Compresses JSONL session logs to XML format by removing metadata and deduplicating messages.
|
|
6
|
+
This command is invoked via erk exec preprocess-session <log-path>.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import tempfile
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import click
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def escape_xml(text: str) -> str:
|
|
17
|
+
"""Minimal XML escaping for special characters."""
|
|
18
|
+
return text.replace("&", "&").replace("<", "<").replace(">", ">")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def is_empty_session(entries: list[dict]) -> bool:
|
|
22
|
+
"""Check if session contains only metadata with no meaningful content.
|
|
23
|
+
|
|
24
|
+
Empty sessions are characterized by:
|
|
25
|
+
- Fewer than 3 entries (too small to be meaningful)
|
|
26
|
+
- Only metadata/system entries without substantive interaction
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
entries: List of session entries to check
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
True if session is empty/meaningless, False otherwise
|
|
33
|
+
"""
|
|
34
|
+
if len(entries) < 3:
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
# Check if there's any meaningful content
|
|
38
|
+
has_user_message = False
|
|
39
|
+
has_assistant_response = False
|
|
40
|
+
|
|
41
|
+
for entry in entries:
|
|
42
|
+
entry_type = entry.get("type")
|
|
43
|
+
if entry_type == "user":
|
|
44
|
+
content = entry.get("message", {}).get("content", "")
|
|
45
|
+
if isinstance(content, list):
|
|
46
|
+
text_parts = []
|
|
47
|
+
for block in content:
|
|
48
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
49
|
+
text_parts.append(block.get("text", ""))
|
|
50
|
+
content = " ".join(text_parts)
|
|
51
|
+
if content and len(str(content).strip()) > 0:
|
|
52
|
+
has_user_message = True
|
|
53
|
+
|
|
54
|
+
elif entry_type == "assistant":
|
|
55
|
+
content_blocks = entry.get("message", {}).get("content", [])
|
|
56
|
+
for block in content_blocks:
|
|
57
|
+
if block.get("type") == "text" and block.get("text", "").strip():
|
|
58
|
+
has_assistant_response = True
|
|
59
|
+
break
|
|
60
|
+
|
|
61
|
+
# Session is empty if it lacks meaningful interaction
|
|
62
|
+
return not (has_user_message and has_assistant_response)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def is_warmup_session(entries: list[dict]) -> bool:
|
|
66
|
+
"""Check if session is a warmup containing only boilerplate acknowledgment.
|
|
67
|
+
|
|
68
|
+
Warmup sessions contain predictable patterns like:
|
|
69
|
+
- "I've reviewed"
|
|
70
|
+
- "I'm ready"
|
|
71
|
+
- "loaded the instructions"
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
entries: List of session entries to check
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
True if session is a warmup, False otherwise
|
|
78
|
+
"""
|
|
79
|
+
if not entries:
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
# Look for warmup keyword in first user message
|
|
83
|
+
for entry in entries:
|
|
84
|
+
if entry.get("type") == "user":
|
|
85
|
+
content = entry.get("message", {}).get("content", "")
|
|
86
|
+
if isinstance(content, list):
|
|
87
|
+
text_parts = []
|
|
88
|
+
for block in content:
|
|
89
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
90
|
+
text_parts.append(block.get("text", ""))
|
|
91
|
+
content = " ".join(text_parts)
|
|
92
|
+
|
|
93
|
+
content_lower = str(content).lower()
|
|
94
|
+
if "warmup" in content_lower:
|
|
95
|
+
return True
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def deduplicate_documentation_blocks(entries: list[dict]) -> list[dict]:
|
|
102
|
+
"""Replace duplicate command documentation blocks with marker text.
|
|
103
|
+
|
|
104
|
+
Command documentation can appear verbatim multiple times, consuming
|
|
105
|
+
significant tokens. This function detects duplicate blocks by content hash
|
|
106
|
+
and replaces them with a reference marker.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
entries: List of session entries
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Modified entries with duplicate documentation replaced by markers
|
|
113
|
+
"""
|
|
114
|
+
import hashlib
|
|
115
|
+
|
|
116
|
+
seen_docs: dict[str, int] = {} # hash -> first occurrence count
|
|
117
|
+
occurrence_counter: dict[str, int] = {} # hash -> current occurrence
|
|
118
|
+
deduplicated = []
|
|
119
|
+
|
|
120
|
+
for entry in entries:
|
|
121
|
+
if entry.get("type") == "user":
|
|
122
|
+
content = entry.get("message", {}).get("content", "")
|
|
123
|
+
if isinstance(content, list):
|
|
124
|
+
text_parts = []
|
|
125
|
+
for block in content:
|
|
126
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
127
|
+
text_parts.append(block.get("text", ""))
|
|
128
|
+
content = " ".join(text_parts)
|
|
129
|
+
|
|
130
|
+
content_str = str(content)
|
|
131
|
+
|
|
132
|
+
# Detect command documentation by markers
|
|
133
|
+
is_doc = any(
|
|
134
|
+
marker in content_str
|
|
135
|
+
for marker in [
|
|
136
|
+
"/erk:plan-save-issue",
|
|
137
|
+
"/erk:plan-implement",
|
|
138
|
+
"/gt:submit-branch",
|
|
139
|
+
"/gt:pr-update",
|
|
140
|
+
"command-message>",
|
|
141
|
+
"command-name>",
|
|
142
|
+
]
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
if is_doc and len(content_str) > 500:
|
|
146
|
+
# Hash the content
|
|
147
|
+
content_hash = hashlib.sha256(content_str.encode()).hexdigest()[:16]
|
|
148
|
+
|
|
149
|
+
if content_hash not in seen_docs:
|
|
150
|
+
# First occurrence - keep it
|
|
151
|
+
seen_docs[content_hash] = 1
|
|
152
|
+
occurrence_counter[content_hash] = 1
|
|
153
|
+
deduplicated.append(entry)
|
|
154
|
+
else:
|
|
155
|
+
# Duplicate - replace with marker
|
|
156
|
+
occurrence_counter[content_hash] += 1
|
|
157
|
+
occurrence_num = occurrence_counter[content_hash]
|
|
158
|
+
|
|
159
|
+
# Create marker entry
|
|
160
|
+
marker_entry = entry.copy()
|
|
161
|
+
marker_content = (
|
|
162
|
+
f"[Duplicate command documentation block omitted - "
|
|
163
|
+
f"hash {content_hash}, occurrence #{occurrence_num}]"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Preserve structure
|
|
167
|
+
if isinstance(entry.get("message", {}).get("content"), list):
|
|
168
|
+
marker_entry["message"] = {
|
|
169
|
+
"content": [{"type": "text", "text": marker_content}]
|
|
170
|
+
}
|
|
171
|
+
else:
|
|
172
|
+
marker_entry["message"] = {"content": marker_content}
|
|
173
|
+
|
|
174
|
+
deduplicated.append(marker_entry)
|
|
175
|
+
else:
|
|
176
|
+
deduplicated.append(entry)
|
|
177
|
+
else:
|
|
178
|
+
deduplicated.append(entry)
|
|
179
|
+
|
|
180
|
+
return deduplicated
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def truncate_parameter_value(value: str, max_length: int = 200) -> str:
|
|
184
|
+
"""Truncate long parameter values while preserving identifiability.
|
|
185
|
+
|
|
186
|
+
Special handling for file paths to preserve structure.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
value: Parameter value to truncate
|
|
190
|
+
max_length: Maximum length (default 200)
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Truncated value with context markers
|
|
194
|
+
"""
|
|
195
|
+
if len(value) <= max_length:
|
|
196
|
+
return value
|
|
197
|
+
|
|
198
|
+
# Detect file paths - check for path separators and no spaces
|
|
199
|
+
has_slash = "/" in value
|
|
200
|
+
has_no_spaces_early = " " not in value[: min(100, len(value))]
|
|
201
|
+
|
|
202
|
+
if has_slash and has_no_spaces_early:
|
|
203
|
+
# Likely a file path - preserve start and end structure
|
|
204
|
+
parts = value.split("/")
|
|
205
|
+
if len(parts) > 3:
|
|
206
|
+
# Build path keeping first 2 parts and last 2 parts
|
|
207
|
+
first_parts = "/".join(parts[:2])
|
|
208
|
+
last_parts = "/".join(parts[-2:])
|
|
209
|
+
return f"{first_parts}/.../{last_parts}"
|
|
210
|
+
|
|
211
|
+
# General text - keep beginning and end with marker
|
|
212
|
+
keep_chars = (max_length - 20) // 2
|
|
213
|
+
truncated_count = len(value) - max_length
|
|
214
|
+
return f"{value[:keep_chars]}...[truncated {truncated_count} chars]...{value[-keep_chars:]}"
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def truncate_tool_parameters(entries: list[dict]) -> list[dict]:
|
|
218
|
+
"""Truncate verbose tool parameters to reduce token usage.
|
|
219
|
+
|
|
220
|
+
Tool parameters can be extremely long (20+ lines), especially prompts.
|
|
221
|
+
This function truncates them while preserving identifiability.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
entries: List of session entries
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Modified entries with truncated parameters
|
|
228
|
+
"""
|
|
229
|
+
truncated = []
|
|
230
|
+
|
|
231
|
+
for entry in entries:
|
|
232
|
+
if entry.get("type") == "assistant":
|
|
233
|
+
message = entry.get("message", {})
|
|
234
|
+
content_blocks = message.get("content", [])
|
|
235
|
+
|
|
236
|
+
modified_blocks = []
|
|
237
|
+
for block in content_blocks:
|
|
238
|
+
if block.get("type") == "tool_use":
|
|
239
|
+
# Truncate input parameters
|
|
240
|
+
input_params = block.get("input", {})
|
|
241
|
+
truncated_params = {}
|
|
242
|
+
for key, value in input_params.items():
|
|
243
|
+
value_str = str(value)
|
|
244
|
+
if len(value_str) > 200:
|
|
245
|
+
truncated_params[key] = truncate_parameter_value(value_str)
|
|
246
|
+
else:
|
|
247
|
+
truncated_params[key] = value
|
|
248
|
+
|
|
249
|
+
# Create modified block
|
|
250
|
+
modified_block = block.copy()
|
|
251
|
+
modified_block["input"] = truncated_params
|
|
252
|
+
modified_blocks.append(modified_block)
|
|
253
|
+
else:
|
|
254
|
+
modified_blocks.append(block)
|
|
255
|
+
|
|
256
|
+
# Update entry
|
|
257
|
+
modified_entry = entry.copy()
|
|
258
|
+
modified_entry["message"] = message.copy()
|
|
259
|
+
modified_entry["message"]["content"] = modified_blocks
|
|
260
|
+
truncated.append(modified_entry)
|
|
261
|
+
else:
|
|
262
|
+
truncated.append(entry)
|
|
263
|
+
|
|
264
|
+
return truncated
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def prune_tool_result_content(result_text: str) -> str:
|
|
268
|
+
"""Prune verbose tool results to first 30 lines, preserving errors.
|
|
269
|
+
|
|
270
|
+
Tool results can be extremely long. This function keeps the first 30 lines
|
|
271
|
+
(which usually contain the most relevant context) and preserves any lines
|
|
272
|
+
containing error keywords.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
result_text: Tool result text to prune
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Pruned result text with error preservation
|
|
279
|
+
"""
|
|
280
|
+
lines = result_text.split("\n")
|
|
281
|
+
|
|
282
|
+
if len(lines) <= 30:
|
|
283
|
+
return result_text
|
|
284
|
+
|
|
285
|
+
# Keep first 30 lines
|
|
286
|
+
kept_lines = lines[:30]
|
|
287
|
+
|
|
288
|
+
# Scan remaining lines for errors
|
|
289
|
+
error_keywords = ["error", "exception", "failed", "failure", "fatal", "warning"]
|
|
290
|
+
error_lines = []
|
|
291
|
+
|
|
292
|
+
for line in lines[30:]:
|
|
293
|
+
line_lower = line.lower()
|
|
294
|
+
if any(keyword in line_lower for keyword in error_keywords):
|
|
295
|
+
error_lines.append(line)
|
|
296
|
+
|
|
297
|
+
# Combine
|
|
298
|
+
if error_lines:
|
|
299
|
+
result_lines = kept_lines + [f"\n... [{len(lines) - 30} lines omitted] ...\n"] + error_lines
|
|
300
|
+
else:
|
|
301
|
+
result_lines = kept_lines + [f"\n... [{len(lines) - 30} lines omitted] ..."]
|
|
302
|
+
|
|
303
|
+
return "\n".join(result_lines)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def is_log_discovery_operation(entry: dict) -> bool:
|
|
307
|
+
"""Check if entry is a log discovery bash command (pwd, ls, etc.).
|
|
308
|
+
|
|
309
|
+
These are implementation mechanics that don't provide semantic value
|
|
310
|
+
for plan enhancement.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
entry: Session entry to check
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
True if entry is a log discovery operation, False otherwise
|
|
317
|
+
"""
|
|
318
|
+
if entry.get("type") != "assistant":
|
|
319
|
+
return False
|
|
320
|
+
|
|
321
|
+
content_blocks = entry.get("message", {}).get("content", [])
|
|
322
|
+
|
|
323
|
+
for block in content_blocks:
|
|
324
|
+
if block.get("type") == "tool_use":
|
|
325
|
+
tool_name = block.get("name", "")
|
|
326
|
+
if tool_name != "Bash":
|
|
327
|
+
continue
|
|
328
|
+
|
|
329
|
+
# Check command parameter
|
|
330
|
+
input_params = block.get("input", {})
|
|
331
|
+
command = input_params.get("command", "")
|
|
332
|
+
|
|
333
|
+
# Log discovery patterns
|
|
334
|
+
log_discovery_patterns = [
|
|
335
|
+
"pwd",
|
|
336
|
+
"ls ~/.claude/projects/",
|
|
337
|
+
"ls ~/.claude",
|
|
338
|
+
"find ~/.claude",
|
|
339
|
+
"echo $SESSION_ID",
|
|
340
|
+
]
|
|
341
|
+
|
|
342
|
+
for pattern in log_discovery_patterns:
|
|
343
|
+
if pattern in command:
|
|
344
|
+
return True
|
|
345
|
+
|
|
346
|
+
return False
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def deduplicate_assistant_messages(entries: list[dict]) -> list[dict]:
|
|
350
|
+
"""Remove duplicate assistant text when tool_use present."""
|
|
351
|
+
deduplicated = []
|
|
352
|
+
prev_assistant_text = None
|
|
353
|
+
|
|
354
|
+
for entry in entries:
|
|
355
|
+
if entry["type"] == "assistant":
|
|
356
|
+
message_content = entry["message"].get("content", [])
|
|
357
|
+
|
|
358
|
+
# Extract text and tool uses separately
|
|
359
|
+
text_blocks = [c for c in message_content if c.get("type") == "text"]
|
|
360
|
+
tool_uses = [c for c in message_content if c.get("type") == "tool_use"]
|
|
361
|
+
|
|
362
|
+
current_text = text_blocks[0]["text"] if text_blocks else None
|
|
363
|
+
|
|
364
|
+
# If text same as previous AND there's a tool_use, drop the duplicate text
|
|
365
|
+
if current_text == prev_assistant_text and tool_uses:
|
|
366
|
+
# Keep only tool_use content
|
|
367
|
+
entry["message"]["content"] = tool_uses
|
|
368
|
+
|
|
369
|
+
prev_assistant_text = current_text
|
|
370
|
+
|
|
371
|
+
deduplicated.append(entry)
|
|
372
|
+
|
|
373
|
+
return deduplicated
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def generate_compressed_xml(
|
|
377
|
+
entries: list[dict], source_label: str | None = None, enable_pruning: bool = True
|
|
378
|
+
) -> str:
|
|
379
|
+
"""Generate coarse-grained XML from filtered entries.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
entries: List of session entries to convert to XML
|
|
383
|
+
source_label: Optional label for agent logs
|
|
384
|
+
enable_pruning: Whether to prune tool results (default: True)
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
XML string representation of the session
|
|
388
|
+
"""
|
|
389
|
+
xml_lines = ["<session>"]
|
|
390
|
+
|
|
391
|
+
# Add source label if provided (for agent logs)
|
|
392
|
+
if source_label:
|
|
393
|
+
xml_lines.append(f' <meta source="{escape_xml(source_label)}" />')
|
|
394
|
+
|
|
395
|
+
# Extract session metadata once (from first entry with gitBranch)
|
|
396
|
+
for entry in entries:
|
|
397
|
+
# Check in the original entry structure (before filtering)
|
|
398
|
+
if "gitBranch" in entry:
|
|
399
|
+
branch = entry["gitBranch"]
|
|
400
|
+
xml_lines.append(f' <meta branch="{escape_xml(branch)}" />')
|
|
401
|
+
break
|
|
402
|
+
|
|
403
|
+
for entry in entries:
|
|
404
|
+
entry_type = entry["type"]
|
|
405
|
+
message = entry.get("message", {})
|
|
406
|
+
|
|
407
|
+
if entry_type == "user":
|
|
408
|
+
# Extract user content
|
|
409
|
+
content = message.get("content", "")
|
|
410
|
+
if isinstance(content, list):
|
|
411
|
+
# Handle list of content blocks
|
|
412
|
+
text_parts = []
|
|
413
|
+
for block in content:
|
|
414
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
415
|
+
text_parts.append(block.get("text", ""))
|
|
416
|
+
elif isinstance(block, str):
|
|
417
|
+
text_parts.append(block)
|
|
418
|
+
content = "\n".join(text_parts)
|
|
419
|
+
xml_lines.append(f" <user>{escape_xml(content)}</user>")
|
|
420
|
+
|
|
421
|
+
elif entry_type == "assistant":
|
|
422
|
+
# Extract text and tool uses
|
|
423
|
+
content_blocks = message.get("content", [])
|
|
424
|
+
for content in content_blocks:
|
|
425
|
+
if content.get("type") == "text":
|
|
426
|
+
text = content.get("text", "")
|
|
427
|
+
if text.strip(): # Only include non-empty text
|
|
428
|
+
xml_lines.append(f" <assistant>{escape_xml(text)}</assistant>")
|
|
429
|
+
elif content.get("type") == "tool_use":
|
|
430
|
+
tool_name = content.get("name", "")
|
|
431
|
+
tool_id = content.get("id", "")
|
|
432
|
+
escaped_name = escape_xml(tool_name)
|
|
433
|
+
escaped_id = escape_xml(tool_id)
|
|
434
|
+
xml_lines.append(f' <tool_use name="{escaped_name}" id="{escaped_id}">')
|
|
435
|
+
input_params = content.get("input", {})
|
|
436
|
+
for key, value in input_params.items():
|
|
437
|
+
escaped_key = escape_xml(key)
|
|
438
|
+
escaped_value = escape_xml(str(value))
|
|
439
|
+
xml_lines.append(f' <param name="{escaped_key}">{escaped_value}</param>')
|
|
440
|
+
xml_lines.append(" </tool_use>")
|
|
441
|
+
|
|
442
|
+
elif entry_type == "tool_result":
|
|
443
|
+
# Handle tool results - apply pruning if enabled
|
|
444
|
+
content_blocks = message.get("content", [])
|
|
445
|
+
tool_use_id = message.get("tool_use_id", "")
|
|
446
|
+
|
|
447
|
+
# Extract result content
|
|
448
|
+
result_parts = []
|
|
449
|
+
for block in content_blocks:
|
|
450
|
+
if isinstance(block, dict):
|
|
451
|
+
if block.get("type") == "text":
|
|
452
|
+
result_parts.append(block.get("text", ""))
|
|
453
|
+
elif "text" in block:
|
|
454
|
+
result_parts.append(block["text"])
|
|
455
|
+
elif isinstance(block, str):
|
|
456
|
+
result_parts.append(block)
|
|
457
|
+
|
|
458
|
+
result_text = "\n".join(result_parts)
|
|
459
|
+
|
|
460
|
+
# Apply pruning if enabled
|
|
461
|
+
if enable_pruning:
|
|
462
|
+
result_text = prune_tool_result_content(result_text)
|
|
463
|
+
|
|
464
|
+
xml_lines.append(f' <tool_result tool="{escape_xml(tool_use_id)}">')
|
|
465
|
+
xml_lines.append(escape_xml(result_text))
|
|
466
|
+
xml_lines.append(" </tool_result>")
|
|
467
|
+
|
|
468
|
+
xml_lines.append("</session>")
|
|
469
|
+
return "\n".join(xml_lines)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def process_log_file(
|
|
473
|
+
log_path: Path,
|
|
474
|
+
session_id: str | None = None,
|
|
475
|
+
source_label: str | None = None,
|
|
476
|
+
enable_filtering: bool = True,
|
|
477
|
+
) -> tuple[list[dict], int, int]:
|
|
478
|
+
"""Process a single JSONL log file and return filtered entries.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
log_path: Path to the JSONL log file
|
|
482
|
+
session_id: Optional session ID to filter entries by
|
|
483
|
+
source_label: Optional label for agent logs
|
|
484
|
+
enable_filtering: Whether to apply optimization filters (default: True)
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
Tuple of (filtered entries, total entries count, skipped entries count)
|
|
488
|
+
"""
|
|
489
|
+
entries = []
|
|
490
|
+
total_entries = 0
|
|
491
|
+
skipped_entries = 0
|
|
492
|
+
|
|
493
|
+
for line in log_path.read_text(encoding="utf-8").splitlines():
|
|
494
|
+
if not line.strip():
|
|
495
|
+
continue
|
|
496
|
+
|
|
497
|
+
entry = json.loads(line)
|
|
498
|
+
total_entries += 1
|
|
499
|
+
|
|
500
|
+
# Filter by session ID if provided
|
|
501
|
+
if session_id is not None:
|
|
502
|
+
entry_session = entry.get("sessionId")
|
|
503
|
+
# Include if sessionId matches OR if sessionId field missing (backward compat)
|
|
504
|
+
if entry_session is not None and entry_session != session_id:
|
|
505
|
+
skipped_entries += 1
|
|
506
|
+
continue
|
|
507
|
+
|
|
508
|
+
# Filter out noise entries
|
|
509
|
+
if entry.get("type") == "file-history-snapshot":
|
|
510
|
+
continue
|
|
511
|
+
|
|
512
|
+
# Filter log discovery operations if filtering enabled
|
|
513
|
+
if enable_filtering and is_log_discovery_operation(entry):
|
|
514
|
+
continue
|
|
515
|
+
|
|
516
|
+
# Keep minimal fields but preserve gitBranch for metadata extraction
|
|
517
|
+
filtered = {
|
|
518
|
+
"type": entry["type"],
|
|
519
|
+
"message": entry.get("message", {}),
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
# Preserve gitBranch for metadata (will be extracted in XML generation)
|
|
523
|
+
if "gitBranch" in entry:
|
|
524
|
+
filtered["gitBranch"] = entry["gitBranch"]
|
|
525
|
+
|
|
526
|
+
# Drop usage metadata from assistant messages
|
|
527
|
+
if "usage" in filtered["message"]:
|
|
528
|
+
del filtered["message"]["usage"]
|
|
529
|
+
|
|
530
|
+
entries.append(filtered)
|
|
531
|
+
|
|
532
|
+
return entries, total_entries, skipped_entries
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def discover_agent_logs(session_log_path: Path) -> list[Path]:
|
|
536
|
+
"""Discover agent logs in the same directory as the session log."""
|
|
537
|
+
log_dir = session_log_path.parent
|
|
538
|
+
agent_logs = sorted(log_dir.glob("agent-*.jsonl"))
|
|
539
|
+
return agent_logs
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def discover_planning_agent_logs(session_log_path: Path, parent_session_id: str) -> list[Path]:
|
|
543
|
+
"""
|
|
544
|
+
Discover agent logs from Plan subagents only.
|
|
545
|
+
|
|
546
|
+
Algorithm:
|
|
547
|
+
1. Parse parent session JSONL to find Task tool invocations
|
|
548
|
+
2. Filter for entries where input.subagent_type == "Plan"
|
|
549
|
+
3. Extract agent IDs via temporal correlation with agent logs
|
|
550
|
+
4. Return only agent logs matching Plan subagents
|
|
551
|
+
|
|
552
|
+
Args:
|
|
553
|
+
session_log_path: Path to the main session log file
|
|
554
|
+
parent_session_id: Session ID of the parent session
|
|
555
|
+
|
|
556
|
+
Returns:
|
|
557
|
+
List of agent log paths from Plan subagents only.
|
|
558
|
+
Empty list if no Plan subagents found.
|
|
559
|
+
"""
|
|
560
|
+
log_dir = session_log_path.parent
|
|
561
|
+
|
|
562
|
+
# Step 1: Find all Task tool invocations with subagent_type="Plan"
|
|
563
|
+
plan_task_timestamps: list[float] = []
|
|
564
|
+
|
|
565
|
+
for line in session_log_path.read_text(encoding="utf-8").splitlines():
|
|
566
|
+
if not line.strip():
|
|
567
|
+
continue
|
|
568
|
+
|
|
569
|
+
entry = json.loads(line)
|
|
570
|
+
|
|
571
|
+
# Look for assistant messages with tool_use content
|
|
572
|
+
if entry.get("type") == "assistant":
|
|
573
|
+
message = entry.get("message", {})
|
|
574
|
+
content = message.get("content", [])
|
|
575
|
+
|
|
576
|
+
if isinstance(content, list):
|
|
577
|
+
for block in content:
|
|
578
|
+
if isinstance(block, dict) and block.get("type") == "tool_use":
|
|
579
|
+
# Check if this is a Task tool with subagent_type="Plan"
|
|
580
|
+
if block.get("name") == "Task":
|
|
581
|
+
tool_input = block.get("input", {})
|
|
582
|
+
if tool_input.get("subagent_type") == "Plan":
|
|
583
|
+
# Record timestamp for correlation
|
|
584
|
+
timestamp = message.get("timestamp")
|
|
585
|
+
if timestamp is not None:
|
|
586
|
+
plan_task_timestamps.append(timestamp)
|
|
587
|
+
|
|
588
|
+
# If no Plan tasks found, return empty list (fallback to main session only)
|
|
589
|
+
if not plan_task_timestamps:
|
|
590
|
+
return []
|
|
591
|
+
|
|
592
|
+
# Step 2: Discover all agent logs
|
|
593
|
+
all_agent_logs = sorted(log_dir.glob("agent-*.jsonl"))
|
|
594
|
+
|
|
595
|
+
# Step 3: Filter agent logs by temporal correlation
|
|
596
|
+
planning_agent_logs: list[Path] = []
|
|
597
|
+
|
|
598
|
+
for agent_log in all_agent_logs:
|
|
599
|
+
# Read first entry to check sessionId and timestamp
|
|
600
|
+
if not agent_log.exists():
|
|
601
|
+
continue
|
|
602
|
+
first_line = agent_log.read_text(encoding="utf-8").splitlines()[0]
|
|
603
|
+
if not first_line.strip():
|
|
604
|
+
continue
|
|
605
|
+
|
|
606
|
+
first_entry = json.loads(first_line)
|
|
607
|
+
|
|
608
|
+
# Check if this agent log belongs to our parent session
|
|
609
|
+
if first_entry.get("sessionId") != parent_session_id:
|
|
610
|
+
continue
|
|
611
|
+
|
|
612
|
+
# Check if this agent log's timestamp correlates with a Plan Task
|
|
613
|
+
agent_timestamp = first_entry.get("message", {}).get("timestamp")
|
|
614
|
+
if agent_timestamp is None:
|
|
615
|
+
continue
|
|
616
|
+
|
|
617
|
+
# Match if within 1 second of any Plan Task timestamp
|
|
618
|
+
for plan_timestamp in plan_task_timestamps:
|
|
619
|
+
if abs(agent_timestamp - plan_timestamp) <= 1.0:
|
|
620
|
+
planning_agent_logs.append(agent_log)
|
|
621
|
+
break
|
|
622
|
+
|
|
623
|
+
return planning_agent_logs
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
@click.command(name="preprocess-session")
|
|
627
|
+
@click.argument("log_path", type=click.Path(exists=True, path_type=Path))
|
|
628
|
+
@click.option(
|
|
629
|
+
"--session-id",
|
|
630
|
+
type=str,
|
|
631
|
+
default=None,
|
|
632
|
+
help="Filter JSONL entries by session ID before preprocessing",
|
|
633
|
+
)
|
|
634
|
+
@click.option(
|
|
635
|
+
"--include-agents/--no-include-agents",
|
|
636
|
+
default=True,
|
|
637
|
+
help="Include agent logs from same directory (default: True)",
|
|
638
|
+
)
|
|
639
|
+
@click.option(
|
|
640
|
+
"--no-filtering",
|
|
641
|
+
is_flag=True,
|
|
642
|
+
help="Disable all filtering optimizations (raw output)",
|
|
643
|
+
)
|
|
644
|
+
@click.option(
|
|
645
|
+
"--stdout",
|
|
646
|
+
is_flag=True,
|
|
647
|
+
help="Output XML to stdout instead of temp file",
|
|
648
|
+
)
|
|
649
|
+
def preprocess_session(
|
|
650
|
+
log_path: Path, session_id: str | None, include_agents: bool, no_filtering: bool, stdout: bool
|
|
651
|
+
) -> None:
|
|
652
|
+
"""Preprocess session log JSONL to compressed XML format.
|
|
653
|
+
|
|
654
|
+
By default, automatically discovers and includes agent logs (agent-*.jsonl)
|
|
655
|
+
from the same directory as the main session log.
|
|
656
|
+
|
|
657
|
+
All optimization filters are enabled by default for maximum token reduction:
|
|
658
|
+
- Empty session filtering
|
|
659
|
+
- Warmup session filtering
|
|
660
|
+
- Documentation deduplication
|
|
661
|
+
- Parameter truncation
|
|
662
|
+
- Tool result pruning
|
|
663
|
+
- Log discovery operation filtering
|
|
664
|
+
|
|
665
|
+
Use --no-filtering to disable all optimizations and get raw output.
|
|
666
|
+
|
|
667
|
+
Args:
|
|
668
|
+
log_path: Path to the main session JSONL file
|
|
669
|
+
session_id: Optional session ID to filter entries by
|
|
670
|
+
include_agents: Whether to include agent logs
|
|
671
|
+
no_filtering: Disable all filtering optimizations
|
|
672
|
+
"""
|
|
673
|
+
enable_filtering = not no_filtering
|
|
674
|
+
|
|
675
|
+
# Process main session log
|
|
676
|
+
entries, total_entries, skipped_entries = process_log_file(
|
|
677
|
+
log_path, session_id=session_id, enable_filtering=enable_filtering
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
# Apply filtering operations if enabled
|
|
681
|
+
if enable_filtering:
|
|
682
|
+
# Check for empty/warmup sessions
|
|
683
|
+
if is_empty_session(entries):
|
|
684
|
+
click.echo("⚠️ Empty session detected - skipping output", err=True)
|
|
685
|
+
return
|
|
686
|
+
|
|
687
|
+
if is_warmup_session(entries):
|
|
688
|
+
click.echo("⚠️ Warmup session detected - skipping output", err=True)
|
|
689
|
+
return
|
|
690
|
+
|
|
691
|
+
# Apply documentation deduplication
|
|
692
|
+
entries = deduplicate_documentation_blocks(entries)
|
|
693
|
+
|
|
694
|
+
# Apply parameter truncation
|
|
695
|
+
entries = truncate_tool_parameters(entries)
|
|
696
|
+
|
|
697
|
+
# Apply standard deduplication (always enabled)
|
|
698
|
+
entries = deduplicate_assistant_messages(entries)
|
|
699
|
+
|
|
700
|
+
# Show diagnostic output if filtering by session ID
|
|
701
|
+
if session_id is not None:
|
|
702
|
+
click.echo(f"✅ Filtered JSONL by session ID: {session_id[:8]}...", err=True)
|
|
703
|
+
click.echo(
|
|
704
|
+
f"📊 Included {total_entries - skipped_entries} entries, "
|
|
705
|
+
f"skipped {skipped_entries} entries",
|
|
706
|
+
err=True,
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
# Generate main session XML
|
|
710
|
+
xml_sections = [generate_compressed_xml(entries, enable_pruning=enable_filtering)]
|
|
711
|
+
|
|
712
|
+
# Discover and process agent logs if requested
|
|
713
|
+
if include_agents:
|
|
714
|
+
agent_logs = discover_agent_logs(log_path)
|
|
715
|
+
for agent_log in agent_logs:
|
|
716
|
+
agent_entries, agent_total, agent_skipped = process_log_file(
|
|
717
|
+
agent_log, session_id=session_id, enable_filtering=enable_filtering
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
# Apply filtering for agent logs
|
|
721
|
+
if enable_filtering:
|
|
722
|
+
if is_empty_session(agent_entries):
|
|
723
|
+
continue
|
|
724
|
+
if is_warmup_session(agent_entries):
|
|
725
|
+
continue
|
|
726
|
+
agent_entries = deduplicate_documentation_blocks(agent_entries)
|
|
727
|
+
agent_entries = truncate_tool_parameters(agent_entries)
|
|
728
|
+
|
|
729
|
+
agent_entries = deduplicate_assistant_messages(agent_entries)
|
|
730
|
+
|
|
731
|
+
# Generate XML with source label
|
|
732
|
+
source_label = f"agent-{agent_log.stem.replace('agent-', '')}"
|
|
733
|
+
agent_xml = generate_compressed_xml(
|
|
734
|
+
agent_entries, source_label=source_label, enable_pruning=enable_filtering
|
|
735
|
+
)
|
|
736
|
+
xml_sections.append(agent_xml)
|
|
737
|
+
|
|
738
|
+
# Combine all XML sections
|
|
739
|
+
xml_content = "\n\n".join(xml_sections)
|
|
740
|
+
|
|
741
|
+
# Calculate compression metrics (only when filtering is enabled)
|
|
742
|
+
if enable_filtering:
|
|
743
|
+
original_size = sum(len(log_path.read_text(encoding="utf-8")) for _ in [log_path])
|
|
744
|
+
compressed_size = len(xml_content)
|
|
745
|
+
if original_size > 0:
|
|
746
|
+
reduction_pct = ((original_size - compressed_size) / original_size) * 100
|
|
747
|
+
stats_msg = (
|
|
748
|
+
f"📉 Token reduction: {reduction_pct:.1f}% "
|
|
749
|
+
f"({original_size:,} → {compressed_size:,} chars)"
|
|
750
|
+
)
|
|
751
|
+
# Route stats to stderr when stdout contains XML
|
|
752
|
+
click.echo(stats_msg, err=True)
|
|
753
|
+
|
|
754
|
+
if stdout:
|
|
755
|
+
# Output XML directly to stdout
|
|
756
|
+
click.echo(xml_content)
|
|
757
|
+
else:
|
|
758
|
+
# Write to temp file and print path (backward compatible)
|
|
759
|
+
# Use NamedTemporaryFile to avoid conflicts when multiple tests use same filename
|
|
760
|
+
filename_session_id = log_path.stem # Extract session ID from filename
|
|
761
|
+
with tempfile.NamedTemporaryFile(
|
|
762
|
+
mode="w",
|
|
763
|
+
encoding="utf-8",
|
|
764
|
+
prefix=f"session-{filename_session_id}-",
|
|
765
|
+
suffix="-compressed.xml",
|
|
766
|
+
delete=False,
|
|
767
|
+
dir=tempfile.gettempdir(),
|
|
768
|
+
) as f:
|
|
769
|
+
f.write(xml_content)
|
|
770
|
+
temp_file = Path(f.name)
|
|
771
|
+
|
|
772
|
+
# Print path to stdout for command capture
|
|
773
|
+
click.echo(str(temp_file))
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
if __name__ == "__main__":
|
|
777
|
+
preprocess_session()
|