gobby 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gobby/__init__.py +3 -0
- gobby/adapters/__init__.py +30 -0
- gobby/adapters/base.py +93 -0
- gobby/adapters/claude_code.py +276 -0
- gobby/adapters/codex.py +1292 -0
- gobby/adapters/gemini.py +343 -0
- gobby/agents/__init__.py +37 -0
- gobby/agents/codex_session.py +120 -0
- gobby/agents/constants.py +112 -0
- gobby/agents/context.py +362 -0
- gobby/agents/definitions.py +133 -0
- gobby/agents/gemini_session.py +111 -0
- gobby/agents/registry.py +618 -0
- gobby/agents/runner.py +968 -0
- gobby/agents/session.py +259 -0
- gobby/agents/spawn.py +916 -0
- gobby/agents/spawners/__init__.py +77 -0
- gobby/agents/spawners/base.py +142 -0
- gobby/agents/spawners/cross_platform.py +266 -0
- gobby/agents/spawners/embedded.py +225 -0
- gobby/agents/spawners/headless.py +226 -0
- gobby/agents/spawners/linux.py +125 -0
- gobby/agents/spawners/macos.py +277 -0
- gobby/agents/spawners/windows.py +308 -0
- gobby/agents/tty_config.py +319 -0
- gobby/autonomous/__init__.py +32 -0
- gobby/autonomous/progress_tracker.py +447 -0
- gobby/autonomous/stop_registry.py +269 -0
- gobby/autonomous/stuck_detector.py +383 -0
- gobby/cli/__init__.py +67 -0
- gobby/cli/__main__.py +8 -0
- gobby/cli/agents.py +529 -0
- gobby/cli/artifacts.py +266 -0
- gobby/cli/daemon.py +329 -0
- gobby/cli/extensions.py +526 -0
- gobby/cli/github.py +263 -0
- gobby/cli/init.py +53 -0
- gobby/cli/install.py +614 -0
- gobby/cli/installers/__init__.py +37 -0
- gobby/cli/installers/antigravity.py +65 -0
- gobby/cli/installers/claude.py +363 -0
- gobby/cli/installers/codex.py +192 -0
- gobby/cli/installers/gemini.py +294 -0
- gobby/cli/installers/git_hooks.py +377 -0
- gobby/cli/installers/shared.py +737 -0
- gobby/cli/linear.py +250 -0
- gobby/cli/mcp.py +30 -0
- gobby/cli/mcp_proxy.py +698 -0
- gobby/cli/memory.py +304 -0
- gobby/cli/merge.py +384 -0
- gobby/cli/projects.py +79 -0
- gobby/cli/sessions.py +622 -0
- gobby/cli/tasks/__init__.py +30 -0
- gobby/cli/tasks/_utils.py +658 -0
- gobby/cli/tasks/ai.py +1025 -0
- gobby/cli/tasks/commits.py +169 -0
- gobby/cli/tasks/crud.py +685 -0
- gobby/cli/tasks/deps.py +135 -0
- gobby/cli/tasks/labels.py +63 -0
- gobby/cli/tasks/main.py +273 -0
- gobby/cli/tasks/search.py +178 -0
- gobby/cli/tui.py +34 -0
- gobby/cli/utils.py +513 -0
- gobby/cli/workflows.py +927 -0
- gobby/cli/worktrees.py +481 -0
- gobby/config/__init__.py +129 -0
- gobby/config/app.py +551 -0
- gobby/config/extensions.py +167 -0
- gobby/config/features.py +472 -0
- gobby/config/llm_providers.py +98 -0
- gobby/config/logging.py +66 -0
- gobby/config/mcp.py +346 -0
- gobby/config/persistence.py +247 -0
- gobby/config/servers.py +141 -0
- gobby/config/sessions.py +250 -0
- gobby/config/tasks.py +784 -0
- gobby/hooks/__init__.py +104 -0
- gobby/hooks/artifact_capture.py +213 -0
- gobby/hooks/broadcaster.py +243 -0
- gobby/hooks/event_handlers.py +723 -0
- gobby/hooks/events.py +218 -0
- gobby/hooks/git.py +169 -0
- gobby/hooks/health_monitor.py +171 -0
- gobby/hooks/hook_manager.py +856 -0
- gobby/hooks/hook_types.py +575 -0
- gobby/hooks/plugins.py +813 -0
- gobby/hooks/session_coordinator.py +396 -0
- gobby/hooks/verification_runner.py +268 -0
- gobby/hooks/webhooks.py +339 -0
- gobby/install/claude/commands/gobby/bug.md +51 -0
- gobby/install/claude/commands/gobby/chore.md +51 -0
- gobby/install/claude/commands/gobby/epic.md +52 -0
- gobby/install/claude/commands/gobby/eval.md +235 -0
- gobby/install/claude/commands/gobby/feat.md +49 -0
- gobby/install/claude/commands/gobby/nit.md +52 -0
- gobby/install/claude/commands/gobby/ref.md +52 -0
- gobby/install/claude/hooks/HOOK_SCHEMAS.md +632 -0
- gobby/install/claude/hooks/hook_dispatcher.py +364 -0
- gobby/install/claude/hooks/validate_settings.py +102 -0
- gobby/install/claude/hooks-template.json +118 -0
- gobby/install/codex/hooks/hook_dispatcher.py +153 -0
- gobby/install/codex/prompts/forget.md +7 -0
- gobby/install/codex/prompts/memories.md +7 -0
- gobby/install/codex/prompts/recall.md +7 -0
- gobby/install/codex/prompts/remember.md +13 -0
- gobby/install/gemini/hooks/hook_dispatcher.py +268 -0
- gobby/install/gemini/hooks-template.json +138 -0
- gobby/install/shared/plugins/code_guardian.py +456 -0
- gobby/install/shared/plugins/example_notify.py +331 -0
- gobby/integrations/__init__.py +10 -0
- gobby/integrations/github.py +145 -0
- gobby/integrations/linear.py +145 -0
- gobby/llm/__init__.py +40 -0
- gobby/llm/base.py +120 -0
- gobby/llm/claude.py +578 -0
- gobby/llm/claude_executor.py +503 -0
- gobby/llm/codex.py +322 -0
- gobby/llm/codex_executor.py +513 -0
- gobby/llm/executor.py +316 -0
- gobby/llm/factory.py +34 -0
- gobby/llm/gemini.py +258 -0
- gobby/llm/gemini_executor.py +339 -0
- gobby/llm/litellm.py +287 -0
- gobby/llm/litellm_executor.py +303 -0
- gobby/llm/resolver.py +499 -0
- gobby/llm/service.py +236 -0
- gobby/mcp_proxy/__init__.py +29 -0
- gobby/mcp_proxy/actions.py +175 -0
- gobby/mcp_proxy/daemon_control.py +198 -0
- gobby/mcp_proxy/importer.py +436 -0
- gobby/mcp_proxy/lazy.py +325 -0
- gobby/mcp_proxy/manager.py +798 -0
- gobby/mcp_proxy/metrics.py +609 -0
- gobby/mcp_proxy/models.py +139 -0
- gobby/mcp_proxy/registries.py +215 -0
- gobby/mcp_proxy/schema_hash.py +381 -0
- gobby/mcp_proxy/semantic_search.py +706 -0
- gobby/mcp_proxy/server.py +549 -0
- gobby/mcp_proxy/services/__init__.py +0 -0
- gobby/mcp_proxy/services/fallback.py +306 -0
- gobby/mcp_proxy/services/recommendation.py +224 -0
- gobby/mcp_proxy/services/server_mgmt.py +214 -0
- gobby/mcp_proxy/services/system.py +72 -0
- gobby/mcp_proxy/services/tool_filter.py +231 -0
- gobby/mcp_proxy/services/tool_proxy.py +309 -0
- gobby/mcp_proxy/stdio.py +565 -0
- gobby/mcp_proxy/tools/__init__.py +27 -0
- gobby/mcp_proxy/tools/agents.py +1103 -0
- gobby/mcp_proxy/tools/artifacts.py +207 -0
- gobby/mcp_proxy/tools/hub.py +335 -0
- gobby/mcp_proxy/tools/internal.py +337 -0
- gobby/mcp_proxy/tools/memory.py +543 -0
- gobby/mcp_proxy/tools/merge.py +422 -0
- gobby/mcp_proxy/tools/metrics.py +283 -0
- gobby/mcp_proxy/tools/orchestration/__init__.py +23 -0
- gobby/mcp_proxy/tools/orchestration/cleanup.py +619 -0
- gobby/mcp_proxy/tools/orchestration/monitor.py +380 -0
- gobby/mcp_proxy/tools/orchestration/orchestrate.py +746 -0
- gobby/mcp_proxy/tools/orchestration/review.py +736 -0
- gobby/mcp_proxy/tools/orchestration/utils.py +16 -0
- gobby/mcp_proxy/tools/session_messages.py +1056 -0
- gobby/mcp_proxy/tools/task_dependencies.py +219 -0
- gobby/mcp_proxy/tools/task_expansion.py +591 -0
- gobby/mcp_proxy/tools/task_github.py +393 -0
- gobby/mcp_proxy/tools/task_linear.py +379 -0
- gobby/mcp_proxy/tools/task_orchestration.py +77 -0
- gobby/mcp_proxy/tools/task_readiness.py +522 -0
- gobby/mcp_proxy/tools/task_sync.py +351 -0
- gobby/mcp_proxy/tools/task_validation.py +843 -0
- gobby/mcp_proxy/tools/tasks/__init__.py +25 -0
- gobby/mcp_proxy/tools/tasks/_context.py +112 -0
- gobby/mcp_proxy/tools/tasks/_crud.py +516 -0
- gobby/mcp_proxy/tools/tasks/_factory.py +176 -0
- gobby/mcp_proxy/tools/tasks/_helpers.py +129 -0
- gobby/mcp_proxy/tools/tasks/_lifecycle.py +517 -0
- gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +301 -0
- gobby/mcp_proxy/tools/tasks/_resolution.py +55 -0
- gobby/mcp_proxy/tools/tasks/_search.py +215 -0
- gobby/mcp_proxy/tools/tasks/_session.py +125 -0
- gobby/mcp_proxy/tools/workflows.py +973 -0
- gobby/mcp_proxy/tools/worktrees.py +1264 -0
- gobby/mcp_proxy/transports/__init__.py +0 -0
- gobby/mcp_proxy/transports/base.py +95 -0
- gobby/mcp_proxy/transports/factory.py +44 -0
- gobby/mcp_proxy/transports/http.py +139 -0
- gobby/mcp_proxy/transports/stdio.py +213 -0
- gobby/mcp_proxy/transports/websocket.py +136 -0
- gobby/memory/backends/__init__.py +116 -0
- gobby/memory/backends/mem0.py +408 -0
- gobby/memory/backends/memu.py +485 -0
- gobby/memory/backends/null.py +111 -0
- gobby/memory/backends/openmemory.py +537 -0
- gobby/memory/backends/sqlite.py +304 -0
- gobby/memory/context.py +87 -0
- gobby/memory/manager.py +1001 -0
- gobby/memory/protocol.py +451 -0
- gobby/memory/search/__init__.py +66 -0
- gobby/memory/search/text.py +127 -0
- gobby/memory/viz.py +258 -0
- gobby/prompts/__init__.py +13 -0
- gobby/prompts/defaults/expansion/system.md +119 -0
- gobby/prompts/defaults/expansion/user.md +48 -0
- gobby/prompts/defaults/external_validation/agent.md +72 -0
- gobby/prompts/defaults/external_validation/external.md +63 -0
- gobby/prompts/defaults/external_validation/spawn.md +83 -0
- gobby/prompts/defaults/external_validation/system.md +6 -0
- gobby/prompts/defaults/features/import_mcp.md +22 -0
- gobby/prompts/defaults/features/import_mcp_github.md +17 -0
- gobby/prompts/defaults/features/import_mcp_search.md +16 -0
- gobby/prompts/defaults/features/recommend_tools.md +32 -0
- gobby/prompts/defaults/features/recommend_tools_hybrid.md +35 -0
- gobby/prompts/defaults/features/recommend_tools_llm.md +30 -0
- gobby/prompts/defaults/features/server_description.md +20 -0
- gobby/prompts/defaults/features/server_description_system.md +6 -0
- gobby/prompts/defaults/features/task_description.md +31 -0
- gobby/prompts/defaults/features/task_description_system.md +6 -0
- gobby/prompts/defaults/features/tool_summary.md +17 -0
- gobby/prompts/defaults/features/tool_summary_system.md +6 -0
- gobby/prompts/defaults/research/step.md +58 -0
- gobby/prompts/defaults/validation/criteria.md +47 -0
- gobby/prompts/defaults/validation/validate.md +38 -0
- gobby/prompts/loader.py +346 -0
- gobby/prompts/models.py +113 -0
- gobby/py.typed +0 -0
- gobby/runner.py +488 -0
- gobby/search/__init__.py +23 -0
- gobby/search/protocol.py +104 -0
- gobby/search/tfidf.py +232 -0
- gobby/servers/__init__.py +7 -0
- gobby/servers/http.py +636 -0
- gobby/servers/models.py +31 -0
- gobby/servers/routes/__init__.py +23 -0
- gobby/servers/routes/admin.py +416 -0
- gobby/servers/routes/dependencies.py +118 -0
- gobby/servers/routes/mcp/__init__.py +24 -0
- gobby/servers/routes/mcp/hooks.py +135 -0
- gobby/servers/routes/mcp/plugins.py +121 -0
- gobby/servers/routes/mcp/tools.py +1337 -0
- gobby/servers/routes/mcp/webhooks.py +159 -0
- gobby/servers/routes/sessions.py +582 -0
- gobby/servers/websocket.py +766 -0
- gobby/sessions/__init__.py +13 -0
- gobby/sessions/analyzer.py +322 -0
- gobby/sessions/lifecycle.py +240 -0
- gobby/sessions/manager.py +563 -0
- gobby/sessions/processor.py +225 -0
- gobby/sessions/summary.py +532 -0
- gobby/sessions/transcripts/__init__.py +41 -0
- gobby/sessions/transcripts/base.py +125 -0
- gobby/sessions/transcripts/claude.py +386 -0
- gobby/sessions/transcripts/codex.py +143 -0
- gobby/sessions/transcripts/gemini.py +195 -0
- gobby/storage/__init__.py +21 -0
- gobby/storage/agents.py +409 -0
- gobby/storage/artifact_classifier.py +341 -0
- gobby/storage/artifacts.py +285 -0
- gobby/storage/compaction.py +67 -0
- gobby/storage/database.py +357 -0
- gobby/storage/inter_session_messages.py +194 -0
- gobby/storage/mcp.py +680 -0
- gobby/storage/memories.py +562 -0
- gobby/storage/merge_resolutions.py +550 -0
- gobby/storage/migrations.py +860 -0
- gobby/storage/migrations_legacy.py +1359 -0
- gobby/storage/projects.py +166 -0
- gobby/storage/session_messages.py +251 -0
- gobby/storage/session_tasks.py +97 -0
- gobby/storage/sessions.py +817 -0
- gobby/storage/task_dependencies.py +223 -0
- gobby/storage/tasks/__init__.py +42 -0
- gobby/storage/tasks/_aggregates.py +180 -0
- gobby/storage/tasks/_crud.py +449 -0
- gobby/storage/tasks/_id.py +104 -0
- gobby/storage/tasks/_lifecycle.py +311 -0
- gobby/storage/tasks/_manager.py +889 -0
- gobby/storage/tasks/_models.py +300 -0
- gobby/storage/tasks/_ordering.py +119 -0
- gobby/storage/tasks/_path_cache.py +110 -0
- gobby/storage/tasks/_queries.py +343 -0
- gobby/storage/tasks/_search.py +143 -0
- gobby/storage/workflow_audit.py +393 -0
- gobby/storage/worktrees.py +547 -0
- gobby/sync/__init__.py +29 -0
- gobby/sync/github.py +333 -0
- gobby/sync/linear.py +304 -0
- gobby/sync/memories.py +284 -0
- gobby/sync/tasks.py +641 -0
- gobby/tasks/__init__.py +8 -0
- gobby/tasks/build_verification.py +193 -0
- gobby/tasks/commits.py +633 -0
- gobby/tasks/context.py +747 -0
- gobby/tasks/criteria.py +342 -0
- gobby/tasks/enhanced_validator.py +226 -0
- gobby/tasks/escalation.py +263 -0
- gobby/tasks/expansion.py +626 -0
- gobby/tasks/external_validator.py +764 -0
- gobby/tasks/issue_extraction.py +171 -0
- gobby/tasks/prompts/expand.py +327 -0
- gobby/tasks/research.py +421 -0
- gobby/tasks/tdd.py +352 -0
- gobby/tasks/tree_builder.py +263 -0
- gobby/tasks/validation.py +712 -0
- gobby/tasks/validation_history.py +357 -0
- gobby/tasks/validation_models.py +89 -0
- gobby/tools/__init__.py +0 -0
- gobby/tools/summarizer.py +170 -0
- gobby/tui/__init__.py +5 -0
- gobby/tui/api_client.py +281 -0
- gobby/tui/app.py +327 -0
- gobby/tui/screens/__init__.py +25 -0
- gobby/tui/screens/agents.py +333 -0
- gobby/tui/screens/chat.py +450 -0
- gobby/tui/screens/dashboard.py +377 -0
- gobby/tui/screens/memory.py +305 -0
- gobby/tui/screens/metrics.py +231 -0
- gobby/tui/screens/orchestrator.py +904 -0
- gobby/tui/screens/sessions.py +412 -0
- gobby/tui/screens/tasks.py +442 -0
- gobby/tui/screens/workflows.py +289 -0
- gobby/tui/screens/worktrees.py +174 -0
- gobby/tui/widgets/__init__.py +21 -0
- gobby/tui/widgets/chat.py +210 -0
- gobby/tui/widgets/conductor.py +104 -0
- gobby/tui/widgets/menu.py +132 -0
- gobby/tui/widgets/message_panel.py +160 -0
- gobby/tui/widgets/review_gate.py +224 -0
- gobby/tui/widgets/task_tree.py +99 -0
- gobby/tui/widgets/token_budget.py +166 -0
- gobby/tui/ws_client.py +258 -0
- gobby/utils/__init__.py +3 -0
- gobby/utils/daemon_client.py +235 -0
- gobby/utils/git.py +222 -0
- gobby/utils/id.py +38 -0
- gobby/utils/json_helpers.py +161 -0
- gobby/utils/logging.py +376 -0
- gobby/utils/machine_id.py +135 -0
- gobby/utils/metrics.py +589 -0
- gobby/utils/project_context.py +182 -0
- gobby/utils/project_init.py +263 -0
- gobby/utils/status.py +256 -0
- gobby/utils/validation.py +80 -0
- gobby/utils/version.py +23 -0
- gobby/workflows/__init__.py +4 -0
- gobby/workflows/actions.py +1310 -0
- gobby/workflows/approval_flow.py +138 -0
- gobby/workflows/artifact_actions.py +103 -0
- gobby/workflows/audit_helpers.py +110 -0
- gobby/workflows/autonomous_actions.py +286 -0
- gobby/workflows/context_actions.py +394 -0
- gobby/workflows/definitions.py +130 -0
- gobby/workflows/detection_helpers.py +208 -0
- gobby/workflows/engine.py +485 -0
- gobby/workflows/evaluator.py +669 -0
- gobby/workflows/git_utils.py +96 -0
- gobby/workflows/hooks.py +169 -0
- gobby/workflows/lifecycle_evaluator.py +613 -0
- gobby/workflows/llm_actions.py +70 -0
- gobby/workflows/loader.py +333 -0
- gobby/workflows/mcp_actions.py +60 -0
- gobby/workflows/memory_actions.py +272 -0
- gobby/workflows/premature_stop.py +164 -0
- gobby/workflows/session_actions.py +139 -0
- gobby/workflows/state_actions.py +123 -0
- gobby/workflows/state_manager.py +104 -0
- gobby/workflows/stop_signal_actions.py +163 -0
- gobby/workflows/summary_actions.py +344 -0
- gobby/workflows/task_actions.py +249 -0
- gobby/workflows/task_enforcement_actions.py +901 -0
- gobby/workflows/templates.py +52 -0
- gobby/workflows/todo_actions.py +84 -0
- gobby/workflows/webhook.py +223 -0
- gobby/workflows/webhook_executor.py +399 -0
- gobby/worktrees/__init__.py +5 -0
- gobby/worktrees/git.py +690 -0
- gobby/worktrees/merge/__init__.py +20 -0
- gobby/worktrees/merge/conflict_parser.py +177 -0
- gobby/worktrees/merge/resolver.py +485 -0
- gobby-0.2.5.dist-info/METADATA +351 -0
- gobby-0.2.5.dist-info/RECORD +383 -0
- gobby-0.2.5.dist-info/WHEEL +5 -0
- gobby-0.2.5.dist-info/entry_points.txt +2 -0
- gobby-0.2.5.dist-info/licenses/LICENSE.md +193 -0
- gobby-0.2.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Claude Code transcript parser.
|
|
3
|
+
|
|
4
|
+
Parses JSONL transcript files generated by Claude Code CLI.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from datetime import UTC, datetime
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from gobby.sessions.transcripts.base import ParsedMessage, TokenUsage
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ClaudeTranscriptParser:
|
|
20
|
+
"""
|
|
21
|
+
Parses JSONL transcript files from Claude Code.
|
|
22
|
+
|
|
23
|
+
Implements the TranscriptParser protocol for Claude Code's specific
|
|
24
|
+
transcript format. Session boundaries are marked by /clear commands.
|
|
25
|
+
|
|
26
|
+
This is a stateless utility class that provides methods for reading,
|
|
27
|
+
parsing, and analyzing transcript files. It does not maintain any
|
|
28
|
+
session state and can be shared across multiple sessions.
|
|
29
|
+
|
|
30
|
+
Thread-safe: All methods are stateless and can be called concurrently.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, logger_instance: logging.Logger | None = None):
|
|
34
|
+
"""
|
|
35
|
+
Initialize ClaudeTranscriptParser.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
logger_instance: Optional logger instance to use. If not provided,
|
|
39
|
+
uses the module-level logger.
|
|
40
|
+
"""
|
|
41
|
+
self.logger = logger_instance or logger
|
|
42
|
+
|
|
43
|
+
def extract_last_messages(
|
|
44
|
+
self, turns: list[dict[str, Any]], num_pairs: int = 2
|
|
45
|
+
) -> list[dict[str, Any]]:
|
|
46
|
+
"""
|
|
47
|
+
Extract last N user<>agent message pairs from transcript.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
turns: List of transcript turns
|
|
51
|
+
num_pairs: Number of user/agent message pairs to extract (default: 2)
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
List of message dicts with "role" and "content" fields, ordered chronologically
|
|
55
|
+
|
|
56
|
+
Example:
|
|
57
|
+
>>> parser = ClaudeTranscriptParser()
|
|
58
|
+
>>> messages = parser.read_jsonl("/path/to/transcript.jsonl")
|
|
59
|
+
>>> last_msgs = parser.extract_last_messages(messages, num_pairs=3)
|
|
60
|
+
>>> len(last_msgs)
|
|
61
|
+
6 # 3 pairs = 6 messages
|
|
62
|
+
"""
|
|
63
|
+
messages: list[dict[str, str]] = []
|
|
64
|
+
for turn in reversed(turns):
|
|
65
|
+
# Claude Code transcript structure has message nested
|
|
66
|
+
message = turn.get("message", {})
|
|
67
|
+
role = message.get("role")
|
|
68
|
+
if role in ["user", "assistant"]:
|
|
69
|
+
content = message.get("content", "")
|
|
70
|
+
|
|
71
|
+
# Assistant messages have content as array of blocks
|
|
72
|
+
if isinstance(content, list):
|
|
73
|
+
text_parts = []
|
|
74
|
+
for block in content:
|
|
75
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
76
|
+
text_parts.append(block.get("text", ""))
|
|
77
|
+
content = " ".join(text_parts)
|
|
78
|
+
|
|
79
|
+
messages.insert(0, {"role": role, "content": str(content)})
|
|
80
|
+
if len(messages) >= num_pairs * 2:
|
|
81
|
+
break
|
|
82
|
+
return messages
|
|
83
|
+
|
|
84
|
+
def extract_turns_since_clear(
|
|
85
|
+
self, turns: list[dict[str, Any]], max_turns: int = 50
|
|
86
|
+
) -> list[dict[str, Any]]:
|
|
87
|
+
"""
|
|
88
|
+
Extract turns since the most recent /clear, up to max_turns.
|
|
89
|
+
|
|
90
|
+
Logic:
|
|
91
|
+
1. Find most recent /clear in the transcript (handling consecutive /clears as one boundary)
|
|
92
|
+
2. Start from the turn AFTER the last /clear
|
|
93
|
+
3. Go back up to max_turns but stop if we hit another /clear
|
|
94
|
+
4. Consecutive /clear commands are treated as a single conversation boundary
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
turns: List of all transcript turns
|
|
98
|
+
max_turns: Maximum number of turns to extract (default: 50)
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of turns representing the current conversation segment
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
Turn 100: user message
|
|
105
|
+
Turn 101: /clear
|
|
106
|
+
Turn 102: /clear (consecutive)
|
|
107
|
+
Turn 103: user message
|
|
108
|
+
Turn 104: agent message
|
|
109
|
+
|
|
110
|
+
-> Returns turns 103-104 (after the /clear cluster)
|
|
111
|
+
"""
|
|
112
|
+
if not turns:
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
# Find the most recent /clear by scanning backwards
|
|
116
|
+
# We want the LAST /clear in any consecutive cluster
|
|
117
|
+
most_recent_clear_idx = None
|
|
118
|
+
for i in range(len(turns) - 1, -1, -1):
|
|
119
|
+
if self.is_session_boundary(turns[i]):
|
|
120
|
+
# Found a /clear - this is the most recent one
|
|
121
|
+
most_recent_clear_idx = i
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
# If no /clear found, just take the last max_turns
|
|
125
|
+
if most_recent_clear_idx is None:
|
|
126
|
+
return turns[-max_turns:] if len(turns) > max_turns else turns
|
|
127
|
+
|
|
128
|
+
# Start after this /clear (which is the last in any cluster since we scanned backwards)
|
|
129
|
+
start_idx = most_recent_clear_idx + 1
|
|
130
|
+
end_idx = len(turns)
|
|
131
|
+
|
|
132
|
+
# Now go backwards from the /clear we found to check for:
|
|
133
|
+
# 1. Another /clear (conversation boundary)
|
|
134
|
+
# 2. Max turns limit
|
|
135
|
+
# We want at most max_turns AFTER the most recent /clear
|
|
136
|
+
# So if we have 150 total turns and most_recent_clear is at 100,
|
|
137
|
+
# we want to limit to turns 101-150 (50 turns) if end is at 150
|
|
138
|
+
|
|
139
|
+
# If the segment after most_recent_clear is already <= max_turns, we're done
|
|
140
|
+
segment_size = end_idx - start_idx
|
|
141
|
+
if segment_size <= max_turns:
|
|
142
|
+
# Check if there's a previous /clear we should respect
|
|
143
|
+
search_idx = most_recent_clear_idx - 1
|
|
144
|
+
|
|
145
|
+
# Skip consecutive /clears going backwards
|
|
146
|
+
while search_idx >= 0 and self.is_session_boundary(turns[search_idx]):
|
|
147
|
+
search_idx -= 1
|
|
148
|
+
|
|
149
|
+
# Search for previous /clear boundary (no max_turns limit since current segment is small)
|
|
150
|
+
for i in range(search_idx, -1, -1):
|
|
151
|
+
if self.is_session_boundary(turns[i]):
|
|
152
|
+
# Found previous /clear - make sure we start after it
|
|
153
|
+
boundary_idx = i
|
|
154
|
+
# Skip forward over consecutive /clears
|
|
155
|
+
while boundary_idx < most_recent_clear_idx and self.is_session_boundary(
|
|
156
|
+
turns[boundary_idx + 1]
|
|
157
|
+
):
|
|
158
|
+
boundary_idx += 1
|
|
159
|
+
start_idx = max(start_idx, boundary_idx + 1)
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
return turns[start_idx:end_idx]
|
|
163
|
+
|
|
164
|
+
# Segment is > max_turns, so we need to limit it
|
|
165
|
+
# Take the last max_turns from the segment
|
|
166
|
+
start_idx = end_idx - max_turns
|
|
167
|
+
|
|
168
|
+
# But make sure we don't cross a /clear boundary
|
|
169
|
+
search_idx = most_recent_clear_idx - 1
|
|
170
|
+
|
|
171
|
+
# Skip consecutive /clears going backwards
|
|
172
|
+
while search_idx >= 0 and self.is_session_boundary(turns[search_idx]):
|
|
173
|
+
search_idx -= 1
|
|
174
|
+
|
|
175
|
+
# Check if there's a /clear between start_idx and most_recent_clear_idx
|
|
176
|
+
for i in range(most_recent_clear_idx - 1, start_idx - 1, -1):
|
|
177
|
+
if self.is_session_boundary(turns[i]):
|
|
178
|
+
# Found a /clear in our window - start after it
|
|
179
|
+
boundary_idx = i
|
|
180
|
+
while boundary_idx < most_recent_clear_idx and self.is_session_boundary(
|
|
181
|
+
turns[boundary_idx + 1]
|
|
182
|
+
):
|
|
183
|
+
boundary_idx += 1
|
|
184
|
+
start_idx = boundary_idx + 1
|
|
185
|
+
break
|
|
186
|
+
|
|
187
|
+
return turns[start_idx:end_idx]
|
|
188
|
+
|
|
189
|
+
def is_session_boundary(self, turn: dict[str, Any]) -> bool:
|
|
190
|
+
"""
|
|
191
|
+
Check if a turn is a session boundary (/clear command).
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
turn: Transcript turn dict
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
True if turn contains a /clear command marker
|
|
198
|
+
|
|
199
|
+
Example:
|
|
200
|
+
>>> parser = ClaudeTranscriptParser()
|
|
201
|
+
>>> turn = {"type": "user", "message": {"content": "<command-name>/clear</command-name>"}}
|
|
202
|
+
>>> parser.is_session_boundary(turn)
|
|
203
|
+
True
|
|
204
|
+
"""
|
|
205
|
+
if turn.get("type") != "user":
|
|
206
|
+
return False
|
|
207
|
+
|
|
208
|
+
message = turn.get("message", {})
|
|
209
|
+
content = message.get("content", "")
|
|
210
|
+
|
|
211
|
+
# Check for /clear command marker
|
|
212
|
+
# Check for /clear command marker
|
|
213
|
+
return "<command-name>/clear</command-name>" in str(content)
|
|
214
|
+
|
|
215
|
+
def parse_line(self, line: str, index: int) -> ParsedMessage | None:
|
|
216
|
+
"""
|
|
217
|
+
Parse a single line from the transcript JSONL.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
line: Raw JSON line string
|
|
221
|
+
index: Line index (0-based)
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
ParsedMessage object or None if line should be skipped
|
|
225
|
+
"""
|
|
226
|
+
if not line.strip():
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
data = json.loads(line)
|
|
231
|
+
except json.JSONDecodeError:
|
|
232
|
+
self.logger.warning(f"Invalid JSON at line {index}")
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
# Extract basic fields
|
|
236
|
+
msg_type = data.get("type", "unknown")
|
|
237
|
+
timestamp_str = data.get("timestamp") or datetime.now(UTC).isoformat()
|
|
238
|
+
try:
|
|
239
|
+
timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
|
|
240
|
+
except ValueError:
|
|
241
|
+
timestamp = datetime.now(UTC)
|
|
242
|
+
|
|
243
|
+
# Claude Code format handling
|
|
244
|
+
role = "unknown"
|
|
245
|
+
content = ""
|
|
246
|
+
content_type = "text"
|
|
247
|
+
tool_name = None
|
|
248
|
+
tool_input = None
|
|
249
|
+
tool_result = None
|
|
250
|
+
|
|
251
|
+
if msg_type == "user":
|
|
252
|
+
role = "user"
|
|
253
|
+
msg_data = data.get("message", {})
|
|
254
|
+
content = str(msg_data.get("content", ""))
|
|
255
|
+
|
|
256
|
+
elif msg_type in ("agent", "assistant"):
|
|
257
|
+
role = "assistant"
|
|
258
|
+
msg_data = data.get("message", {})
|
|
259
|
+
content_blocks = msg_data.get("content", [])
|
|
260
|
+
|
|
261
|
+
# Handle list of blocks (Claude 3 format)
|
|
262
|
+
if isinstance(content_blocks, list):
|
|
263
|
+
text_parts = []
|
|
264
|
+
for block in content_blocks:
|
|
265
|
+
if not isinstance(block, dict):
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
block_type = block.get("type")
|
|
269
|
+
|
|
270
|
+
if block_type == "text":
|
|
271
|
+
text_parts.append(block.get("text", ""))
|
|
272
|
+
|
|
273
|
+
elif block_type == "tool_use":
|
|
274
|
+
content_type = "tool_use"
|
|
275
|
+
tool_name = block.get("name")
|
|
276
|
+
tool_input = block.get("input")
|
|
277
|
+
# We capture the tool use ID as content if needed,
|
|
278
|
+
# but for now we append nothing to text content
|
|
279
|
+
|
|
280
|
+
elif block_type == "tool_result":
|
|
281
|
+
content_type = "tool_result"
|
|
282
|
+
# Tool results usually come in a separate message or block
|
|
283
|
+
# For now we map strictly to transcript lines
|
|
284
|
+
|
|
285
|
+
content = " ".join(text_parts)
|
|
286
|
+
else:
|
|
287
|
+
content = str(content_blocks)
|
|
288
|
+
|
|
289
|
+
elif msg_type == "tool_result":
|
|
290
|
+
role = "tool"
|
|
291
|
+
content_type = "tool_result"
|
|
292
|
+
tool_name = data.get("tool_name")
|
|
293
|
+
tool_result = data.get("result")
|
|
294
|
+
content = str(tool_result)
|
|
295
|
+
|
|
296
|
+
else:
|
|
297
|
+
# Skip unknown message types (e.g., 'progress', 'error' internal events)
|
|
298
|
+
return None
|
|
299
|
+
|
|
300
|
+
return ParsedMessage(
|
|
301
|
+
index=index,
|
|
302
|
+
role=role,
|
|
303
|
+
content=content,
|
|
304
|
+
content_type=content_type,
|
|
305
|
+
tool_name=tool_name,
|
|
306
|
+
tool_input=tool_input,
|
|
307
|
+
tool_result=tool_result,
|
|
308
|
+
timestamp=timestamp,
|
|
309
|
+
raw_json=data,
|
|
310
|
+
usage=self._extract_usage(data),
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
def _extract_usage(self, data: dict[str, Any]) -> TokenUsage | None:
|
|
314
|
+
"""Extract token usage from message data."""
|
|
315
|
+
# Check for top-level usage field (some formats)
|
|
316
|
+
usage_data = data.get("usage")
|
|
317
|
+
|
|
318
|
+
# Check inside message object (standard Claude API format)
|
|
319
|
+
if not usage_data:
|
|
320
|
+
usage_data = data.get("message", {}).get("usage")
|
|
321
|
+
|
|
322
|
+
if not usage_data:
|
|
323
|
+
return None
|
|
324
|
+
|
|
325
|
+
# Use explicit presence checks to handle 0 correctly
|
|
326
|
+
input_tokens = (
|
|
327
|
+
usage_data["input_tokens"]
|
|
328
|
+
if "input_tokens" in usage_data
|
|
329
|
+
else usage_data.get("inputTokens", 0)
|
|
330
|
+
)
|
|
331
|
+
output_tokens = (
|
|
332
|
+
usage_data["output_tokens"]
|
|
333
|
+
if "output_tokens" in usage_data
|
|
334
|
+
else usage_data.get("outputTokens", 0)
|
|
335
|
+
)
|
|
336
|
+
cache_creation_tokens = (
|
|
337
|
+
usage_data["cache_creation_input_tokens"]
|
|
338
|
+
if "cache_creation_input_tokens" in usage_data
|
|
339
|
+
else usage_data.get("cacheCreationInputTokens", 0)
|
|
340
|
+
)
|
|
341
|
+
cache_read_tokens = (
|
|
342
|
+
usage_data["cache_read_input_tokens"]
|
|
343
|
+
if "cache_read_input_tokens" in usage_data
|
|
344
|
+
else usage_data.get("cacheReadInputTokens", 0)
|
|
345
|
+
)
|
|
346
|
+
# Cost might be calculated or provided
|
|
347
|
+
total_cost_usd = (
|
|
348
|
+
usage_data["cost"] if "cost" in usage_data else usage_data.get("total_cost")
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
return TokenUsage(
|
|
352
|
+
input_tokens=input_tokens,
|
|
353
|
+
output_tokens=output_tokens,
|
|
354
|
+
cache_creation_tokens=cache_creation_tokens,
|
|
355
|
+
cache_read_tokens=cache_read_tokens,
|
|
356
|
+
total_cost_usd=total_cost_usd,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
def parse_lines(self, lines: list[str], start_index: int = 0) -> list[ParsedMessage]:
|
|
360
|
+
"""
|
|
361
|
+
Parse a list of transcript lines.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
lines: List of JSON line strings
|
|
365
|
+
start_index: Starting index for messages
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
List of parsed ParsedMessage objects
|
|
369
|
+
"""
|
|
370
|
+
parsed_messages = []
|
|
371
|
+
current_index = start_index
|
|
372
|
+
|
|
373
|
+
for line in lines:
|
|
374
|
+
message = self.parse_line(line, current_index)
|
|
375
|
+
if message:
|
|
376
|
+
parsed_messages.append(message)
|
|
377
|
+
current_index += 1
|
|
378
|
+
|
|
379
|
+
return parsed_messages
|
|
380
|
+
|
|
381
|
+
# Backward-compatible alias
|
|
382
|
+
is_clear_command = is_session_boundary
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
# Backward-compatible alias for existing code
|
|
386
|
+
TranscriptProcessor = ClaudeTranscriptParser
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Codex transcript parser.
|
|
3
|
+
|
|
4
|
+
Parses JSONL transcript files generated by Codex CLI.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from datetime import UTC, datetime
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from gobby.sessions.transcripts.base import ParsedMessage, TokenUsage
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CodexTranscriptParser:
|
|
20
|
+
"""
|
|
21
|
+
Parses JSONL transcript files from Codex.
|
|
22
|
+
|
|
23
|
+
Implements the TranscriptParser protocol for Codex's transcript format.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, logger_instance: logging.Logger | None = None):
|
|
27
|
+
self.logger = logger_instance or logger
|
|
28
|
+
|
|
29
|
+
def extract_last_messages(
|
|
30
|
+
self, turns: list[dict[str, Any]], num_pairs: int = 2
|
|
31
|
+
) -> list[dict[str, Any]]:
|
|
32
|
+
messages: list[dict[str, str]] = []
|
|
33
|
+
for turn in reversed(turns):
|
|
34
|
+
role = turn.get("role")
|
|
35
|
+
content = turn.get("content")
|
|
36
|
+
|
|
37
|
+
if role in ["user", "assistant", "system"]:
|
|
38
|
+
messages.insert(0, {"role": role, "content": str(content)})
|
|
39
|
+
if len(messages) >= num_pairs * 2:
|
|
40
|
+
break
|
|
41
|
+
return messages
|
|
42
|
+
|
|
43
|
+
def extract_turns_since_clear(
|
|
44
|
+
self, turns: list[dict[str, Any]], max_turns: int = 50
|
|
45
|
+
) -> list[dict[str, Any]]:
|
|
46
|
+
# Codex likely uses a new session or clear command
|
|
47
|
+
# For now, default to tail
|
|
48
|
+
return turns[-max_turns:] if len(turns) > max_turns else turns
|
|
49
|
+
|
|
50
|
+
def is_session_boundary(self, turn: dict[str, Any]) -> bool:
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
def parse_line(self, line: str, index: int) -> ParsedMessage | None:
|
|
54
|
+
if not line.strip():
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
data = json.loads(line)
|
|
59
|
+
except json.JSONDecodeError:
|
|
60
|
+
self.logger.warning(f"Invalid JSON at line {index}")
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
timestamp = datetime.now(UTC)
|
|
64
|
+
if "timestamp" in data:
|
|
65
|
+
try:
|
|
66
|
+
timestamp = datetime.fromisoformat(data["timestamp"].replace("Z", "+00:00"))
|
|
67
|
+
except ValueError:
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
# Assume simple schema for Codex: {"role": "user", "content": "..."}
|
|
71
|
+
role = data.get("role")
|
|
72
|
+
content = data.get("content", "")
|
|
73
|
+
|
|
74
|
+
if not role:
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
content_type = "text"
|
|
78
|
+
tool_name = None
|
|
79
|
+
|
|
80
|
+
# Check for tool use if Codex supports it in this format
|
|
81
|
+
if role == "tool" or "tool_calls" in data:
|
|
82
|
+
pass # Placeholder for tool logic
|
|
83
|
+
|
|
84
|
+
return ParsedMessage(
|
|
85
|
+
index=index,
|
|
86
|
+
role=role,
|
|
87
|
+
content=str(content),
|
|
88
|
+
content_type=content_type,
|
|
89
|
+
tool_name=tool_name,
|
|
90
|
+
tool_input=None,
|
|
91
|
+
tool_result=None,
|
|
92
|
+
timestamp=timestamp,
|
|
93
|
+
raw_json=data,
|
|
94
|
+
usage=self._extract_usage(data),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def _extract_usage(self, data: dict[str, Any]) -> TokenUsage | None:
|
|
98
|
+
"""Extract token usage from Codex message data."""
|
|
99
|
+
# Codex CLI typically logs these at top level or in usage field
|
|
100
|
+
usage_data = data if "input_tokens" in data else data.get("usage")
|
|
101
|
+
|
|
102
|
+
if not usage_data:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
# Use explicit presence checks to handle 0 correctly
|
|
106
|
+
input_tokens = (
|
|
107
|
+
usage_data["input_tokens"]
|
|
108
|
+
if "input_tokens" in usage_data
|
|
109
|
+
else usage_data.get("inputTokens", 0)
|
|
110
|
+
)
|
|
111
|
+
output_tokens = (
|
|
112
|
+
usage_data["output_tokens"]
|
|
113
|
+
if "output_tokens" in usage_data
|
|
114
|
+
else usage_data.get("outputTokens", 0)
|
|
115
|
+
)
|
|
116
|
+
cache_read_tokens = (
|
|
117
|
+
usage_data["cached_tokens"]
|
|
118
|
+
if "cached_tokens" in usage_data
|
|
119
|
+
else usage_data.get("cachedTokens", 0)
|
|
120
|
+
)
|
|
121
|
+
# Total cost often provided directly
|
|
122
|
+
total_cost_usd = (
|
|
123
|
+
usage_data["cost"] if "cost" in usage_data else usage_data.get("total_cost")
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return TokenUsage(
|
|
127
|
+
input_tokens=input_tokens,
|
|
128
|
+
output_tokens=output_tokens,
|
|
129
|
+
cache_read_tokens=cache_read_tokens,
|
|
130
|
+
total_cost_usd=total_cost_usd,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def parse_lines(self, lines: list[str], start_index: int = 0) -> list[ParsedMessage]:
|
|
134
|
+
parsed_messages = []
|
|
135
|
+
current_index = start_index
|
|
136
|
+
|
|
137
|
+
for line in lines:
|
|
138
|
+
message = self.parse_line(line, current_index)
|
|
139
|
+
if message:
|
|
140
|
+
parsed_messages.append(message)
|
|
141
|
+
current_index += 1
|
|
142
|
+
|
|
143
|
+
return parsed_messages
|