claude-code-statusline 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_code_statusline/__init__.py +6 -0
- claude_code_statusline/config/__init__.py +0 -0
- claude_code_statusline/config/defaults.py +29 -0
- claude_code_statusline/config/loader.py +123 -0
- claude_code_statusline/config/schema.py +25 -0
- claude_code_statusline/parsers/__init__.py +0 -0
- claude_code_statusline/parsers/jsonl.py +330 -0
- claude_code_statusline/parsers/tokens.py +159 -0
- claude_code_statusline/renderer.py +159 -0
- claude_code_statusline/statusline.py +126 -0
- claude_code_statusline/types.py +49 -0
- claude_code_statusline/utils/__init__.py +0 -0
- claude_code_statusline/utils/colors.py +112 -0
- claude_code_statusline/utils/debug.py +48 -0
- claude_code_statusline/utils/formatting.py +57 -0
- claude_code_statusline/utils/git.py +120 -0
- claude_code_statusline/utils/models.py +194 -0
- claude_code_statusline/widgets/__init__.py +0 -0
- claude_code_statusline/widgets/base.py +36 -0
- claude_code_statusline/widgets/builtin/__init__.py +29 -0
- claude_code_statusline/widgets/builtin/context.py +74 -0
- claude_code_statusline/widgets/builtin/cost.py +111 -0
- claude_code_statusline/widgets/builtin/directory.py +33 -0
- claude_code_statusline/widgets/builtin/git.py +104 -0
- claude_code_statusline/widgets/builtin/model.py +31 -0
- claude_code_statusline/widgets/builtin/separator.py +26 -0
- claude_code_statusline/widgets/builtin/session.py +54 -0
- claude_code_statusline/widgets/registry.py +65 -0
- claude_code_statusline-0.7.1.dist-info/METADATA +439 -0
- claude_code_statusline-0.7.1.dist-info/RECORD +34 -0
- claude_code_statusline-0.7.1.dist-info/WHEEL +5 -0
- claude_code_statusline-0.7.1.dist-info/entry_points.txt +2 -0
- claude_code_statusline-0.7.1.dist-info/licenses/LICENSE +21 -0
- claude_code_statusline-0.7.1.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Default configuration for Claude Code Status Line."""
|
|
2
|
+
|
|
3
|
+
from .schema import StatusLineConfig, WidgetConfigModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_default_config() -> StatusLineConfig:
|
|
7
|
+
"""Generate the default status line configuration."""
|
|
8
|
+
return StatusLineConfig(
|
|
9
|
+
version=1,
|
|
10
|
+
lines=[
|
|
11
|
+
[
|
|
12
|
+
WidgetConfigModel(type="model", color="cyan"),
|
|
13
|
+
WidgetConfigModel(type="separator"),
|
|
14
|
+
WidgetConfigModel(type="directory", color="blue"),
|
|
15
|
+
WidgetConfigModel(type="separator"),
|
|
16
|
+
WidgetConfigModel(type="git-branch", color="magenta"),
|
|
17
|
+
WidgetConfigModel(type="separator"),
|
|
18
|
+
WidgetConfigModel(type="context-percentage"),
|
|
19
|
+
WidgetConfigModel(type="separator"),
|
|
20
|
+
WidgetConfigModel(type="cost"),
|
|
21
|
+
WidgetConfigModel(type="separator"),
|
|
22
|
+
WidgetConfigModel(type="lines-changed"),
|
|
23
|
+
WidgetConfigModel(type="separator"),
|
|
24
|
+
WidgetConfigModel(type="session-id"),
|
|
25
|
+
WidgetConfigModel(type="separator"),
|
|
26
|
+
WidgetConfigModel(type="session-clock"),
|
|
27
|
+
]
|
|
28
|
+
],
|
|
29
|
+
)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Configuration file loading and saving."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
from pydantic import ValidationError
|
|
11
|
+
|
|
12
|
+
from .defaults import get_default_config
|
|
13
|
+
from .schema import StatusLineConfig
|
|
14
|
+
|
|
15
|
+
# Module-level cache for config
|
|
16
|
+
_cached_config: Optional[StatusLineConfig] = None
|
|
17
|
+
_cached_mtime: float = 0.0
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_config_dir() -> Path:
|
|
21
|
+
"""Get the configuration directory path."""
|
|
22
|
+
config_home = os.getenv("XDG_CONFIG_HOME", os.path.expanduser("~/.config"))
|
|
23
|
+
return Path(config_home) / "claude-statusline"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_config_path() -> Path:
|
|
27
|
+
"""Get the full configuration file path."""
|
|
28
|
+
return get_config_dir() / "config.yaml"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_missing_widgets(config: StatusLineConfig) -> list[str]:
|
|
32
|
+
"""Return widget types in defaults but missing from user config."""
|
|
33
|
+
default_config = get_default_config()
|
|
34
|
+
|
|
35
|
+
default_types = {
|
|
36
|
+
w.type for line in default_config.lines for w in line if w.type != "separator"
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
user_types = {
|
|
40
|
+
w.type for line in config.lines for w in line if w.type != "separator"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return sorted(default_types - user_types)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def load_config() -> StatusLineConfig:
|
|
47
|
+
"""
|
|
48
|
+
Load configuration from YAML file with mtime-based caching.
|
|
49
|
+
|
|
50
|
+
If config file doesn't exist, creates it with defaults.
|
|
51
|
+
If config is invalid, falls back to defaults and logs error.
|
|
52
|
+
"""
|
|
53
|
+
global _cached_config, _cached_mtime
|
|
54
|
+
|
|
55
|
+
config_path = get_config_path()
|
|
56
|
+
|
|
57
|
+
if _cached_config is not None:
|
|
58
|
+
try:
|
|
59
|
+
current_mtime = config_path.stat().st_mtime
|
|
60
|
+
if current_mtime == _cached_mtime:
|
|
61
|
+
return _cached_config
|
|
62
|
+
except OSError:
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
if not config_path.exists():
|
|
66
|
+
config = get_default_config()
|
|
67
|
+
save_config(config)
|
|
68
|
+
_cached_config = config
|
|
69
|
+
try:
|
|
70
|
+
_cached_mtime = config_path.stat().st_mtime
|
|
71
|
+
except OSError:
|
|
72
|
+
_cached_mtime = 0.0
|
|
73
|
+
return config
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
with open(config_path, encoding="utf-8") as f:
|
|
77
|
+
config_data = yaml.safe_load(f)
|
|
78
|
+
|
|
79
|
+
if config_data is None:
|
|
80
|
+
config_data = {}
|
|
81
|
+
|
|
82
|
+
config = StatusLineConfig(**config_data)
|
|
83
|
+
|
|
84
|
+
missing = get_missing_widgets(config)
|
|
85
|
+
if missing:
|
|
86
|
+
import sys
|
|
87
|
+
|
|
88
|
+
print(
|
|
89
|
+
f"Warning: Config is missing widgets from defaults: {', '.join(missing)}. "
|
|
90
|
+
f"Delete {config_path} to regenerate with new defaults.",
|
|
91
|
+
file=sys.stderr,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
_cached_config = config
|
|
95
|
+
try:
|
|
96
|
+
_cached_mtime = config_path.stat().st_mtime
|
|
97
|
+
except OSError:
|
|
98
|
+
_cached_mtime = 0.0
|
|
99
|
+
|
|
100
|
+
return config
|
|
101
|
+
|
|
102
|
+
except (yaml.YAMLError, ValidationError, OSError) as e:
|
|
103
|
+
import sys
|
|
104
|
+
|
|
105
|
+
print(
|
|
106
|
+
f"Warning: Failed to load config from {config_path}: {e}",
|
|
107
|
+
file=sys.stderr,
|
|
108
|
+
)
|
|
109
|
+
print("Using default configuration.", file=sys.stderr)
|
|
110
|
+
return get_default_config()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def save_config(config: StatusLineConfig) -> None:
|
|
114
|
+
"""Save configuration to YAML file."""
|
|
115
|
+
config_path = get_config_path()
|
|
116
|
+
config_dir = config_path.parent
|
|
117
|
+
|
|
118
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
|
|
120
|
+
config_dict = config.model_dump(mode="python")
|
|
121
|
+
|
|
122
|
+
with open(config_path, "w", encoding="utf-8") as f:
|
|
123
|
+
yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Configuration schema using Pydantic for validation."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class WidgetConfigModel(BaseModel):
|
|
10
|
+
"""Configuration for a single widget instance."""
|
|
11
|
+
|
|
12
|
+
type: str
|
|
13
|
+
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
14
|
+
color: Optional[str] = None
|
|
15
|
+
bold: bool = False
|
|
16
|
+
metadata: dict[str, str] = Field(default_factory=dict)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class StatusLineConfig(BaseModel):
|
|
20
|
+
"""Complete status line configuration."""
|
|
21
|
+
|
|
22
|
+
version: int = 1
|
|
23
|
+
lines: list[list[WidgetConfigModel]] = Field(default_factory=list)
|
|
24
|
+
|
|
25
|
+
model_config = {"extra": "forbid"}
|
|
File without changes
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""JSONL transcript parsing utilities."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
from ..utils.debug import debug_log
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ParsedTranscript:
|
|
13
|
+
"""Results from parsing a transcript file."""
|
|
14
|
+
|
|
15
|
+
session_id: str = ""
|
|
16
|
+
context_chars: int = 0
|
|
17
|
+
total_file_chars: int = 0
|
|
18
|
+
boundaries_found: int = 0
|
|
19
|
+
is_jsonl: bool = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class ExclusionRules:
|
|
24
|
+
"""Configuration for which lines to exclude from token counting.
|
|
25
|
+
|
|
26
|
+
These fields are stored in Claude Code's JSONL files for debugging and
|
|
27
|
+
UI purposes but are not included in the actual context sent to Claude.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
metadata_fields: list[str] = field(
|
|
31
|
+
default_factory=lambda: [
|
|
32
|
+
"snapshot",
|
|
33
|
+
"leafUuid",
|
|
34
|
+
]
|
|
35
|
+
)
|
|
36
|
+
excluded_types: list[str] = field(default_factory=lambda: ["summary", "system"])
|
|
37
|
+
excluded_flags: list[str] = field(default_factory=lambda: [])
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
EXCLUSION_RULES = ExclusionRules()
|
|
41
|
+
CHARS_PER_TOKEN = 3.31 # Fallback estimation for transcripts without message.usage
|
|
42
|
+
DEFAULT_SYSTEM_OVERHEAD_TOKENS = 21400 # For character-based fallback
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_system_overhead_tokens() -> int:
|
|
46
|
+
"""Get system overhead tokens from environment or use default."""
|
|
47
|
+
try:
|
|
48
|
+
return int(
|
|
49
|
+
os.getenv("CLAUDE_CODE_SYSTEM_OVERHEAD", DEFAULT_SYSTEM_OVERHEAD_TOKENS)
|
|
50
|
+
)
|
|
51
|
+
except (ValueError, TypeError):
|
|
52
|
+
return DEFAULT_SYSTEM_OVERHEAD_TOKENS
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def calculate_total_tokens(transcript: ParsedTranscript) -> int:
|
|
56
|
+
"""Calculate total tokens from a transcript including conversation and system overhead.
|
|
57
|
+
|
|
58
|
+
This uses character-based estimation and is kept for backwards compatibility.
|
|
59
|
+
For real token counts, use parsers.tokens.get_token_metrics() instead.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
transcript: Parsed transcript with character counts
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Estimated total tokens
|
|
66
|
+
"""
|
|
67
|
+
conversation_tokens = int(transcript.context_chars // CHARS_PER_TOKEN)
|
|
68
|
+
system_overhead_tokens = get_system_overhead_tokens()
|
|
69
|
+
return conversation_tokens + system_overhead_tokens
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def safe_get_file_size(file_path: str) -> int:
|
|
73
|
+
"""Safely get file size, returning 0 on error."""
|
|
74
|
+
try:
|
|
75
|
+
return os.path.getsize(file_path)
|
|
76
|
+
except OSError:
|
|
77
|
+
return 0
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def is_real_compact_boundary(data: dict) -> bool:
|
|
81
|
+
"""Check if this is a real compact boundary set by Claude Code.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
data: Parsed JSON line from transcript
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
True if this line is a valid compact boundary marker
|
|
88
|
+
"""
|
|
89
|
+
return (
|
|
90
|
+
data.get("type") == "system"
|
|
91
|
+
and data.get("subtype") == "compact_boundary"
|
|
92
|
+
and "compactMetadata" in data
|
|
93
|
+
and isinstance(data["compactMetadata"], dict)
|
|
94
|
+
and "trigger" in data["compactMetadata"]
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def should_exclude_line(
|
|
99
|
+
data: dict, rules: ExclusionRules = EXCLUSION_RULES
|
|
100
|
+
) -> tuple[bool, str]:
|
|
101
|
+
"""Check if a line should be excluded from token counting.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
data: Parsed JSON line from transcript
|
|
105
|
+
rules: Exclusion rules configuration (defaults to global rules)
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Tuple of (should_exclude, reason) where reason is empty if not excluded
|
|
109
|
+
"""
|
|
110
|
+
for metadata_field in rules.metadata_fields:
|
|
111
|
+
if metadata_field in data:
|
|
112
|
+
return True, f"has {metadata_field}"
|
|
113
|
+
|
|
114
|
+
line_type = data.get("type")
|
|
115
|
+
if line_type in rules.excluded_types:
|
|
116
|
+
return True, f"type={line_type}"
|
|
117
|
+
|
|
118
|
+
for flag_name in rules.excluded_flags:
|
|
119
|
+
if data.get(flag_name):
|
|
120
|
+
return True, f"{flag_name}=true"
|
|
121
|
+
|
|
122
|
+
return False, ""
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def extract_message_content_chars(
|
|
126
|
+
data: dict, session_id: str = "", detailed_debug: bool = False
|
|
127
|
+
) -> int:
|
|
128
|
+
"""Extract content that contributes to context.
|
|
129
|
+
|
|
130
|
+
Only counts role and content fields, as these are what actually gets sent to Claude.
|
|
131
|
+
Response metadata fields (model, type, stop_reason, etc.) are not counted.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
data: Parsed JSON line from transcript
|
|
135
|
+
session_id: Session identifier for debug logging
|
|
136
|
+
detailed_debug: Whether to log detailed debug information
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Number of characters that contribute to context
|
|
140
|
+
"""
|
|
141
|
+
message = data.get("message", {})
|
|
142
|
+
if not message:
|
|
143
|
+
return 0
|
|
144
|
+
|
|
145
|
+
filtered_message = {}
|
|
146
|
+
field_contributions = {}
|
|
147
|
+
|
|
148
|
+
if "role" in message:
|
|
149
|
+
field_value = json.dumps(message["role"])
|
|
150
|
+
filtered_message["role"] = message["role"]
|
|
151
|
+
field_contributions["role"] = len(field_value)
|
|
152
|
+
|
|
153
|
+
if "content" in message:
|
|
154
|
+
content = message["content"]
|
|
155
|
+
filtered_content = content
|
|
156
|
+
images_skipped = 0
|
|
157
|
+
|
|
158
|
+
if isinstance(content, list):
|
|
159
|
+
filtered_content = []
|
|
160
|
+
for item in content:
|
|
161
|
+
if isinstance(item, dict) and item.get("type") == "image":
|
|
162
|
+
images_skipped += 1
|
|
163
|
+
debug_log(
|
|
164
|
+
"Skipping base64-encoded image in message (not counted as text tokens)",
|
|
165
|
+
session_id,
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
filtered_content.append(item)
|
|
169
|
+
|
|
170
|
+
field_value = json.dumps(filtered_content)
|
|
171
|
+
filtered_message["content"] = filtered_content
|
|
172
|
+
field_contributions["content"] = len(field_value)
|
|
173
|
+
|
|
174
|
+
if images_skipped > 0:
|
|
175
|
+
debug_log(
|
|
176
|
+
f"Excluded {images_skipped} base64-encoded image(s) from character count",
|
|
177
|
+
session_id,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
total_chars = len(json.dumps(filtered_message))
|
|
181
|
+
|
|
182
|
+
if detailed_debug and field_contributions:
|
|
183
|
+
role = message.get("role", "unknown")
|
|
184
|
+
debug_log(
|
|
185
|
+
f"Message field breakdown ({role}): {field_contributions}", session_id
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return total_chars
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def parse_transcript(file_path: str) -> ParsedTranscript:
|
|
192
|
+
"""Parse transcript file in a single pass.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
file_path: Path to the JSONL transcript file
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
ParsedTranscript with character counts and session metadata
|
|
199
|
+
"""
|
|
200
|
+
if not file_path or not os.path.isfile(file_path):
|
|
201
|
+
debug_log("No valid transcript file", transcript_path=file_path)
|
|
202
|
+
return ParsedTranscript()
|
|
203
|
+
|
|
204
|
+
total_file_chars = safe_get_file_size(file_path)
|
|
205
|
+
debug_log(
|
|
206
|
+
f"Parsing transcript: {file_path} ({total_file_chars} chars)",
|
|
207
|
+
transcript_path=file_path,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
try:
|
|
211
|
+
with open(file_path, encoding="utf-8") as f:
|
|
212
|
+
lines = f.readlines()
|
|
213
|
+
except OSError as e:
|
|
214
|
+
debug_log(f"Failed to read file: {e}", transcript_path=file_path)
|
|
215
|
+
return ParsedTranscript(total_file_chars=total_file_chars)
|
|
216
|
+
|
|
217
|
+
# Single-pass parsing: track all state simultaneously
|
|
218
|
+
session_id = ""
|
|
219
|
+
boundary_count = 0
|
|
220
|
+
is_jsonl = False
|
|
221
|
+
detailed_debug = os.getenv("CLAUDE_CODE_STATUSLINE_DEBUG")
|
|
222
|
+
|
|
223
|
+
chars_before_latest_boundary = 0
|
|
224
|
+
chars_after_latest_boundary = 0
|
|
225
|
+
|
|
226
|
+
message_type_counts = {}
|
|
227
|
+
message_type_chars = {}
|
|
228
|
+
excluded_line_counts = {}
|
|
229
|
+
total_excluded_lines = 0
|
|
230
|
+
|
|
231
|
+
for _line_num, line in enumerate(lines):
|
|
232
|
+
stripped = line.strip()
|
|
233
|
+
if not stripped:
|
|
234
|
+
continue
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
data = json.loads(stripped)
|
|
238
|
+
is_jsonl = True # At least one valid JSON line
|
|
239
|
+
|
|
240
|
+
if not session_id and data.get("sessionId"):
|
|
241
|
+
session_id = data["sessionId"]
|
|
242
|
+
|
|
243
|
+
if is_real_compact_boundary(data):
|
|
244
|
+
boundary_count += 1
|
|
245
|
+
chars_before_latest_boundary += chars_after_latest_boundary
|
|
246
|
+
chars_after_latest_boundary = 0
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
should_exclude, exclusion_reason = should_exclude_line(data)
|
|
250
|
+
if should_exclude:
|
|
251
|
+
total_excluded_lines += 1
|
|
252
|
+
excluded_line_counts[exclusion_reason] = (
|
|
253
|
+
excluded_line_counts.get(exclusion_reason, 0) + 1
|
|
254
|
+
)
|
|
255
|
+
continue
|
|
256
|
+
|
|
257
|
+
chars = extract_message_content_chars(
|
|
258
|
+
data, session_id, bool(detailed_debug)
|
|
259
|
+
)
|
|
260
|
+
chars_after_latest_boundary += chars
|
|
261
|
+
|
|
262
|
+
if detailed_debug and chars > 0:
|
|
263
|
+
msg_type = data.get("type", "unknown")
|
|
264
|
+
role = data.get("message", {}).get("role", "")
|
|
265
|
+
type_key = f"{msg_type}:{role}" if role else msg_type
|
|
266
|
+
|
|
267
|
+
message_type_counts[type_key] = message_type_counts.get(type_key, 0) + 1
|
|
268
|
+
message_type_chars[type_key] = (
|
|
269
|
+
message_type_chars.get(type_key, 0) + chars
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
except json.JSONDecodeError:
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
if not is_jsonl:
|
|
276
|
+
debug_log("File is not JSONL format, using fallback", transcript_path=file_path)
|
|
277
|
+
return ParsedTranscript(
|
|
278
|
+
session_id="",
|
|
279
|
+
total_file_chars=total_file_chars,
|
|
280
|
+
context_chars=total_file_chars,
|
|
281
|
+
is_jsonl=False,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
message_content_chars = (
|
|
285
|
+
chars_after_latest_boundary
|
|
286
|
+
if boundary_count > 0
|
|
287
|
+
else (chars_before_latest_boundary + chars_after_latest_boundary)
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
debug_log(f"Session: {session_id}, boundaries: {boundary_count}", session_id)
|
|
291
|
+
debug_log(
|
|
292
|
+
f"Message content chars: {message_content_chars}/{total_file_chars}", session_id
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
if detailed_debug:
|
|
296
|
+
if total_excluded_lines > 0:
|
|
297
|
+
debug_log(
|
|
298
|
+
f"=== Excluded {total_excluded_lines} lines from token counting ===",
|
|
299
|
+
session_id,
|
|
300
|
+
)
|
|
301
|
+
for reason, count in sorted(
|
|
302
|
+
excluded_line_counts.items(), key=lambda x: x[1], reverse=True
|
|
303
|
+
):
|
|
304
|
+
debug_log(f" {reason}: {count} lines", session_id)
|
|
305
|
+
debug_log("=" * 40, session_id)
|
|
306
|
+
|
|
307
|
+
if message_type_chars:
|
|
308
|
+
debug_log("=== Token Breakdown by Message Type ===", session_id)
|
|
309
|
+
sorted_types = sorted(
|
|
310
|
+
message_type_chars.items(), key=lambda x: x[1], reverse=True
|
|
311
|
+
)
|
|
312
|
+
total_tracked = sum(message_type_chars.values())
|
|
313
|
+
|
|
314
|
+
for msg_type, chars in sorted_types:
|
|
315
|
+
count = message_type_counts.get(msg_type, 0)
|
|
316
|
+
tokens = int(chars // CHARS_PER_TOKEN)
|
|
317
|
+
percentage = (chars * 100) // total_tracked if total_tracked > 0 else 0
|
|
318
|
+
debug_log(
|
|
319
|
+
f" {msg_type}: {count} messages, {chars} chars, {tokens} tokens ({percentage}%)",
|
|
320
|
+
session_id,
|
|
321
|
+
)
|
|
322
|
+
debug_log("=" * 40, session_id)
|
|
323
|
+
|
|
324
|
+
return ParsedTranscript(
|
|
325
|
+
session_id=session_id,
|
|
326
|
+
context_chars=message_content_chars,
|
|
327
|
+
total_file_chars=total_file_chars,
|
|
328
|
+
boundaries_found=boundary_count,
|
|
329
|
+
is_jsonl=True,
|
|
330
|
+
)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Real token extraction from JSONL message.usage fields."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from ..types import SessionMetrics, TokenMetrics
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def parse_transcript(
|
|
13
|
+
transcript_path: str,
|
|
14
|
+
) -> tuple[TokenMetrics, Optional[SessionMetrics]]:
|
|
15
|
+
"""Extract token metrics and session duration in single file read.
|
|
16
|
+
|
|
17
|
+
Combines the logic of get_token_metrics() and get_session_duration()
|
|
18
|
+
to avoid reading the transcript file twice.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
transcript_path: Path to the JSONL transcript file
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Tuple of (TokenMetrics, SessionMetrics) from single file pass
|
|
25
|
+
"""
|
|
26
|
+
if not transcript_path or not os.path.isfile(transcript_path):
|
|
27
|
+
return TokenMetrics(transcript_exists=False), None
|
|
28
|
+
|
|
29
|
+
input_tokens = 0
|
|
30
|
+
output_tokens = 0
|
|
31
|
+
cached_tokens = 0
|
|
32
|
+
context_length = 0
|
|
33
|
+
|
|
34
|
+
most_recent_time: Optional[datetime] = None
|
|
35
|
+
most_recent_usage: Optional[dict] = None
|
|
36
|
+
|
|
37
|
+
first_timestamp: Optional[datetime] = None
|
|
38
|
+
last_timestamp: Optional[datetime] = None
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
with open(transcript_path, encoding="utf-8") as f:
|
|
42
|
+
for line in f:
|
|
43
|
+
line = line.strip()
|
|
44
|
+
if not line:
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
data = json.loads(line)
|
|
49
|
+
|
|
50
|
+
# Extract timestamp for session duration
|
|
51
|
+
timestamp_str = data.get("timestamp")
|
|
52
|
+
if timestamp_str:
|
|
53
|
+
try:
|
|
54
|
+
timestamp = datetime.fromisoformat(
|
|
55
|
+
timestamp_str.replace("Z", "+00:00")
|
|
56
|
+
)
|
|
57
|
+
if first_timestamp is None:
|
|
58
|
+
first_timestamp = timestamp
|
|
59
|
+
last_timestamp = timestamp
|
|
60
|
+
except ValueError:
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
# Extract token usage
|
|
64
|
+
usage = data.get("message", {}).get("usage")
|
|
65
|
+
if not usage:
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
# Sum totals across all messages
|
|
69
|
+
input_tokens += usage.get("input_tokens", 0)
|
|
70
|
+
output_tokens += usage.get("output_tokens", 0)
|
|
71
|
+
cached_tokens += usage.get("cache_read_input_tokens", 0)
|
|
72
|
+
cached_tokens += usage.get("cache_creation_input_tokens", 0)
|
|
73
|
+
|
|
74
|
+
# Track most recent main chain entry for context_length
|
|
75
|
+
is_sidechain = data.get("isSidechain", False)
|
|
76
|
+
is_api_error = data.get("isApiErrorMessage", False)
|
|
77
|
+
stop_reason = data.get("message", {}).get("stop_reason")
|
|
78
|
+
|
|
79
|
+
# Only use completed messages (skip streaming partials with stop_reason: null)
|
|
80
|
+
if (
|
|
81
|
+
not is_sidechain
|
|
82
|
+
and not is_api_error
|
|
83
|
+
and timestamp_str
|
|
84
|
+
and stop_reason is not None
|
|
85
|
+
):
|
|
86
|
+
try:
|
|
87
|
+
entry_time = datetime.fromisoformat(
|
|
88
|
+
timestamp_str.replace("Z", "+00:00")
|
|
89
|
+
)
|
|
90
|
+
if (
|
|
91
|
+
most_recent_time is None
|
|
92
|
+
or entry_time > most_recent_time
|
|
93
|
+
):
|
|
94
|
+
most_recent_time = entry_time
|
|
95
|
+
most_recent_usage = usage
|
|
96
|
+
except ValueError:
|
|
97
|
+
# Skip invalid timestamps
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
except (json.JSONDecodeError, ValueError):
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
except OSError:
|
|
104
|
+
return TokenMetrics(transcript_exists=False), None
|
|
105
|
+
|
|
106
|
+
# Calculate context_length from most recent main chain message
|
|
107
|
+
if most_recent_usage:
|
|
108
|
+
context_length = (
|
|
109
|
+
most_recent_usage.get("input_tokens", 0)
|
|
110
|
+
+ most_recent_usage.get("cache_read_input_tokens", 0)
|
|
111
|
+
+ most_recent_usage.get("cache_creation_input_tokens", 0)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
total_tokens = input_tokens + output_tokens + cached_tokens
|
|
115
|
+
|
|
116
|
+
token_metrics = TokenMetrics(
|
|
117
|
+
input_tokens=input_tokens,
|
|
118
|
+
output_tokens=output_tokens,
|
|
119
|
+
cached_tokens=cached_tokens,
|
|
120
|
+
total_tokens=total_tokens,
|
|
121
|
+
context_length=context_length,
|
|
122
|
+
transcript_exists=True,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Build session metrics
|
|
126
|
+
session_metrics = None
|
|
127
|
+
if first_timestamp and last_timestamp:
|
|
128
|
+
duration_seconds = int((last_timestamp - first_timestamp).total_seconds())
|
|
129
|
+
session_metrics = SessionMetrics(
|
|
130
|
+
start_time=first_timestamp,
|
|
131
|
+
last_activity=last_timestamp,
|
|
132
|
+
duration_seconds=duration_seconds,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
return token_metrics, session_metrics
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def format_duration(duration_seconds: int) -> str:
|
|
139
|
+
"""Format duration in seconds to human-readable string.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
duration_seconds: Duration in seconds
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Formatted string like "2hr 15m" or "45m" or "<1m"
|
|
146
|
+
"""
|
|
147
|
+
if duration_seconds < 60:
|
|
148
|
+
return "<1m"
|
|
149
|
+
|
|
150
|
+
total_minutes = duration_seconds // 60
|
|
151
|
+
hours = total_minutes // 60
|
|
152
|
+
minutes = total_minutes % 60
|
|
153
|
+
|
|
154
|
+
if hours == 0:
|
|
155
|
+
return f"{minutes}m"
|
|
156
|
+
elif minutes == 0:
|
|
157
|
+
return f"{hours}hr"
|
|
158
|
+
else:
|
|
159
|
+
return f"{hours}hr {minutes}m"
|