okstra 0.60.1 → 0.60.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/BUILD.json +2 -2
- package/runtime/python/okstra_ctl/render_final_report.py +6 -1
- package/runtime/python/okstra_token_usage/claude.py +27 -6
- package/runtime/python/okstra_token_usage/collect.py +55 -3
- package/runtime/templates/reports/final-report.template.md +17 -17
package/package.json
CHANGED
package/runtime/BUILD.json
CHANGED
|
@@ -522,7 +522,12 @@ def render_to_file(
|
|
|
522
522
|
except json.JSONDecodeError as exc:
|
|
523
523
|
raise FinalReportRenderError(f"invalid JSON in {data_path}: {exc}") from exc
|
|
524
524
|
|
|
525
|
-
|
|
525
|
+
# 템플릿은 data.json 위치가 아니라 이 모듈(설치본은 ~/.okstra/lib/python,
|
|
526
|
+
# repo 는 scripts/) 위치 기준으로 찾는다. data_path 를 start 로 넘기면
|
|
527
|
+
# 프로젝트의 .okstra 트리만 위로 뒤지다 templates/reports 를 못 찾아
|
|
528
|
+
# 설치본에서 항상 'could not locate template' 으로 실패한다(OKSTRA_HOME 을
|
|
529
|
+
# 수동 설정해야 했던 원인). 프로젝트별 override 는 --template 으로 한다.
|
|
530
|
+
resolved_template = template_path or find_default_template()
|
|
526
531
|
rendered = render(
|
|
527
532
|
data,
|
|
528
533
|
template_path=resolved_template,
|
|
@@ -7,8 +7,21 @@ from .jsonl_io import iter_jsonl
|
|
|
7
7
|
from .paths import claude_project_dir
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def claude_session_totals(
|
|
11
|
-
|
|
10
|
+
def claude_session_totals(
|
|
11
|
+
jsonl_path: Path, *, since: str | None = None, until: str | None = None
|
|
12
|
+
) -> dict:
|
|
13
|
+
"""Return totals + agentName + assistant model + time window for a Claude session jsonl.
|
|
14
|
+
|
|
15
|
+
``since`` / ``until`` are ISO-8601 timestamp strings (UTC ``...Z``). When
|
|
16
|
+
given, only records whose ``timestamp`` falls within ``[since, until]`` are
|
|
17
|
+
counted toward tokens / tool_uses / duration. This is the run-scoping seam:
|
|
18
|
+
an **in-session** lead writes its run into the user's whole-session jsonl,
|
|
19
|
+
so without a window the totals swallow every unrelated turn (observed:
|
|
20
|
+
lead billed 1.7억 tokens / $416 / 3h for a single requirements-discovery
|
|
21
|
+
run). ``agentName`` / ``model`` are session metadata and are read from the
|
|
22
|
+
whole file regardless of the window. Records without a timestamp are kept
|
|
23
|
+
(conservative — never silently drop usage when we can't place it in time).
|
|
24
|
+
"""
|
|
12
25
|
input_t = output_t = cache_create_t = cache_read_t = 0
|
|
13
26
|
cache_create_5m_t = cache_create_1h_t = 0
|
|
14
27
|
tool_uses = 0
|
|
@@ -20,6 +33,12 @@ def claude_session_totals(jsonl_path: Path) -> dict:
|
|
|
20
33
|
if agent_name is None and rec.get("agentName"):
|
|
21
34
|
agent_name = rec["agentName"]
|
|
22
35
|
msg = rec.get("message") or {}
|
|
36
|
+
ts = rec.get("timestamp") or (msg.get("timestamp") if isinstance(msg, dict) else None)
|
|
37
|
+
in_window = not (ts and ((since and ts < since) or (until and ts > until)))
|
|
38
|
+
if rec.get("type") == "assistant" and model is None and msg.get("model"):
|
|
39
|
+
model = msg["model"]
|
|
40
|
+
if not in_window:
|
|
41
|
+
continue
|
|
23
42
|
usage = msg.get("usage")
|
|
24
43
|
if usage:
|
|
25
44
|
input_t += usage.get("input_tokens", 0) or 0
|
|
@@ -39,12 +58,9 @@ def claude_session_totals(jsonl_path: Path) -> dict:
|
|
|
39
58
|
else:
|
|
40
59
|
cache_create_5m_t += cc_total
|
|
41
60
|
if rec.get("type") == "assistant":
|
|
42
|
-
if model is None and msg.get("model"):
|
|
43
|
-
model = msg["model"]
|
|
44
61
|
for block in (msg.get("content") or []):
|
|
45
62
|
if isinstance(block, dict) and block.get("type") == "tool_use":
|
|
46
63
|
tool_uses += 1
|
|
47
|
-
ts = rec.get("timestamp") or (msg.get("timestamp") if isinstance(msg, dict) else None)
|
|
48
64
|
if ts:
|
|
49
65
|
if first_ts is None or ts < first_ts:
|
|
50
66
|
first_ts = ts
|
|
@@ -58,7 +74,12 @@ def claude_session_totals(jsonl_path: Path) -> dict:
|
|
|
58
74
|
duration_ms = max(0, int((b - a).total_seconds() * 1000))
|
|
59
75
|
except ValueError:
|
|
60
76
|
duration_ms = 0
|
|
61
|
-
total
|
|
77
|
+
# '처리 토큰' total 에서 cache_read 는 제외한다. claude 는 매 턴 직전까지의
|
|
78
|
+
# 컨텍스트 전체를 캐시에서 재읽기(cache_read)하므로, 단순 합산하면 같은 토큰을
|
|
79
|
+
# 턴 수만큼 중복 카운트해 처리량이 비현실적으로 부풀려진다(예: in-session
|
|
80
|
+
# lead 가 1.7억으로 표시됨). cache_read 는 cacheReadTokens 로 따로 노출되고,
|
|
81
|
+
# 비용은 pricing 이 0.1x 단가로 별도 반영하므로 total 에서 빼도 비용은 불변.
|
|
82
|
+
total = input_t + output_t + cache_create_t
|
|
62
83
|
return {
|
|
63
84
|
"totalTokens": total,
|
|
64
85
|
"inputTokens": input_t,
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
import json
|
|
5
|
-
from datetime import datetime
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from okstra_project.dirs import OKSTRA_RELATIVE
|
|
8
8
|
|
|
@@ -86,9 +86,61 @@ def _aggregate_totals(items: list[dict]) -> dict:
|
|
|
86
86
|
return aggregate
|
|
87
87
|
|
|
88
88
|
|
|
89
|
+
def _run_window_suffix(team_state_path: Path) -> str | None:
|
|
90
|
+
"""``team-state-<task-type>-<seq>.json`` → ``<task-type>-<seq>``.
|
|
91
|
+
|
|
92
|
+
이 접미사로 *같은 run* 의 run-manifest / status 를 정확히 짚는다. task 디렉토리
|
|
93
|
+
한 곳에 여러 run(재시도·이전 phase·레거시 타임스탬프)의 산출물이 섞여 있어,
|
|
94
|
+
glob 으로 아무거나 집으면 엉뚱한 run 의 시각을 쓰게 된다(관측: 가장 오래된
|
|
95
|
+
레거시 manifest 의 createdAt 을 집어 윈도우가 한 달로 벌어짐)."""
|
|
96
|
+
name = team_state_path.name
|
|
97
|
+
if not (name.startswith("team-state-") and name.endswith(".json")):
|
|
98
|
+
return None
|
|
99
|
+
return name[len("team-state-"):-len(".json")]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _run_manifest_created_at(run_dir: Path, suffix: str) -> str | None:
|
|
103
|
+
p = run_dir / "manifests" / f"run-manifest-{suffix}.json"
|
|
104
|
+
try:
|
|
105
|
+
return json.loads(p.read_text()).get("createdAt")
|
|
106
|
+
except (OSError, json.JSONDecodeError):
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _run_end_estimate(run_dir: Path, suffix: str) -> str | None:
|
|
111
|
+
"""run 종료 근사 — 같은 run 의 status 산출물 mtime(reconcile 후 고정, Phase 7
|
|
112
|
+
재렌더로도 바뀌지 않음). 완료 전(status 부재)이면 None."""
|
|
113
|
+
p = run_dir / "status" / f"final-{suffix}.status"
|
|
114
|
+
try:
|
|
115
|
+
mtime = p.stat().st_mtime
|
|
116
|
+
except OSError:
|
|
117
|
+
return None
|
|
118
|
+
return datetime.fromtimestamp(mtime, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _resolve_run_window(team_state_path: Path, state: dict) -> tuple[str | None, str | None]:
|
|
122
|
+
"""이 run 의 [시작, 종료] ISO 윈도우.
|
|
123
|
+
|
|
124
|
+
in-session lead 는 자기 run 을 사용자의 *세션 전체* jsonl 에 기록하므로,
|
|
125
|
+
윈도우 없이 합산하면 무관한 모든 턴(다른 작업·대화)이 lead 토큰·시간에
|
|
126
|
+
섞여 폭증한다(관측: requirements-discovery 한 run 에 lead 1.7억 토큰 /
|
|
127
|
+
$416 / 3h). 토큰 집계를 이 윈도우로 스코핑해 그 run 분만 센다. 시작 =
|
|
128
|
+
이 run 의 run-manifest createdAt, 종료 = team-state.runEndedAt → 이 run 의
|
|
129
|
+
status mtime → 현재 시각(아직 진행 중) 순으로 해소한다. 접미사를 못 뽑으면
|
|
130
|
+
(None, None) — 윈도우 없이 전체를 세는 기존 동작으로 안전 폴백."""
|
|
131
|
+
suffix = _run_window_suffix(team_state_path)
|
|
132
|
+
if not suffix:
|
|
133
|
+
return None, None
|
|
134
|
+
run_dir = team_state_path.parent.parent
|
|
135
|
+
since = _run_manifest_created_at(run_dir, suffix)
|
|
136
|
+
until = state.get("runEndedAt") or _run_end_estimate(run_dir, suffix) or utc_now()
|
|
137
|
+
return since, until
|
|
138
|
+
|
|
139
|
+
|
|
89
140
|
def collect(team_state_path: Path, project_root: Path | None = None) -> dict:
|
|
90
141
|
state = json.loads(team_state_path.read_text())
|
|
91
142
|
cwd = project_root or _infer_project_root(team_state_path, state)
|
|
143
|
+
run_since, run_until = _resolve_run_window(team_state_path, state)
|
|
92
144
|
task_key = state.get("taskKey", "")
|
|
93
145
|
# Prefer the team name actually persisted in team-state (set during Phase 3
|
|
94
146
|
# when TeamCreate succeeded); only fall back to the `okstra-<task-id>`
|
|
@@ -130,7 +182,7 @@ def collect(team_state_path: Path, project_root: Path | None = None) -> dict:
|
|
|
130
182
|
if sid == lead_sid:
|
|
131
183
|
lead_path = path
|
|
132
184
|
continue
|
|
133
|
-
totals = claude_session_totals(path)
|
|
185
|
+
totals = claude_session_totals(path, since=run_since, until=run_until)
|
|
134
186
|
agent = totals.get("agentName")
|
|
135
187
|
if agent:
|
|
136
188
|
by_agent.setdefault(agent, []).append((sid, path, totals))
|
|
@@ -139,7 +191,7 @@ def collect(team_state_path: Path, project_root: Path | None = None) -> dict:
|
|
|
139
191
|
|
|
140
192
|
# Lead.
|
|
141
193
|
if lead_path is not None:
|
|
142
|
-
totals = claude_session_totals(lead_path)
|
|
194
|
+
totals = claude_session_totals(lead_path, since=run_since, until=run_until)
|
|
143
195
|
state["leadUsage"] = usage_block(totals, source="claude-jsonl")
|
|
144
196
|
state["leadUsage"]["sessionId"] = lead_sid
|
|
145
197
|
else:
|
|
@@ -32,6 +32,23 @@ implementation-option: {{ frontmatter.implementationOption | yaml_scalar }}
|
|
|
32
32
|
{% for row in followUpTasks if row.origin == 'phase-continuation' %}{% if loop.first %}- Next Step: run `/okstra-run`, select task `{{ header.taskKey }}`, choose task-type `{{ row.suggestedTaskType }}`
|
|
33
33
|
{% endif %}{% endfor %}- Okstra Version: `{{ header.okstraVersion }}`
|
|
34
34
|
|
|
35
|
+
{% if clarificationCarryIn and clarificationCarryIn.sourceFile %}
|
|
36
|
+
## 0. Clarification Response Carried In From Previous Run
|
|
37
|
+
|
|
38
|
+
- Source file: `{{ clarificationCarryIn.sourceFile }}`
|
|
39
|
+
- {{ t("sectionIntro.clarificationCarryIn") }}
|
|
40
|
+
|
|
41
|
+
{% endif %}
|
|
42
|
+
## Summary of the Problem or Verification Target
|
|
43
|
+
|
|
44
|
+
{{ t("sectionIntro.ticketCoverage") }}
|
|
45
|
+
|
|
46
|
+
| {{ t("columns.recordMeta") }} | {{ t("columns.summary") }} |
|
|
47
|
+
|--------|------------|
|
|
48
|
+
{% for row in summary -%}
|
|
49
|
+
| **{{ row.id }}**<br>Ticket: `{{ row.ticketId }}`<br>{{ t("columns.source") }}: {{ row.source }} | {{ row.summary }} |
|
|
50
|
+
{% endfor %}
|
|
51
|
+
|
|
35
52
|
## Verdict Card
|
|
36
53
|
|
|
37
54
|
{{ t("sectionIntro.verdictCard") }}
|
|
@@ -599,23 +616,6 @@ Acceptance: {{ stage.acceptance }}
|
|
|
599
616
|
| {{ t("verdictCard.rationaleLabel") }} | {{ finalVerdict.rationaleRowIds | join(', ') }} |
|
|
600
617
|
| {{ t("verdictCard.nextStepLabel") }} | {{ finalVerdict.nextStep }} |
|
|
601
618
|
|
|
602
|
-
{% if clarificationCarryIn and clarificationCarryIn.sourceFile %}
|
|
603
|
-
## 0. Clarification Response Carried In From Previous Run
|
|
604
|
-
|
|
605
|
-
- Source file: `{{ clarificationCarryIn.sourceFile }}`
|
|
606
|
-
- {{ t("sectionIntro.clarificationCarryIn") }}
|
|
607
|
-
|
|
608
|
-
{% endif %}
|
|
609
|
-
## Summary of the Problem or Verification Target
|
|
610
|
-
|
|
611
|
-
{{ t("sectionIntro.ticketCoverage") }}
|
|
612
|
-
|
|
613
|
-
| {{ t("columns.recordMeta") }} | {{ t("columns.summary") }} |
|
|
614
|
-
|--------|------------|
|
|
615
|
-
{% for row in summary -%}
|
|
616
|
-
| **{{ row.id }}**<br>Ticket: `{{ row.ticketId }}`<br>{{ t("columns.source") }}: {{ row.source }} | {{ row.summary }} |
|
|
617
|
-
{% endfor %}
|
|
618
|
-
|
|
619
619
|
{% if ticketCoverage.omit %}
|
|
620
620
|
{# Ticket Coverage omitted entirely — release-handoff / final-verification #}
|
|
621
621
|
{%- else %}
|