copex 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- copex/__init__.py +69 -0
- copex/checkpoint.py +445 -0
- copex/cli.py +1106 -0
- copex/client.py +725 -0
- copex/config.py +311 -0
- copex/mcp.py +561 -0
- copex/metrics.py +383 -0
- copex/models.py +50 -0
- copex/persistence.py +324 -0
- copex/plan.py +358 -0
- copex/ralph.py +247 -0
- copex/tools.py +404 -0
- copex/ui.py +971 -0
- copex-0.8.4.dist-info/METADATA +511 -0
- copex-0.8.4.dist-info/RECORD +18 -0
- copex-0.8.4.dist-info/WHEEL +4 -0
- copex-0.8.4.dist-info/entry_points.txt +2 -0
- copex-0.8.4.dist-info/licenses/LICENSE +21 -0
copex/metrics.py
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metrics and Logging - Track token usage, timing, and success rates.
|
|
3
|
+
|
|
4
|
+
Provides:
|
|
5
|
+
- Token usage tracking
|
|
6
|
+
- Timing metrics per request
|
|
7
|
+
- Success/failure rates
|
|
8
|
+
- Cost estimation
|
|
9
|
+
- Export to various formats
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import threading
|
|
16
|
+
import time
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class RequestMetrics:
|
|
26
|
+
"""Metrics for a single request."""
|
|
27
|
+
|
|
28
|
+
request_id: str
|
|
29
|
+
timestamp: str
|
|
30
|
+
model: str
|
|
31
|
+
reasoning_effort: str
|
|
32
|
+
|
|
33
|
+
# Timing
|
|
34
|
+
start_time: float
|
|
35
|
+
end_time: float | None = None
|
|
36
|
+
duration_ms: float | None = None
|
|
37
|
+
|
|
38
|
+
# Tokens (estimated or from API)
|
|
39
|
+
prompt_tokens: int | None = None
|
|
40
|
+
completion_tokens: int | None = None
|
|
41
|
+
total_tokens: int | None = None
|
|
42
|
+
|
|
43
|
+
# Status
|
|
44
|
+
success: bool = False
|
|
45
|
+
error: str | None = None
|
|
46
|
+
retries: int = 0
|
|
47
|
+
|
|
48
|
+
# Content
|
|
49
|
+
prompt_preview: str = ""
|
|
50
|
+
response_preview: str = ""
|
|
51
|
+
|
|
52
|
+
def finalize(self, end_time: float | None = None) -> None:
|
|
53
|
+
"""Finalize timing metrics."""
|
|
54
|
+
self.end_time = end_time or time.time()
|
|
55
|
+
self.duration_ms = (self.end_time - self.start_time) * 1000
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class SessionMetrics:
|
|
60
|
+
"""Aggregated metrics for a session."""
|
|
61
|
+
|
|
62
|
+
session_id: str
|
|
63
|
+
started_at: str
|
|
64
|
+
requests: list[RequestMetrics] = field(default_factory=list)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def total_requests(self) -> int:
|
|
68
|
+
return len(self.requests)
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def successful_requests(self) -> int:
|
|
72
|
+
return sum(1 for r in self.requests if r.success)
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def failed_requests(self) -> int:
|
|
76
|
+
return sum(1 for r in self.requests if not r.success)
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def success_rate(self) -> float:
|
|
80
|
+
if not self.requests:
|
|
81
|
+
return 0.0
|
|
82
|
+
return self.successful_requests / self.total_requests
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def total_retries(self) -> int:
|
|
86
|
+
return sum(r.retries for r in self.requests)
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def total_tokens(self) -> int:
|
|
90
|
+
return sum(r.total_tokens or 0 for r in self.requests)
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def total_duration_ms(self) -> float:
|
|
94
|
+
return sum(r.duration_ms or 0 for r in self.requests)
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def avg_duration_ms(self) -> float:
|
|
98
|
+
if not self.requests:
|
|
99
|
+
return 0.0
|
|
100
|
+
return self.total_duration_ms / self.total_requests
|
|
101
|
+
|
|
102
|
+
def to_dict(self) -> dict[str, Any]:
|
|
103
|
+
"""Convert to dictionary."""
|
|
104
|
+
return {
|
|
105
|
+
"session_id": self.session_id,
|
|
106
|
+
"started_at": self.started_at,
|
|
107
|
+
"total_requests": self.total_requests,
|
|
108
|
+
"successful_requests": self.successful_requests,
|
|
109
|
+
"failed_requests": self.failed_requests,
|
|
110
|
+
"success_rate": self.success_rate,
|
|
111
|
+
"total_retries": self.total_retries,
|
|
112
|
+
"total_tokens": self.total_tokens,
|
|
113
|
+
"total_duration_ms": self.total_duration_ms,
|
|
114
|
+
"avg_duration_ms": self.avg_duration_ms,
|
|
115
|
+
"requests": [
|
|
116
|
+
{
|
|
117
|
+
"request_id": r.request_id,
|
|
118
|
+
"timestamp": r.timestamp,
|
|
119
|
+
"model": r.model,
|
|
120
|
+
"duration_ms": r.duration_ms,
|
|
121
|
+
"tokens": r.total_tokens,
|
|
122
|
+
"success": r.success,
|
|
123
|
+
"retries": r.retries,
|
|
124
|
+
}
|
|
125
|
+
for r in self.requests
|
|
126
|
+
],
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# Token cost estimates per 1M tokens (as of 2026)
|
|
131
|
+
TOKEN_COSTS = {
|
|
132
|
+
"gpt-5.2-codex": {"input": 3.00, "output": 15.00},
|
|
133
|
+
"gpt-5.1-codex": {"input": 2.50, "output": 10.00},
|
|
134
|
+
"gpt-5.1-codex-max": {"input": 5.00, "output": 20.00},
|
|
135
|
+
"gpt-5.1-codex-mini": {"input": 0.50, "output": 2.00},
|
|
136
|
+
"gpt-5.2": {"input": 2.50, "output": 10.00},
|
|
137
|
+
"gpt-5.1": {"input": 2.00, "output": 8.00},
|
|
138
|
+
"gpt-5": {"input": 2.00, "output": 8.00},
|
|
139
|
+
"gpt-5-mini": {"input": 0.40, "output": 1.60},
|
|
140
|
+
"gpt-4.1": {"input": 0.30, "output": 1.20},
|
|
141
|
+
"claude-sonnet-4.5": {"input": 3.00, "output": 15.00},
|
|
142
|
+
"claude-sonnet-4": {"input": 3.00, "output": 15.00},
|
|
143
|
+
"claude-haiku-4.5": {"input": 0.80, "output": 4.00},
|
|
144
|
+
"claude-opus-4.5": {"input": 15.00, "output": 75.00},
|
|
145
|
+
"gemini-3-pro-preview": {"input": 1.25, "output": 5.00},
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def estimate_tokens(text: str) -> int:
|
|
150
|
+
"""Rough token estimation (4 chars ≈ 1 token)."""
|
|
151
|
+
return len(text) // 4
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def estimate_cost(
|
|
155
|
+
model: str,
|
|
156
|
+
prompt_tokens: int,
|
|
157
|
+
completion_tokens: int,
|
|
158
|
+
) -> float:
|
|
159
|
+
"""
|
|
160
|
+
Estimate cost in USD.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
model: Model name
|
|
164
|
+
prompt_tokens: Input tokens
|
|
165
|
+
completion_tokens: Output tokens
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Estimated cost in USD
|
|
169
|
+
"""
|
|
170
|
+
costs = TOKEN_COSTS.get(model, {"input": 2.0, "output": 10.0})
|
|
171
|
+
input_cost = (prompt_tokens / 1_000_000) * costs["input"]
|
|
172
|
+
output_cost = (completion_tokens / 1_000_000) * costs["output"]
|
|
173
|
+
return input_cost + output_cost
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class MetricsCollector:
|
|
177
|
+
"""
|
|
178
|
+
Collects and manages metrics for Copex requests.
|
|
179
|
+
|
|
180
|
+
Usage:
|
|
181
|
+
collector = MetricsCollector()
|
|
182
|
+
|
|
183
|
+
# Start tracking a request
|
|
184
|
+
req = collector.start_request(model="gpt-5.2-codex", prompt="Hello")
|
|
185
|
+
|
|
186
|
+
# ... make request ...
|
|
187
|
+
|
|
188
|
+
# Complete tracking
|
|
189
|
+
collector.complete_request(req.request_id, success=True, response="Hi!")
|
|
190
|
+
|
|
191
|
+
# Get summary
|
|
192
|
+
print(collector.summary())
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
def __init__(self, session_id: str | None = None):
|
|
196
|
+
"""Initialize collector."""
|
|
197
|
+
self.session = SessionMetrics(
|
|
198
|
+
session_id=session_id or datetime.now().strftime("%Y%m%d_%H%M%S"),
|
|
199
|
+
started_at=datetime.now().isoformat(),
|
|
200
|
+
)
|
|
201
|
+
self._pending: dict[str, RequestMetrics] = {}
|
|
202
|
+
self._lock = threading.Lock()
|
|
203
|
+
self._request_counter = 0
|
|
204
|
+
|
|
205
|
+
def start_request(
|
|
206
|
+
self,
|
|
207
|
+
model: str,
|
|
208
|
+
reasoning_effort: str = "xhigh",
|
|
209
|
+
prompt: str = "",
|
|
210
|
+
) -> RequestMetrics:
|
|
211
|
+
"""
|
|
212
|
+
Start tracking a new request.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
RequestMetrics object to track this request
|
|
216
|
+
"""
|
|
217
|
+
with self._lock:
|
|
218
|
+
self._request_counter += 1
|
|
219
|
+
request_id = f"req_{self._request_counter}_{int(time.time() * 1000)}"
|
|
220
|
+
|
|
221
|
+
metrics = RequestMetrics(
|
|
222
|
+
request_id=request_id,
|
|
223
|
+
timestamp=datetime.now().isoformat(),
|
|
224
|
+
model=model,
|
|
225
|
+
reasoning_effort=reasoning_effort,
|
|
226
|
+
start_time=time.time(),
|
|
227
|
+
prompt_preview=prompt[:100] if prompt else "",
|
|
228
|
+
prompt_tokens=estimate_tokens(prompt) if prompt else None,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
with self._lock:
|
|
232
|
+
self._pending[request_id] = metrics
|
|
233
|
+
|
|
234
|
+
return metrics
|
|
235
|
+
|
|
236
|
+
def complete_request(
|
|
237
|
+
self,
|
|
238
|
+
request_id: str,
|
|
239
|
+
success: bool = True,
|
|
240
|
+
response: str = "",
|
|
241
|
+
error: str | None = None,
|
|
242
|
+
retries: int = 0,
|
|
243
|
+
tokens: dict[str, int] | None = None,
|
|
244
|
+
) -> RequestMetrics | None:
|
|
245
|
+
"""
|
|
246
|
+
Complete tracking for a request.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
request_id: Request ID from start_request
|
|
250
|
+
success: Whether request succeeded
|
|
251
|
+
response: Response content
|
|
252
|
+
error: Error message if failed
|
|
253
|
+
retries: Number of retries needed
|
|
254
|
+
tokens: Optional token counts {"prompt": N, "completion": N}
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Completed RequestMetrics
|
|
258
|
+
"""
|
|
259
|
+
with self._lock:
|
|
260
|
+
metrics = self._pending.pop(request_id, None)
|
|
261
|
+
|
|
262
|
+
if not metrics:
|
|
263
|
+
return None
|
|
264
|
+
|
|
265
|
+
metrics.finalize()
|
|
266
|
+
metrics.success = success
|
|
267
|
+
metrics.error = error
|
|
268
|
+
metrics.retries = retries
|
|
269
|
+
metrics.response_preview = response[:100] if response else ""
|
|
270
|
+
|
|
271
|
+
# Token counts
|
|
272
|
+
if tokens:
|
|
273
|
+
metrics.prompt_tokens = tokens.get("prompt", metrics.prompt_tokens)
|
|
274
|
+
metrics.completion_tokens = tokens.get("completion")
|
|
275
|
+
metrics.total_tokens = (metrics.prompt_tokens or 0) + (metrics.completion_tokens or 0)
|
|
276
|
+
elif response:
|
|
277
|
+
metrics.completion_tokens = estimate_tokens(response)
|
|
278
|
+
metrics.total_tokens = (metrics.prompt_tokens or 0) + metrics.completion_tokens
|
|
279
|
+
|
|
280
|
+
with self._lock:
|
|
281
|
+
self.session.requests.append(metrics)
|
|
282
|
+
|
|
283
|
+
return metrics
|
|
284
|
+
|
|
285
|
+
def summary(self) -> dict[str, Any]:
|
|
286
|
+
"""Get session summary."""
|
|
287
|
+
return self.session.to_dict()
|
|
288
|
+
|
|
289
|
+
def cost_estimate(self) -> float:
|
|
290
|
+
"""Estimate total cost in USD."""
|
|
291
|
+
total = 0.0
|
|
292
|
+
for req in self.session.requests:
|
|
293
|
+
if req.prompt_tokens and req.completion_tokens:
|
|
294
|
+
total += estimate_cost(req.model, req.prompt_tokens, req.completion_tokens)
|
|
295
|
+
return total
|
|
296
|
+
|
|
297
|
+
def by_model(self) -> dict[str, dict[str, Any]]:
|
|
298
|
+
"""Get metrics grouped by model."""
|
|
299
|
+
by_model: dict[str, list[RequestMetrics]] = defaultdict(list)
|
|
300
|
+
for req in self.session.requests:
|
|
301
|
+
by_model[req.model].append(req)
|
|
302
|
+
|
|
303
|
+
result = {}
|
|
304
|
+
for model, requests in by_model.items():
|
|
305
|
+
result[model] = {
|
|
306
|
+
"requests": len(requests),
|
|
307
|
+
"success_rate": sum(1 for r in requests if r.success) / len(requests),
|
|
308
|
+
"total_tokens": sum(r.total_tokens or 0 for r in requests),
|
|
309
|
+
"avg_duration_ms": sum(r.duration_ms or 0 for r in requests) / len(requests),
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return result
|
|
313
|
+
|
|
314
|
+
def export_json(self, path: Path | str) -> None:
|
|
315
|
+
"""Export metrics to JSON file."""
|
|
316
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
317
|
+
json.dump(self.summary(), f, indent=2)
|
|
318
|
+
|
|
319
|
+
def export_csv(self, path: Path | str) -> None:
|
|
320
|
+
"""Export metrics to CSV file."""
|
|
321
|
+
import csv
|
|
322
|
+
|
|
323
|
+
with open(path, "w", newline="", encoding="utf-8") as f:
|
|
324
|
+
writer = csv.writer(f)
|
|
325
|
+
writer.writerow([
|
|
326
|
+
"request_id", "timestamp", "model", "reasoning_effort",
|
|
327
|
+
"duration_ms", "prompt_tokens", "completion_tokens", "total_tokens",
|
|
328
|
+
"success", "retries", "error",
|
|
329
|
+
])
|
|
330
|
+
|
|
331
|
+
for req in self.session.requests:
|
|
332
|
+
writer.writerow([
|
|
333
|
+
req.request_id, req.timestamp, req.model, req.reasoning_effort,
|
|
334
|
+
req.duration_ms, req.prompt_tokens, req.completion_tokens, req.total_tokens,
|
|
335
|
+
req.success, req.retries, req.error or "",
|
|
336
|
+
])
|
|
337
|
+
|
|
338
|
+
def print_summary(self) -> str:
|
|
339
|
+
"""Get printable summary string."""
|
|
340
|
+
s = self.session
|
|
341
|
+
lines = [
|
|
342
|
+
"═══ Copex Metrics Summary ═══",
|
|
343
|
+
f"Session: {s.session_id}",
|
|
344
|
+
f"Started: {s.started_at}",
|
|
345
|
+
"",
|
|
346
|
+
f"Requests: {s.total_requests} ({s.successful_requests} ok, {s.failed_requests} failed)",
|
|
347
|
+
f"Success Rate: {s.success_rate:.1%}",
|
|
348
|
+
f"Total Retries: {s.total_retries}",
|
|
349
|
+
"",
|
|
350
|
+
f"Total Tokens: {s.total_tokens:,}",
|
|
351
|
+
f"Estimated Cost: ${self.cost_estimate():.4f}",
|
|
352
|
+
"",
|
|
353
|
+
f"Total Time: {s.total_duration_ms / 1000:.1f}s",
|
|
354
|
+
f"Avg Time/Request: {s.avg_duration_ms:.0f}ms",
|
|
355
|
+
]
|
|
356
|
+
|
|
357
|
+
# By model breakdown
|
|
358
|
+
by_model = self.by_model()
|
|
359
|
+
if len(by_model) > 1:
|
|
360
|
+
lines.append("")
|
|
361
|
+
lines.append("By Model:")
|
|
362
|
+
for model, stats in by_model.items():
|
|
363
|
+
lines.append(f" {model}: {stats['requests']} requests, {stats['success_rate']:.0%} success")
|
|
364
|
+
|
|
365
|
+
return "\n".join(lines)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
# Global collector for convenience
|
|
369
|
+
_global_collector: MetricsCollector | None = None
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def get_collector() -> MetricsCollector:
|
|
373
|
+
"""Get or create global metrics collector."""
|
|
374
|
+
global _global_collector
|
|
375
|
+
if _global_collector is None:
|
|
376
|
+
_global_collector = MetricsCollector()
|
|
377
|
+
return _global_collector
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def reset_collector() -> None:
|
|
381
|
+
"""Reset global metrics collector."""
|
|
382
|
+
global _global_collector
|
|
383
|
+
_global_collector = None
|
copex/models.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Model and configuration enums."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Model(str, Enum):
|
|
7
|
+
"""Available Copilot models."""
|
|
8
|
+
|
|
9
|
+
GPT_5_2_CODEX = "gpt-5.2-codex"
|
|
10
|
+
GPT_5_1_CODEX = "gpt-5.1-codex"
|
|
11
|
+
GPT_5_1_CODEX_MAX = "gpt-5.1-codex-max"
|
|
12
|
+
GPT_5_1_CODEX_MINI = "gpt-5.1-codex-mini"
|
|
13
|
+
GPT_5_2 = "gpt-5.2"
|
|
14
|
+
GPT_5_1 = "gpt-5.1"
|
|
15
|
+
GPT_5 = "gpt-5"
|
|
16
|
+
GPT_5_MINI = "gpt-5-mini"
|
|
17
|
+
GPT_4_1 = "gpt-4.1"
|
|
18
|
+
CLAUDE_SONNET_4_5 = "claude-sonnet-4.5"
|
|
19
|
+
CLAUDE_SONNET_4 = "claude-sonnet-4"
|
|
20
|
+
CLAUDE_HAIKU_4_5 = "claude-haiku-4.5"
|
|
21
|
+
CLAUDE_OPUS_4_5 = "claude-opus-4.5"
|
|
22
|
+
GEMINI_3_PRO = "gemini-3-pro-preview"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ReasoningEffort(str, Enum):
|
|
26
|
+
"""Reasoning effort levels for supported models."""
|
|
27
|
+
|
|
28
|
+
NONE = "none"
|
|
29
|
+
LOW = "low"
|
|
30
|
+
MEDIUM = "medium"
|
|
31
|
+
HIGH = "high"
|
|
32
|
+
XHIGH = "xhigh"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class EventType(str, Enum):
|
|
36
|
+
"""Copilot session event types."""
|
|
37
|
+
|
|
38
|
+
USER_MESSAGE = "user.message"
|
|
39
|
+
ASSISTANT_MESSAGE = "assistant.message"
|
|
40
|
+
ASSISTANT_MESSAGE_DELTA = "assistant.message_delta"
|
|
41
|
+
ASSISTANT_REASONING = "assistant.reasoning"
|
|
42
|
+
ASSISTANT_REASONING_DELTA = "assistant.reasoning_delta"
|
|
43
|
+
ASSISTANT_TURN_END = "assistant.turn_end"
|
|
44
|
+
SESSION_IDLE = "session.idle"
|
|
45
|
+
SESSION_ERROR = "session.error"
|
|
46
|
+
ERROR = "error"
|
|
47
|
+
TOOL_CALL = "tool.call"
|
|
48
|
+
TOOL_EXECUTION_START = "tool.execution_start"
|
|
49
|
+
TOOL_EXECUTION_PARTIAL_RESULT = "tool.execution_partial_result"
|
|
50
|
+
TOOL_EXECUTION_COMPLETE = "tool.execution_complete"
|