copex 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
copex/metrics.py ADDED
@@ -0,0 +1,383 @@
1
+ """
2
+ Metrics and Logging - Track token usage, timing, and success rates.
3
+
4
+ Provides:
5
+ - Token usage tracking
6
+ - Timing metrics per request
7
+ - Success/failure rates
8
+ - Cost estimation
9
+ - Export to various formats
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import threading
16
+ import time
17
+ from collections import defaultdict
18
+ from dataclasses import dataclass, field
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+
24
+ @dataclass
25
+ class RequestMetrics:
26
+ """Metrics for a single request."""
27
+
28
+ request_id: str
29
+ timestamp: str
30
+ model: str
31
+ reasoning_effort: str
32
+
33
+ # Timing
34
+ start_time: float
35
+ end_time: float | None = None
36
+ duration_ms: float | None = None
37
+
38
+ # Tokens (estimated or from API)
39
+ prompt_tokens: int | None = None
40
+ completion_tokens: int | None = None
41
+ total_tokens: int | None = None
42
+
43
+ # Status
44
+ success: bool = False
45
+ error: str | None = None
46
+ retries: int = 0
47
+
48
+ # Content
49
+ prompt_preview: str = ""
50
+ response_preview: str = ""
51
+
52
+ def finalize(self, end_time: float | None = None) -> None:
53
+ """Finalize timing metrics."""
54
+ self.end_time = end_time or time.time()
55
+ self.duration_ms = (self.end_time - self.start_time) * 1000
56
+
57
+
58
+ @dataclass
59
+ class SessionMetrics:
60
+ """Aggregated metrics for a session."""
61
+
62
+ session_id: str
63
+ started_at: str
64
+ requests: list[RequestMetrics] = field(default_factory=list)
65
+
66
+ @property
67
+ def total_requests(self) -> int:
68
+ return len(self.requests)
69
+
70
+ @property
71
+ def successful_requests(self) -> int:
72
+ return sum(1 for r in self.requests if r.success)
73
+
74
+ @property
75
+ def failed_requests(self) -> int:
76
+ return sum(1 for r in self.requests if not r.success)
77
+
78
+ @property
79
+ def success_rate(self) -> float:
80
+ if not self.requests:
81
+ return 0.0
82
+ return self.successful_requests / self.total_requests
83
+
84
+ @property
85
+ def total_retries(self) -> int:
86
+ return sum(r.retries for r in self.requests)
87
+
88
+ @property
89
+ def total_tokens(self) -> int:
90
+ return sum(r.total_tokens or 0 for r in self.requests)
91
+
92
+ @property
93
+ def total_duration_ms(self) -> float:
94
+ return sum(r.duration_ms or 0 for r in self.requests)
95
+
96
+ @property
97
+ def avg_duration_ms(self) -> float:
98
+ if not self.requests:
99
+ return 0.0
100
+ return self.total_duration_ms / self.total_requests
101
+
102
+ def to_dict(self) -> dict[str, Any]:
103
+ """Convert to dictionary."""
104
+ return {
105
+ "session_id": self.session_id,
106
+ "started_at": self.started_at,
107
+ "total_requests": self.total_requests,
108
+ "successful_requests": self.successful_requests,
109
+ "failed_requests": self.failed_requests,
110
+ "success_rate": self.success_rate,
111
+ "total_retries": self.total_retries,
112
+ "total_tokens": self.total_tokens,
113
+ "total_duration_ms": self.total_duration_ms,
114
+ "avg_duration_ms": self.avg_duration_ms,
115
+ "requests": [
116
+ {
117
+ "request_id": r.request_id,
118
+ "timestamp": r.timestamp,
119
+ "model": r.model,
120
+ "duration_ms": r.duration_ms,
121
+ "tokens": r.total_tokens,
122
+ "success": r.success,
123
+ "retries": r.retries,
124
+ }
125
+ for r in self.requests
126
+ ],
127
+ }
128
+
129
+
130
+ # Token cost estimates per 1M tokens (as of 2026)
131
+ TOKEN_COSTS = {
132
+ "gpt-5.2-codex": {"input": 3.00, "output": 15.00},
133
+ "gpt-5.1-codex": {"input": 2.50, "output": 10.00},
134
+ "gpt-5.1-codex-max": {"input": 5.00, "output": 20.00},
135
+ "gpt-5.1-codex-mini": {"input": 0.50, "output": 2.00},
136
+ "gpt-5.2": {"input": 2.50, "output": 10.00},
137
+ "gpt-5.1": {"input": 2.00, "output": 8.00},
138
+ "gpt-5": {"input": 2.00, "output": 8.00},
139
+ "gpt-5-mini": {"input": 0.40, "output": 1.60},
140
+ "gpt-4.1": {"input": 0.30, "output": 1.20},
141
+ "claude-sonnet-4.5": {"input": 3.00, "output": 15.00},
142
+ "claude-sonnet-4": {"input": 3.00, "output": 15.00},
143
+ "claude-haiku-4.5": {"input": 0.80, "output": 4.00},
144
+ "claude-opus-4.5": {"input": 15.00, "output": 75.00},
145
+ "gemini-3-pro-preview": {"input": 1.25, "output": 5.00},
146
+ }
147
+
148
+
149
+ def estimate_tokens(text: str) -> int:
150
+ """Rough token estimation (4 chars ≈ 1 token)."""
151
+ return len(text) // 4
152
+
153
+
154
+ def estimate_cost(
155
+ model: str,
156
+ prompt_tokens: int,
157
+ completion_tokens: int,
158
+ ) -> float:
159
+ """
160
+ Estimate cost in USD.
161
+
162
+ Args:
163
+ model: Model name
164
+ prompt_tokens: Input tokens
165
+ completion_tokens: Output tokens
166
+
167
+ Returns:
168
+ Estimated cost in USD
169
+ """
170
+ costs = TOKEN_COSTS.get(model, {"input": 2.0, "output": 10.0})
171
+ input_cost = (prompt_tokens / 1_000_000) * costs["input"]
172
+ output_cost = (completion_tokens / 1_000_000) * costs["output"]
173
+ return input_cost + output_cost
174
+
175
+
176
+ class MetricsCollector:
177
+ """
178
+ Collects and manages metrics for Copex requests.
179
+
180
+ Usage:
181
+ collector = MetricsCollector()
182
+
183
+ # Start tracking a request
184
+ req = collector.start_request(model="gpt-5.2-codex", prompt="Hello")
185
+
186
+ # ... make request ...
187
+
188
+ # Complete tracking
189
+ collector.complete_request(req.request_id, success=True, response="Hi!")
190
+
191
+ # Get summary
192
+ print(collector.summary())
193
+ """
194
+
195
+ def __init__(self, session_id: str | None = None):
196
+ """Initialize collector."""
197
+ self.session = SessionMetrics(
198
+ session_id=session_id or datetime.now().strftime("%Y%m%d_%H%M%S"),
199
+ started_at=datetime.now().isoformat(),
200
+ )
201
+ self._pending: dict[str, RequestMetrics] = {}
202
+ self._lock = threading.Lock()
203
+ self._request_counter = 0
204
+
205
+ def start_request(
206
+ self,
207
+ model: str,
208
+ reasoning_effort: str = "xhigh",
209
+ prompt: str = "",
210
+ ) -> RequestMetrics:
211
+ """
212
+ Start tracking a new request.
213
+
214
+ Returns:
215
+ RequestMetrics object to track this request
216
+ """
217
+ with self._lock:
218
+ self._request_counter += 1
219
+ request_id = f"req_{self._request_counter}_{int(time.time() * 1000)}"
220
+
221
+ metrics = RequestMetrics(
222
+ request_id=request_id,
223
+ timestamp=datetime.now().isoformat(),
224
+ model=model,
225
+ reasoning_effort=reasoning_effort,
226
+ start_time=time.time(),
227
+ prompt_preview=prompt[:100] if prompt else "",
228
+ prompt_tokens=estimate_tokens(prompt) if prompt else None,
229
+ )
230
+
231
+ with self._lock:
232
+ self._pending[request_id] = metrics
233
+
234
+ return metrics
235
+
236
+ def complete_request(
237
+ self,
238
+ request_id: str,
239
+ success: bool = True,
240
+ response: str = "",
241
+ error: str | None = None,
242
+ retries: int = 0,
243
+ tokens: dict[str, int] | None = None,
244
+ ) -> RequestMetrics | None:
245
+ """
246
+ Complete tracking for a request.
247
+
248
+ Args:
249
+ request_id: Request ID from start_request
250
+ success: Whether request succeeded
251
+ response: Response content
252
+ error: Error message if failed
253
+ retries: Number of retries needed
254
+ tokens: Optional token counts {"prompt": N, "completion": N}
255
+
256
+ Returns:
257
+ Completed RequestMetrics
258
+ """
259
+ with self._lock:
260
+ metrics = self._pending.pop(request_id, None)
261
+
262
+ if not metrics:
263
+ return None
264
+
265
+ metrics.finalize()
266
+ metrics.success = success
267
+ metrics.error = error
268
+ metrics.retries = retries
269
+ metrics.response_preview = response[:100] if response else ""
270
+
271
+ # Token counts
272
+ if tokens:
273
+ metrics.prompt_tokens = tokens.get("prompt", metrics.prompt_tokens)
274
+ metrics.completion_tokens = tokens.get("completion")
275
+ metrics.total_tokens = (metrics.prompt_tokens or 0) + (metrics.completion_tokens or 0)
276
+ elif response:
277
+ metrics.completion_tokens = estimate_tokens(response)
278
+ metrics.total_tokens = (metrics.prompt_tokens or 0) + metrics.completion_tokens
279
+
280
+ with self._lock:
281
+ self.session.requests.append(metrics)
282
+
283
+ return metrics
284
+
285
+ def summary(self) -> dict[str, Any]:
286
+ """Get session summary."""
287
+ return self.session.to_dict()
288
+
289
+ def cost_estimate(self) -> float:
290
+ """Estimate total cost in USD."""
291
+ total = 0.0
292
+ for req in self.session.requests:
293
+ if req.prompt_tokens and req.completion_tokens:
294
+ total += estimate_cost(req.model, req.prompt_tokens, req.completion_tokens)
295
+ return total
296
+
297
+ def by_model(self) -> dict[str, dict[str, Any]]:
298
+ """Get metrics grouped by model."""
299
+ by_model: dict[str, list[RequestMetrics]] = defaultdict(list)
300
+ for req in self.session.requests:
301
+ by_model[req.model].append(req)
302
+
303
+ result = {}
304
+ for model, requests in by_model.items():
305
+ result[model] = {
306
+ "requests": len(requests),
307
+ "success_rate": sum(1 for r in requests if r.success) / len(requests),
308
+ "total_tokens": sum(r.total_tokens or 0 for r in requests),
309
+ "avg_duration_ms": sum(r.duration_ms or 0 for r in requests) / len(requests),
310
+ }
311
+
312
+ return result
313
+
314
+ def export_json(self, path: Path | str) -> None:
315
+ """Export metrics to JSON file."""
316
+ with open(path, "w", encoding="utf-8") as f:
317
+ json.dump(self.summary(), f, indent=2)
318
+
319
+ def export_csv(self, path: Path | str) -> None:
320
+ """Export metrics to CSV file."""
321
+ import csv
322
+
323
+ with open(path, "w", newline="", encoding="utf-8") as f:
324
+ writer = csv.writer(f)
325
+ writer.writerow([
326
+ "request_id", "timestamp", "model", "reasoning_effort",
327
+ "duration_ms", "prompt_tokens", "completion_tokens", "total_tokens",
328
+ "success", "retries", "error",
329
+ ])
330
+
331
+ for req in self.session.requests:
332
+ writer.writerow([
333
+ req.request_id, req.timestamp, req.model, req.reasoning_effort,
334
+ req.duration_ms, req.prompt_tokens, req.completion_tokens, req.total_tokens,
335
+ req.success, req.retries, req.error or "",
336
+ ])
337
+
338
+ def print_summary(self) -> str:
339
+ """Get printable summary string."""
340
+ s = self.session
341
+ lines = [
342
+ "═══ Copex Metrics Summary ═══",
343
+ f"Session: {s.session_id}",
344
+ f"Started: {s.started_at}",
345
+ "",
346
+ f"Requests: {s.total_requests} ({s.successful_requests} ok, {s.failed_requests} failed)",
347
+ f"Success Rate: {s.success_rate:.1%}",
348
+ f"Total Retries: {s.total_retries}",
349
+ "",
350
+ f"Total Tokens: {s.total_tokens:,}",
351
+ f"Estimated Cost: ${self.cost_estimate():.4f}",
352
+ "",
353
+ f"Total Time: {s.total_duration_ms / 1000:.1f}s",
354
+ f"Avg Time/Request: {s.avg_duration_ms:.0f}ms",
355
+ ]
356
+
357
+ # By model breakdown
358
+ by_model = self.by_model()
359
+ if len(by_model) > 1:
360
+ lines.append("")
361
+ lines.append("By Model:")
362
+ for model, stats in by_model.items():
363
+ lines.append(f" {model}: {stats['requests']} requests, {stats['success_rate']:.0%} success")
364
+
365
+ return "\n".join(lines)
366
+
367
+
368
+ # Global collector for convenience
369
+ _global_collector: MetricsCollector | None = None
370
+
371
+
372
+ def get_collector() -> MetricsCollector:
373
+ """Get or create global metrics collector."""
374
+ global _global_collector
375
+ if _global_collector is None:
376
+ _global_collector = MetricsCollector()
377
+ return _global_collector
378
+
379
+
380
+ def reset_collector() -> None:
381
+ """Reset global metrics collector."""
382
+ global _global_collector
383
+ _global_collector = None
copex/models.py ADDED
@@ -0,0 +1,50 @@
1
+ """Model and configuration enums."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class Model(str, Enum):
7
+ """Available Copilot models."""
8
+
9
+ GPT_5_2_CODEX = "gpt-5.2-codex"
10
+ GPT_5_1_CODEX = "gpt-5.1-codex"
11
+ GPT_5_1_CODEX_MAX = "gpt-5.1-codex-max"
12
+ GPT_5_1_CODEX_MINI = "gpt-5.1-codex-mini"
13
+ GPT_5_2 = "gpt-5.2"
14
+ GPT_5_1 = "gpt-5.1"
15
+ GPT_5 = "gpt-5"
16
+ GPT_5_MINI = "gpt-5-mini"
17
+ GPT_4_1 = "gpt-4.1"
18
+ CLAUDE_SONNET_4_5 = "claude-sonnet-4.5"
19
+ CLAUDE_SONNET_4 = "claude-sonnet-4"
20
+ CLAUDE_HAIKU_4_5 = "claude-haiku-4.5"
21
+ CLAUDE_OPUS_4_5 = "claude-opus-4.5"
22
+ GEMINI_3_PRO = "gemini-3-pro-preview"
23
+
24
+
25
+ class ReasoningEffort(str, Enum):
26
+ """Reasoning effort levels for supported models."""
27
+
28
+ NONE = "none"
29
+ LOW = "low"
30
+ MEDIUM = "medium"
31
+ HIGH = "high"
32
+ XHIGH = "xhigh"
33
+
34
+
35
+ class EventType(str, Enum):
36
+ """Copilot session event types."""
37
+
38
+ USER_MESSAGE = "user.message"
39
+ ASSISTANT_MESSAGE = "assistant.message"
40
+ ASSISTANT_MESSAGE_DELTA = "assistant.message_delta"
41
+ ASSISTANT_REASONING = "assistant.reasoning"
42
+ ASSISTANT_REASONING_DELTA = "assistant.reasoning_delta"
43
+ ASSISTANT_TURN_END = "assistant.turn_end"
44
+ SESSION_IDLE = "session.idle"
45
+ SESSION_ERROR = "session.error"
46
+ ERROR = "error"
47
+ TOOL_CALL = "tool.call"
48
+ TOOL_EXECUTION_START = "tool.execution_start"
49
+ TOOL_EXECUTION_PARTIAL_RESULT = "tool.execution_partial_result"
50
+ TOOL_EXECUTION_COMPLETE = "tool.execution_complete"