flashlite 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flashlite/observability/__init__.py +7 -1
- flashlite/observability/inspect_compat.py +263 -2
- flashlite/types.py +3 -1
- {flashlite-0.1.0.dist-info → flashlite-0.1.2.dist-info}/METADATA +3 -1
- {flashlite-0.1.0.dist-info → flashlite-0.1.2.dist-info}/RECORD +7 -7
- {flashlite-0.1.0.dist-info → flashlite-0.1.2.dist-info}/WHEEL +0 -0
- {flashlite-0.1.0.dist-info → flashlite-0.1.2.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -7,7 +7,12 @@ from .callbacks import (
|
|
|
7
7
|
OnResponseCallback,
|
|
8
8
|
create_logging_callbacks,
|
|
9
9
|
)
|
|
10
|
-
from .inspect_compat import
|
|
10
|
+
from .inspect_compat import (
|
|
11
|
+
FlashliteModelAPI,
|
|
12
|
+
InspectLogEntry,
|
|
13
|
+
InspectLogger,
|
|
14
|
+
convert_flashlite_logs_to_inspect,
|
|
15
|
+
)
|
|
11
16
|
from .logging import RequestContext, RequestLogEntry, ResponseLogEntry, StructuredLogger
|
|
12
17
|
from .metrics import BudgetExceededError, CostMetrics, CostTracker
|
|
13
18
|
|
|
@@ -31,4 +36,5 @@ __all__ = [
|
|
|
31
36
|
"InspectLogger",
|
|
32
37
|
"InspectLogEntry",
|
|
33
38
|
"FlashliteModelAPI",
|
|
39
|
+
"convert_flashlite_logs_to_inspect",
|
|
34
40
|
]
|
|
@@ -4,13 +4,14 @@ This module provides interoperability with the UK AISI's Inspect framework
|
|
|
4
4
|
(https://inspect.ai-safety-institute.org.uk/).
|
|
5
5
|
|
|
6
6
|
It includes:
|
|
7
|
-
- Log format
|
|
7
|
+
- Log format conversion to Inspect's native eval log format
|
|
8
8
|
- ModelAPI protocol implementation for use as an Inspect solver backend
|
|
9
|
-
-
|
|
9
|
+
- Functions to convert flashlite JSONL logs to Inspect-viewable format
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
import json
|
|
13
13
|
import logging
|
|
14
|
+
import uuid
|
|
14
15
|
from dataclasses import dataclass, field
|
|
15
16
|
from datetime import UTC, datetime
|
|
16
17
|
from pathlib import Path
|
|
@@ -24,6 +25,231 @@ if TYPE_CHECKING:
|
|
|
24
25
|
logger = logging.getLogger(__name__)
|
|
25
26
|
|
|
26
27
|
|
|
28
|
+
def convert_flashlite_logs_to_inspect(
|
|
29
|
+
input_path: str | Path,
|
|
30
|
+
output_path: str | Path | None = None,
|
|
31
|
+
task_name: str | None = None,
|
|
32
|
+
) -> Path:
|
|
33
|
+
"""
|
|
34
|
+
Convert flashlite JSONL logs to Inspect-compatible format.
|
|
35
|
+
|
|
36
|
+
This allows logs generated by flashlite's InspectLogger to be viewed
|
|
37
|
+
in Inspect's log viewer (`inspect view`).
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
input_path: Path to flashlite JSONL log file
|
|
41
|
+
output_path: Output path for Inspect log file (defaults to same dir with proper naming)
|
|
42
|
+
task_name: Task name for the evaluation (defaults to eval_id from logs)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Path to the generated Inspect log file
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
FileNotFoundError: If input file doesn't exist
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
>>> from flashlite.observability import convert_flashlite_logs_to_inspect
|
|
52
|
+
>>> convert_flashlite_logs_to_inspect("logs/my_eval.jsonl")
|
|
53
|
+
PosixPath('logs/2026-02-05T12-00-00_my_eval_abc123.json')
|
|
54
|
+
"""
|
|
55
|
+
input_path = Path(input_path)
|
|
56
|
+
if not input_path.exists():
|
|
57
|
+
raise FileNotFoundError(f"Log file not found: {input_path}")
|
|
58
|
+
|
|
59
|
+
# Read all entries from JSONL
|
|
60
|
+
entries: list[dict[str, Any]] = []
|
|
61
|
+
with open(input_path) as f:
|
|
62
|
+
for line in f:
|
|
63
|
+
line = line.strip()
|
|
64
|
+
if line:
|
|
65
|
+
entries.append(json.loads(line))
|
|
66
|
+
|
|
67
|
+
if not entries:
|
|
68
|
+
raise ValueError(f"No log entries found in {input_path}")
|
|
69
|
+
|
|
70
|
+
# Extract metadata from first entry
|
|
71
|
+
first_entry = entries[0]
|
|
72
|
+
eval_id = first_entry.get("eval_id", "flashlite_eval")
|
|
73
|
+
model_name = first_entry.get("model", "unknown")
|
|
74
|
+
task = task_name or eval_id
|
|
75
|
+
|
|
76
|
+
# Get timestamp from entries or generate one
|
|
77
|
+
timestamps = [e.get("timestamp", "") for e in entries if e.get("timestamp")]
|
|
78
|
+
if timestamps:
|
|
79
|
+
# Parse and format for filename (Inspect uses format like 2024-05-29T12-38-43)
|
|
80
|
+
started_at = min(timestamps)
|
|
81
|
+
# Convert ISO format to Inspect's filename format
|
|
82
|
+
ts_for_filename = started_at.replace(":", "-").split(".")[0]
|
|
83
|
+
else:
|
|
84
|
+
ts_for_filename = datetime.now(UTC).strftime("%Y-%m-%dT%H-%M-%S")
|
|
85
|
+
|
|
86
|
+
# Generate a short unique ID
|
|
87
|
+
short_id = uuid.uuid4().hex[:8]
|
|
88
|
+
|
|
89
|
+
# Determine output path with Inspect's naming convention: {timestamp}_{task}_{id}.json
|
|
90
|
+
if output_path is None:
|
|
91
|
+
output_dir = input_path.parent
|
|
92
|
+
output_filename = f"{ts_for_filename}_{task}_{short_id}.json"
|
|
93
|
+
output_path = output_dir / output_filename
|
|
94
|
+
else:
|
|
95
|
+
output_path = Path(output_path)
|
|
96
|
+
|
|
97
|
+
# Build EvalLog structure as dict (Inspect's JSON format)
|
|
98
|
+
eval_log = _build_eval_log_dict(
|
|
99
|
+
entries=entries,
|
|
100
|
+
eval_id=eval_id,
|
|
101
|
+
task_name=task,
|
|
102
|
+
model_name=model_name,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Write JSON directly
|
|
106
|
+
with open(output_path, "w") as f:
|
|
107
|
+
json.dump(eval_log, f, indent=2)
|
|
108
|
+
|
|
109
|
+
logger.info(f"Converted {len(entries)} entries to Inspect format: {output_path}")
|
|
110
|
+
return output_path
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _build_eval_log_dict(
|
|
114
|
+
entries: list[dict[str, Any]],
|
|
115
|
+
eval_id: str,
|
|
116
|
+
task_name: str,
|
|
117
|
+
model_name: str,
|
|
118
|
+
) -> dict[str, Any]:
|
|
119
|
+
"""Build an Inspect-compatible EvalLog dict from flashlite log entries."""
|
|
120
|
+
# Calculate timestamps
|
|
121
|
+
timestamps = [e.get("timestamp", "") for e in entries if e.get("timestamp")]
|
|
122
|
+
started_at = min(timestamps) if timestamps else datetime.now(UTC).isoformat()
|
|
123
|
+
completed_at = max(timestamps) if timestamps else datetime.now(UTC).isoformat()
|
|
124
|
+
|
|
125
|
+
# Calculate total token usage
|
|
126
|
+
total_input_tokens = sum(e.get("tokens", {}).get("input", 0) for e in entries)
|
|
127
|
+
total_output_tokens = sum(e.get("tokens", {}).get("output", 0) for e in entries)
|
|
128
|
+
|
|
129
|
+
# Build samples
|
|
130
|
+
samples = [_build_eval_sample_dict(entry) for entry in entries]
|
|
131
|
+
|
|
132
|
+
# Get unique epochs
|
|
133
|
+
epochs = len(set(e.get("epoch", 0) for e in entries))
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
"version": 2,
|
|
137
|
+
"status": "success",
|
|
138
|
+
"eval": {
|
|
139
|
+
"eval_id": eval_id,
|
|
140
|
+
"run_id": str(uuid.uuid4()),
|
|
141
|
+
"created": started_at,
|
|
142
|
+
"task": task_name,
|
|
143
|
+
"task_id": f"{task_name}_{eval_id}",
|
|
144
|
+
"task_version": 1,
|
|
145
|
+
"task_file": None,
|
|
146
|
+
"task_attribs": {},
|
|
147
|
+
"task_args": {},
|
|
148
|
+
"task_args_passed": {},
|
|
149
|
+
"solver": None,
|
|
150
|
+
"solver_args": None,
|
|
151
|
+
"dataset": {
|
|
152
|
+
"name": task_name,
|
|
153
|
+
"location": None,
|
|
154
|
+
"samples": len(entries),
|
|
155
|
+
"shuffled": False,
|
|
156
|
+
},
|
|
157
|
+
"sandbox": None,
|
|
158
|
+
"model": model_name,
|
|
159
|
+
"model_generate_config": {},
|
|
160
|
+
"model_base_url": None,
|
|
161
|
+
"model_args": {},
|
|
162
|
+
"config": {
|
|
163
|
+
"epochs": epochs,
|
|
164
|
+
"log_samples": True,
|
|
165
|
+
},
|
|
166
|
+
"revision": None,
|
|
167
|
+
"packages": {"flashlite": "0.1.0"},
|
|
168
|
+
"metadata": {"source": "flashlite"},
|
|
169
|
+
},
|
|
170
|
+
"plan": {
|
|
171
|
+
"name": "flashlite",
|
|
172
|
+
"steps": [],
|
|
173
|
+
"finish": None,
|
|
174
|
+
"config": {},
|
|
175
|
+
},
|
|
176
|
+
"results": {
|
|
177
|
+
"total_samples": len(samples),
|
|
178
|
+
"completed_samples": len(samples),
|
|
179
|
+
"scores": [],
|
|
180
|
+
},
|
|
181
|
+
"stats": {
|
|
182
|
+
"started_at": started_at,
|
|
183
|
+
"completed_at": completed_at,
|
|
184
|
+
"model_usage": {
|
|
185
|
+
model_name: {
|
|
186
|
+
"input_tokens": total_input_tokens,
|
|
187
|
+
"output_tokens": total_output_tokens,
|
|
188
|
+
"total_tokens": total_input_tokens + total_output_tokens,
|
|
189
|
+
}
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
"error": None,
|
|
193
|
+
"samples": samples,
|
|
194
|
+
"reductions": None,
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _build_eval_sample_dict(entry: dict[str, Any]) -> dict[str, Any]:
|
|
199
|
+
"""Build an Inspect-compatible EvalSample dict from a flashlite log entry."""
|
|
200
|
+
# Convert input messages to ChatMessage format
|
|
201
|
+
input_messages = entry.get("input", [])
|
|
202
|
+
|
|
203
|
+
# Get tokens
|
|
204
|
+
tokens = entry.get("tokens", {})
|
|
205
|
+
model_name = entry.get("model", "unknown")
|
|
206
|
+
|
|
207
|
+
# Build messages list (input + assistant response)
|
|
208
|
+
messages = list(input_messages) + [
|
|
209
|
+
{"role": "assistant", "content": entry.get("output", "")}
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
"id": entry.get("sample_id", 0),
|
|
214
|
+
"epoch": entry.get("epoch", 0) + 1, # Inspect uses 1-based epochs
|
|
215
|
+
"input": input_messages,
|
|
216
|
+
"choices": None,
|
|
217
|
+
"target": "", # flashlite logs don't have targets
|
|
218
|
+
"sandbox": None,
|
|
219
|
+
"files": None,
|
|
220
|
+
"setup": None,
|
|
221
|
+
"messages": messages,
|
|
222
|
+
"output": {
|
|
223
|
+
"model": model_name,
|
|
224
|
+
"choices": [
|
|
225
|
+
{
|
|
226
|
+
"message": {
|
|
227
|
+
"role": "assistant",
|
|
228
|
+
"content": entry.get("output", ""),
|
|
229
|
+
},
|
|
230
|
+
"stop_reason": "stop",
|
|
231
|
+
}
|
|
232
|
+
],
|
|
233
|
+
"usage": {
|
|
234
|
+
"input_tokens": tokens.get("input", 0),
|
|
235
|
+
"output_tokens": tokens.get("output", 0),
|
|
236
|
+
"total_tokens": tokens.get("total", 0),
|
|
237
|
+
},
|
|
238
|
+
},
|
|
239
|
+
"scores": None,
|
|
240
|
+
"metadata": entry.get("metadata", {}),
|
|
241
|
+
"store": {},
|
|
242
|
+
"events": [],
|
|
243
|
+
"model_usage": {
|
|
244
|
+
model_name: {
|
|
245
|
+
"input_tokens": tokens.get("input", 0),
|
|
246
|
+
"output_tokens": tokens.get("output", 0),
|
|
247
|
+
"total_tokens": tokens.get("total", 0),
|
|
248
|
+
}
|
|
249
|
+
},
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
27
253
|
@dataclass
|
|
28
254
|
class InspectLogEntry:
|
|
29
255
|
"""A log entry in Inspect-compatible format."""
|
|
@@ -264,3 +490,38 @@ class FlashliteModelAPI:
|
|
|
264
490
|
def model_name(self) -> str | None:
|
|
265
491
|
"""Get the default model name."""
|
|
266
492
|
return self._model
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def convert_logs_cli() -> None:
|
|
496
|
+
"""CLI entry point for converting flashlite logs to Inspect format.
|
|
497
|
+
|
|
498
|
+
Usage:
|
|
499
|
+
python -m flashlite.observability.inspect_compat input.jsonl [output.json]
|
|
500
|
+
"""
|
|
501
|
+
import sys
|
|
502
|
+
|
|
503
|
+
if len(sys.argv) < 2:
|
|
504
|
+
print("Usage: python -m flashlite.observability.inspect_compat <input.jsonl> [output.json]")
|
|
505
|
+
print("\nConverts flashlite JSONL logs to Inspect-viewable format.")
|
|
506
|
+
sys.exit(1)
|
|
507
|
+
|
|
508
|
+
input_path = sys.argv[1]
|
|
509
|
+
output_path = sys.argv[2] if len(sys.argv) > 2 else None
|
|
510
|
+
|
|
511
|
+
try:
|
|
512
|
+
result = convert_flashlite_logs_to_inspect(input_path, output_path)
|
|
513
|
+
print(f"Successfully converted to: {result}")
|
|
514
|
+
print(f"\nView with: inspect view --log-dir {result.parent}")
|
|
515
|
+
except ImportError as e:
|
|
516
|
+
print(f"Error: {e}")
|
|
517
|
+
sys.exit(1)
|
|
518
|
+
except FileNotFoundError as e:
|
|
519
|
+
print(f"Error: {e}")
|
|
520
|
+
sys.exit(1)
|
|
521
|
+
except Exception as e:
|
|
522
|
+
print(f"Error converting logs: {e}")
|
|
523
|
+
sys.exit(1)
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
if __name__ == "__main__":
|
|
527
|
+
convert_logs_cli()
|
flashlite/types.py
CHANGED
|
@@ -58,7 +58,9 @@ class CompletionRequest:
|
|
|
58
58
|
"""A request to complete a chat conversation."""
|
|
59
59
|
|
|
60
60
|
model: str
|
|
61
|
-
messages: Messages
|
|
61
|
+
messages: Messages = field(default_factory=list)
|
|
62
|
+
template: str | None = None
|
|
63
|
+
variables: dict[str, Any] | None = None
|
|
62
64
|
temperature: float | None = None
|
|
63
65
|
max_tokens: int | None = None
|
|
64
66
|
max_completion_tokens: int | None = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flashlite
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Batteries-included wrapper for litellm with rate limiting, retries, templating, and more
|
|
5
5
|
Author-email: ndalton12 <niall.dalton12@gmail.com>
|
|
6
6
|
License-File: LICENSE.md
|
|
@@ -16,6 +16,8 @@ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
|
|
|
16
16
|
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
17
17
|
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
18
18
|
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
19
|
+
Provides-Extra: evals
|
|
20
|
+
Requires-Dist: inspect-ai>=0.3.0; extra == 'evals'
|
|
19
21
|
Description-Content-Type: text/markdown
|
|
20
22
|
|
|
21
23
|
# Flashlite
|
|
@@ -2,7 +2,7 @@ flashlite/__init__.py,sha256=RlXjsK7zvZXStMvfz4FGqBxTWHev9VkyHYy-35TuTuM,3585
|
|
|
2
2
|
flashlite/client.py,sha256=zQH_eLWZxnkX9acwI-y9c3uxeGybA-C0I9UPU6HrzvI,25081
|
|
3
3
|
flashlite/config.py,sha256=3RMEIAejBPlBG_VOgD8mpZKEDNZvK0k0cVv3vMM9kW8,4818
|
|
4
4
|
flashlite/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
flashlite/types.py,sha256=
|
|
5
|
+
flashlite/types.py,sha256=OqtgtnZaEWRYfaI6esqzv4-HKwu76Y-RDu3-Mhsae9w,6498
|
|
6
6
|
flashlite/cache/__init__.py,sha256=T8O7oiZ0U181_tacJzfK6IGEAt1m3NdaIlBjq9wmB44,325
|
|
7
7
|
flashlite/cache/base.py,sha256=IaDAI4EzewhJe0quh2JQK9-BxQxGxUDwrsd9BCaHFFc,5663
|
|
8
8
|
flashlite/cache/disk.py,sha256=pGPI7eJW6RqVCQC4laTYhQr0iU-AkjA4aFFYt-wg8ls,8777
|
|
@@ -20,9 +20,9 @@ flashlite/middleware/cache.py,sha256=R1YwAZBg5YJGTiqgNWdkl7VSN1xpmqmupTSBQnpyH-s
|
|
|
20
20
|
flashlite/middleware/logging.py,sha256=D3x8X1l1LN1Um_qOWuELyO8Fgo9WulFJTIx6s94Ure4,4919
|
|
21
21
|
flashlite/middleware/rate_limit.py,sha256=nf0-Ul0CGnX0VRKtxB2dfoplkBin3P2cMLrbks76lcg,7059
|
|
22
22
|
flashlite/middleware/retry.py,sha256=_3Lz9Gmes2sNk6rO10WamH6yrwJy8TQi-esIl8NIMag,4832
|
|
23
|
-
flashlite/observability/__init__.py,sha256=
|
|
23
|
+
flashlite/observability/__init__.py,sha256=VHdYteU9KmVkgSHrkA-Ssz6_qoi9uL-2JFDhSH5sgwI,949
|
|
24
24
|
flashlite/observability/callbacks.py,sha256=yz1oZh7f7WVxvKmt7XyHbj4WDC2xnvM3SJiTSxfAkoQ,4897
|
|
25
|
-
flashlite/observability/inspect_compat.py,sha256=
|
|
25
|
+
flashlite/observability/inspect_compat.py,sha256=IrsdEiV-qn_wOlgAvWLcIJ_7WxU0Bpq7DcHaS_KWXPw,16366
|
|
26
26
|
flashlite/observability/logging.py,sha256=UxBH2RN8rNcGZHYgC_QYiuEpaIRXEQFs1OjiKjxbuf0,9273
|
|
27
27
|
flashlite/observability/metrics.py,sha256=blRx5N3uN4ilnPpxBe7k_uDhYV3GmQWXoKPLVxnk8_s,7466
|
|
28
28
|
flashlite/structured/__init__.py,sha256=9k5bwkzFo_JD3WZ1Tm4iyZqoZ1A51EIINI8N1H2_2ew,750
|
|
@@ -35,7 +35,7 @@ flashlite/templating/registry.py,sha256=wp8RaibHKNyu5q4tCdOXJ0B4tey7bv-c0qb9h1a7
|
|
|
35
35
|
flashlite/tools/__init__.py,sha256=zpQ5KyvZwZaVvaulnpMmL_JjCnMfD08nD_foI95TjVg,1791
|
|
36
36
|
flashlite/tools/definitions.py,sha256=cqyk6GR1qeMkTPFqsadnJc-YkCG15QVafiaf-OjGYNU,11519
|
|
37
37
|
flashlite/tools/execution.py,sha256=iQC7V3R5Tx19suISnnuaDpjpgl8wURwOHmKZbsHL16s,10814
|
|
38
|
-
flashlite-0.1.
|
|
39
|
-
flashlite-0.1.
|
|
40
|
-
flashlite-0.1.
|
|
41
|
-
flashlite-0.1.
|
|
38
|
+
flashlite-0.1.2.dist-info/METADATA,sha256=vWQl0DuuE16hbq9n1lLRL8ASCgxwBrHZsuibi8YD-u4,4293
|
|
39
|
+
flashlite-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
40
|
+
flashlite-0.1.2.dist-info/licenses/LICENSE.md,sha256=z2KZcyoH16ayjxlbeBM01uD-bXn1WTcKFab5ZKBhfJE,1068
|
|
41
|
+
flashlite-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|