flashlite 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flashlite-0.1.0 → flashlite-0.1.2}/.gitignore +3 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/PKG-INFO +3 -1
- {flashlite-0.1.0 → flashlite-0.1.2}/pyproject.toml +4 -1
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/observability/__init__.py +7 -1
- flashlite-0.1.2/src/flashlite/observability/inspect_compat.py +527 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/types.py +3 -1
- {flashlite-0.1.0 → flashlite-0.1.2}/uv.lock +695 -6
- flashlite-0.1.0/examples/stag_hunt_game.py +0 -333
- flashlite-0.1.0/plan.md +0 -854
- flashlite-0.1.0/src/flashlite/observability/inspect_compat.py +0 -266
- {flashlite-0.1.0 → flashlite-0.1.2}/.python-version +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/.vscode/settings.json +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/DEV.md +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/LICENSE.md +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/QUICK_START.md +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/README.md +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/examples/basic_example.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/examples/multi_agent_chat.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/__init__.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/cache/__init__.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/cache/base.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/cache/disk.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/cache/memory.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/client.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/config.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/conversation/__init__.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/conversation/context.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/conversation/manager.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/conversation/multi_agent.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/core/__init__.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/core/completion.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/core/messages.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/middleware/__init__.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/middleware/base.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/middleware/cache.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/middleware/logging.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/middleware/rate_limit.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/middleware/retry.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/observability/callbacks.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/observability/logging.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/observability/metrics.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/py.typed +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/structured/__init__.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/structured/outputs.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/structured/schema.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/templating/__init__.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/templating/engine.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/templating/filters.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/templating/registry.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/tools/__init__.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/tools/definitions.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/src/flashlite/tools/execution.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/__init__.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/conftest.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_cache.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_cache_integration.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_client.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_config.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_conversation.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_integration.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_messages.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_middleware.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_multi_agent.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_observability.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_structured.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_templating.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_tools.py +0 -0
- {flashlite-0.1.0 → flashlite-0.1.2}/tests/test_types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flashlite
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Batteries-included wrapper for litellm with rate limiting, retries, templating, and more
|
|
5
5
|
Author-email: ndalton12 <niall.dalton12@gmail.com>
|
|
6
6
|
License-File: LICENSE.md
|
|
@@ -16,6 +16,8 @@ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
|
|
|
16
16
|
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
17
17
|
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
18
18
|
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
19
|
+
Provides-Extra: evals
|
|
20
|
+
Requires-Dist: inspect-ai>=0.3.0; extra == 'evals'
|
|
19
21
|
Description-Content-Type: text/markdown
|
|
20
22
|
|
|
21
23
|
# Flashlite
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "flashlite"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.2"
|
|
4
4
|
description = "Batteries-included wrapper for litellm with rate limiting, retries, templating, and more"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -23,6 +23,9 @@ dev = [
|
|
|
23
23
|
"mypy>=1.0.0",
|
|
24
24
|
"ruff>=0.1.0",
|
|
25
25
|
]
|
|
26
|
+
evals = [
|
|
27
|
+
"inspect-ai>=0.3.0",
|
|
28
|
+
]
|
|
26
29
|
|
|
27
30
|
[build-system]
|
|
28
31
|
requires = ["hatchling"]
|
|
@@ -7,7 +7,12 @@ from .callbacks import (
|
|
|
7
7
|
OnResponseCallback,
|
|
8
8
|
create_logging_callbacks,
|
|
9
9
|
)
|
|
10
|
-
from .inspect_compat import
|
|
10
|
+
from .inspect_compat import (
|
|
11
|
+
FlashliteModelAPI,
|
|
12
|
+
InspectLogEntry,
|
|
13
|
+
InspectLogger,
|
|
14
|
+
convert_flashlite_logs_to_inspect,
|
|
15
|
+
)
|
|
11
16
|
from .logging import RequestContext, RequestLogEntry, ResponseLogEntry, StructuredLogger
|
|
12
17
|
from .metrics import BudgetExceededError, CostMetrics, CostTracker
|
|
13
18
|
|
|
@@ -31,4 +36,5 @@ __all__ = [
|
|
|
31
36
|
"InspectLogger",
|
|
32
37
|
"InspectLogEntry",
|
|
33
38
|
"FlashliteModelAPI",
|
|
39
|
+
"convert_flashlite_logs_to_inspect",
|
|
34
40
|
]
|
|
@@ -0,0 +1,527 @@
|
|
|
1
|
+
"""Inspect framework compatibility layer for flashlite.
|
|
2
|
+
|
|
3
|
+
This module provides interoperability with the UK AISI's Inspect framework
|
|
4
|
+
(https://inspect.ai-safety-institute.org.uk/).
|
|
5
|
+
|
|
6
|
+
It includes:
|
|
7
|
+
- Log format conversion to Inspect's native eval log format
|
|
8
|
+
- ModelAPI protocol implementation for use as an Inspect solver backend
|
|
9
|
+
- Functions to convert flashlite JSONL logs to Inspect-viewable format
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
import uuid
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import UTC, datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import TYPE_CHECKING, Any
|
|
19
|
+
|
|
20
|
+
from ..types import CompletionRequest, CompletionResponse
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from ..client import Flashlite
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def convert_flashlite_logs_to_inspect(
|
|
29
|
+
input_path: str | Path,
|
|
30
|
+
output_path: str | Path | None = None,
|
|
31
|
+
task_name: str | None = None,
|
|
32
|
+
) -> Path:
|
|
33
|
+
"""
|
|
34
|
+
Convert flashlite JSONL logs to Inspect-compatible format.
|
|
35
|
+
|
|
36
|
+
This allows logs generated by flashlite's InspectLogger to be viewed
|
|
37
|
+
in Inspect's log viewer (`inspect view`).
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
input_path: Path to flashlite JSONL log file
|
|
41
|
+
output_path: Output path for Inspect log file (defaults to same dir with proper naming)
|
|
42
|
+
task_name: Task name for the evaluation (defaults to eval_id from logs)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Path to the generated Inspect log file
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
FileNotFoundError: If input file doesn't exist
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
>>> from flashlite.observability import convert_flashlite_logs_to_inspect
|
|
52
|
+
>>> convert_flashlite_logs_to_inspect("logs/my_eval.jsonl")
|
|
53
|
+
PosixPath('logs/2026-02-05T12-00-00_my_eval_abc123.json')
|
|
54
|
+
"""
|
|
55
|
+
input_path = Path(input_path)
|
|
56
|
+
if not input_path.exists():
|
|
57
|
+
raise FileNotFoundError(f"Log file not found: {input_path}")
|
|
58
|
+
|
|
59
|
+
# Read all entries from JSONL
|
|
60
|
+
entries: list[dict[str, Any]] = []
|
|
61
|
+
with open(input_path) as f:
|
|
62
|
+
for line in f:
|
|
63
|
+
line = line.strip()
|
|
64
|
+
if line:
|
|
65
|
+
entries.append(json.loads(line))
|
|
66
|
+
|
|
67
|
+
if not entries:
|
|
68
|
+
raise ValueError(f"No log entries found in {input_path}")
|
|
69
|
+
|
|
70
|
+
# Extract metadata from first entry
|
|
71
|
+
first_entry = entries[0]
|
|
72
|
+
eval_id = first_entry.get("eval_id", "flashlite_eval")
|
|
73
|
+
model_name = first_entry.get("model", "unknown")
|
|
74
|
+
task = task_name or eval_id
|
|
75
|
+
|
|
76
|
+
# Get timestamp from entries or generate one
|
|
77
|
+
timestamps = [e.get("timestamp", "") for e in entries if e.get("timestamp")]
|
|
78
|
+
if timestamps:
|
|
79
|
+
# Parse and format for filename (Inspect uses format like 2024-05-29T12-38-43)
|
|
80
|
+
started_at = min(timestamps)
|
|
81
|
+
# Convert ISO format to Inspect's filename format
|
|
82
|
+
ts_for_filename = started_at.replace(":", "-").split(".")[0]
|
|
83
|
+
else:
|
|
84
|
+
ts_for_filename = datetime.now(UTC).strftime("%Y-%m-%dT%H-%M-%S")
|
|
85
|
+
|
|
86
|
+
# Generate a short unique ID
|
|
87
|
+
short_id = uuid.uuid4().hex[:8]
|
|
88
|
+
|
|
89
|
+
# Determine output path with Inspect's naming convention: {timestamp}_{task}_{id}.json
|
|
90
|
+
if output_path is None:
|
|
91
|
+
output_dir = input_path.parent
|
|
92
|
+
output_filename = f"{ts_for_filename}_{task}_{short_id}.json"
|
|
93
|
+
output_path = output_dir / output_filename
|
|
94
|
+
else:
|
|
95
|
+
output_path = Path(output_path)
|
|
96
|
+
|
|
97
|
+
# Build EvalLog structure as dict (Inspect's JSON format)
|
|
98
|
+
eval_log = _build_eval_log_dict(
|
|
99
|
+
entries=entries,
|
|
100
|
+
eval_id=eval_id,
|
|
101
|
+
task_name=task,
|
|
102
|
+
model_name=model_name,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Write JSON directly
|
|
106
|
+
with open(output_path, "w") as f:
|
|
107
|
+
json.dump(eval_log, f, indent=2)
|
|
108
|
+
|
|
109
|
+
logger.info(f"Converted {len(entries)} entries to Inspect format: {output_path}")
|
|
110
|
+
return output_path
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _build_eval_log_dict(
|
|
114
|
+
entries: list[dict[str, Any]],
|
|
115
|
+
eval_id: str,
|
|
116
|
+
task_name: str,
|
|
117
|
+
model_name: str,
|
|
118
|
+
) -> dict[str, Any]:
|
|
119
|
+
"""Build an Inspect-compatible EvalLog dict from flashlite log entries."""
|
|
120
|
+
# Calculate timestamps
|
|
121
|
+
timestamps = [e.get("timestamp", "") for e in entries if e.get("timestamp")]
|
|
122
|
+
started_at = min(timestamps) if timestamps else datetime.now(UTC).isoformat()
|
|
123
|
+
completed_at = max(timestamps) if timestamps else datetime.now(UTC).isoformat()
|
|
124
|
+
|
|
125
|
+
# Calculate total token usage
|
|
126
|
+
total_input_tokens = sum(e.get("tokens", {}).get("input", 0) for e in entries)
|
|
127
|
+
total_output_tokens = sum(e.get("tokens", {}).get("output", 0) for e in entries)
|
|
128
|
+
|
|
129
|
+
# Build samples
|
|
130
|
+
samples = [_build_eval_sample_dict(entry) for entry in entries]
|
|
131
|
+
|
|
132
|
+
# Get unique epochs
|
|
133
|
+
epochs = len(set(e.get("epoch", 0) for e in entries))
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
"version": 2,
|
|
137
|
+
"status": "success",
|
|
138
|
+
"eval": {
|
|
139
|
+
"eval_id": eval_id,
|
|
140
|
+
"run_id": str(uuid.uuid4()),
|
|
141
|
+
"created": started_at,
|
|
142
|
+
"task": task_name,
|
|
143
|
+
"task_id": f"{task_name}_{eval_id}",
|
|
144
|
+
"task_version": 1,
|
|
145
|
+
"task_file": None,
|
|
146
|
+
"task_attribs": {},
|
|
147
|
+
"task_args": {},
|
|
148
|
+
"task_args_passed": {},
|
|
149
|
+
"solver": None,
|
|
150
|
+
"solver_args": None,
|
|
151
|
+
"dataset": {
|
|
152
|
+
"name": task_name,
|
|
153
|
+
"location": None,
|
|
154
|
+
"samples": len(entries),
|
|
155
|
+
"shuffled": False,
|
|
156
|
+
},
|
|
157
|
+
"sandbox": None,
|
|
158
|
+
"model": model_name,
|
|
159
|
+
"model_generate_config": {},
|
|
160
|
+
"model_base_url": None,
|
|
161
|
+
"model_args": {},
|
|
162
|
+
"config": {
|
|
163
|
+
"epochs": epochs,
|
|
164
|
+
"log_samples": True,
|
|
165
|
+
},
|
|
166
|
+
"revision": None,
|
|
167
|
+
"packages": {"flashlite": "0.1.0"},
|
|
168
|
+
"metadata": {"source": "flashlite"},
|
|
169
|
+
},
|
|
170
|
+
"plan": {
|
|
171
|
+
"name": "flashlite",
|
|
172
|
+
"steps": [],
|
|
173
|
+
"finish": None,
|
|
174
|
+
"config": {},
|
|
175
|
+
},
|
|
176
|
+
"results": {
|
|
177
|
+
"total_samples": len(samples),
|
|
178
|
+
"completed_samples": len(samples),
|
|
179
|
+
"scores": [],
|
|
180
|
+
},
|
|
181
|
+
"stats": {
|
|
182
|
+
"started_at": started_at,
|
|
183
|
+
"completed_at": completed_at,
|
|
184
|
+
"model_usage": {
|
|
185
|
+
model_name: {
|
|
186
|
+
"input_tokens": total_input_tokens,
|
|
187
|
+
"output_tokens": total_output_tokens,
|
|
188
|
+
"total_tokens": total_input_tokens + total_output_tokens,
|
|
189
|
+
}
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
"error": None,
|
|
193
|
+
"samples": samples,
|
|
194
|
+
"reductions": None,
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _build_eval_sample_dict(entry: dict[str, Any]) -> dict[str, Any]:
|
|
199
|
+
"""Build an Inspect-compatible EvalSample dict from a flashlite log entry."""
|
|
200
|
+
# Convert input messages to ChatMessage format
|
|
201
|
+
input_messages = entry.get("input", [])
|
|
202
|
+
|
|
203
|
+
# Get tokens
|
|
204
|
+
tokens = entry.get("tokens", {})
|
|
205
|
+
model_name = entry.get("model", "unknown")
|
|
206
|
+
|
|
207
|
+
# Build messages list (input + assistant response)
|
|
208
|
+
messages = list(input_messages) + [
|
|
209
|
+
{"role": "assistant", "content": entry.get("output", "")}
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
"id": entry.get("sample_id", 0),
|
|
214
|
+
"epoch": entry.get("epoch", 0) + 1, # Inspect uses 1-based epochs
|
|
215
|
+
"input": input_messages,
|
|
216
|
+
"choices": None,
|
|
217
|
+
"target": "", # flashlite logs don't have targets
|
|
218
|
+
"sandbox": None,
|
|
219
|
+
"files": None,
|
|
220
|
+
"setup": None,
|
|
221
|
+
"messages": messages,
|
|
222
|
+
"output": {
|
|
223
|
+
"model": model_name,
|
|
224
|
+
"choices": [
|
|
225
|
+
{
|
|
226
|
+
"message": {
|
|
227
|
+
"role": "assistant",
|
|
228
|
+
"content": entry.get("output", ""),
|
|
229
|
+
},
|
|
230
|
+
"stop_reason": "stop",
|
|
231
|
+
}
|
|
232
|
+
],
|
|
233
|
+
"usage": {
|
|
234
|
+
"input_tokens": tokens.get("input", 0),
|
|
235
|
+
"output_tokens": tokens.get("output", 0),
|
|
236
|
+
"total_tokens": tokens.get("total", 0),
|
|
237
|
+
},
|
|
238
|
+
},
|
|
239
|
+
"scores": None,
|
|
240
|
+
"metadata": entry.get("metadata", {}),
|
|
241
|
+
"store": {},
|
|
242
|
+
"events": [],
|
|
243
|
+
"model_usage": {
|
|
244
|
+
model_name: {
|
|
245
|
+
"input_tokens": tokens.get("input", 0),
|
|
246
|
+
"output_tokens": tokens.get("output", 0),
|
|
247
|
+
"total_tokens": tokens.get("total", 0),
|
|
248
|
+
}
|
|
249
|
+
},
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
@dataclass
|
|
254
|
+
class InspectLogEntry:
|
|
255
|
+
"""A log entry in Inspect-compatible format."""
|
|
256
|
+
|
|
257
|
+
eval_id: str
|
|
258
|
+
sample_id: str | int
|
|
259
|
+
epoch: int
|
|
260
|
+
model: str
|
|
261
|
+
input: list[dict[str, Any]]
|
|
262
|
+
output: str
|
|
263
|
+
tokens: dict[str, int]
|
|
264
|
+
timestamp: str
|
|
265
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
266
|
+
|
|
267
|
+
def to_dict(self) -> dict[str, Any]:
|
|
268
|
+
"""Convert to Inspect log format."""
|
|
269
|
+
return {
|
|
270
|
+
"eval_id": self.eval_id,
|
|
271
|
+
"sample_id": self.sample_id,
|
|
272
|
+
"epoch": self.epoch,
|
|
273
|
+
"model": self.model,
|
|
274
|
+
"input": self.input,
|
|
275
|
+
"output": self.output,
|
|
276
|
+
"tokens": self.tokens,
|
|
277
|
+
"timestamp": self.timestamp,
|
|
278
|
+
"metadata": self.metadata,
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
class InspectLogger:
|
|
283
|
+
"""
|
|
284
|
+
A logger that outputs in Inspect-compatible format.
|
|
285
|
+
|
|
286
|
+
This allows Flashlite logs to be analyzed alongside Inspect eval logs,
|
|
287
|
+
enabling unified observability across evaluation runs.
|
|
288
|
+
|
|
289
|
+
Example:
|
|
290
|
+
inspect_logger = InspectLogger(
|
|
291
|
+
log_dir="./logs",
|
|
292
|
+
eval_id="my-eval-001",
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Log a completion
|
|
296
|
+
inspect_logger.log(
|
|
297
|
+
request=request,
|
|
298
|
+
response=response,
|
|
299
|
+
sample_id="sample_123",
|
|
300
|
+
epoch=0,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# Close when done
|
|
304
|
+
inspect_logger.close()
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
def __init__(
|
|
308
|
+
self,
|
|
309
|
+
log_dir: str | Path,
|
|
310
|
+
eval_id: str | None = None,
|
|
311
|
+
append: bool = True,
|
|
312
|
+
):
|
|
313
|
+
"""
|
|
314
|
+
Initialize the Inspect logger.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
log_dir: Directory to write log files
|
|
318
|
+
eval_id: Evaluation ID (auto-generated if not provided)
|
|
319
|
+
append: Whether to append to existing log file
|
|
320
|
+
"""
|
|
321
|
+
self._log_dir = Path(log_dir)
|
|
322
|
+
self._log_dir.mkdir(parents=True, exist_ok=True)
|
|
323
|
+
|
|
324
|
+
self._eval_id = eval_id or self._generate_eval_id()
|
|
325
|
+
self._log_file = self._log_dir / f"{self._eval_id}.jsonl"
|
|
326
|
+
self._mode = "a" if append else "w"
|
|
327
|
+
self._file_handle = open(self._log_file, self._mode)
|
|
328
|
+
self._sample_count = 0
|
|
329
|
+
|
|
330
|
+
def _generate_eval_id(self) -> str:
|
|
331
|
+
"""Generate a unique evaluation ID."""
|
|
332
|
+
timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
|
|
333
|
+
return f"flashlite_eval_{timestamp}"
|
|
334
|
+
|
|
335
|
+
def log(
|
|
336
|
+
self,
|
|
337
|
+
request: CompletionRequest,
|
|
338
|
+
response: CompletionResponse,
|
|
339
|
+
sample_id: str | int | None = None,
|
|
340
|
+
epoch: int = 0,
|
|
341
|
+
metadata: dict[str, Any] | None = None,
|
|
342
|
+
) -> None:
|
|
343
|
+
"""
|
|
344
|
+
Log a request/response pair in Inspect format.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
request: The completion request
|
|
348
|
+
response: The completion response
|
|
349
|
+
sample_id: Sample identifier (auto-incremented if not provided)
|
|
350
|
+
epoch: Epoch number for multi-epoch evals
|
|
351
|
+
metadata: Additional metadata to include
|
|
352
|
+
"""
|
|
353
|
+
if sample_id is None:
|
|
354
|
+
sample_id = self._sample_count
|
|
355
|
+
self._sample_count += 1
|
|
356
|
+
|
|
357
|
+
# Convert messages to Inspect format
|
|
358
|
+
input_messages = [
|
|
359
|
+
{"role": msg.get("role", "user"), "content": msg.get("content", "")}
|
|
360
|
+
for msg in request.messages
|
|
361
|
+
]
|
|
362
|
+
|
|
363
|
+
entry = InspectLogEntry(
|
|
364
|
+
eval_id=self._eval_id,
|
|
365
|
+
sample_id=sample_id,
|
|
366
|
+
epoch=epoch,
|
|
367
|
+
model=response.model,
|
|
368
|
+
input=input_messages,
|
|
369
|
+
output=response.content,
|
|
370
|
+
tokens={
|
|
371
|
+
"input": response.usage.input_tokens if response.usage else 0,
|
|
372
|
+
"output": response.usage.output_tokens if response.usage else 0,
|
|
373
|
+
"total": response.usage.total_tokens if response.usage else 0,
|
|
374
|
+
},
|
|
375
|
+
timestamp=datetime.now(UTC).isoformat(),
|
|
376
|
+
metadata=metadata or {},
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
json_str = json.dumps(entry.to_dict())
|
|
380
|
+
self._file_handle.write(json_str + "\n")
|
|
381
|
+
self._file_handle.flush()
|
|
382
|
+
|
|
383
|
+
def close(self) -> None:
|
|
384
|
+
"""Close the log file."""
|
|
385
|
+
if self._file_handle:
|
|
386
|
+
self._file_handle.close()
|
|
387
|
+
|
|
388
|
+
@property
|
|
389
|
+
def eval_id(self) -> str:
|
|
390
|
+
"""Get the evaluation ID."""
|
|
391
|
+
return self._eval_id
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def log_file(self) -> Path:
|
|
395
|
+
"""Get the log file path."""
|
|
396
|
+
return self._log_file
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
class FlashliteModelAPI:
|
|
400
|
+
"""
|
|
401
|
+
An adapter that implements a ModelAPI-like interface for Inspect integration.
|
|
402
|
+
|
|
403
|
+
This allows Flashlite to be used as a model backend in Inspect evaluations.
|
|
404
|
+
|
|
405
|
+
Example:
|
|
406
|
+
from flashlite import Flashlite
|
|
407
|
+
from flashlite.observability import FlashliteModelAPI
|
|
408
|
+
|
|
409
|
+
# Create Flashlite client
|
|
410
|
+
client = Flashlite(rate_limit=RateLimitConfig(requests_per_minute=60))
|
|
411
|
+
|
|
412
|
+
# Wrap for Inspect
|
|
413
|
+
model_api = FlashliteModelAPI(client, model="gpt-4o")
|
|
414
|
+
|
|
415
|
+
# Use in Inspect eval (pseudocode)
|
|
416
|
+
# @task
|
|
417
|
+
# def my_eval():
|
|
418
|
+
# return Task(
|
|
419
|
+
# dataset=my_dataset,
|
|
420
|
+
# solver=my_solver,
|
|
421
|
+
# model=model_api,
|
|
422
|
+
# )
|
|
423
|
+
"""
|
|
424
|
+
|
|
425
|
+
def __init__(
|
|
426
|
+
self,
|
|
427
|
+
client: "Flashlite",
|
|
428
|
+
model: str | None = None,
|
|
429
|
+
**default_kwargs: Any,
|
|
430
|
+
):
|
|
431
|
+
"""
|
|
432
|
+
Initialize the Inspect model adapter.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
client: The Flashlite client to use
|
|
436
|
+
model: Default model to use (can be overridden per-request)
|
|
437
|
+
**default_kwargs: Default parameters for completions
|
|
438
|
+
"""
|
|
439
|
+
self._client = client
|
|
440
|
+
self._model = model
|
|
441
|
+
self._default_kwargs = default_kwargs
|
|
442
|
+
|
|
443
|
+
async def generate(
|
|
444
|
+
self,
|
|
445
|
+
messages: list[dict[str, Any]],
|
|
446
|
+
model: str | None = None,
|
|
447
|
+
**kwargs: Any,
|
|
448
|
+
) -> dict[str, Any]:
|
|
449
|
+
"""
|
|
450
|
+
Generate a completion (Inspect-compatible interface).
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
messages: List of messages
|
|
454
|
+
model: Model to use (overrides default)
|
|
455
|
+
**kwargs: Additional parameters
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
Inspect-compatible response dict
|
|
459
|
+
"""
|
|
460
|
+
# Merge kwargs
|
|
461
|
+
call_kwargs = {**self._default_kwargs, **kwargs}
|
|
462
|
+
|
|
463
|
+
# Call Flashlite
|
|
464
|
+
response = await self._client.complete(
|
|
465
|
+
model=model or self._model,
|
|
466
|
+
messages=messages,
|
|
467
|
+
**call_kwargs,
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# Return in Inspect-compatible format
|
|
471
|
+
return {
|
|
472
|
+
"choices": [
|
|
473
|
+
{
|
|
474
|
+
"message": {
|
|
475
|
+
"role": "assistant",
|
|
476
|
+
"content": response.content,
|
|
477
|
+
},
|
|
478
|
+
"finish_reason": response.finish_reason,
|
|
479
|
+
}
|
|
480
|
+
],
|
|
481
|
+
"usage": {
|
|
482
|
+
"prompt_tokens": response.usage.input_tokens if response.usage else 0,
|
|
483
|
+
"completion_tokens": response.usage.output_tokens if response.usage else 0,
|
|
484
|
+
"total_tokens": response.usage.total_tokens if response.usage else 0,
|
|
485
|
+
},
|
|
486
|
+
"model": response.model,
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
@property
|
|
490
|
+
def model_name(self) -> str | None:
|
|
491
|
+
"""Get the default model name."""
|
|
492
|
+
return self._model
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def convert_logs_cli() -> None:
|
|
496
|
+
"""CLI entry point for converting flashlite logs to Inspect format.
|
|
497
|
+
|
|
498
|
+
Usage:
|
|
499
|
+
python -m flashlite.observability.inspect_compat input.jsonl [output.json]
|
|
500
|
+
"""
|
|
501
|
+
import sys
|
|
502
|
+
|
|
503
|
+
if len(sys.argv) < 2:
|
|
504
|
+
print("Usage: python -m flashlite.observability.inspect_compat <input.jsonl> [output.json]")
|
|
505
|
+
print("\nConverts flashlite JSONL logs to Inspect-viewable format.")
|
|
506
|
+
sys.exit(1)
|
|
507
|
+
|
|
508
|
+
input_path = sys.argv[1]
|
|
509
|
+
output_path = sys.argv[2] if len(sys.argv) > 2 else None
|
|
510
|
+
|
|
511
|
+
try:
|
|
512
|
+
result = convert_flashlite_logs_to_inspect(input_path, output_path)
|
|
513
|
+
print(f"Successfully converted to: {result}")
|
|
514
|
+
print(f"\nView with: inspect view --log-dir {result.parent}")
|
|
515
|
+
except ImportError as e:
|
|
516
|
+
print(f"Error: {e}")
|
|
517
|
+
sys.exit(1)
|
|
518
|
+
except FileNotFoundError as e:
|
|
519
|
+
print(f"Error: {e}")
|
|
520
|
+
sys.exit(1)
|
|
521
|
+
except Exception as e:
|
|
522
|
+
print(f"Error converting logs: {e}")
|
|
523
|
+
sys.exit(1)
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
if __name__ == "__main__":
|
|
527
|
+
convert_logs_cli()
|
|
@@ -58,7 +58,9 @@ class CompletionRequest:
|
|
|
58
58
|
"""A request to complete a chat conversation."""
|
|
59
59
|
|
|
60
60
|
model: str
|
|
61
|
-
messages: Messages
|
|
61
|
+
messages: Messages = field(default_factory=list)
|
|
62
|
+
template: str | None = None
|
|
63
|
+
variables: dict[str, Any] | None = None
|
|
62
64
|
temperature: float | None = None
|
|
63
65
|
max_tokens: int | None = None
|
|
64
66
|
max_completion_tokens: int | None = None
|