fugusashi 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fugusashi/__init__.py +1 -0
- fugusashi/__main__.py +66 -0
- fugusashi/api/__init__.py +0 -0
- fugusashi/api/routes.py +358 -0
- fugusashi/benchmark.py +294 -0
- fugusashi/config.py +67 -0
- fugusashi/feedback.py +226 -0
- fugusashi/providers.py +119 -0
- fugusashi/router/__init__.py +12 -0
- fugusashi/router/ensemble.py +79 -0
- fugusashi/router/interface.py +39 -0
- fugusashi/router/strategies.py +186 -0
- fugusashi/server.py +66 -0
- fugusashi/static/dashboard.html +210 -0
- fugusashi/tracker.py +140 -0
- fugusashi-0.3.0.dist-info/METADATA +402 -0
- fugusashi-0.3.0.dist-info/RECORD +20 -0
- fugusashi-0.3.0.dist-info/WHEEL +5 -0
- fugusashi-0.3.0.dist-info/entry_points.txt +2 -0
- fugusashi-0.3.0.dist-info/top_level.txt +1 -0
fugusashi/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.0"
|
fugusashi/__main__.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
import uvicorn
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.group()
|
|
11
|
+
def cli():
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@cli.command()
|
|
16
|
+
@click.option("--dataset", "-d", type=click.Path(exists=True), help="JSONL dataset file")
|
|
17
|
+
@click.option("--threshold", "-t", default=0.4, type=float, help="Confidence threshold")
|
|
18
|
+
@click.option("--verbose", "-v", is_flag=True, help="Show per-sample results")
|
|
19
|
+
@click.option("--json", "json_out", is_flag=True, help="Output as JSON")
|
|
20
|
+
@click.option("--train", is_flag=True, help="Seed training data for similarity routing")
|
|
21
|
+
def benchmark(dataset, threshold, verbose, json_out, train):
|
|
22
|
+
from .benchmark import run_benchmark_cli
|
|
23
|
+
run_benchmark_cli(dataset_path=dataset, threshold=threshold, verbose=verbose, json_out=json_out, train=train)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@cli.command()
|
|
27
|
+
@click.option("--config", "-c", default="config.yaml", help="Path to config file")
|
|
28
|
+
@click.option("--host", default=None, help="Bind address")
|
|
29
|
+
@click.option("--port", default=None, type=int, help="Bind port")
|
|
30
|
+
@click.option("--reload", is_flag=True, help="Auto-reload on file changes")
|
|
31
|
+
def serve(config: str, host: str | None, port: int | None, reload: bool):
|
|
32
|
+
from .config import AppConfig
|
|
33
|
+
|
|
34
|
+
config_path = Path(config)
|
|
35
|
+
if not config_path.exists():
|
|
36
|
+
click.echo(f"Config file not found: {config}", err=True)
|
|
37
|
+
sys.exit(1)
|
|
38
|
+
|
|
39
|
+
cfg = AppConfig.from_yaml(str(config_path))
|
|
40
|
+
|
|
41
|
+
from .server import create_app
|
|
42
|
+
|
|
43
|
+
app = create_app(cfg)
|
|
44
|
+
|
|
45
|
+
click.echo(
|
|
46
|
+
f" Fugusashi router listening on "
|
|
47
|
+
f"{host or cfg.host}:{port or cfg.port}"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
uvicorn.run(
|
|
51
|
+
app,
|
|
52
|
+
host=host or cfg.host,
|
|
53
|
+
port=port or cfg.port,
|
|
54
|
+
reload=reload,
|
|
55
|
+
log_level=cfg.observability.log_level.lower(),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@cli.command()
|
|
60
|
+
def version():
|
|
61
|
+
from . import __version__
|
|
62
|
+
click.echo(f"fugusashi v{__version__}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
cli()
|
|
File without changes
|
fugusashi/api/routes.py
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from ..tracker import RoutingDecision
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ChatMessage(BaseModel):
|
|
15
|
+
role: str
|
|
16
|
+
content: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TrainingExample(BaseModel):
|
|
20
|
+
prompt: str
|
|
21
|
+
model: str
|
|
22
|
+
score: float = 1.0
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ChatCompletionRequest(BaseModel):
|
|
26
|
+
model: str = "auto"
|
|
27
|
+
messages: List[ChatMessage]
|
|
28
|
+
temperature: float = 0.7
|
|
29
|
+
max_tokens: Optional[int] = None
|
|
30
|
+
stream: bool = False
|
|
31
|
+
user: Optional[str] = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ChatCompletionResponse(BaseModel):
|
|
35
|
+
id: str
|
|
36
|
+
object: str = "chat.completion"
|
|
37
|
+
created: int
|
|
38
|
+
model: str
|
|
39
|
+
choices: List[Dict[str, Any]]
|
|
40
|
+
usage: Dict[str, Any]
|
|
41
|
+
routing_decision: Optional[Dict[str, Any]] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ModelInfo(BaseModel):
|
|
45
|
+
id: str
|
|
46
|
+
object: str = "model"
|
|
47
|
+
created: int
|
|
48
|
+
owned_by: str = "fugusashi"
|
|
49
|
+
description: str = ""
|
|
50
|
+
capabilities: List[str] = []
|
|
51
|
+
cost_per_input_token: float = 0.0
|
|
52
|
+
cost_per_output_token: float = 0.0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def create_router(deps) -> APIRouter:
|
|
56
|
+
router = APIRouter()
|
|
57
|
+
|
|
58
|
+
@router.get("/health")
|
|
59
|
+
async def health():
|
|
60
|
+
return {"status": "ok", "version": "0.1.0"}
|
|
61
|
+
|
|
62
|
+
@router.get("/v1/models")
|
|
63
|
+
async def list_models():
|
|
64
|
+
available = deps["model_client"].get_available_models()
|
|
65
|
+
models = []
|
|
66
|
+
for name, cfg in available.items():
|
|
67
|
+
models.append(ModelInfo(
|
|
68
|
+
id=name,
|
|
69
|
+
created=int(datetime.utcnow().timestamp()),
|
|
70
|
+
description=cfg.get("description", ""),
|
|
71
|
+
capabilities=cfg.get("capabilities", []),
|
|
72
|
+
cost_per_input_token=cfg.get("cost_per_input_token", 0.0),
|
|
73
|
+
cost_per_output_token=cfg.get("cost_per_output_token", 0.0),
|
|
74
|
+
))
|
|
75
|
+
return {"object": "list", "data": models}
|
|
76
|
+
|
|
77
|
+
@router.post("/v1/chat/completions")
|
|
78
|
+
async def chat_completion(body: ChatCompletionRequest, raw_request: Request):
|
|
79
|
+
request_id = f"fugu-{uuid.uuid4().hex[:12]}"
|
|
80
|
+
tracker = deps["tracker"]
|
|
81
|
+
model_client = deps["model_client"]
|
|
82
|
+
router_engine = deps["router"]
|
|
83
|
+
config = deps["config"]
|
|
84
|
+
|
|
85
|
+
tracker.start_trace(request_id)
|
|
86
|
+
prompt = body.messages[-1].content if body.messages else ""
|
|
87
|
+
prompt_preview = prompt[:200]
|
|
88
|
+
|
|
89
|
+
if body.model and body.model != "auto":
|
|
90
|
+
selected_model = body.model
|
|
91
|
+
routing_result = RoutingDecision(
|
|
92
|
+
request_id=request_id,
|
|
93
|
+
timestamp=datetime.utcnow().isoformat(),
|
|
94
|
+
prompt_hash=str(hash(prompt)),
|
|
95
|
+
prompt_preview=prompt_preview,
|
|
96
|
+
routed_to=selected_model,
|
|
97
|
+
confidence=1.0,
|
|
98
|
+
strategy="user-specified",
|
|
99
|
+
model_scores={selected_model: 1.0},
|
|
100
|
+
latency_ms=0.0,
|
|
101
|
+
explanation="User explicitly specified the model",
|
|
102
|
+
needs_escalation=False,
|
|
103
|
+
)
|
|
104
|
+
else:
|
|
105
|
+
available = model_client.get_available_models()
|
|
106
|
+
threshold = config.tier1.router.confidence_threshold
|
|
107
|
+
result = router_engine.route(
|
|
108
|
+
prompt=prompt,
|
|
109
|
+
messages=[m.model_dump() for m in body.messages],
|
|
110
|
+
available_models=available,
|
|
111
|
+
threshold=threshold,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if result.needs_escalation and config.tier2.enabled:
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
selected_model = result.model
|
|
118
|
+
routing_result = RoutingDecision(
|
|
119
|
+
request_id=request_id,
|
|
120
|
+
timestamp=datetime.utcnow().isoformat(),
|
|
121
|
+
prompt_hash=str(hash(prompt)),
|
|
122
|
+
prompt_preview=prompt_preview,
|
|
123
|
+
routed_to=selected_model,
|
|
124
|
+
confidence=result.confidence,
|
|
125
|
+
strategy=result.strategy,
|
|
126
|
+
model_scores=result.scores,
|
|
127
|
+
latency_ms=result.latency_ms,
|
|
128
|
+
explanation=result.explanation,
|
|
129
|
+
needs_escalation=result.needs_escalation,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
tracker.log_routing(request_id, routing_result)
|
|
133
|
+
|
|
134
|
+
if body.stream:
|
|
135
|
+
from fastapi.responses import StreamingResponse
|
|
136
|
+
|
|
137
|
+
async def stream_generator():
|
|
138
|
+
full_content = ""
|
|
139
|
+
prompt_tokens = 0
|
|
140
|
+
completion_tokens = 0
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
async for chunk in model_client.call_model_stream(
|
|
144
|
+
model_name=selected_model,
|
|
145
|
+
messages=[m.model_dump() for m in body.messages],
|
|
146
|
+
temperature=body.temperature,
|
|
147
|
+
max_tokens=body.max_tokens,
|
|
148
|
+
):
|
|
149
|
+
if hasattr(chunk, "choices") and chunk.choices:
|
|
150
|
+
delta = chunk.choices[0].delta
|
|
151
|
+
if hasattr(delta, "content") and delta.content:
|
|
152
|
+
full_content += delta.content
|
|
153
|
+
chunk_data = chunk.model_dump() if hasattr(chunk, "model_dump") else chunk
|
|
154
|
+
yield f"data: {json.dumps(chunk_data)}\n\n"
|
|
155
|
+
|
|
156
|
+
yield f"data: {json.dumps({'routing_decision': {
|
|
157
|
+
'model': selected_model,
|
|
158
|
+
'confidence': routing_result.confidence,
|
|
159
|
+
'strategy': routing_result.strategy,
|
|
160
|
+
'latency_ms': routing_result.latency_ms,
|
|
161
|
+
'explanation': routing_result.explanation,
|
|
162
|
+
}})}\n\n"
|
|
163
|
+
|
|
164
|
+
yield "data: [DONE]\n\n"
|
|
165
|
+
except Exception as e:
|
|
166
|
+
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
|
167
|
+
|
|
168
|
+
tracker.log_model_call(
|
|
169
|
+
request_id=request_id,
|
|
170
|
+
model=selected_model,
|
|
171
|
+
provider="",
|
|
172
|
+
prompt_tokens=prompt_tokens,
|
|
173
|
+
completion_tokens=completion_tokens,
|
|
174
|
+
status="success",
|
|
175
|
+
)
|
|
176
|
+
tracker.finish_trace(request_id)
|
|
177
|
+
|
|
178
|
+
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
|
179
|
+
|
|
180
|
+
models_to_try = [selected_model]
|
|
181
|
+
if selected_model != config.default_model:
|
|
182
|
+
models_to_try.append(config.default_model)
|
|
183
|
+
|
|
184
|
+
last_error = None
|
|
185
|
+
response = None
|
|
186
|
+
for fallback_idx, model_to_try in enumerate(models_to_try):
|
|
187
|
+
try:
|
|
188
|
+
response, latency, prompt_tokens, completion_tokens, provider = (
|
|
189
|
+
await model_client.call_model(
|
|
190
|
+
model_name=model_to_try,
|
|
191
|
+
messages=[m.model_dump() for m in body.messages],
|
|
192
|
+
temperature=body.temperature,
|
|
193
|
+
max_tokens=body.max_tokens,
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
tracker.log_model_call(
|
|
198
|
+
request_id=request_id,
|
|
199
|
+
model=model_to_try,
|
|
200
|
+
provider=provider,
|
|
201
|
+
prompt_tokens=prompt_tokens,
|
|
202
|
+
completion_tokens=completion_tokens,
|
|
203
|
+
cost=0.0,
|
|
204
|
+
latency_ms=latency,
|
|
205
|
+
status="success" if fallback_idx == 0 else "fallback_success",
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
feedback = deps.get("feedback")
|
|
209
|
+
if feedback:
|
|
210
|
+
feedback.record_routing(
|
|
211
|
+
prompt=prompt,
|
|
212
|
+
routed_to=model_to_try,
|
|
213
|
+
confidence=routing_result.confidence,
|
|
214
|
+
strategy=routing_result.strategy,
|
|
215
|
+
prompt_tokens=prompt_tokens,
|
|
216
|
+
completion_tokens=completion_tokens,
|
|
217
|
+
cost=0.0,
|
|
218
|
+
latency_ms=latency,
|
|
219
|
+
error=False,
|
|
220
|
+
auto_retrain=True,
|
|
221
|
+
retrain_interval=10,
|
|
222
|
+
router=router_engine,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
tracker.finish_trace(request_id)
|
|
226
|
+
break
|
|
227
|
+
except Exception as e:
|
|
228
|
+
last_error = e
|
|
229
|
+
tracker.log_model_call(
|
|
230
|
+
request_id=request_id,
|
|
231
|
+
model=model_to_try,
|
|
232
|
+
provider="",
|
|
233
|
+
status="error" if fallback_idx == 0 else "fallback_error",
|
|
234
|
+
error=str(e),
|
|
235
|
+
)
|
|
236
|
+
feedback = deps.get("feedback")
|
|
237
|
+
if feedback:
|
|
238
|
+
feedback.record_routing(
|
|
239
|
+
prompt=prompt,
|
|
240
|
+
routed_to=model_to_try,
|
|
241
|
+
confidence=routing_result.confidence,
|
|
242
|
+
strategy=routing_result.strategy,
|
|
243
|
+
error=True,
|
|
244
|
+
auto_retrain=True,
|
|
245
|
+
retrain_interval=10,
|
|
246
|
+
router=router_engine,
|
|
247
|
+
)
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
if response is None:
|
|
251
|
+
raise HTTPException(status_code=502, detail=f"All models failed. Last error: {last_error}")
|
|
252
|
+
|
|
253
|
+
response_dict = response.model_dump() if hasattr(response, "model_dump") else response
|
|
254
|
+
|
|
255
|
+
raw_usage = response_dict.get("usage", {})
|
|
256
|
+
if raw_usage is None:
|
|
257
|
+
raw_usage = {}
|
|
258
|
+
sanitized_usage = {
|
|
259
|
+
k: (v if v is not None else 0)
|
|
260
|
+
for k, v in raw_usage.items()
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return ChatCompletionResponse(
|
|
264
|
+
id=request_id,
|
|
265
|
+
created=int(datetime.utcnow().timestamp()),
|
|
266
|
+
model=model_to_try,
|
|
267
|
+
choices=response_dict.get("choices", []),
|
|
268
|
+
usage=sanitized_usage,
|
|
269
|
+
routing_decision={
|
|
270
|
+
"model": selected_model,
|
|
271
|
+
"confidence": routing_result.confidence,
|
|
272
|
+
"strategy": routing_result.strategy,
|
|
273
|
+
"latency_ms": routing_result.latency_ms,
|
|
274
|
+
"explanation": routing_result.explanation,
|
|
275
|
+
},
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
@router.get("/v1/routing/decisions")
|
|
279
|
+
async def get_routing_decisions(limit: int = 20):
|
|
280
|
+
decisions = deps["tracker"].routing_log[-limit:]
|
|
281
|
+
return {
|
|
282
|
+
"object": "list",
|
|
283
|
+
"data": [
|
|
284
|
+
{
|
|
285
|
+
"request_id": d.request_id,
|
|
286
|
+
"timestamp": d.timestamp,
|
|
287
|
+
"prompt_preview": d.prompt_preview,
|
|
288
|
+
"routed_to": d.routed_to,
|
|
289
|
+
"confidence": d.confidence,
|
|
290
|
+
"strategy": d.strategy,
|
|
291
|
+
"latency_ms": d.latency_ms,
|
|
292
|
+
"explanation": d.explanation,
|
|
293
|
+
}
|
|
294
|
+
for d in decisions
|
|
295
|
+
],
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
@router.get("/v1/stats")
|
|
299
|
+
async def get_stats():
|
|
300
|
+
return deps["tracker"].get_stats()
|
|
301
|
+
|
|
302
|
+
@router.get("/v1/trace/{request_id}")
|
|
303
|
+
async def get_trace(request_id: str):
|
|
304
|
+
trace = deps["tracker"].get_trace(request_id)
|
|
305
|
+
if not trace:
|
|
306
|
+
raise HTTPException(status_code=404, detail="Trace not found")
|
|
307
|
+
return trace
|
|
308
|
+
|
|
309
|
+
@router.post("/v1/routing/training")
|
|
310
|
+
async def add_training_data(examples: List[TrainingExample]):
|
|
311
|
+
router_engine = deps["router"]
|
|
312
|
+
history = [
|
|
313
|
+
{"prompt": ex.prompt, "model": ex.model, "score": ex.score}
|
|
314
|
+
for ex in examples
|
|
315
|
+
]
|
|
316
|
+
router_engine.similarity_router.build_index(history)
|
|
317
|
+
return {"status": "ok", "indexed": len(history)}
|
|
318
|
+
|
|
319
|
+
@router.post("/v1/feedback/rate")
|
|
320
|
+
async def rate_outcome(request: Request):
|
|
321
|
+
body = await request.json()
|
|
322
|
+
request_id = body.get("request_id", "")
|
|
323
|
+
rating = int(body.get("rating", 3))
|
|
324
|
+
feedback: Any = deps.get("feedback")
|
|
325
|
+
if not feedback:
|
|
326
|
+
return {"status": "error", "message": "feedback not enabled"}
|
|
327
|
+
trace = feedback.outcomes
|
|
328
|
+
for outcome in reversed(trace):
|
|
329
|
+
if outcome.timestamp and outcome.timestamp.endswith(request_id[-6:]):
|
|
330
|
+
feedback.record_user_rating(outcome, rating)
|
|
331
|
+
return {"status": "ok", "rating": rating}
|
|
332
|
+
return {"status": "not_found", "request_id": request_id}
|
|
333
|
+
|
|
334
|
+
@router.post("/v1/feedback/retrain")
|
|
335
|
+
async def retrain():
|
|
336
|
+
feedback: Any = deps.get("feedback")
|
|
337
|
+
router_engine = deps["router"]
|
|
338
|
+
if not feedback:
|
|
339
|
+
return {"status": "error", "message": "feedback not enabled"}
|
|
340
|
+
feedback.build_similarity_index(router_engine.similarity_router)
|
|
341
|
+
data = feedback.get_retraining_data()
|
|
342
|
+
return {"status": "ok", "retrained_on": len(data)}
|
|
343
|
+
|
|
344
|
+
@router.get("/v1/feedback/stats")
|
|
345
|
+
async def feedback_stats():
|
|
346
|
+
feedback: Any = deps.get("feedback")
|
|
347
|
+
if not feedback:
|
|
348
|
+
return {"status": "error", "message": "feedback not enabled"}
|
|
349
|
+
return feedback.get_stats()
|
|
350
|
+
|
|
351
|
+
@router.get("/v1/feedback/rankings")
|
|
352
|
+
async def model_rankings():
|
|
353
|
+
feedback: Any = deps.get("feedback")
|
|
354
|
+
if not feedback:
|
|
355
|
+
return {"status": "error", "message": "feedback not enabled"}
|
|
356
|
+
return feedback.get_model_rankings()
|
|
357
|
+
|
|
358
|
+
return router
|