fugusashi 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fugusashi/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.3.0"
fugusashi/__main__.py ADDED
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import click
7
+ import uvicorn
8
+
9
+
10
+ @click.group()
11
+ def cli():
12
+ pass
13
+
14
+
15
+ @cli.command()
16
+ @click.option("--dataset", "-d", type=click.Path(exists=True), help="JSONL dataset file")
17
+ @click.option("--threshold", "-t", default=0.4, type=float, help="Confidence threshold")
18
+ @click.option("--verbose", "-v", is_flag=True, help="Show per-sample results")
19
+ @click.option("--json", "json_out", is_flag=True, help="Output as JSON")
20
+ @click.option("--train", is_flag=True, help="Seed training data for similarity routing")
21
+ def benchmark(dataset, threshold, verbose, json_out, train):
22
+ from .benchmark import run_benchmark_cli
23
+ run_benchmark_cli(dataset_path=dataset, threshold=threshold, verbose=verbose, json_out=json_out, train=train)
24
+
25
+
26
+ @cli.command()
27
+ @click.option("--config", "-c", default="config.yaml", help="Path to config file")
28
+ @click.option("--host", default=None, help="Bind address")
29
+ @click.option("--port", default=None, type=int, help="Bind port")
30
+ @click.option("--reload", is_flag=True, help="Auto-reload on file changes")
31
+ def serve(config: str, host: str | None, port: int | None, reload: bool):
32
+ from .config import AppConfig
33
+
34
+ config_path = Path(config)
35
+ if not config_path.exists():
36
+ click.echo(f"Config file not found: {config}", err=True)
37
+ sys.exit(1)
38
+
39
+ cfg = AppConfig.from_yaml(str(config_path))
40
+
41
+ from .server import create_app
42
+
43
+ app = create_app(cfg)
44
+
45
+ click.echo(
46
+ f" Fugusashi router listening on "
47
+ f"{host or cfg.host}:{port or cfg.port}"
48
+ )
49
+
50
+ uvicorn.run(
51
+ app,
52
+ host=host or cfg.host,
53
+ port=port or cfg.port,
54
+ reload=reload,
55
+ log_level=cfg.observability.log_level.lower(),
56
+ )
57
+
58
+
59
+ @cli.command()
60
+ def version():
61
+ from . import __version__
62
+ click.echo(f"fugusashi v{__version__}")
63
+
64
+
65
+ if __name__ == "__main__":
66
+ cli()
File without changes
@@ -0,0 +1,358 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import uuid
5
+ from datetime import datetime
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from fastapi import APIRouter, Depends, HTTPException, Request
9
+ from pydantic import BaseModel, Field
10
+
11
+ from ..tracker import RoutingDecision
12
+
13
+
14
+ class ChatMessage(BaseModel):
15
+ role: str
16
+ content: str
17
+
18
+
19
+ class TrainingExample(BaseModel):
20
+ prompt: str
21
+ model: str
22
+ score: float = 1.0
23
+
24
+
25
+ class ChatCompletionRequest(BaseModel):
26
+ model: str = "auto"
27
+ messages: List[ChatMessage]
28
+ temperature: float = 0.7
29
+ max_tokens: Optional[int] = None
30
+ stream: bool = False
31
+ user: Optional[str] = None
32
+
33
+
34
+ class ChatCompletionResponse(BaseModel):
35
+ id: str
36
+ object: str = "chat.completion"
37
+ created: int
38
+ model: str
39
+ choices: List[Dict[str, Any]]
40
+ usage: Dict[str, Any]
41
+ routing_decision: Optional[Dict[str, Any]] = None
42
+
43
+
44
+ class ModelInfo(BaseModel):
45
+ id: str
46
+ object: str = "model"
47
+ created: int
48
+ owned_by: str = "fugusashi"
49
+ description: str = ""
50
+ capabilities: List[str] = []
51
+ cost_per_input_token: float = 0.0
52
+ cost_per_output_token: float = 0.0
53
+
54
+
55
+ def create_router(deps) -> APIRouter:
56
+ router = APIRouter()
57
+
58
+ @router.get("/health")
59
+ async def health():
60
+ return {"status": "ok", "version": "0.1.0"}
61
+
62
+ @router.get("/v1/models")
63
+ async def list_models():
64
+ available = deps["model_client"].get_available_models()
65
+ models = []
66
+ for name, cfg in available.items():
67
+ models.append(ModelInfo(
68
+ id=name,
69
+ created=int(datetime.utcnow().timestamp()),
70
+ description=cfg.get("description", ""),
71
+ capabilities=cfg.get("capabilities", []),
72
+ cost_per_input_token=cfg.get("cost_per_input_token", 0.0),
73
+ cost_per_output_token=cfg.get("cost_per_output_token", 0.0),
74
+ ))
75
+ return {"object": "list", "data": models}
76
+
77
+ @router.post("/v1/chat/completions")
78
+ async def chat_completion(body: ChatCompletionRequest, raw_request: Request):
79
+ request_id = f"fugu-{uuid.uuid4().hex[:12]}"
80
+ tracker = deps["tracker"]
81
+ model_client = deps["model_client"]
82
+ router_engine = deps["router"]
83
+ config = deps["config"]
84
+
85
+ tracker.start_trace(request_id)
86
+ prompt = body.messages[-1].content if body.messages else ""
87
+ prompt_preview = prompt[:200]
88
+
89
+ if body.model and body.model != "auto":
90
+ selected_model = body.model
91
+ routing_result = RoutingDecision(
92
+ request_id=request_id,
93
+ timestamp=datetime.utcnow().isoformat(),
94
+ prompt_hash=str(hash(prompt)),
95
+ prompt_preview=prompt_preview,
96
+ routed_to=selected_model,
97
+ confidence=1.0,
98
+ strategy="user-specified",
99
+ model_scores={selected_model: 1.0},
100
+ latency_ms=0.0,
101
+ explanation="User explicitly specified the model",
102
+ needs_escalation=False,
103
+ )
104
+ else:
105
+ available = model_client.get_available_models()
106
+ threshold = config.tier1.router.confidence_threshold
107
+ result = router_engine.route(
108
+ prompt=prompt,
109
+ messages=[m.model_dump() for m in body.messages],
110
+ available_models=available,
111
+ threshold=threshold,
112
+ )
113
+
114
+ if result.needs_escalation and config.tier2.enabled:
115
+ pass
116
+
117
+ selected_model = result.model
118
+ routing_result = RoutingDecision(
119
+ request_id=request_id,
120
+ timestamp=datetime.utcnow().isoformat(),
121
+ prompt_hash=str(hash(prompt)),
122
+ prompt_preview=prompt_preview,
123
+ routed_to=selected_model,
124
+ confidence=result.confidence,
125
+ strategy=result.strategy,
126
+ model_scores=result.scores,
127
+ latency_ms=result.latency_ms,
128
+ explanation=result.explanation,
129
+ needs_escalation=result.needs_escalation,
130
+ )
131
+
132
+ tracker.log_routing(request_id, routing_result)
133
+
134
+ if body.stream:
135
+ from fastapi.responses import StreamingResponse
136
+
137
+ async def stream_generator():
138
+ full_content = ""
139
+ prompt_tokens = 0
140
+ completion_tokens = 0
141
+
142
+ try:
143
+ async for chunk in model_client.call_model_stream(
144
+ model_name=selected_model,
145
+ messages=[m.model_dump() for m in body.messages],
146
+ temperature=body.temperature,
147
+ max_tokens=body.max_tokens,
148
+ ):
149
+ if hasattr(chunk, "choices") and chunk.choices:
150
+ delta = chunk.choices[0].delta
151
+ if hasattr(delta, "content") and delta.content:
152
+ full_content += delta.content
153
+ chunk_data = chunk.model_dump() if hasattr(chunk, "model_dump") else chunk
154
+ yield f"data: {json.dumps(chunk_data)}\n\n"
155
+
156
+ yield f"data: {json.dumps({'routing_decision': {
157
+ 'model': selected_model,
158
+ 'confidence': routing_result.confidence,
159
+ 'strategy': routing_result.strategy,
160
+ 'latency_ms': routing_result.latency_ms,
161
+ 'explanation': routing_result.explanation,
162
+ }})}\n\n"
163
+
164
+ yield "data: [DONE]\n\n"
165
+ except Exception as e:
166
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
167
+
168
+ tracker.log_model_call(
169
+ request_id=request_id,
170
+ model=selected_model,
171
+ provider="",
172
+ prompt_tokens=prompt_tokens,
173
+ completion_tokens=completion_tokens,
174
+ status="success",
175
+ )
176
+ tracker.finish_trace(request_id)
177
+
178
+ return StreamingResponse(stream_generator(), media_type="text/event-stream")
179
+
180
+ models_to_try = [selected_model]
181
+ if selected_model != config.default_model:
182
+ models_to_try.append(config.default_model)
183
+
184
+ last_error = None
185
+ response = None
186
+ for fallback_idx, model_to_try in enumerate(models_to_try):
187
+ try:
188
+ response, latency, prompt_tokens, completion_tokens, provider = (
189
+ await model_client.call_model(
190
+ model_name=model_to_try,
191
+ messages=[m.model_dump() for m in body.messages],
192
+ temperature=body.temperature,
193
+ max_tokens=body.max_tokens,
194
+ )
195
+ )
196
+
197
+ tracker.log_model_call(
198
+ request_id=request_id,
199
+ model=model_to_try,
200
+ provider=provider,
201
+ prompt_tokens=prompt_tokens,
202
+ completion_tokens=completion_tokens,
203
+ cost=0.0,
204
+ latency_ms=latency,
205
+ status="success" if fallback_idx == 0 else "fallback_success",
206
+ )
207
+
208
+ feedback = deps.get("feedback")
209
+ if feedback:
210
+ feedback.record_routing(
211
+ prompt=prompt,
212
+ routed_to=model_to_try,
213
+ confidence=routing_result.confidence,
214
+ strategy=routing_result.strategy,
215
+ prompt_tokens=prompt_tokens,
216
+ completion_tokens=completion_tokens,
217
+ cost=0.0,
218
+ latency_ms=latency,
219
+ error=False,
220
+ auto_retrain=True,
221
+ retrain_interval=10,
222
+ router=router_engine,
223
+ )
224
+
225
+ tracker.finish_trace(request_id)
226
+ break
227
+ except Exception as e:
228
+ last_error = e
229
+ tracker.log_model_call(
230
+ request_id=request_id,
231
+ model=model_to_try,
232
+ provider="",
233
+ status="error" if fallback_idx == 0 else "fallback_error",
234
+ error=str(e),
235
+ )
236
+ feedback = deps.get("feedback")
237
+ if feedback:
238
+ feedback.record_routing(
239
+ prompt=prompt,
240
+ routed_to=model_to_try,
241
+ confidence=routing_result.confidence,
242
+ strategy=routing_result.strategy,
243
+ error=True,
244
+ auto_retrain=True,
245
+ retrain_interval=10,
246
+ router=router_engine,
247
+ )
248
+ continue
249
+
250
+ if response is None:
251
+ raise HTTPException(status_code=502, detail=f"All models failed. Last error: {last_error}")
252
+
253
+ response_dict = response.model_dump() if hasattr(response, "model_dump") else response
254
+
255
+ raw_usage = response_dict.get("usage", {})
256
+ if raw_usage is None:
257
+ raw_usage = {}
258
+ sanitized_usage = {
259
+ k: (v if v is not None else 0)
260
+ for k, v in raw_usage.items()
261
+ }
262
+
263
+ return ChatCompletionResponse(
264
+ id=request_id,
265
+ created=int(datetime.utcnow().timestamp()),
266
+ model=model_to_try,
267
+ choices=response_dict.get("choices", []),
268
+ usage=sanitized_usage,
269
+ routing_decision={
270
+ "model": selected_model,
271
+ "confidence": routing_result.confidence,
272
+ "strategy": routing_result.strategy,
273
+ "latency_ms": routing_result.latency_ms,
274
+ "explanation": routing_result.explanation,
275
+ },
276
+ )
277
+
278
+ @router.get("/v1/routing/decisions")
279
+ async def get_routing_decisions(limit: int = 20):
280
+ decisions = deps["tracker"].routing_log[-limit:]
281
+ return {
282
+ "object": "list",
283
+ "data": [
284
+ {
285
+ "request_id": d.request_id,
286
+ "timestamp": d.timestamp,
287
+ "prompt_preview": d.prompt_preview,
288
+ "routed_to": d.routed_to,
289
+ "confidence": d.confidence,
290
+ "strategy": d.strategy,
291
+ "latency_ms": d.latency_ms,
292
+ "explanation": d.explanation,
293
+ }
294
+ for d in decisions
295
+ ],
296
+ }
297
+
298
+ @router.get("/v1/stats")
299
+ async def get_stats():
300
+ return deps["tracker"].get_stats()
301
+
302
+ @router.get("/v1/trace/{request_id}")
303
+ async def get_trace(request_id: str):
304
+ trace = deps["tracker"].get_trace(request_id)
305
+ if not trace:
306
+ raise HTTPException(status_code=404, detail="Trace not found")
307
+ return trace
308
+
309
+ @router.post("/v1/routing/training")
310
+ async def add_training_data(examples: List[TrainingExample]):
311
+ router_engine = deps["router"]
312
+ history = [
313
+ {"prompt": ex.prompt, "model": ex.model, "score": ex.score}
314
+ for ex in examples
315
+ ]
316
+ router_engine.similarity_router.build_index(history)
317
+ return {"status": "ok", "indexed": len(history)}
318
+
319
+ @router.post("/v1/feedback/rate")
320
+ async def rate_outcome(request: Request):
321
+ body = await request.json()
322
+ request_id = body.get("request_id", "")
323
+ rating = int(body.get("rating", 3))
324
+ feedback: Any = deps.get("feedback")
325
+ if not feedback:
326
+ return {"status": "error", "message": "feedback not enabled"}
327
+ trace = feedback.outcomes
328
+ for outcome in reversed(trace):
329
+ if outcome.timestamp and outcome.timestamp.endswith(request_id[-6:]):
330
+ feedback.record_user_rating(outcome, rating)
331
+ return {"status": "ok", "rating": rating}
332
+ return {"status": "not_found", "request_id": request_id}
333
+
334
+ @router.post("/v1/feedback/retrain")
335
+ async def retrain():
336
+ feedback: Any = deps.get("feedback")
337
+ router_engine = deps["router"]
338
+ if not feedback:
339
+ return {"status": "error", "message": "feedback not enabled"}
340
+ feedback.build_similarity_index(router_engine.similarity_router)
341
+ data = feedback.get_retraining_data()
342
+ return {"status": "ok", "retrained_on": len(data)}
343
+
344
+ @router.get("/v1/feedback/stats")
345
+ async def feedback_stats():
346
+ feedback: Any = deps.get("feedback")
347
+ if not feedback:
348
+ return {"status": "error", "message": "feedback not enabled"}
349
+ return feedback.get_stats()
350
+
351
+ @router.get("/v1/feedback/rankings")
352
+ async def model_rankings():
353
+ feedback: Any = deps.get("feedback")
354
+ if not feedback:
355
+ return {"status": "error", "message": "feedback not enabled"}
356
+ return feedback.get_model_rankings()
357
+
358
+ return router