fusefable 0.1.9__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {fusefable-0.1.9 → fusefable-0.3.0}/PKG-INFO +41 -1
  2. {fusefable-0.1.9 → fusefable-0.3.0}/README.md +40 -0
  3. fusefable-0.3.0/fusefable/__init__.py +1 -0
  4. fusefable-0.3.0/fusefable/cache.py +74 -0
  5. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/cli.py +33 -5
  6. fusefable-0.3.0/fusefable/compressor.py +74 -0
  7. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/config.py +7 -0
  8. fusefable-0.3.0/fusefable/core.py +88 -0
  9. fusefable-0.3.0/fusefable/cost.py +25 -0
  10. fusefable-0.3.0/fusefable/ensemble.py +35 -0
  11. fusefable-0.3.0/fusefable/fusion.py +40 -0
  12. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/models.py +3 -0
  13. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/wizard.py +19 -2
  14. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable.egg-info/PKG-INFO +41 -1
  15. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable.egg-info/SOURCES.txt +6 -0
  16. {fusefable-0.1.9 → fusefable-0.3.0}/pyproject.toml +1 -1
  17. fusefable-0.3.0/tests/test_cache.py +48 -0
  18. fusefable-0.3.0/tests/test_compressor.py +69 -0
  19. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_cost.py +8 -1
  20. fusefable-0.3.0/tests/test_ensemble.py +36 -0
  21. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_fusion.py +20 -0
  22. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_wizard.py +15 -2
  23. fusefable-0.1.9/fusefable/__init__.py +0 -1
  24. fusefable-0.1.9/fusefable/core.py +0 -37
  25. fusefable-0.1.9/fusefable/cost.py +0 -11
  26. fusefable-0.1.9/fusefable/fusion.py +0 -23
  27. {fusefable-0.1.9 → fusefable-0.3.0}/LICENSE +0 -0
  28. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/client.py +0 -0
  29. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/fanout.py +0 -0
  30. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/judge.py +0 -0
  31. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/mcp_server.py +0 -0
  32. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/providers/__init__.py +0 -0
  33. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/providers/anthropic.py +0 -0
  34. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/providers/base.py +0 -0
  35. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/providers/factory.py +0 -0
  36. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/providers/google.py +0 -0
  37. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/providers/openai_compat.py +0 -0
  38. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable/routing.py +0 -0
  39. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable.egg-info/dependency_links.txt +0 -0
  40. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable.egg-info/entry_points.txt +0 -0
  41. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable.egg-info/requires.txt +0 -0
  42. {fusefable-0.1.9 → fusefable-0.3.0}/fusefable.egg-info/top_level.txt +0 -0
  43. {fusefable-0.1.9 → fusefable-0.3.0}/setup.cfg +0 -0
  44. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_cli.py +0 -0
  45. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_client.py +0 -0
  46. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_config.py +0 -0
  47. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_core.py +0 -0
  48. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_fanout.py +0 -0
  49. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_judge.py +0 -0
  50. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_mcp_server.py +0 -0
  51. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_models.py +0 -0
  52. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_native_providers.py +0 -0
  53. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_openai_compat.py +0 -0
  54. {fusefable-0.1.9 → fusefable-0.3.0}/tests/test_routing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fusefable
3
- Version: 0.1.9
3
+ Version: 0.3.0
4
4
  Summary: Fuse multiple AI models and judge the best answer for coding
5
5
  Author: proultrax9
6
6
  License: MIT
@@ -145,6 +145,46 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
145
145
  > Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
146
146
  > If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
147
147
 
148
+ ## Ensemble, cache & budget
149
+
150
+ ```bash
151
+ fusefable ask --ensemble "..." # merge all answers into one (vs picking one)
152
+ fusefable ask --cache "..." # reuse the answer for an identical question
153
+ fusefable ask --no-cache "..." # force a fresh run
154
+ ```
155
+
156
+ - **Ensemble mode** (`--ensemble`, config `fusion_mode: ensemble`): instead of the judge
157
+ picking one answer, a model synthesizes a single answer combining the strengths of all
158
+ candidates (anonymized). Falls back to the first answer if synthesis fails.
159
+ - **Cache** (`--cache`, config `cache: true`, `cache_ttl_seconds`): identical question +
160
+ same models/mode/compression returns the stored answer instantly with no API calls
161
+ (`cached, $0`). Stored in `~/.fusefable/cache/`. `cache_ttl_seconds: 0` = never expires.
162
+ - **Budget cap** (config `budget_cap_usd`, `budget_action: warn|stop`): before firing,
163
+ the run estimates cost. If it exceeds the cap — `warn` prints a warning and continues,
164
+ `stop` aborts before spending anything.
165
+
166
+ ## Prompt compression (save tokens)
167
+
168
+ Reduce token usage while keeping answer quality — useful when you pay per-provider
169
+ directly. Two tiers, opt-in via `--compress`:
170
+
171
+ ```bash
172
+ fusefable ask --compress "<long prompt or pasted code>"
173
+ # [compressed: 5200→1800 chars, ~65% saved via llm]
174
+ ```
175
+
176
+ - **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
177
+ zero-width chars — keeps indentation and inner spacing intact (safe for code).
178
+ - **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
179
+ model compresses semantically — **once**, then the compressed prompt is sent to all
180
+ models, so you save `tokens × number-of-models`.
181
+ - **Quality guards:** prompts under the threshold skip the LLM; if the compressed
182
+ result is empty, longer, or under 30% of the original, it falls back to the lossless
183
+ text. The judge always sees the **original** question.
184
+
185
+ Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
186
+ (empty = reuse the judge model).
187
+
148
188
  ## Architecture
149
189
 
150
190
  ```
@@ -117,6 +117,46 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
117
117
  > Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
118
118
  > If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
119
119
 
120
+ ## Ensemble, cache & budget
121
+
122
+ ```bash
123
+ fusefable ask --ensemble "..." # merge all answers into one (vs picking one)
124
+ fusefable ask --cache "..." # reuse the answer for an identical question
125
+ fusefable ask --no-cache "..." # force a fresh run
126
+ ```
127
+
128
+ - **Ensemble mode** (`--ensemble`, config `fusion_mode: ensemble`): instead of the judge
129
+ picking one answer, a model synthesizes a single answer combining the strengths of all
130
+ candidates (anonymized). Falls back to the first answer if synthesis fails.
131
+ - **Cache** (`--cache`, config `cache: true`, `cache_ttl_seconds`): identical question +
132
+ same models/mode/compression returns the stored answer instantly with no API calls
133
+ (`cached, $0`). Stored in `~/.fusefable/cache/`. `cache_ttl_seconds: 0` = never expires.
134
+ - **Budget cap** (config `budget_cap_usd`, `budget_action: warn|stop`): before firing,
135
+ the run estimates cost. If it exceeds the cap — `warn` prints a warning and continues,
136
+ `stop` aborts before spending anything.
137
+
138
+ ## Prompt compression (save tokens)
139
+
140
+ Reduce token usage while keeping answer quality — useful when you pay per-provider
141
+ directly. Two tiers, opt-in via `--compress`:
142
+
143
+ ```bash
144
+ fusefable ask --compress "<long prompt or pasted code>"
145
+ # [compressed: 5200→1800 chars, ~65% saved via llm]
146
+ ```
147
+
148
+ - **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
149
+ zero-width chars — keeps indentation and inner spacing intact (safe for code).
150
+ - **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
151
+ model compresses semantically — **once**, then the compressed prompt is sent to all
152
+ models, so you save `tokens × number-of-models`.
153
+ - **Quality guards:** prompts under the threshold skip the LLM; if the compressed
154
+ result is empty, longer, or under 30% of the original, it falls back to the lossless
155
+ text. The judge always sees the **original** question.
156
+
157
+ Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
158
+ (empty = reuse the judge model).
159
+
120
160
  ## Architecture
121
161
 
122
162
  ```
@@ -0,0 +1 @@
1
+ __version__ = "0.3.0"
@@ -0,0 +1,74 @@
1
+ """Cache คำตอบ — คำถามซ้ำ (config เดิม) ไม่ต้องยิงใหม่.
2
+
3
+ เก็บเป็นไฟล์ JSON ใน ~/.fusefable/cache/<sha256>.json
4
+ key มาจาก question + รายชื่อโมเดล + flags ที่กระทบผลลัพธ์
5
+ """
6
+ from __future__ import annotations
7
+ import hashlib
8
+ import json
9
+ import time
10
+ from pathlib import Path
11
+ from typing import Optional, Sequence
12
+ from fusefable.models import Completion, FinalAnswer
13
+
14
+
15
+ def cache_dir() -> Path:
16
+ return Path.home() / ".fusefable" / "cache"
17
+
18
+
19
+ def make_key(question: str, models: Sequence[str], *, compress: bool,
20
+ mode: str, judge_model: str) -> str:
21
+ payload = json.dumps({
22
+ "q": question,
23
+ "models": sorted(models),
24
+ "compress": compress,
25
+ "mode": mode,
26
+ "judge": judge_model,
27
+ }, ensure_ascii=False, sort_keys=True)
28
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
29
+
30
+
31
+ def _path(key: str) -> Path:
32
+ return cache_dir() / f"{key}.json"
33
+
34
+
35
+ def load_cached(key: str, ttl_seconds: int, *, now: float) -> Optional[FinalAnswer]:
36
+ """คืน FinalAnswer (cached=True) ถ้ามีและยังไม่หมดอายุ; ไม่งั้น None.
37
+
38
+ ttl_seconds = 0 หมายถึงไม่หมดอายุ.
39
+ """
40
+ p = _path(key)
41
+ if not p.exists():
42
+ return None
43
+ try:
44
+ data = json.loads(p.read_text(encoding="utf-8"))
45
+ except (ValueError, OSError):
46
+ return None
47
+ if ttl_seconds > 0 and now - data.get("ts", 0) > ttl_seconds:
48
+ return None
49
+ d = data["answer"]
50
+ return FinalAnswer(
51
+ text=d["text"],
52
+ chosen_model=d["chosen_model"],
53
+ reason=d.get("reason", ""),
54
+ cost_usd=d.get("cost_usd", 0.0),
55
+ all_completions=[Completion(model=c["model"], text=c["text"])
56
+ for c in d.get("candidates", [])],
57
+ cached=True,
58
+ )
59
+
60
+
61
+ def save_cached(key: str, answer: FinalAnswer, *, now: float) -> None:
62
+ d = {
63
+ "ts": now,
64
+ "answer": {
65
+ "text": answer.text,
66
+ "chosen_model": answer.chosen_model,
67
+ "reason": answer.reason,
68
+ "cost_usd": answer.cost_usd,
69
+ "candidates": [{"model": c.model, "text": c.text}
70
+ for c in answer.all_completions],
71
+ },
72
+ }
73
+ cache_dir().mkdir(parents=True, exist_ok=True)
74
+ _path(key).write_text(json.dumps(d, ensure_ascii=False), encoding="utf-8")
@@ -48,6 +48,12 @@ def ask(
48
48
  models: Optional[str] = typer.Option(None, "--models",
49
49
  help="จำกัดเฉพาะโมเดลที่ระบุ คั่นด้วย comma"),
50
50
  cheap: bool = typer.Option(False, "--cheap", help="ใช้ cheap_models ใน config"),
51
+ compress: Optional[bool] = typer.Option(None, "--compress/--no-compress",
52
+ help="บีบ prompt ก่อนส่งเพื่อลด token (default ตาม config)"),
53
+ ensemble: Optional[bool] = typer.Option(None, "--ensemble/--judge",
54
+ help="รวมคำตอบหลายตัวเป็นหนึ่ง (ensemble) แทนเลือกตัวเดียว (judge)"),
55
+ use_cache: Optional[bool] = typer.Option(None, "--cache/--no-cache",
56
+ help="ใช้ cache คำตอบ (default ตาม config)"),
51
57
  json_out: bool = typer.Option(False, "--json", help="output เป็น JSON"),
52
58
  quiet: bool = typer.Option(False, "--quiet", "-q",
53
59
  help="พิมพ์เฉพาะคำตอบ (เหมาะกับ pipe/subagent)"),
@@ -58,33 +64,55 @@ def ask(
58
64
  model_list = [m.strip() for m in models.split(",")] if models else None
59
65
 
60
66
  try:
61
- result = asyncio.run(fuse(cfg, q, models=model_list, cheap=cheap))
67
+ result = asyncio.run(fuse(cfg, q, models=model_list, cheap=cheap,
68
+ compress=compress, ensemble=ensemble,
69
+ use_cache=use_cache))
62
70
  except RuntimeError as e:
63
71
  typer.echo(f"Error: {e}", err=True)
64
72
  raise typer.Exit(1)
65
73
 
74
+ comp = result.compression
75
+
66
76
  if json_out:
67
- typer.echo(json.dumps({
77
+ out = {
68
78
  "answer": result.text,
69
79
  "chosen_model": result.chosen_model,
70
80
  "reason": result.reason,
71
81
  "cost_usd": result.cost_usd,
72
82
  "candidates": [{"model": c.model, "text": c.text}
73
83
  for c in result.all_completions],
74
- }, ensure_ascii=False, indent=2))
84
+ }
85
+ if comp is not None:
86
+ out["compression"] = {
87
+ "original_chars": comp.original_chars,
88
+ "final_chars": comp.final_chars,
89
+ "saved_pct": round(comp.saved_pct, 1),
90
+ "method": comp.method,
91
+ }
92
+ out["cached"] = result.cached
93
+ if result.budget_warning:
94
+ out["budget_warning"] = result.budget_warning
95
+ typer.echo(json.dumps(out, ensure_ascii=False, indent=2))
75
96
  return
76
97
 
77
98
  if quiet:
78
99
  typer.echo(result.text)
79
100
  return
80
101
 
102
+ if result.budget_warning:
103
+ typer.echo(f"⚠️ {result.budget_warning}", err=True)
81
104
  if show_all:
82
105
  for c in result.all_completions:
83
106
  typer.echo(f"\n--- {c.model} ---\n{c.text}")
84
107
  typer.echo(f"\n=== Judge reason ===\n{result.reason}")
85
- typer.echo(f"\n=== Best answer (from {result.chosen_model}) ===")
108
+ label = "ensemble" if result.chosen_model == "ensemble" else f"from {result.chosen_model}"
109
+ typer.echo(f"\n=== Best answer ({label}) ===")
86
110
  typer.echo(result.text)
87
- typer.echo(f"\n[estimated cost: ${result.cost_usd:.4f}]")
111
+ if comp is not None:
112
+ typer.echo(f"\n[compressed: {comp.original_chars}→{comp.final_chars} chars, "
113
+ f"~{comp.saved_pct:.0f}% saved via {comp.method}]")
114
+ cost_note = "cached, $0" if result.cached else f"${result.cost_usd:.4f}"
115
+ typer.echo(f"[estimated cost: {cost_note}]")
88
116
 
89
117
 
90
118
  @app.command()
@@ -0,0 +1,74 @@
1
+ """Prompt compressor — ลด token แต่คงความหมาย (2 ชั้น).
2
+
3
+ ชั้น 1 (lossless): normalize whitespace/บรรทัดว่าง/zero-width — ปลอดภัย ไม่เสียความหมาย
4
+ ชั้น 2 (LLM): ให้โมเดลถูกบีบเชิงความหมาย เฉพาะ prompt ยาวเกิน threshold
5
+ มี guard: ถ้าผลบีบ ว่าง/ยาวกว่าเดิม/สั้นเกินไป → fallback ใช้ lossless
6
+ """
7
+ from __future__ import annotations
8
+ import re
9
+ from dataclasses import dataclass
10
+ from fusefable.client import call_model
11
+ from fusefable.providers.base import Provider
12
+
13
+ _BLANKS = re.compile(r"\n{3,}")
14
+ _ZEROWIDTH = re.compile(r"[​‌‍]")
15
+
16
+ COMPRESS_SYSTEM = (
17
+ "You compress prompts to save tokens while preserving meaning EXACTLY. "
18
+ "Keep ALL technical details, code, numbers, names, constraints, and requirements. "
19
+ "Remove only filler words, redundancy, and repetition. "
20
+ "Output ONLY the compressed prompt itself — no preamble, no explanation, no quotes."
21
+ )
22
+
23
+
24
+ @dataclass
25
+ class CompressionResult:
26
+ text: str
27
+ original_chars: int
28
+ final_chars: int
29
+ method: str # "lossless" | "llm"
30
+
31
+ @property
32
+ def saved_pct(self) -> float:
33
+ if self.original_chars == 0:
34
+ return 0.0
35
+ return (1 - self.final_chars / self.original_chars) * 100
36
+
37
+
38
+ def normalize_lossless(text: str) -> str:
39
+ """ชั้น 1: ตัด trailing space + บรรทัดว่างซ้ำ + zero-width.
40
+
41
+ คง indentation และช่องว่างภายในบรรทัดไว้ครบ (ปลอดภัยสำหรับโค้ด).
42
+ """
43
+ text = _ZEROWIDTH.sub("", text)
44
+ lines = [ln.rstrip() for ln in text.split("\n")]
45
+ text = "\n".join(lines)
46
+ text = _BLANKS.sub("\n\n", text)
47
+ return text.strip()
48
+
49
+
50
+ async def compress_prompt(provider: Provider, model: str, text: str, *,
51
+ min_chars: int, timeout_s: float,
52
+ min_ratio: float = 0.3) -> CompressionResult:
53
+ """บีบ prompt 2 ชั้น. คืน CompressionResult (มี text ที่จะใช้จริง)."""
54
+ original = len(text)
55
+ lossless = normalize_lossless(text)
56
+
57
+ # prompt สั้น → ข้ามชั้น 2
58
+ if len(lossless) < min_chars:
59
+ return CompressionResult(lossless, original, len(lossless), "lossless")
60
+
61
+ # ชั้น 2: LLM
62
+ instruction = f"{COMPRESS_SYSTEM}\n\n---\n{lossless}"
63
+ result = await call_model(provider, model, instruction, timeout_s)
64
+ if result.is_error:
65
+ return CompressionResult(lossless, original, len(lossless), "lossless")
66
+
67
+ compressed = result.text.strip()
68
+ # guard กันคุณภาพตก: ว่าง / ยาวกว่าเดิม / สั้นเกินไป → ใช้ lossless
69
+ if (not compressed
70
+ or len(compressed) >= len(lossless)
71
+ or len(compressed) < len(lossless) * min_ratio):
72
+ return CompressionResult(lossless, original, len(lossless), "lossless")
73
+
74
+ return CompressionResult(compressed, original, len(compressed), "llm")
@@ -27,6 +27,13 @@ class Config:
27
27
  min_responses: int = 1
28
28
  budget_cap_usd: float | None = None
29
29
  cheap_models: list[str] = field(default_factory=list)
30
+ compress: bool = False # บีบ prompt ก่อนส่ง (opt-in)
31
+ compress_min_chars: int = 2000 # ต่ำกว่านี้ไม่เรียก LLM บีบ
32
+ compress_model: str = "" # ว่าง = ใช้ judge_model
33
+ fusion_mode: str = "judge" # "judge" (เลือกตัวดีสุด) | "ensemble" (รวมคำตอบ)
34
+ cache: bool = False # cache คำตอบ (opt-in)
35
+ cache_ttl_seconds: int = 0 # 0 = ไม่หมดอายุ
36
+ budget_action: str = "warn" # "warn" | "stop" เมื่อประเมินเกิน budget_cap_usd
30
37
 
31
38
  def resolve_api_key(self) -> str:
32
39
  return os.environ.get(self.api_key_env, "")
@@ -0,0 +1,88 @@
1
+ from __future__ import annotations
2
+ import time
3
+ from typing import Optional, Sequence
4
+ import httpx
5
+ from fusefable.config import Config
6
+ from fusefable.routing import build_routes, build_judge_provider
7
+ from fusefable.fusion import run_fusion
8
+ from fusefable.compressor import compress_prompt
9
+ from fusefable.cost import estimate_prefire_cost
10
+ from fusefable import cache as cache_mod
11
+ from fusefable.models import FinalAnswer
12
+
13
+
14
+ def select_models(cfg: Config, models: Optional[Sequence[str]] = None,
15
+ cheap: bool = False) -> Optional[set[str]]:
16
+ """ตัดสินว่าจะใช้โมเดลชุดไหน. คืน None = ใช้ทุกตัวตาม config."""
17
+ if models:
18
+ return set(models)
19
+ if cheap and cfg.cheap_models:
20
+ return set(cfg.cheap_models)
21
+ return None
22
+
23
+
24
+ async def fuse(cfg: Config, question: str,
25
+ models: Optional[Sequence[str]] = None,
26
+ cheap: bool = False,
27
+ compress: Optional[bool] = None,
28
+ ensemble: Optional[bool] = None,
29
+ use_cache: Optional[bool] = None) -> FinalAnswer:
30
+ """entry point กลาง — ใช้ร่วมกันทั้ง CLI และ MCP server.
31
+
32
+ models: จำกัดเฉพาะโมเดลที่ระบุ
33
+ cheap: ใช้ cfg.cheap_models
34
+ compress: บีบ prompt (None = cfg.compress)
35
+ ensemble: รวมคำตอบแทนเลือกตัวเดียว (None = cfg.mode)
36
+ use_cache: ใช้ cache (None = cfg.cache)
37
+ """
38
+ only = select_models(cfg, models, cheap)
39
+ do_compress = cfg.compress if compress is None else compress
40
+ mode = cfg.fusion_mode if ensemble is None else ("ensemble" if ensemble else "judge")
41
+ do_cache = cfg.cache if use_cache is None else use_cache
42
+ effective_models = sorted(only) if only is not None else sorted(cfg.models)
43
+
44
+ key = cache_mod.make_key(question, effective_models, compress=do_compress,
45
+ mode=mode, judge_model=cfg.judge_model)
46
+ if do_cache:
47
+ hit = cache_mod.load_cached(key, cfg.cache_ttl_seconds, now=time.time())
48
+ if hit is not None:
49
+ return hit
50
+
51
+ async with httpx.AsyncClient(timeout=None) as http:
52
+ routes = build_routes(cfg, http)
53
+ if only is not None:
54
+ routes = [(p, m) for (p, m) in routes if m in only]
55
+ if not routes:
56
+ raise RuntimeError("ไม่มีโมเดลให้ใช้ (ตรวจ --models / config)")
57
+ judge_prov = build_judge_provider(cfg, http)
58
+
59
+ # บีบ prompt ครั้งเดียว แล้วส่งตัวที่บีบไปทุกโมเดล (judge ใช้คำถามเดิม)
60
+ model_prompt = question
61
+ comp = None
62
+ if do_compress:
63
+ comp = await compress_prompt(
64
+ judge_prov, cfg.compress_model or cfg.judge_model, question,
65
+ min_chars=cfg.compress_min_chars, timeout_s=cfg.timeout_seconds)
66
+ model_prompt = comp.text
67
+
68
+ # budget cap — ประเมินก่อนยิง: stop = ยกเลิก, warn = เตือนแต่ทำต่อ
69
+ budget_warning = ""
70
+ if cfg.budget_cap_usd is not None:
71
+ est = estimate_prefire_cost(model_prompt, len(routes))
72
+ if est > cfg.budget_cap_usd:
73
+ if cfg.budget_action == "stop":
74
+ raise RuntimeError(
75
+ f"ประเมินค่าใช้จ่าย ~${est:.4f} เกิน budget "
76
+ f"${cfg.budget_cap_usd} (budget_action=stop) — ยกเลิกก่อนยิง")
77
+ budget_warning = (f"ประเมิน ~${est:.4f} เกิน budget "
78
+ f"${cfg.budget_cap_usd} (budget_action=warn)")
79
+
80
+ result = await run_fusion(routes, judge_prov, cfg.judge_model,
81
+ model_prompt, cfg.timeout_seconds,
82
+ judge_question=question, mode=mode)
83
+ result.compression = comp
84
+ result.budget_warning = budget_warning
85
+
86
+ if do_cache:
87
+ cache_mod.save_cached(key, result, now=time.time())
88
+ return result
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+ from typing import Sequence
3
+ from fusefable.models import Completion
4
+
5
+
6
+ def estimate_cost(comps: Sequence[Completion],
7
+ default_in: float = 1.0, default_out: float = 3.0) -> float:
8
+ """ประมาณค่าใช้จ่ายรวม (USD) จาก usage tokens. rate = $/1M tokens."""
9
+ total_in = sum(c.prompt_tokens for c in comps)
10
+ total_out = sum(c.completion_tokens for c in comps)
11
+ return total_in / 1_000_000 * default_in + total_out / 1_000_000 * default_out
12
+
13
+
14
+ def estimate_prefire_cost(prompt: str, n_models: int,
15
+ default_in: float = 1.0, default_out: float = 3.0,
16
+ assumed_out_tokens: int = 600) -> float:
17
+ """ประเมินค่าใช้จ่ายคร่าวๆ ก่อนยิง (สำหรับ budget cap).
18
+
19
+ หยาบ: input tokens ≈ len(prompt)/4 ต่อโมเดล, output สมมติ assumed_out_tokens.
20
+ +1 สำหรับ judge/synthesize. ใช้เป็น guard ไม่ใช่ตัวเลขเป๊ะ.
21
+ """
22
+ in_tokens = len(prompt) / 4
23
+ per_model = (in_tokens / 1_000_000 * default_in
24
+ + assumed_out_tokens / 1_000_000 * default_out)
25
+ return per_model * (n_models + 1)
@@ -0,0 +1,35 @@
1
+ """Ensemble mode — รวมจุดเด่นหลายคำตอบเป็นคำตอบเดียว (แทนการเลือกตัวเดียว).
2
+
3
+ ปกปิดชื่อโมเดลเหมือน judge เพื่อให้ synthesize ที่เนื้อหาล้วน.
4
+ """
5
+ from __future__ import annotations
6
+ from typing import Sequence
7
+ from fusefable.client import call_model
8
+ from fusefable.models import Completion
9
+ from fusefable.providers.base import Provider
10
+
11
+ _LABELS = "ABCDEFGHIJ"
12
+
13
+
14
+ def build_ensemble_prompt(question: str,
15
+ comps: Sequence[Completion]) -> str:
16
+ labels = [_LABELS[i] for i in range(len(comps))]
17
+ blocks = [f"### Answer {label}\n{c.text}" for label, c in zip(labels, comps)]
18
+ body = "\n\n".join(blocks)
19
+ return (
20
+ "You are merging multiple coding answers into ONE superior answer.\n"
21
+ "Combine correct and complementary parts, fix mistakes, drop redundancy.\n"
22
+ "Output ONLY the final merged answer — no commentary about the sources.\n\n"
23
+ f"## Question\n{question}\n\n"
24
+ f"## Candidate Answers\n{body}"
25
+ )
26
+
27
+
28
+ async def synthesize(provider: Provider, model: str, question: str,
29
+ comps: Sequence[Completion], timeout_s: float) -> str:
30
+ """คืนข้อความคำตอบที่สังเคราะห์รวม. ถ้าพัง → fallback คำตอบแรก."""
31
+ prompt = build_ensemble_prompt(question, comps)
32
+ result = await call_model(provider, model, prompt, timeout_s)
33
+ if result.is_error or not result.text.strip():
34
+ return comps[0].text
35
+ return result.text
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+ from typing import Sequence, Tuple
3
+ from fusefable.fanout import fan_out
4
+ from fusefable.judge import judge
5
+ from fusefable.ensemble import synthesize
6
+ from fusefable.cost import estimate_cost
7
+ from fusefable.models import FinalAnswer
8
+ from fusefable.providers.base import Provider
9
+
10
+ Route = Tuple[Provider, str]
11
+
12
+
13
+ async def run_fusion(routes: Sequence[Route], judge_provider: Provider,
14
+ judge_model: str, prompt: str, timeout_s: float,
15
+ judge_question: str | None = None,
16
+ mode: str = "judge") -> FinalAnswer:
17
+ """fan-out → judge/ensemble → FinalAnswer. โยน RuntimeError ถ้าไม่มีตัวไหนสำเร็จ.
18
+
19
+ prompt = ข้อความที่ส่งให้โมเดล (อาจถูกบีบแล้ว)
20
+ judge_question = คำถามที่ใช้ตัดสิน/สังเคราะห์ (default = prompt; ส่งคำถามเดิมเพื่อคงคุณภาพ)
21
+ mode = "judge" (เลือกตัวดีสุด) | "ensemble" (รวมคำตอบ)
22
+ """
23
+ completions = await fan_out(routes, prompt, timeout_s)
24
+ if not completions:
25
+ raise RuntimeError("no successful completions from any model")
26
+ q = judge_question if judge_question is not None else prompt
27
+ cost = estimate_cost(completions)
28
+
29
+ if mode == "ensemble":
30
+ text = await synthesize(judge_provider, judge_model, q,
31
+ completions, timeout_s)
32
+ return FinalAnswer(text=text, chosen_model="ensemble",
33
+ reason=f"synthesized from {len(completions)} answers",
34
+ cost_usd=cost, all_completions=list(completions))
35
+
36
+ chosen, reason = await judge(judge_provider, judge_model, q,
37
+ completions, timeout_s)
38
+ return FinalAnswer(text=chosen.text, chosen_model=chosen.model,
39
+ reason=reason, cost_usd=cost,
40
+ all_completions=list(completions))
@@ -37,3 +37,6 @@ class FinalAnswer:
37
37
  reason: str = ""
38
38
  cost_usd: float = 0.0
39
39
  all_completions: list = field(default_factory=list)
40
+ compression: object = None # CompressionResult | None (กัน import วน)
41
+ cached: bool = False # มาจาก cache หรือไม่
42
+ budget_warning: str = "" # ข้อความเตือนงบ (ถ้ามี)
@@ -21,6 +21,10 @@ KNOWN_GATEWAYS = {
21
21
 
22
22
  def build_config_from_answers(answers: dict) -> Config:
23
23
  """แปลงคำตอบจาก wizard เป็น Config (logic ล้วน — แยกจาก I/O เพื่อ test ได้)."""
24
+ extra = dict(
25
+ compress=answers.get("compress", False),
26
+ compress_min_chars=answers.get("compress_min_chars", 2000),
27
+ )
24
28
  if answers["mode"] == "gateway":
25
29
  return Config(
26
30
  mode="gateway",
@@ -30,6 +34,7 @@ def build_config_from_answers(answers: dict) -> Config:
30
34
  models=answers["models"],
31
35
  judge_model=answers["judge_model"],
32
36
  timeout_seconds=answers["timeout_seconds"],
37
+ **extra,
33
38
  )
34
39
  providers = [SingleProvider(**p) for p in answers["providers"]]
35
40
  all_models = [m for p in providers for m in p.models]
@@ -39,9 +44,19 @@ def build_config_from_answers(answers: dict) -> Config:
39
44
  models=all_models,
40
45
  judge_model=answers["judge_model"],
41
46
  timeout_seconds=answers["timeout_seconds"],
47
+ **extra,
42
48
  )
43
49
 
44
50
 
51
+ def _ask_compression(prompt) -> dict:
52
+ """ถามตั้งค่า compression — คืน dict ใส่ใน answers."""
53
+ ans = prompt("เปิดการบีบ prompt เพื่อลด token? [y/N]: ").strip().lower()
54
+ if ans not in ("y", "yes"):
55
+ return {"compress": False}
56
+ raw = prompt(" บีบเมื่อ prompt ยาวเกินกี่ตัวอักษร? [2000]: ").strip()
57
+ return {"compress": True, "compress_min_chars": int(raw) if raw else 2000}
58
+
59
+
45
60
  def run_wizard(prompt=input) -> Config:
46
61
  """ถาม interactive แล้วคืน Config. `prompt` ฉีดเข้าได้เพื่อ test."""
47
62
  print("=== Fuse Fable setup ===")
@@ -65,10 +80,11 @@ def run_wizard(prompt=input) -> Config:
65
80
  if m:
66
81
  models.append(m)
67
82
  judge = prompt("judge model: ").strip()
83
+ comp = _ask_compression(prompt)
68
84
  return build_config_from_answers({
69
85
  "mode": "gateway", "gateway_name": gw, "gateway_base_url": base,
70
86
  "api_key_env": key_env, "models": models, "judge_model": judge,
71
- "timeout_seconds": 90,
87
+ "timeout_seconds": 90, **comp,
72
88
  })
73
89
 
74
90
  n = int(prompt("จะใช้กี่เจ้า?: ").strip())
@@ -89,7 +105,8 @@ def run_wizard(prompt=input) -> Config:
89
105
  providers.append({"name": name, "base_url": base, "kind": kind,
90
106
  "api_key_env": key_env, "models": models})
91
107
  judge = prompt("judge model: ").strip()
108
+ comp = _ask_compression(prompt)
92
109
  return build_config_from_answers({
93
110
  "mode": "single", "providers": providers,
94
- "judge_model": judge, "timeout_seconds": 90,
111
+ "judge_model": judge, "timeout_seconds": 90, **comp,
95
112
  })
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fusefable
3
- Version: 0.1.9
3
+ Version: 0.3.0
4
4
  Summary: Fuse multiple AI models and judge the best answer for coding
5
5
  Author: proultrax9
6
6
  License: MIT
@@ -145,6 +145,46 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
145
145
  > Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
146
146
  > If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
147
147
 
148
+ ## Ensemble, cache & budget
149
+
150
+ ```bash
151
+ fusefable ask --ensemble "..." # merge all answers into one (vs picking one)
152
+ fusefable ask --cache "..." # reuse the answer for an identical question
153
+ fusefable ask --no-cache "..." # force a fresh run
154
+ ```
155
+
156
+ - **Ensemble mode** (`--ensemble`, config `fusion_mode: ensemble`): instead of the judge
157
+ picking one answer, a model synthesizes a single answer combining the strengths of all
158
+ candidates (anonymized). Falls back to the first answer if synthesis fails.
159
+ - **Cache** (`--cache`, config `cache: true`, `cache_ttl_seconds`): identical question +
160
+ same models/mode/compression returns the stored answer instantly with no API calls
161
+ (`cached, $0`). Stored in `~/.fusefable/cache/`. `cache_ttl_seconds: 0` = never expires.
162
+ - **Budget cap** (config `budget_cap_usd`, `budget_action: warn|stop`): before firing,
163
+ the run estimates cost. If it exceeds the cap — `warn` prints a warning and continues,
164
+ `stop` aborts before spending anything.
165
+
166
+ ## Prompt compression (save tokens)
167
+
168
+ Reduce token usage while keeping answer quality — useful when you pay per-provider
169
+ directly. Two tiers, opt-in via `--compress`:
170
+
171
+ ```bash
172
+ fusefable ask --compress "<long prompt or pasted code>"
173
+ # [compressed: 5200→1800 chars, ~65% saved via llm]
174
+ ```
175
+
176
+ - **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
177
+ zero-width chars — keeps indentation and inner spacing intact (safe for code).
178
+ - **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
179
+ model compresses semantically — **once**, then the compressed prompt is sent to all
180
+ models, so you save `tokens × number-of-models`.
181
+ - **Quality guards:** prompts under the threshold skip the LLM; if the compressed
182
+ result is empty, longer, or under 30% of the original, it falls back to the lossless
183
+ text. The judge always sees the **original** question.
184
+
185
+ Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
186
+ (empty = reuse the judge model).
187
+
148
188
  ## Architecture
149
189
 
150
190
  ```
@@ -2,11 +2,14 @@ LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
4
  fusefable/__init__.py
5
+ fusefable/cache.py
5
6
  fusefable/cli.py
6
7
  fusefable/client.py
8
+ fusefable/compressor.py
7
9
  fusefable/config.py
8
10
  fusefable/core.py
9
11
  fusefable/cost.py
12
+ fusefable/ensemble.py
10
13
  fusefable/fanout.py
11
14
  fusefable/fusion.py
12
15
  fusefable/judge.py
@@ -26,11 +29,14 @@ fusefable/providers/base.py
26
29
  fusefable/providers/factory.py
27
30
  fusefable/providers/google.py
28
31
  fusefable/providers/openai_compat.py
32
+ tests/test_cache.py
29
33
  tests/test_cli.py
30
34
  tests/test_client.py
35
+ tests/test_compressor.py
31
36
  tests/test_config.py
32
37
  tests/test_core.py
33
38
  tests/test_cost.py
39
+ tests/test_ensemble.py
34
40
  tests/test_fanout.py
35
41
  tests/test_fusion.py
36
42
  tests/test_judge.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "fusefable"
3
- version = "0.1.9"
3
+ version = "0.3.0"
4
4
  description = "Fuse multiple AI models and judge the best answer for coding"
5
5
  readme = "README.md"
6
6
  license = { text = "MIT" }
@@ -0,0 +1,48 @@
1
+ import fusefable.cache as cache_mod
2
+ from fusefable.cache import make_key, load_cached, save_cached
3
+ from fusefable.models import FinalAnswer, Completion
4
+
5
+
6
+ def _use_tmp(monkeypatch, tmp_path):
7
+ monkeypatch.setattr(cache_mod, "cache_dir", lambda: tmp_path / "cache")
8
+
9
+
10
+ def test_make_key_stable_and_order_independent():
11
+ k1 = make_key("q", ["a", "b"], compress=False, mode="judge", judge_model="j")
12
+ k2 = make_key("q", ["b", "a"], compress=False, mode="judge", judge_model="j")
13
+ assert k1 == k2 # ลำดับโมเดลไม่มีผล
14
+ k3 = make_key("q", ["a"], compress=False, mode="judge", judge_model="j")
15
+ assert k1 != k3 # ชุดโมเดลต่าง = key ต่าง
16
+
17
+
18
+ def test_make_key_differs_by_mode_and_compress():
19
+ base = dict(models=["a"], judge_model="j")
20
+ assert (make_key("q", compress=False, mode="judge", **base)
21
+ != make_key("q", compress=True, mode="judge", **base))
22
+ assert (make_key("q", compress=False, mode="judge", **base)
23
+ != make_key("q", compress=False, mode="ensemble", **base))
24
+
25
+
26
+ def test_save_and_load_roundtrip(monkeypatch, tmp_path):
27
+ _use_tmp(monkeypatch, tmp_path)
28
+ ans = FinalAnswer(text="best", chosen_model="gpt", reason="r", cost_usd=0.02,
29
+ all_completions=[Completion(model="gpt", text="best")])
30
+ save_cached("k1", ans, now=1000.0)
31
+ got = load_cached("k1", ttl_seconds=0, now=2000.0)
32
+ assert got is not None
33
+ assert got.text == "best"
34
+ assert got.cached is True # mark ว่ามาจาก cache
35
+ assert got.all_completions[0].model == "gpt"
36
+
37
+
38
+ def test_load_miss_returns_none(monkeypatch, tmp_path):
39
+ _use_tmp(monkeypatch, tmp_path)
40
+ assert load_cached("nope", ttl_seconds=0, now=1.0) is None
41
+
42
+
43
+ def test_ttl_expiry(monkeypatch, tmp_path):
44
+ _use_tmp(monkeypatch, tmp_path)
45
+ ans = FinalAnswer(text="x", chosen_model="m")
46
+ save_cached("k", ans, now=1000.0)
47
+ assert load_cached("k", ttl_seconds=60, now=1030.0) is not None # ภายใน TTL
48
+ assert load_cached("k", ttl_seconds=60, now=1100.0) is None # เกิน TTL
@@ -0,0 +1,69 @@
1
+ import pytest
2
+ from fusefable.compressor import normalize_lossless, compress_prompt
3
+ from fusefable.models import Completion
4
+
5
+
6
+ def test_normalize_lossless_trims_safely_keeps_indent():
7
+ raw = "def f():\n\n\n\n return 1 \n"
8
+ out = normalize_lossless(raw)
9
+ assert "\n\n\n" not in out # บรรทัดว่างซ้ำถูกยุบ
10
+ assert out == "def f():\n\n return 1" # คง indent + ช่องว่างภายใน, ตัดแค่ trailing
11
+
12
+
13
+ def test_normalize_strips_zero_width():
14
+ assert normalize_lossless("a​b‌") == "ab"
15
+
16
+
17
+ class FakeProvider:
18
+ def __init__(self, text=None, error=False):
19
+ self.text, self.error = text, error
20
+ self.called = False
21
+
22
+ async def complete(self, model, prompt):
23
+ self.called = True
24
+ if self.error:
25
+ raise RuntimeError("boom")
26
+ return Completion(model=model, text=self.text)
27
+
28
+
29
+ @pytest.mark.asyncio
30
+ async def test_short_prompt_skips_llm():
31
+ prov = FakeProvider(text="should not be used")
32
+ r = await compress_prompt(prov, "m", "short text", min_chars=2000, timeout_s=5)
33
+ assert r.method == "lossless"
34
+ assert prov.called is False # ไม่เรียก LLM
35
+
36
+
37
+ @pytest.mark.asyncio
38
+ async def test_long_prompt_uses_llm_when_shorter():
39
+ big = "word " * 1000 # ~5000 chars (lossless ~4999)
40
+ prov = FakeProvider(text="C" * 2000) # อยู่ในช่วง 30%-100% → ผ่าน guard
41
+ r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
42
+ assert r.method == "llm"
43
+ assert r.final_chars == 2000
44
+ assert r.final_chars < r.original_chars
45
+ assert r.saved_pct > 0
46
+
47
+
48
+ @pytest.mark.asyncio
49
+ async def test_llm_failure_falls_back_to_lossless():
50
+ big = "word " * 1000
51
+ prov = FakeProvider(error=True)
52
+ r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
53
+ assert r.method == "lossless" # LLM พัง → ใช้ lossless
54
+
55
+
56
+ @pytest.mark.asyncio
57
+ async def test_guard_rejects_too_short_compression():
58
+ big = "word " * 1000 # ~5000 chars
59
+ prov = FakeProvider(text="x") # สั้นเกินไป (< 30%)
60
+ r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
61
+ assert r.method == "lossless" # ป้องกันโมเดลตัดเนื้อหาทิ้ง
62
+
63
+
64
+ @pytest.mark.asyncio
65
+ async def test_guard_rejects_longer_result():
66
+ big = "word " * 1000
67
+ prov = FakeProvider(text="y" * 99999) # ยาวกว่าเดิม
68
+ r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
69
+ assert r.method == "lossless"
@@ -1,7 +1,14 @@
1
- from fusefable.cost import estimate_cost
1
+ from fusefable.cost import estimate_cost, estimate_prefire_cost
2
2
  from fusefable.models import Completion
3
3
 
4
4
 
5
+ def test_estimate_prefire_scales_with_models():
6
+ one = estimate_prefire_cost("x" * 4000, n_models=1)
7
+ five = estimate_prefire_cost("x" * 4000, n_models=5)
8
+ assert five > one # ยิ่งหลายโมเดล ยิ่งแพง
9
+ assert one > 0
10
+
11
+
5
12
  def test_estimate_cost_sums_tokens():
6
13
  comps = [
7
14
  Completion(model="a", text="x", prompt_tokens=1000, completion_tokens=500),
@@ -0,0 +1,36 @@
1
+ import pytest
2
+ from fusefable.ensemble import build_ensemble_prompt, synthesize
3
+ from fusefable.models import Completion
4
+
5
+
6
+ def test_build_ensemble_prompt_anonymizes():
7
+ comps = [Completion(model="claude", text="a1"),
8
+ Completion(model="gpt", text="a2")]
9
+ p = build_ensemble_prompt("q?", comps)
10
+ assert "claude" not in p and "gpt" not in p
11
+ assert "Answer A" in p and "Answer B" in p
12
+ assert "merg" in p.lower()
13
+
14
+
15
+ @pytest.mark.asyncio
16
+ async def test_synthesize_returns_merged_text():
17
+ comps = [Completion(model="a", text="x"), Completion(model="b", text="y")]
18
+
19
+ class P:
20
+ async def complete(self, model, prompt):
21
+ return Completion(model=model, text="MERGED")
22
+
23
+ out = await synthesize(P(), "judge", "q?", comps, timeout_s=5)
24
+ assert out == "MERGED"
25
+
26
+
27
+ @pytest.mark.asyncio
28
+ async def test_synthesize_fallback_on_error():
29
+ comps = [Completion(model="a", text="first"), Completion(model="b", text="y")]
30
+
31
+ class P:
32
+ async def complete(self, model, prompt):
33
+ raise RuntimeError("boom")
34
+
35
+ out = await synthesize(P(), "judge", "q?", comps, timeout_s=5)
36
+ assert out == "first" # fallback คำตอบแรก
@@ -26,6 +26,26 @@ async def test_run_fusion_end_to_end():
26
26
  assert len(result.all_completions) == 2
27
27
 
28
28
 
29
+ @pytest.mark.asyncio
30
+ async def test_run_fusion_uses_judge_question_for_judging():
31
+ seen = {}
32
+
33
+ class FakeProvider:
34
+ async def complete(self, model, prompt):
35
+ if model == "judge":
36
+ seen["judge_prompt"] = prompt
37
+ return Completion(model=model, text="I choose A")
38
+ return Completion(model=model, text="ans")
39
+
40
+ prov = FakeProvider()
41
+ routes = [(prov, "m1")]
42
+ await run_fusion(routes, prov, "judge", "COMPRESSED", timeout_s=5,
43
+ judge_question="ORIGINAL QUESTION")
44
+ # judge ต้องเห็นคำถามเดิม ไม่ใช่ตัวที่บีบ
45
+ assert "ORIGINAL QUESTION" in seen["judge_prompt"]
46
+ assert "COMPRESSED" not in seen["judge_prompt"]
47
+
48
+
29
49
  @pytest.mark.asyncio
30
50
  async def test_run_fusion_raises_when_all_fail():
31
51
  class DeadProvider:
@@ -22,9 +22,11 @@ def test_run_wizard_gateway_asks_how_many_then_each_model():
22
22
  "anthropic/claude-opus-4.1", # โมเดลที่ 2
23
23
  "qwen/qwen3-coder", # โมเดลที่ 3
24
24
  "deepseek/deepseek-chat", # judge
25
+ "n", # ไม่เปิด compression
25
26
  ])
26
27
  cfg = run_wizard(prompt=answers)
27
28
  assert cfg.mode == "gateway"
29
+ assert cfg.compress is False
28
30
  assert cfg.gateway_name == "openrouter"
29
31
  assert cfg.gateway_base_url == "https://openrouter.ai/api/v1" # เติมอัตโนมัติ
30
32
  assert len(cfg.models) == 3
@@ -36,16 +38,26 @@ def test_run_wizard_gateway_asks_how_many_then_each_model():
36
38
  def test_run_wizard_gateway_autofills_other_known_gateway():
37
39
  # groq เป็น gateway ที่รู้จัก → เติม base_url อัตโนมัติ ไม่ถาม URL
38
40
  answers = _scripted([
39
- "1", "groq", "GROQ_API_KEY", "1", "llama-3.3-70b", "llama-3.3-70b",
41
+ "1", "groq", "GROQ_API_KEY", "1", "llama-3.3-70b", "llama-3.3-70b", "n",
40
42
  ])
41
43
  cfg = run_wizard(prompt=answers)
42
44
  assert cfg.gateway_base_url == "https://api.groq.com/openai/v1"
43
45
 
44
46
 
47
+ def test_run_wizard_enables_compression_when_yes():
48
+ answers = _scripted([
49
+ "1", "openrouter", "OR_KEY", "1", "m1", "judge",
50
+ "y", "3000", # เปิด compression, min 3000
51
+ ])
52
+ cfg = run_wizard(prompt=answers)
53
+ assert cfg.compress is True
54
+ assert cfg.compress_min_chars == 3000
55
+
56
+
45
57
  def test_run_wizard_gateway_unknown_asks_base_url():
46
58
  # gateway ที่ไม่รู้จัก → ถาม base_url เอง (รองรับทุกเจ้า)
47
59
  answers = _scripted([
48
- "1", "mygw", "https://my.gateway/v1", "MY_KEY", "1", "m1", "m1",
60
+ "1", "mygw", "https://my.gateway/v1", "MY_KEY", "1", "m1", "m1", "n",
49
61
  ])
50
62
  cfg = run_wizard(prompt=answers)
51
63
  assert cfg.gateway_name == "mygw"
@@ -79,6 +91,7 @@ def test_run_wizard_single_mode_native_autofills_base_url():
79
91
  # เจ้าที่ 2: openai_compat
80
92
  "ds", "openai_compat", "https://api.deepseek.com/v1", "DS_KEY", "deepseek-chat",
81
93
  "deepseek-chat", # judge
94
+ "n", # ไม่เปิด compression
82
95
  ])
83
96
  cfg = run_wizard(prompt=answers)
84
97
  assert cfg.mode == "single"
@@ -1 +0,0 @@
1
- __version__ = "0.1.9"
@@ -1,37 +0,0 @@
1
- from __future__ import annotations
2
- from typing import Optional, Sequence
3
- import httpx
4
- from fusefable.config import Config
5
- from fusefable.routing import build_routes, build_judge_provider
6
- from fusefable.fusion import run_fusion
7
- from fusefable.models import FinalAnswer
8
-
9
-
10
- def select_models(cfg: Config, models: Optional[Sequence[str]] = None,
11
- cheap: bool = False) -> Optional[set[str]]:
12
- """ตัดสินว่าจะใช้โมเดลชุดไหน. คืน None = ใช้ทุกตัวตาม config."""
13
- if models:
14
- return set(models)
15
- if cheap and cfg.cheap_models:
16
- return set(cfg.cheap_models)
17
- return None
18
-
19
-
20
- async def fuse(cfg: Config, question: str,
21
- models: Optional[Sequence[str]] = None,
22
- cheap: bool = False) -> FinalAnswer:
23
- """entry point กลาง — ใช้ร่วมกันทั้ง CLI และ MCP server.
24
-
25
- models: จำกัดเฉพาะโมเดลที่ระบุ (เช่นจาก --models)
26
- cheap: ใช้ cfg.cheap_models ถ้ามี
27
- """
28
- only = select_models(cfg, models, cheap)
29
- async with httpx.AsyncClient(timeout=None) as http:
30
- routes = build_routes(cfg, http)
31
- if only is not None:
32
- routes = [(p, m) for (p, m) in routes if m in only]
33
- if not routes:
34
- raise RuntimeError("ไม่มีโมเดลให้ใช้ (ตรวจ --models / config)")
35
- judge_prov = build_judge_provider(cfg, http)
36
- return await run_fusion(routes, judge_prov, cfg.judge_model,
37
- question, cfg.timeout_seconds)
@@ -1,11 +0,0 @@
1
- from __future__ import annotations
2
- from typing import Sequence
3
- from fusefable.models import Completion
4
-
5
-
6
- def estimate_cost(comps: Sequence[Completion],
7
- default_in: float = 1.0, default_out: float = 3.0) -> float:
8
- """ประมาณค่าใช้จ่ายรวม (USD) จาก usage tokens. rate = $/1M tokens."""
9
- total_in = sum(c.prompt_tokens for c in comps)
10
- total_out = sum(c.completion_tokens for c in comps)
11
- return total_in / 1_000_000 * default_in + total_out / 1_000_000 * default_out
@@ -1,23 +0,0 @@
1
- from __future__ import annotations
2
- from typing import Sequence, Tuple
3
- from fusefable.fanout import fan_out
4
- from fusefable.judge import judge
5
- from fusefable.cost import estimate_cost
6
- from fusefable.models import FinalAnswer
7
- from fusefable.providers.base import Provider
8
-
9
- Route = Tuple[Provider, str]
10
-
11
-
12
- async def run_fusion(routes: Sequence[Route], judge_provider: Provider,
13
- judge_model: str, prompt: str, timeout_s: float) -> FinalAnswer:
14
- """fan-out → judge → FinalAnswer. โยน RuntimeError ถ้าไม่มีตัวไหนสำเร็จ."""
15
- completions = await fan_out(routes, prompt, timeout_s)
16
- if not completions:
17
- raise RuntimeError("no successful completions from any model")
18
- chosen, reason = await judge(judge_provider, judge_model, prompt,
19
- completions, timeout_s)
20
- cost = estimate_cost(completions)
21
- return FinalAnswer(text=chosen.text, chosen_model=chosen.model,
22
- reason=reason, cost_usd=cost,
23
- all_completions=list(completions))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes