fusefable 0.1.9__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fusefable-0.1.9 → fusefable-0.2.0}/PKG-INFO +23 -1
- {fusefable-0.1.9 → fusefable-0.2.0}/README.md +22 -0
- fusefable-0.2.0/fusefable/__init__.py +1 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/cli.py +20 -4
- fusefable-0.2.0/fusefable/compressor.py +74 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/config.py +3 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/core.py +20 -3
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/fusion.py +9 -3
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/models.py +1 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/PKG-INFO +23 -1
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/SOURCES.txt +2 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/pyproject.toml +1 -1
- fusefable-0.2.0/tests/test_compressor.py +69 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_fusion.py +20 -0
- fusefable-0.1.9/fusefable/__init__.py +0 -1
- {fusefable-0.1.9 → fusefable-0.2.0}/LICENSE +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/client.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/cost.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/fanout.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/judge.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/mcp_server.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/__init__.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/anthropic.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/base.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/factory.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/google.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/openai_compat.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/routing.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/wizard.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/dependency_links.txt +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/entry_points.txt +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/requires.txt +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/top_level.txt +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/setup.cfg +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_cli.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_client.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_config.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_core.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_cost.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_fanout.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_judge.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_mcp_server.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_models.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_native_providers.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_openai_compat.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_routing.py +0 -0
- {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_wizard.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fusefable
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Fuse multiple AI models and judge the best answer for coding
|
|
5
5
|
Author: proultrax9
|
|
6
6
|
License: MIT
|
|
@@ -145,6 +145,28 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
|
|
|
145
145
|
> Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
|
|
146
146
|
> If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
|
|
147
147
|
|
|
148
|
+
## Prompt compression (save tokens)
|
|
149
|
+
|
|
150
|
+
Reduce token usage while keeping answer quality — useful when you pay per-provider
|
|
151
|
+
directly. Two tiers, opt-in via `--compress`:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
fusefable ask --compress "<long prompt or pasted code>"
|
|
155
|
+
# [compressed: 5200→1800 chars, ~65% saved via llm]
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
- **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
|
|
159
|
+
zero-width chars — keeps indentation and inner spacing intact (safe for code).
|
|
160
|
+
- **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
|
|
161
|
+
model compresses semantically — **once**, then the compressed prompt is sent to all
|
|
162
|
+
models, so you save `tokens × number-of-models`.
|
|
163
|
+
- **Quality guards:** prompts under the threshold skip the LLM; if the compressed
|
|
164
|
+
result is empty, longer, or under 30% of the original, it falls back to the lossless
|
|
165
|
+
text. The judge always sees the **original** question.
|
|
166
|
+
|
|
167
|
+
Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
|
|
168
|
+
(empty = reuse the judge model).
|
|
169
|
+
|
|
148
170
|
## Architecture
|
|
149
171
|
|
|
150
172
|
```
|
|
@@ -117,6 +117,28 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
|
|
|
117
117
|
> Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
|
|
118
118
|
> If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
|
|
119
119
|
|
|
120
|
+
## Prompt compression (save tokens)
|
|
121
|
+
|
|
122
|
+
Reduce token usage while keeping answer quality — useful when you pay per-provider
|
|
123
|
+
directly. Two tiers, opt-in via `--compress`:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
fusefable ask --compress "<long prompt or pasted code>"
|
|
127
|
+
# [compressed: 5200→1800 chars, ~65% saved via llm]
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
- **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
|
|
131
|
+
zero-width chars — keeps indentation and inner spacing intact (safe for code).
|
|
132
|
+
- **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
|
|
133
|
+
model compresses semantically — **once**, then the compressed prompt is sent to all
|
|
134
|
+
models, so you save `tokens × number-of-models`.
|
|
135
|
+
- **Quality guards:** prompts under the threshold skip the LLM; if the compressed
|
|
136
|
+
result is empty, longer, or under 30% of the original, it falls back to the lossless
|
|
137
|
+
text. The judge always sees the **original** question.
|
|
138
|
+
|
|
139
|
+
Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
|
|
140
|
+
(empty = reuse the judge model).
|
|
141
|
+
|
|
120
142
|
## Architecture
|
|
121
143
|
|
|
122
144
|
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.0"
|
|
@@ -48,6 +48,8 @@ def ask(
|
|
|
48
48
|
models: Optional[str] = typer.Option(None, "--models",
|
|
49
49
|
help="จำกัดเฉพาะโมเดลที่ระบุ คั่นด้วย comma"),
|
|
50
50
|
cheap: bool = typer.Option(False, "--cheap", help="ใช้ cheap_models ใน config"),
|
|
51
|
+
compress: Optional[bool] = typer.Option(None, "--compress/--no-compress",
|
|
52
|
+
help="บีบ prompt ก่อนส่งเพื่อลด token (default ตาม config)"),
|
|
51
53
|
json_out: bool = typer.Option(False, "--json", help="output เป็น JSON"),
|
|
52
54
|
quiet: bool = typer.Option(False, "--quiet", "-q",
|
|
53
55
|
help="พิมพ์เฉพาะคำตอบ (เหมาะกับ pipe/subagent)"),
|
|
@@ -58,20 +60,31 @@ def ask(
|
|
|
58
60
|
model_list = [m.strip() for m in models.split(",")] if models else None
|
|
59
61
|
|
|
60
62
|
try:
|
|
61
|
-
result = asyncio.run(fuse(cfg, q, models=model_list, cheap=cheap
|
|
63
|
+
result = asyncio.run(fuse(cfg, q, models=model_list, cheap=cheap,
|
|
64
|
+
compress=compress))
|
|
62
65
|
except RuntimeError as e:
|
|
63
66
|
typer.echo(f"Error: {e}", err=True)
|
|
64
67
|
raise typer.Exit(1)
|
|
65
68
|
|
|
69
|
+
comp = result.compression
|
|
70
|
+
|
|
66
71
|
if json_out:
|
|
67
|
-
|
|
72
|
+
out = {
|
|
68
73
|
"answer": result.text,
|
|
69
74
|
"chosen_model": result.chosen_model,
|
|
70
75
|
"reason": result.reason,
|
|
71
76
|
"cost_usd": result.cost_usd,
|
|
72
77
|
"candidates": [{"model": c.model, "text": c.text}
|
|
73
78
|
for c in result.all_completions],
|
|
74
|
-
}
|
|
79
|
+
}
|
|
80
|
+
if comp is not None:
|
|
81
|
+
out["compression"] = {
|
|
82
|
+
"original_chars": comp.original_chars,
|
|
83
|
+
"final_chars": comp.final_chars,
|
|
84
|
+
"saved_pct": round(comp.saved_pct, 1),
|
|
85
|
+
"method": comp.method,
|
|
86
|
+
}
|
|
87
|
+
typer.echo(json.dumps(out, ensure_ascii=False, indent=2))
|
|
75
88
|
return
|
|
76
89
|
|
|
77
90
|
if quiet:
|
|
@@ -84,7 +97,10 @@ def ask(
|
|
|
84
97
|
typer.echo(f"\n=== Judge reason ===\n{result.reason}")
|
|
85
98
|
typer.echo(f"\n=== Best answer (from {result.chosen_model}) ===")
|
|
86
99
|
typer.echo(result.text)
|
|
87
|
-
|
|
100
|
+
if comp is not None:
|
|
101
|
+
typer.echo(f"\n[compressed: {comp.original_chars}→{comp.final_chars} chars, "
|
|
102
|
+
f"~{comp.saved_pct:.0f}% saved via {comp.method}]")
|
|
103
|
+
typer.echo(f"[estimated cost: ${result.cost_usd:.4f}]")
|
|
88
104
|
|
|
89
105
|
|
|
90
106
|
@app.command()
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Prompt compressor — ลด token แต่คงความหมาย (2 ชั้น).
|
|
2
|
+
|
|
3
|
+
ชั้น 1 (lossless): normalize whitespace/บรรทัดว่าง/zero-width — ปลอดภัย ไม่เสียความหมาย
|
|
4
|
+
ชั้น 2 (LLM): ให้โมเดลถูกบีบเชิงความหมาย เฉพาะ prompt ยาวเกิน threshold
|
|
5
|
+
มี guard: ถ้าผลบีบ ว่าง/ยาวกว่าเดิม/สั้นเกินไป → fallback ใช้ lossless
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
import re
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from fusefable.client import call_model
|
|
11
|
+
from fusefable.providers.base import Provider
|
|
12
|
+
|
|
13
|
+
_BLANKS = re.compile(r"\n{3,}")
|
|
14
|
+
_ZEROWIDTH = re.compile(r"[]")
|
|
15
|
+
|
|
16
|
+
COMPRESS_SYSTEM = (
|
|
17
|
+
"You compress prompts to save tokens while preserving meaning EXACTLY. "
|
|
18
|
+
"Keep ALL technical details, code, numbers, names, constraints, and requirements. "
|
|
19
|
+
"Remove only filler words, redundancy, and repetition. "
|
|
20
|
+
"Output ONLY the compressed prompt itself — no preamble, no explanation, no quotes."
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class CompressionResult:
|
|
26
|
+
text: str
|
|
27
|
+
original_chars: int
|
|
28
|
+
final_chars: int
|
|
29
|
+
method: str # "lossless" | "llm"
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def saved_pct(self) -> float:
|
|
33
|
+
if self.original_chars == 0:
|
|
34
|
+
return 0.0
|
|
35
|
+
return (1 - self.final_chars / self.original_chars) * 100
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def normalize_lossless(text: str) -> str:
|
|
39
|
+
"""ชั้น 1: ตัด trailing space + บรรทัดว่างซ้ำ + zero-width.
|
|
40
|
+
|
|
41
|
+
คง indentation และช่องว่างภายในบรรทัดไว้ครบ (ปลอดภัยสำหรับโค้ด).
|
|
42
|
+
"""
|
|
43
|
+
text = _ZEROWIDTH.sub("", text)
|
|
44
|
+
lines = [ln.rstrip() for ln in text.split("\n")]
|
|
45
|
+
text = "\n".join(lines)
|
|
46
|
+
text = _BLANKS.sub("\n\n", text)
|
|
47
|
+
return text.strip()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def compress_prompt(provider: Provider, model: str, text: str, *,
|
|
51
|
+
min_chars: int, timeout_s: float,
|
|
52
|
+
min_ratio: float = 0.3) -> CompressionResult:
|
|
53
|
+
"""บีบ prompt 2 ชั้น. คืน CompressionResult (มี text ที่จะใช้จริง)."""
|
|
54
|
+
original = len(text)
|
|
55
|
+
lossless = normalize_lossless(text)
|
|
56
|
+
|
|
57
|
+
# prompt สั้น → ข้ามชั้น 2
|
|
58
|
+
if len(lossless) < min_chars:
|
|
59
|
+
return CompressionResult(lossless, original, len(lossless), "lossless")
|
|
60
|
+
|
|
61
|
+
# ชั้น 2: LLM
|
|
62
|
+
instruction = f"{COMPRESS_SYSTEM}\n\n---\n{lossless}"
|
|
63
|
+
result = await call_model(provider, model, instruction, timeout_s)
|
|
64
|
+
if result.is_error:
|
|
65
|
+
return CompressionResult(lossless, original, len(lossless), "lossless")
|
|
66
|
+
|
|
67
|
+
compressed = result.text.strip()
|
|
68
|
+
# guard กันคุณภาพตก: ว่าง / ยาวกว่าเดิม / สั้นเกินไป → ใช้ lossless
|
|
69
|
+
if (not compressed
|
|
70
|
+
or len(compressed) >= len(lossless)
|
|
71
|
+
or len(compressed) < len(lossless) * min_ratio):
|
|
72
|
+
return CompressionResult(lossless, original, len(lossless), "lossless")
|
|
73
|
+
|
|
74
|
+
return CompressionResult(compressed, original, len(compressed), "llm")
|
|
@@ -27,6 +27,9 @@ class Config:
|
|
|
27
27
|
min_responses: int = 1
|
|
28
28
|
budget_cap_usd: float | None = None
|
|
29
29
|
cheap_models: list[str] = field(default_factory=list)
|
|
30
|
+
compress: bool = False # บีบ prompt ก่อนส่ง (opt-in)
|
|
31
|
+
compress_min_chars: int = 2000 # ต่ำกว่านี้ไม่เรียก LLM บีบ
|
|
32
|
+
compress_model: str = "" # ว่าง = ใช้ judge_model
|
|
30
33
|
|
|
31
34
|
def resolve_api_key(self) -> str:
|
|
32
35
|
return os.environ.get(self.api_key_env, "")
|
|
@@ -4,6 +4,7 @@ import httpx
|
|
|
4
4
|
from fusefable.config import Config
|
|
5
5
|
from fusefable.routing import build_routes, build_judge_provider
|
|
6
6
|
from fusefable.fusion import run_fusion
|
|
7
|
+
from fusefable.compressor import compress_prompt
|
|
7
8
|
from fusefable.models import FinalAnswer
|
|
8
9
|
|
|
9
10
|
|
|
@@ -19,13 +20,16 @@ def select_models(cfg: Config, models: Optional[Sequence[str]] = None,
|
|
|
19
20
|
|
|
20
21
|
async def fuse(cfg: Config, question: str,
|
|
21
22
|
models: Optional[Sequence[str]] = None,
|
|
22
|
-
cheap: bool = False
|
|
23
|
+
cheap: bool = False,
|
|
24
|
+
compress: Optional[bool] = None) -> FinalAnswer:
|
|
23
25
|
"""entry point กลาง — ใช้ร่วมกันทั้ง CLI และ MCP server.
|
|
24
26
|
|
|
25
27
|
models: จำกัดเฉพาะโมเดลที่ระบุ (เช่นจาก --models)
|
|
26
28
|
cheap: ใช้ cfg.cheap_models ถ้ามี
|
|
29
|
+
compress: บีบ prompt ก่อนส่ง (None = ใช้ค่า cfg.compress)
|
|
27
30
|
"""
|
|
28
31
|
only = select_models(cfg, models, cheap)
|
|
32
|
+
do_compress = cfg.compress if compress is None else compress
|
|
29
33
|
async with httpx.AsyncClient(timeout=None) as http:
|
|
30
34
|
routes = build_routes(cfg, http)
|
|
31
35
|
if only is not None:
|
|
@@ -33,5 +37,18 @@ async def fuse(cfg: Config, question: str,
|
|
|
33
37
|
if not routes:
|
|
34
38
|
raise RuntimeError("ไม่มีโมเดลให้ใช้ (ตรวจ --models / config)")
|
|
35
39
|
judge_prov = build_judge_provider(cfg, http)
|
|
36
|
-
|
|
37
|
-
|
|
40
|
+
|
|
41
|
+
# บีบ prompt ครั้งเดียว แล้วส่งตัวที่บีบไปทุกโมเดล (judge ใช้คำถามเดิม)
|
|
42
|
+
model_prompt = question
|
|
43
|
+
comp = None
|
|
44
|
+
if do_compress:
|
|
45
|
+
comp = await compress_prompt(
|
|
46
|
+
judge_prov, cfg.compress_model or cfg.judge_model, question,
|
|
47
|
+
min_chars=cfg.compress_min_chars, timeout_s=cfg.timeout_seconds)
|
|
48
|
+
model_prompt = comp.text
|
|
49
|
+
|
|
50
|
+
result = await run_fusion(routes, judge_prov, cfg.judge_model,
|
|
51
|
+
model_prompt, cfg.timeout_seconds,
|
|
52
|
+
judge_question=question)
|
|
53
|
+
result.compression = comp
|
|
54
|
+
return result
|
|
@@ -10,12 +10,18 @@ Route = Tuple[Provider, str]
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
async def run_fusion(routes: Sequence[Route], judge_provider: Provider,
|
|
13
|
-
judge_model: str, prompt: str, timeout_s: float
|
|
14
|
-
|
|
13
|
+
judge_model: str, prompt: str, timeout_s: float,
|
|
14
|
+
judge_question: str | None = None) -> FinalAnswer:
|
|
15
|
+
"""fan-out → judge → FinalAnswer. โยน RuntimeError ถ้าไม่มีตัวไหนสำเร็จ.
|
|
16
|
+
|
|
17
|
+
prompt = ข้อความที่ส่งให้โมเดล (อาจถูกบีบแล้ว)
|
|
18
|
+
judge_question = คำถามที่ใช้ให้ judge ตัดสิน (default = prompt; ส่งคำถามเดิมมาเพื่อคงคุณภาพการตัดสิน)
|
|
19
|
+
"""
|
|
15
20
|
completions = await fan_out(routes, prompt, timeout_s)
|
|
16
21
|
if not completions:
|
|
17
22
|
raise RuntimeError("no successful completions from any model")
|
|
18
|
-
|
|
23
|
+
q = judge_question if judge_question is not None else prompt
|
|
24
|
+
chosen, reason = await judge(judge_provider, judge_model, q,
|
|
19
25
|
completions, timeout_s)
|
|
20
26
|
cost = estimate_cost(completions)
|
|
21
27
|
return FinalAnswer(text=chosen.text, chosen_model=chosen.model,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fusefable
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Fuse multiple AI models and judge the best answer for coding
|
|
5
5
|
Author: proultrax9
|
|
6
6
|
License: MIT
|
|
@@ -145,6 +145,28 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
|
|
|
145
145
|
> Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
|
|
146
146
|
> If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
|
|
147
147
|
|
|
148
|
+
## Prompt compression (save tokens)
|
|
149
|
+
|
|
150
|
+
Reduce token usage while keeping answer quality — useful when you pay per-provider
|
|
151
|
+
directly. Two tiers, opt-in via `--compress`:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
fusefable ask --compress "<long prompt or pasted code>"
|
|
155
|
+
# [compressed: 5200→1800 chars, ~65% saved via llm]
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
- **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
|
|
159
|
+
zero-width chars — keeps indentation and inner spacing intact (safe for code).
|
|
160
|
+
- **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
|
|
161
|
+
model compresses semantically — **once**, then the compressed prompt is sent to all
|
|
162
|
+
models, so you save `tokens × number-of-models`.
|
|
163
|
+
- **Quality guards:** prompts under the threshold skip the LLM; if the compressed
|
|
164
|
+
result is empty, longer, or under 30% of the original, it falls back to the lossless
|
|
165
|
+
text. The judge always sees the **original** question.
|
|
166
|
+
|
|
167
|
+
Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
|
|
168
|
+
(empty = reuse the judge model).
|
|
169
|
+
|
|
148
170
|
## Architecture
|
|
149
171
|
|
|
150
172
|
```
|
|
@@ -4,6 +4,7 @@ pyproject.toml
|
|
|
4
4
|
fusefable/__init__.py
|
|
5
5
|
fusefable/cli.py
|
|
6
6
|
fusefable/client.py
|
|
7
|
+
fusefable/compressor.py
|
|
7
8
|
fusefable/config.py
|
|
8
9
|
fusefable/core.py
|
|
9
10
|
fusefable/cost.py
|
|
@@ -28,6 +29,7 @@ fusefable/providers/google.py
|
|
|
28
29
|
fusefable/providers/openai_compat.py
|
|
29
30
|
tests/test_cli.py
|
|
30
31
|
tests/test_client.py
|
|
32
|
+
tests/test_compressor.py
|
|
31
33
|
tests/test_config.py
|
|
32
34
|
tests/test_core.py
|
|
33
35
|
tests/test_cost.py
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from fusefable.compressor import normalize_lossless, compress_prompt
|
|
3
|
+
from fusefable.models import Completion
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_normalize_lossless_trims_safely_keeps_indent():
|
|
7
|
+
raw = "def f():\n\n\n\n return 1 \n"
|
|
8
|
+
out = normalize_lossless(raw)
|
|
9
|
+
assert "\n\n\n" not in out # บรรทัดว่างซ้ำถูกยุบ
|
|
10
|
+
assert out == "def f():\n\n return 1" # คง indent + ช่องว่างภายใน, ตัดแค่ trailing
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_normalize_strips_zero_width():
|
|
14
|
+
assert normalize_lossless("ab") == "ab"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FakeProvider:
|
|
18
|
+
def __init__(self, text=None, error=False):
|
|
19
|
+
self.text, self.error = text, error
|
|
20
|
+
self.called = False
|
|
21
|
+
|
|
22
|
+
async def complete(self, model, prompt):
|
|
23
|
+
self.called = True
|
|
24
|
+
if self.error:
|
|
25
|
+
raise RuntimeError("boom")
|
|
26
|
+
return Completion(model=model, text=self.text)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.asyncio
|
|
30
|
+
async def test_short_prompt_skips_llm():
|
|
31
|
+
prov = FakeProvider(text="should not be used")
|
|
32
|
+
r = await compress_prompt(prov, "m", "short text", min_chars=2000, timeout_s=5)
|
|
33
|
+
assert r.method == "lossless"
|
|
34
|
+
assert prov.called is False # ไม่เรียก LLM
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.mark.asyncio
|
|
38
|
+
async def test_long_prompt_uses_llm_when_shorter():
|
|
39
|
+
big = "word " * 1000 # ~5000 chars (lossless ~4999)
|
|
40
|
+
prov = FakeProvider(text="C" * 2000) # อยู่ในช่วง 30%-100% → ผ่าน guard
|
|
41
|
+
r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
|
|
42
|
+
assert r.method == "llm"
|
|
43
|
+
assert r.final_chars == 2000
|
|
44
|
+
assert r.final_chars < r.original_chars
|
|
45
|
+
assert r.saved_pct > 0
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@pytest.mark.asyncio
|
|
49
|
+
async def test_llm_failure_falls_back_to_lossless():
|
|
50
|
+
big = "word " * 1000
|
|
51
|
+
prov = FakeProvider(error=True)
|
|
52
|
+
r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
|
|
53
|
+
assert r.method == "lossless" # LLM พัง → ใช้ lossless
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@pytest.mark.asyncio
|
|
57
|
+
async def test_guard_rejects_too_short_compression():
|
|
58
|
+
big = "word " * 1000 # ~5000 chars
|
|
59
|
+
prov = FakeProvider(text="x") # สั้นเกินไป (< 30%)
|
|
60
|
+
r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
|
|
61
|
+
assert r.method == "lossless" # ป้องกันโมเดลตัดเนื้อหาทิ้ง
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@pytest.mark.asyncio
|
|
65
|
+
async def test_guard_rejects_longer_result():
|
|
66
|
+
big = "word " * 1000
|
|
67
|
+
prov = FakeProvider(text="y" * 99999) # ยาวกว่าเดิม
|
|
68
|
+
r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
|
|
69
|
+
assert r.method == "lossless"
|
|
@@ -26,6 +26,26 @@ async def test_run_fusion_end_to_end():
|
|
|
26
26
|
assert len(result.all_completions) == 2
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
@pytest.mark.asyncio
|
|
30
|
+
async def test_run_fusion_uses_judge_question_for_judging():
|
|
31
|
+
seen = {}
|
|
32
|
+
|
|
33
|
+
class FakeProvider:
|
|
34
|
+
async def complete(self, model, prompt):
|
|
35
|
+
if model == "judge":
|
|
36
|
+
seen["judge_prompt"] = prompt
|
|
37
|
+
return Completion(model=model, text="I choose A")
|
|
38
|
+
return Completion(model=model, text="ans")
|
|
39
|
+
|
|
40
|
+
prov = FakeProvider()
|
|
41
|
+
routes = [(prov, "m1")]
|
|
42
|
+
await run_fusion(routes, prov, "judge", "COMPRESSED", timeout_s=5,
|
|
43
|
+
judge_question="ORIGINAL QUESTION")
|
|
44
|
+
# judge ต้องเห็นคำถามเดิม ไม่ใช่ตัวที่บีบ
|
|
45
|
+
assert "ORIGINAL QUESTION" in seen["judge_prompt"]
|
|
46
|
+
assert "COMPRESSED" not in seen["judge_prompt"]
|
|
47
|
+
|
|
48
|
+
|
|
29
49
|
@pytest.mark.asyncio
|
|
30
50
|
async def test_run_fusion_raises_when_all_fail():
|
|
31
51
|
class DeadProvider:
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.1.9"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|