fusefable 0.1.9__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {fusefable-0.1.9 → fusefable-0.2.0}/PKG-INFO +23 -1
  2. {fusefable-0.1.9 → fusefable-0.2.0}/README.md +22 -0
  3. fusefable-0.2.0/fusefable/__init__.py +1 -0
  4. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/cli.py +20 -4
  5. fusefable-0.2.0/fusefable/compressor.py +74 -0
  6. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/config.py +3 -0
  7. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/core.py +20 -3
  8. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/fusion.py +9 -3
  9. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/models.py +1 -0
  10. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/PKG-INFO +23 -1
  11. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/SOURCES.txt +2 -0
  12. {fusefable-0.1.9 → fusefable-0.2.0}/pyproject.toml +1 -1
  13. fusefable-0.2.0/tests/test_compressor.py +69 -0
  14. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_fusion.py +20 -0
  15. fusefable-0.1.9/fusefable/__init__.py +0 -1
  16. {fusefable-0.1.9 → fusefable-0.2.0}/LICENSE +0 -0
  17. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/client.py +0 -0
  18. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/cost.py +0 -0
  19. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/fanout.py +0 -0
  20. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/judge.py +0 -0
  21. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/mcp_server.py +0 -0
  22. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/__init__.py +0 -0
  23. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/anthropic.py +0 -0
  24. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/base.py +0 -0
  25. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/factory.py +0 -0
  26. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/google.py +0 -0
  27. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/providers/openai_compat.py +0 -0
  28. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/routing.py +0 -0
  29. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable/wizard.py +0 -0
  30. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/dependency_links.txt +0 -0
  31. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/entry_points.txt +0 -0
  32. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/requires.txt +0 -0
  33. {fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/top_level.txt +0 -0
  34. {fusefable-0.1.9 → fusefable-0.2.0}/setup.cfg +0 -0
  35. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_cli.py +0 -0
  36. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_client.py +0 -0
  37. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_config.py +0 -0
  38. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_core.py +0 -0
  39. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_cost.py +0 -0
  40. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_fanout.py +0 -0
  41. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_judge.py +0 -0
  42. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_mcp_server.py +0 -0
  43. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_models.py +0 -0
  44. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_native_providers.py +0 -0
  45. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_openai_compat.py +0 -0
  46. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_routing.py +0 -0
  47. {fusefable-0.1.9 → fusefable-0.2.0}/tests/test_wizard.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fusefable
3
- Version: 0.1.9
3
+ Version: 0.2.0
4
4
  Summary: Fuse multiple AI models and judge the best answer for coding
5
5
  Author: proultrax9
6
6
  License: MIT
@@ -145,6 +145,28 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
145
145
  > Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
146
146
  > If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
147
147
 
148
+ ## Prompt compression (save tokens)
149
+
150
+ Reduce token usage while keeping answer quality — useful when you pay per-provider
151
+ directly. Two tiers, opt-in via `--compress`:
152
+
153
+ ```bash
154
+ fusefable ask --compress "<long prompt or pasted code>"
155
+ # [compressed: 5200→1800 chars, ~65% saved via llm]
156
+ ```
157
+
158
+ - **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
159
+ zero-width chars — keeps indentation and inner spacing intact (safe for code).
160
+ - **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
161
+ model compresses semantically — **once**, then the compressed prompt is sent to all
162
+ models, so you save `tokens × number-of-models`.
163
+ - **Quality guards:** prompts under the threshold skip the LLM; if the compressed
164
+ result is empty, longer, or under 30% of the original, it falls back to the lossless
165
+ text. The judge always sees the **original** question.
166
+
167
+ Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
168
+ (empty = reuse the judge model).
169
+
148
170
  ## Architecture
149
171
 
150
172
  ```
@@ -117,6 +117,28 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
117
117
  > Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
118
118
  > If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
119
119
 
120
+ ## Prompt compression (save tokens)
121
+
122
+ Reduce token usage while keeping answer quality — useful when you pay per-provider
123
+ directly. Two tiers, opt-in via `--compress`:
124
+
125
+ ```bash
126
+ fusefable ask --compress "<long prompt or pasted code>"
127
+ # [compressed: 5200→1800 chars, ~65% saved via llm]
128
+ ```
129
+
130
+ - **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
131
+ zero-width chars — keeps indentation and inner spacing intact (safe for code).
132
+ - **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
133
+ model compresses semantically — **once**, then the compressed prompt is sent to all
134
+ models, so you save `tokens × number-of-models`.
135
+ - **Quality guards:** prompts under the threshold skip the LLM; if the compressed
136
+ result is empty, longer, or under 30% of the original, it falls back to the lossless
137
+ text. The judge always sees the **original** question.
138
+
139
+ Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
140
+ (empty = reuse the judge model).
141
+
120
142
  ## Architecture
121
143
 
122
144
  ```
@@ -0,0 +1 @@
1
+ __version__ = "0.2.0"
@@ -48,6 +48,8 @@ def ask(
48
48
  models: Optional[str] = typer.Option(None, "--models",
49
49
  help="จำกัดเฉพาะโมเดลที่ระบุ คั่นด้วย comma"),
50
50
  cheap: bool = typer.Option(False, "--cheap", help="ใช้ cheap_models ใน config"),
51
+ compress: Optional[bool] = typer.Option(None, "--compress/--no-compress",
52
+ help="บีบ prompt ก่อนส่งเพื่อลด token (default ตาม config)"),
51
53
  json_out: bool = typer.Option(False, "--json", help="output เป็น JSON"),
52
54
  quiet: bool = typer.Option(False, "--quiet", "-q",
53
55
  help="พิมพ์เฉพาะคำตอบ (เหมาะกับ pipe/subagent)"),
@@ -58,20 +60,31 @@ def ask(
58
60
  model_list = [m.strip() for m in models.split(",")] if models else None
59
61
 
60
62
  try:
61
- result = asyncio.run(fuse(cfg, q, models=model_list, cheap=cheap))
63
+ result = asyncio.run(fuse(cfg, q, models=model_list, cheap=cheap,
64
+ compress=compress))
62
65
  except RuntimeError as e:
63
66
  typer.echo(f"Error: {e}", err=True)
64
67
  raise typer.Exit(1)
65
68
 
69
+ comp = result.compression
70
+
66
71
  if json_out:
67
- typer.echo(json.dumps({
72
+ out = {
68
73
  "answer": result.text,
69
74
  "chosen_model": result.chosen_model,
70
75
  "reason": result.reason,
71
76
  "cost_usd": result.cost_usd,
72
77
  "candidates": [{"model": c.model, "text": c.text}
73
78
  for c in result.all_completions],
74
- }, ensure_ascii=False, indent=2))
79
+ }
80
+ if comp is not None:
81
+ out["compression"] = {
82
+ "original_chars": comp.original_chars,
83
+ "final_chars": comp.final_chars,
84
+ "saved_pct": round(comp.saved_pct, 1),
85
+ "method": comp.method,
86
+ }
87
+ typer.echo(json.dumps(out, ensure_ascii=False, indent=2))
75
88
  return
76
89
 
77
90
  if quiet:
@@ -84,7 +97,10 @@ def ask(
84
97
  typer.echo(f"\n=== Judge reason ===\n{result.reason}")
85
98
  typer.echo(f"\n=== Best answer (from {result.chosen_model}) ===")
86
99
  typer.echo(result.text)
87
- typer.echo(f"\n[estimated cost: ${result.cost_usd:.4f}]")
100
+ if comp is not None:
101
+ typer.echo(f"\n[compressed: {comp.original_chars}→{comp.final_chars} chars, "
102
+ f"~{comp.saved_pct:.0f}% saved via {comp.method}]")
103
+ typer.echo(f"[estimated cost: ${result.cost_usd:.4f}]")
88
104
 
89
105
 
90
106
  @app.command()
@@ -0,0 +1,74 @@
1
+ """Prompt compressor — ลด token แต่คงความหมาย (2 ชั้น).
2
+
3
+ ชั้น 1 (lossless): normalize whitespace/บรรทัดว่าง/zero-width — ปลอดภัย ไม่เสียความหมาย
4
+ ชั้น 2 (LLM): ให้โมเดลถูกบีบเชิงความหมาย เฉพาะ prompt ยาวเกิน threshold
5
+ มี guard: ถ้าผลบีบ ว่าง/ยาวกว่าเดิม/สั้นเกินไป → fallback ใช้ lossless
6
+ """
7
+ from __future__ import annotations
8
+ import re
9
+ from dataclasses import dataclass
10
+ from fusefable.client import call_model
11
+ from fusefable.providers.base import Provider
12
+
13
+ _BLANKS = re.compile(r"\n{3,}")
14
+ _ZEROWIDTH = re.compile(r"[​‌‍]")
15
+
16
+ COMPRESS_SYSTEM = (
17
+ "You compress prompts to save tokens while preserving meaning EXACTLY. "
18
+ "Keep ALL technical details, code, numbers, names, constraints, and requirements. "
19
+ "Remove only filler words, redundancy, and repetition. "
20
+ "Output ONLY the compressed prompt itself — no preamble, no explanation, no quotes."
21
+ )
22
+
23
+
24
+ @dataclass
25
+ class CompressionResult:
26
+ text: str
27
+ original_chars: int
28
+ final_chars: int
29
+ method: str # "lossless" | "llm"
30
+
31
+ @property
32
+ def saved_pct(self) -> float:
33
+ if self.original_chars == 0:
34
+ return 0.0
35
+ return (1 - self.final_chars / self.original_chars) * 100
36
+
37
+
38
+ def normalize_lossless(text: str) -> str:
39
+ """ชั้น 1: ตัด trailing space + บรรทัดว่างซ้ำ + zero-width.
40
+
41
+ คง indentation และช่องว่างภายในบรรทัดไว้ครบ (ปลอดภัยสำหรับโค้ด).
42
+ """
43
+ text = _ZEROWIDTH.sub("", text)
44
+ lines = [ln.rstrip() for ln in text.split("\n")]
45
+ text = "\n".join(lines)
46
+ text = _BLANKS.sub("\n\n", text)
47
+ return text.strip()
48
+
49
+
50
+ async def compress_prompt(provider: Provider, model: str, text: str, *,
51
+ min_chars: int, timeout_s: float,
52
+ min_ratio: float = 0.3) -> CompressionResult:
53
+ """บีบ prompt 2 ชั้น. คืน CompressionResult (มี text ที่จะใช้จริง)."""
54
+ original = len(text)
55
+ lossless = normalize_lossless(text)
56
+
57
+ # prompt สั้น → ข้ามชั้น 2
58
+ if len(lossless) < min_chars:
59
+ return CompressionResult(lossless, original, len(lossless), "lossless")
60
+
61
+ # ชั้น 2: LLM
62
+ instruction = f"{COMPRESS_SYSTEM}\n\n---\n{lossless}"
63
+ result = await call_model(provider, model, instruction, timeout_s)
64
+ if result.is_error:
65
+ return CompressionResult(lossless, original, len(lossless), "lossless")
66
+
67
+ compressed = result.text.strip()
68
+ # guard กันคุณภาพตก: ว่าง / ยาวกว่าเดิม / สั้นเกินไป → ใช้ lossless
69
+ if (not compressed
70
+ or len(compressed) >= len(lossless)
71
+ or len(compressed) < len(lossless) * min_ratio):
72
+ return CompressionResult(lossless, original, len(lossless), "lossless")
73
+
74
+ return CompressionResult(compressed, original, len(compressed), "llm")
@@ -27,6 +27,9 @@ class Config:
27
27
  min_responses: int = 1
28
28
  budget_cap_usd: float | None = None
29
29
  cheap_models: list[str] = field(default_factory=list)
30
+ compress: bool = False # บีบ prompt ก่อนส่ง (opt-in)
31
+ compress_min_chars: int = 2000 # ต่ำกว่านี้ไม่เรียก LLM บีบ
32
+ compress_model: str = "" # ว่าง = ใช้ judge_model
30
33
 
31
34
  def resolve_api_key(self) -> str:
32
35
  return os.environ.get(self.api_key_env, "")
@@ -4,6 +4,7 @@ import httpx
4
4
  from fusefable.config import Config
5
5
  from fusefable.routing import build_routes, build_judge_provider
6
6
  from fusefable.fusion import run_fusion
7
+ from fusefable.compressor import compress_prompt
7
8
  from fusefable.models import FinalAnswer
8
9
 
9
10
 
@@ -19,13 +20,16 @@ def select_models(cfg: Config, models: Optional[Sequence[str]] = None,
19
20
 
20
21
  async def fuse(cfg: Config, question: str,
21
22
  models: Optional[Sequence[str]] = None,
22
- cheap: bool = False) -> FinalAnswer:
23
+ cheap: bool = False,
24
+ compress: Optional[bool] = None) -> FinalAnswer:
23
25
  """entry point กลาง — ใช้ร่วมกันทั้ง CLI และ MCP server.
24
26
 
25
27
  models: จำกัดเฉพาะโมเดลที่ระบุ (เช่นจาก --models)
26
28
  cheap: ใช้ cfg.cheap_models ถ้ามี
29
+ compress: บีบ prompt ก่อนส่ง (None = ใช้ค่า cfg.compress)
27
30
  """
28
31
  only = select_models(cfg, models, cheap)
32
+ do_compress = cfg.compress if compress is None else compress
29
33
  async with httpx.AsyncClient(timeout=None) as http:
30
34
  routes = build_routes(cfg, http)
31
35
  if only is not None:
@@ -33,5 +37,18 @@ async def fuse(cfg: Config, question: str,
33
37
  if not routes:
34
38
  raise RuntimeError("ไม่มีโมเดลให้ใช้ (ตรวจ --models / config)")
35
39
  judge_prov = build_judge_provider(cfg, http)
36
- return await run_fusion(routes, judge_prov, cfg.judge_model,
37
- question, cfg.timeout_seconds)
40
+
41
+ # บีบ prompt ครั้งเดียว แล้วส่งตัวที่บีบไปทุกโมเดล (judge ใช้คำถามเดิม)
42
+ model_prompt = question
43
+ comp = None
44
+ if do_compress:
45
+ comp = await compress_prompt(
46
+ judge_prov, cfg.compress_model or cfg.judge_model, question,
47
+ min_chars=cfg.compress_min_chars, timeout_s=cfg.timeout_seconds)
48
+ model_prompt = comp.text
49
+
50
+ result = await run_fusion(routes, judge_prov, cfg.judge_model,
51
+ model_prompt, cfg.timeout_seconds,
52
+ judge_question=question)
53
+ result.compression = comp
54
+ return result
@@ -10,12 +10,18 @@ Route = Tuple[Provider, str]
10
10
 
11
11
 
12
12
  async def run_fusion(routes: Sequence[Route], judge_provider: Provider,
13
- judge_model: str, prompt: str, timeout_s: float) -> FinalAnswer:
14
- """fan-out judge FinalAnswer. โยน RuntimeError ถ้าไม่มีตัวไหนสำเร็จ."""
13
+ judge_model: str, prompt: str, timeout_s: float,
14
+ judge_question: str | None = None) -> FinalAnswer:
15
+ """fan-out → judge → FinalAnswer. โยน RuntimeError ถ้าไม่มีตัวไหนสำเร็จ.
16
+
17
+ prompt = ข้อความที่ส่งให้โมเดล (อาจถูกบีบแล้ว)
18
+ judge_question = คำถามที่ใช้ให้ judge ตัดสิน (default = prompt; ส่งคำถามเดิมมาเพื่อคงคุณภาพการตัดสิน)
19
+ """
15
20
  completions = await fan_out(routes, prompt, timeout_s)
16
21
  if not completions:
17
22
  raise RuntimeError("no successful completions from any model")
18
- chosen, reason = await judge(judge_provider, judge_model, prompt,
23
+ q = judge_question if judge_question is not None else prompt
24
+ chosen, reason = await judge(judge_provider, judge_model, q,
19
25
  completions, timeout_s)
20
26
  cost = estimate_cost(completions)
21
27
  return FinalAnswer(text=chosen.text, chosen_model=chosen.model,
@@ -37,3 +37,4 @@ class FinalAnswer:
37
37
  reason: str = ""
38
38
  cost_usd: float = 0.0
39
39
  all_completions: list = field(default_factory=list)
40
+ compression: object = None # CompressionResult | None (กัน import วน)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fusefable
3
- Version: 0.1.9
3
+ Version: 0.2.0
4
4
  Summary: Fuse multiple AI models and judge the best answer for coding
5
5
  Author: proultrax9
6
6
  License: MIT
@@ -145,6 +145,28 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
145
145
  > Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
146
146
  > If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
147
147
 
148
+ ## Prompt compression (save tokens)
149
+
150
+ Reduce token usage while keeping answer quality — useful when you pay per-provider
151
+ directly. Two tiers, opt-in via `--compress`:
152
+
153
+ ```bash
154
+ fusefable ask --compress "<long prompt or pasted code>"
155
+ # [compressed: 5200→1800 chars, ~65% saved via llm]
156
+ ```
157
+
158
+ - **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
159
+ zero-width chars — keeps indentation and inner spacing intact (safe for code).
160
+ - **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
161
+ model compresses semantically — **once**, then the compressed prompt is sent to all
162
+ models, so you save `tokens × number-of-models`.
163
+ - **Quality guards:** prompts under the threshold skip the LLM; if the compressed
164
+ result is empty, longer, or under 30% of the original, it falls back to the lossless
165
+ text. The judge always sees the **original** question.
166
+
167
+ Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
168
+ (empty = reuse the judge model).
169
+
148
170
  ## Architecture
149
171
 
150
172
  ```
@@ -4,6 +4,7 @@ pyproject.toml
4
4
  fusefable/__init__.py
5
5
  fusefable/cli.py
6
6
  fusefable/client.py
7
+ fusefable/compressor.py
7
8
  fusefable/config.py
8
9
  fusefable/core.py
9
10
  fusefable/cost.py
@@ -28,6 +29,7 @@ fusefable/providers/google.py
28
29
  fusefable/providers/openai_compat.py
29
30
  tests/test_cli.py
30
31
  tests/test_client.py
32
+ tests/test_compressor.py
31
33
  tests/test_config.py
32
34
  tests/test_core.py
33
35
  tests/test_cost.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "fusefable"
3
- version = "0.1.9"
3
+ version = "0.2.0"
4
4
  description = "Fuse multiple AI models and judge the best answer for coding"
5
5
  readme = "README.md"
6
6
  license = { text = "MIT" }
@@ -0,0 +1,69 @@
1
+ import pytest
2
+ from fusefable.compressor import normalize_lossless, compress_prompt
3
+ from fusefable.models import Completion
4
+
5
+
6
+ def test_normalize_lossless_trims_safely_keeps_indent():
7
+ raw = "def f():\n\n\n\n return 1 \n"
8
+ out = normalize_lossless(raw)
9
+ assert "\n\n\n" not in out # บรรทัดว่างซ้ำถูกยุบ
10
+ assert out == "def f():\n\n return 1" # คง indent + ช่องว่างภายใน, ตัดแค่ trailing
11
+
12
+
13
+ def test_normalize_strips_zero_width():
14
+ assert normalize_lossless("a​b‌") == "ab"
15
+
16
+
17
+ class FakeProvider:
18
+ def __init__(self, text=None, error=False):
19
+ self.text, self.error = text, error
20
+ self.called = False
21
+
22
+ async def complete(self, model, prompt):
23
+ self.called = True
24
+ if self.error:
25
+ raise RuntimeError("boom")
26
+ return Completion(model=model, text=self.text)
27
+
28
+
29
+ @pytest.mark.asyncio
30
+ async def test_short_prompt_skips_llm():
31
+ prov = FakeProvider(text="should not be used")
32
+ r = await compress_prompt(prov, "m", "short text", min_chars=2000, timeout_s=5)
33
+ assert r.method == "lossless"
34
+ assert prov.called is False # ไม่เรียก LLM
35
+
36
+
37
+ @pytest.mark.asyncio
38
+ async def test_long_prompt_uses_llm_when_shorter():
39
+ big = "word " * 1000 # ~5000 chars (lossless ~4999)
40
+ prov = FakeProvider(text="C" * 2000) # อยู่ในช่วง 30%-100% → ผ่าน guard
41
+ r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
42
+ assert r.method == "llm"
43
+ assert r.final_chars == 2000
44
+ assert r.final_chars < r.original_chars
45
+ assert r.saved_pct > 0
46
+
47
+
48
+ @pytest.mark.asyncio
49
+ async def test_llm_failure_falls_back_to_lossless():
50
+ big = "word " * 1000
51
+ prov = FakeProvider(error=True)
52
+ r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
53
+ assert r.method == "lossless" # LLM พัง → ใช้ lossless
54
+
55
+
56
+ @pytest.mark.asyncio
57
+ async def test_guard_rejects_too_short_compression():
58
+ big = "word " * 1000 # ~5000 chars
59
+ prov = FakeProvider(text="x") # สั้นเกินไป (< 30%)
60
+ r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
61
+ assert r.method == "lossless" # ป้องกันโมเดลตัดเนื้อหาทิ้ง
62
+
63
+
64
+ @pytest.mark.asyncio
65
+ async def test_guard_rejects_longer_result():
66
+ big = "word " * 1000
67
+ prov = FakeProvider(text="y" * 99999) # ยาวกว่าเดิม
68
+ r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
69
+ assert r.method == "lossless"
@@ -26,6 +26,26 @@ async def test_run_fusion_end_to_end():
26
26
  assert len(result.all_completions) == 2
27
27
 
28
28
 
29
+ @pytest.mark.asyncio
30
+ async def test_run_fusion_uses_judge_question_for_judging():
31
+ seen = {}
32
+
33
+ class FakeProvider:
34
+ async def complete(self, model, prompt):
35
+ if model == "judge":
36
+ seen["judge_prompt"] = prompt
37
+ return Completion(model=model, text="I choose A")
38
+ return Completion(model=model, text="ans")
39
+
40
+ prov = FakeProvider()
41
+ routes = [(prov, "m1")]
42
+ await run_fusion(routes, prov, "judge", "COMPRESSED", timeout_s=5,
43
+ judge_question="ORIGINAL QUESTION")
44
+ # judge ต้องเห็นคำถามเดิม ไม่ใช่ตัวที่บีบ
45
+ assert "ORIGINAL QUESTION" in seen["judge_prompt"]
46
+ assert "COMPRESSED" not in seen["judge_prompt"]
47
+
48
+
29
49
  @pytest.mark.asyncio
30
50
  async def test_run_fusion_raises_when_all_fail():
31
51
  class DeadProvider:
@@ -1 +0,0 @@
1
- __version__ = "0.1.9"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes