PyPI - fusefable - Versions diffs - 0.1.9__tar.gz → 0.2.0__tar.gz - Mend

fusefable 0.1.9tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{fusefable-0.1.9 → fusefable-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusefable
-Version: 0.1.9
+Version: 0.2.0
 Summary: Fuse multiple AI models and judge the best answer for coding
 Author: proultrax9
 License: MIT
@@ -145,6 +145,28 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
 > Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
 > If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
+## Prompt compression (save tokens)
+Reduce token usage while keeping answer quality — useful when you pay per-provider
+directly. Two tiers, opt-in via `--compress`:
+```bash
+fusefable ask --compress "<long prompt or pasted code>"
+# [compressed: 5200→1800 chars, ~65% saved via llm]
+```
+- **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
+  zero-width chars — keeps indentation and inner spacing intact (safe for code).
+- **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
+  model compresses semantically — **once**, then the compressed prompt is sent to all
+  models, so you save `tokens × number-of-models`.
+- **Quality guards:** prompts under the threshold skip the LLM; if the compressed
+  result is empty, longer, or under 30% of the original, it falls back to the lossless
+  text. The judge always sees the **original** question.
+Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
+(empty = reuse the judge model).
 ## Architecture
 ```

{fusefable-0.1.9 → fusefable-0.2.0}/README.md RENAMED Viewed

@@ -117,6 +117,28 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
 > Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
 > If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
+## Prompt compression (save tokens)
+Reduce token usage while keeping answer quality — useful when you pay per-provider
+directly. Two tiers, opt-in via `--compress`:
+```bash
+fusefable ask --compress "<long prompt or pasted code>"
+# [compressed: 5200→1800 chars, ~65% saved via llm]
+```
+- **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
+  zero-width chars — keeps indentation and inner spacing intact (safe for code).
+- **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
+  model compresses semantically — **once**, then the compressed prompt is sent to all
+  models, so you save `tokens × number-of-models`.
+- **Quality guards:** prompts under the threshold skip the LLM; if the compressed
+  result is empty, longer, or under 30% of the original, it falls back to the lossless
+  text. The judge always sees the **original** question.
+Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
+(empty = reuse the judge model).
 ## Architecture
 ```

fusefable-0.2.0/fusefable/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.2.0"

{fusefable-0.1.9 → fusefable-0.2.0}/fusefable/cli.py RENAMED Viewed

@@ -48,6 +48,8 @@ def ask(
     models: Optional[str] = typer.Option(None, "--models",
         help="จำกัดเฉพาะโมเดลที่ระบุ คั่นด้วย comma"),
     cheap: bool = typer.Option(False, "--cheap", help="ใช้ cheap_models ใน config"),
+    compress: Optional[bool] = typer.Option(None, "--compress/--no-compress",
+        help="บีบ prompt ก่อนส่งเพื่อลด token (default ตาม config)"),
     json_out: bool = typer.Option(False, "--json", help="output เป็น JSON"),
     quiet: bool = typer.Option(False, "--quiet", "-q",
         help="พิมพ์เฉพาะคำตอบ (เหมาะกับ pipe/subagent)"),
@@ -58,20 +60,31 @@ def ask(
     model_list = [m.strip() for m in models.split(",")] if models else None
     try:
-        result = asyncio.run(fuse(cfg, q, models=model_list, cheap=cheap))
+        result = asyncio.run(fuse(cfg, q, models=model_list, cheap=cheap,
+                                  compress=compress))
     except RuntimeError as e:
         typer.echo(f"Error: {e}", err=True)
         raise typer.Exit(1)
+    comp = result.compression
     if json_out:
-        typer.echo(json.dumps({
+        out = {
             "answer": result.text,
             "chosen_model": result.chosen_model,
             "reason": result.reason,
             "cost_usd": result.cost_usd,
             "candidates": [{"model": c.model, "text": c.text}
                            for c in result.all_completions],
-        }, ensure_ascii=False, indent=2))
+        }
+        if comp is not None:
+            out["compression"] = {
+                "original_chars": comp.original_chars,
+                "final_chars": comp.final_chars,
+                "saved_pct": round(comp.saved_pct, 1),
+                "method": comp.method,
+            }
+        typer.echo(json.dumps(out, ensure_ascii=False, indent=2))
         return
     if quiet:
@@ -84,7 +97,10 @@ def ask(
         typer.echo(f"\n=== Judge reason ===\n{result.reason}")
     typer.echo(f"\n=== Best answer (from {result.chosen_model}) ===")
     typer.echo(result.text)
-    typer.echo(f"\n[estimated cost: ${result.cost_usd:.4f}]")
+    if comp is not None:
+        typer.echo(f"\n[compressed: {comp.original_chars}→{comp.final_chars} chars, "
+                   f"~{comp.saved_pct:.0f}% saved via {comp.method}]")
+    typer.echo(f"[estimated cost: ${result.cost_usd:.4f}]")
 @app.command()

fusefable-0.2.0/fusefable/compressor.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""Prompt compressor — ลด token แต่คงความหมาย (2 ชั้น).
+ชั้น 1 (lossless): normalize whitespace/บรรทัดว่าง/zero-width — ปลอดภัย ไม่เสียความหมาย
+ชั้น 2 (LLM): ให้โมเดลถูกบีบเชิงความหมาย เฉพาะ prompt ยาวเกิน threshold
+มี guard: ถ้าผลบีบ ว่าง/ยาวกว่าเดิม/สั้นเกินไป → fallback ใช้ lossless
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from fusefable.client import call_model
+from fusefable.providers.base import Provider
+_BLANKS = re.compile(r"\n{3,}")
+_ZEROWIDTH = re.compile(r"[‌‍]")
+COMPRESS_SYSTEM = (
+    "You compress prompts to save tokens while preserving meaning EXACTLY. "
+    "Keep ALL technical details, code, numbers, names, constraints, and requirements. "
+    "Remove only filler words, redundancy, and repetition. "
+    "Output ONLY the compressed prompt itself — no preamble, no explanation, no quotes."
+)
+@dataclass
+class CompressionResult:
+    text: str
+    original_chars: int
+    final_chars: int
+    method: str  # "lossless" | "llm"
+    @property
+    def saved_pct(self) -> float:
+        if self.original_chars == 0:
+            return 0.0
+        return (1 - self.final_chars / self.original_chars) * 100
+def normalize_lossless(text: str) -> str:
+    """ชั้น 1: ตัด trailing space + บรรทัดว่างซ้ำ + zero-width.
+    คง indentation และช่องว่างภายในบรรทัดไว้ครบ (ปลอดภัยสำหรับโค้ด).
+    """
+    text = _ZEROWIDTH.sub("", text)
+    lines = [ln.rstrip() for ln in text.split("\n")]
+    text = "\n".join(lines)
+    text = _BLANKS.sub("\n\n", text)
+    return text.strip()
+async def compress_prompt(provider: Provider, model: str, text: str, *,
+                          min_chars: int, timeout_s: float,
+                          min_ratio: float = 0.3) -> CompressionResult:
+    """บีบ prompt 2 ชั้น. คืน CompressionResult (มี text ที่จะใช้จริง)."""
+    original = len(text)
+    lossless = normalize_lossless(text)
+    # prompt สั้น → ข้ามชั้น 2
+    if len(lossless) < min_chars:
+        return CompressionResult(lossless, original, len(lossless), "lossless")
+    # ชั้น 2: LLM
+    instruction = f"{COMPRESS_SYSTEM}\n\n---\n{lossless}"
+    result = await call_model(provider, model, instruction, timeout_s)
+    if result.is_error:
+        return CompressionResult(lossless, original, len(lossless), "lossless")
+    compressed = result.text.strip()
+    # guard กันคุณภาพตก: ว่าง / ยาวกว่าเดิม / สั้นเกินไป → ใช้ lossless
+    if (not compressed
+            or len(compressed) >= len(lossless)
+            or len(compressed) < len(lossless) * min_ratio):
+        return CompressionResult(lossless, original, len(lossless), "lossless")
+    return CompressionResult(compressed, original, len(compressed), "llm")

{fusefable-0.1.9 → fusefable-0.2.0}/fusefable/config.py RENAMED Viewed

@@ -27,6 +27,9 @@ class Config:
     min_responses: int = 1
     budget_cap_usd: float | None = None
     cheap_models: list[str] = field(default_factory=list)
+    compress: bool = False              # บีบ prompt ก่อนส่ง (opt-in)
+    compress_min_chars: int = 2000      # ต่ำกว่านี้ไม่เรียก LLM บีบ
+    compress_model: str = ""            # ว่าง = ใช้ judge_model
     def resolve_api_key(self) -> str:
         return os.environ.get(self.api_key_env, "")

{fusefable-0.1.9 → fusefable-0.2.0}/fusefable/core.py RENAMED Viewed

@@ -4,6 +4,7 @@ import httpx
 from fusefable.config import Config
 from fusefable.routing import build_routes, build_judge_provider
 from fusefable.fusion import run_fusion
+from fusefable.compressor import compress_prompt
 from fusefable.models import FinalAnswer
@@ -19,13 +20,16 @@ def select_models(cfg: Config, models: Optional[Sequence[str]] = None,
 async def fuse(cfg: Config, question: str,
                models: Optional[Sequence[str]] = None,
-               cheap: bool = False) -> FinalAnswer:
+               cheap: bool = False,
+               compress: Optional[bool] = None) -> FinalAnswer:
     """entry point กลาง — ใช้ร่วมกันทั้ง CLI และ MCP server.
     models: จำกัดเฉพาะโมเดลที่ระบุ (เช่นจาก --models)
     cheap: ใช้ cfg.cheap_models ถ้ามี
+    compress: บีบ prompt ก่อนส่ง (None = ใช้ค่า cfg.compress)
     """
     only = select_models(cfg, models, cheap)
+    do_compress = cfg.compress if compress is None else compress
     async with httpx.AsyncClient(timeout=None) as http:
         routes = build_routes(cfg, http)
         if only is not None:
@@ -33,5 +37,18 @@ async def fuse(cfg: Config, question: str,
         if not routes:
             raise RuntimeError("ไม่มีโมเดลให้ใช้ (ตรวจ --models / config)")
         judge_prov = build_judge_provider(cfg, http)
-        return await run_fusion(routes, judge_prov, cfg.judge_model,
-                                question, cfg.timeout_seconds)
+        # บีบ prompt ครั้งเดียว แล้วส่งตัวที่บีบไปทุกโมเดล (judge ใช้คำถามเดิม)
+        model_prompt = question
+        comp = None
+        if do_compress:
+            comp = await compress_prompt(
+                judge_prov, cfg.compress_model or cfg.judge_model, question,
+                min_chars=cfg.compress_min_chars, timeout_s=cfg.timeout_seconds)
+            model_prompt = comp.text
+        result = await run_fusion(routes, judge_prov, cfg.judge_model,
+                                  model_prompt, cfg.timeout_seconds,
+                                  judge_question=question)
+        result.compression = comp
+        return result

{fusefable-0.1.9 → fusefable-0.2.0}/fusefable/fusion.py RENAMED Viewed

@@ -10,12 +10,18 @@ Route = Tuple[Provider, str]
 async def run_fusion(routes: Sequence[Route], judge_provider: Provider,
-                     judge_model: str, prompt: str, timeout_s: float) -> FinalAnswer:
-    """fan-out → judge → FinalAnswer. โยน RuntimeError ถ้าไม่มีตัวไหนสำเร็จ."""
+                     judge_model: str, prompt: str, timeout_s: float,
+                     judge_question: str | None = None) -> FinalAnswer:
+    """fan-out → judge → FinalAnswer. โยน RuntimeError ถ้าไม่มีตัวไหนสำเร็จ.
+    prompt = ข้อความที่ส่งให้โมเดล (อาจถูกบีบแล้ว)
+    judge_question = คำถามที่ใช้ให้ judge ตัดสิน (default = prompt; ส่งคำถามเดิมมาเพื่อคงคุณภาพการตัดสิน)
+    """
     completions = await fan_out(routes, prompt, timeout_s)
     if not completions:
         raise RuntimeError("no successful completions from any model")
-    chosen, reason = await judge(judge_provider, judge_model, prompt,
+    q = judge_question if judge_question is not None else prompt
+    chosen, reason = await judge(judge_provider, judge_model, q,
                                  completions, timeout_s)
     cost = estimate_cost(completions)
     return FinalAnswer(text=chosen.text, chosen_model=chosen.model,

{fusefable-0.1.9 → fusefable-0.2.0}/fusefable/models.py RENAMED Viewed

@@ -37,3 +37,4 @@ class FinalAnswer:
     reason: str = ""
     cost_usd: float = 0.0
     all_completions: list = field(default_factory=list)
+    compression: object = None   # CompressionResult | None (กัน import วน)

{fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusefable
-Version: 0.1.9
+Version: 0.2.0
 Summary: Fuse multiple AI models and judge the best answer for coding
 Author: proultrax9
 License: MIT
@@ -145,6 +145,28 @@ Exposes a tool `fuse_ask(question, models?, cheap?)` for any MCP client.
 > Requires `pip install "fusefable[mcp]"` and a completed `fusefable config`.
 > If `fusefable` isn't on the app's PATH, use a full path such as `python -m fusefable.cli`.
+## Prompt compression (save tokens)
+Reduce token usage while keeping answer quality — useful when you pay per-provider
+directly. Two tiers, opt-in via `--compress`:
+```bash
+fusefable ask --compress "<long prompt or pasted code>"
+# [compressed: 5200→1800 chars, ~65% saved via llm]
+```
+- **Tier 1 (lossless):** trims trailing whitespace, collapses blank lines, strips
+  zero-width chars — keeps indentation and inner spacing intact (safe for code).
+- **Tier 2 (LLM):** for prompts above `compress_min_chars` (default 2000), a cheap
+  model compresses semantically — **once**, then the compressed prompt is sent to all
+  models, so you save `tokens × number-of-models`.
+- **Quality guards:** prompts under the threshold skip the LLM; if the compressed
+  result is empty, longer, or under 30% of the original, it falls back to the lossless
+  text. The judge always sees the **original** question.
+Config (`~/.fusefable/config.yaml`): `compress`, `compress_min_chars`, `compress_model`
+(empty = reuse the judge model).
 ## Architecture
 ```

{fusefable-0.1.9 → fusefable-0.2.0}/fusefable.egg-info/SOURCES.txt RENAMED Viewed

@@ -4,6 +4,7 @@ pyproject.toml
 fusefable/__init__.py
 fusefable/cli.py
 fusefable/client.py
+fusefable/compressor.py
 fusefable/config.py
 fusefable/core.py
 fusefable/cost.py
@@ -28,6 +29,7 @@ fusefable/providers/google.py
 fusefable/providers/openai_compat.py
 tests/test_cli.py
 tests/test_client.py
+tests/test_compressor.py
 tests/test_config.py
 tests/test_core.py
 tests/test_cost.py

{fusefable-0.1.9 → fusefable-0.2.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "fusefable"
-version = "0.1.9"
+version = "0.2.0"
 description = "Fuse multiple AI models and judge the best answer for coding"
 readme = "README.md"
 license = { text = "MIT" }

fusefable-0.2.0/tests/test_compressor.py ADDED Viewed

@@ -0,0 +1,69 @@
+import pytest
+from fusefable.compressor import normalize_lossless, compress_prompt
+from fusefable.models import Completion
+def test_normalize_lossless_trims_safely_keeps_indent():
+    raw = "def  f():\n\n\n\n    return   1   \n"
+    out = normalize_lossless(raw)
+    assert "\n\n\n" not in out                 # บรรทัดว่างซ้ำถูกยุบ
+    assert out == "def  f():\n\n    return   1"  # คง indent + ช่องว่างภายใน, ตัดแค่ trailing
+def test_normalize_strips_zero_width():
+    assert normalize_lossless("ab‌") == "ab"
+class FakeProvider:
+    def __init__(self, text=None, error=False):
+        self.text, self.error = text, error
+        self.called = False
+    async def complete(self, model, prompt):
+        self.called = True
+        if self.error:
+            raise RuntimeError("boom")
+        return Completion(model=model, text=self.text)
+@pytest.mark.asyncio
+async def test_short_prompt_skips_llm():
+    prov = FakeProvider(text="should not be used")
+    r = await compress_prompt(prov, "m", "short text", min_chars=2000, timeout_s=5)
+    assert r.method == "lossless"
+    assert prov.called is False          # ไม่เรียก LLM
+@pytest.mark.asyncio
+async def test_long_prompt_uses_llm_when_shorter():
+    big = "word " * 1000                 # ~5000 chars (lossless ~4999)
+    prov = FakeProvider(text="C" * 2000) # อยู่ในช่วง 30%-100% → ผ่าน guard
+    r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
+    assert r.method == "llm"
+    assert r.final_chars == 2000
+    assert r.final_chars < r.original_chars
+    assert r.saved_pct > 0
+@pytest.mark.asyncio
+async def test_llm_failure_falls_back_to_lossless():
+    big = "word " * 1000
+    prov = FakeProvider(error=True)
+    r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
+    assert r.method == "lossless"        # LLM พัง → ใช้ lossless
+@pytest.mark.asyncio
+async def test_guard_rejects_too_short_compression():
+    big = "word " * 1000                 # ~5000 chars
+    prov = FakeProvider(text="x")        # สั้นเกินไป (< 30%)
+    r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
+    assert r.method == "lossless"        # ป้องกันโมเดลตัดเนื้อหาทิ้ง
+@pytest.mark.asyncio
+async def test_guard_rejects_longer_result():
+    big = "word " * 1000
+    prov = FakeProvider(text="y" * 99999)  # ยาวกว่าเดิม
+    r = await compress_prompt(prov, "m", big, min_chars=2000, timeout_s=5)
+    assert r.method == "lossless"

{fusefable-0.1.9 → fusefable-0.2.0}/tests/test_fusion.py RENAMED Viewed

@@ -26,6 +26,26 @@ async def test_run_fusion_end_to_end():
     assert len(result.all_completions) == 2
+@pytest.mark.asyncio
+async def test_run_fusion_uses_judge_question_for_judging():
+    seen = {}
+    class FakeProvider:
+        async def complete(self, model, prompt):
+            if model == "judge":
+                seen["judge_prompt"] = prompt
+                return Completion(model=model, text="I choose A")
+            return Completion(model=model, text="ans")
+    prov = FakeProvider()
+    routes = [(prov, "m1")]
+    await run_fusion(routes, prov, "judge", "COMPRESSED", timeout_s=5,
+                     judge_question="ORIGINAL QUESTION")
+    # judge ต้องเห็นคำถามเดิม ไม่ใช่ตัวที่บีบ
+    assert "ORIGINAL QUESTION" in seen["judge_prompt"]
+    assert "COMPRESSED" not in seen["judge_prompt"]
 @pytest.mark.asyncio
 async def test_run_fusion_raises_when_all_fail():
     class DeadProvider: