PyPI - llmasajudge - Versions diffs - 0.1.10__tar.gz → 0.1.11__tar.gz - Mend

llmasajudge 0.1.10tar.gz → 0.1.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llmasajudge
-Version: 0.1.10
+Version: 0.1.11
 Summary: LLM Judge: simple right/wrong voting across models
 Author-email: Brett Young <byyoung3@gmail.com>
 Project-URL: Homepage, https://example.com

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/llmasajudge/__init__.py RENAMED Viewed

@@ -206,7 +206,10 @@ import time
 import random
 import re
 from typing import Any, Callable, Dict, List, Optional, Tuple
+import litellm
 from litellm import completion
+from litellm.caching.caching import Cache
 __all__ = ["LLMAsAJudge", "OutputParsers"]
@@ -322,10 +325,122 @@ Output only the number. No explanation. No extra text.""",
+    # def __init__(
+    #     self,
+    #     models: Optional[List[str]] = None,
+    #     config: Optional[Dict[str, Dict[str, Any]]] = None,   # one dict for providers and models
+    #     base_headers: Optional[Dict[str, str]] = None,
+    #     wandb_project: Optional[str] = None,
+    #     custom_template: Optional[str] = None,
+    #     use_fully_custom_prompt: bool = False,
+    #     notes: Optional[str] = None,
+    #     output_parser: Optional[str] = 'right/wrong',
+    #     fallback_comparison: bool = True,
+    #     default_temperature: float = 0.0,
+    #     verbose: bool = False,
+    #     num_retries: int = 2,          # per-call retries before giving up on that model
+    #     backoff_base: float = 0.5,     # seconds
+    #     backoff_max: float = 4.0,      # seconds
+    #     custom_generation_fns: Optional[List[Callable[[str], str]]] = None,
+    #     mode: str = "majority",        # "single", "majority", "all"
+    # ):
+    #     """
+    #     config keys can be a provider name ("wandb", "openai", "anthropic")
+    #     or a full model name ("openai/gpt-4o-mini", "wandb/deepseek-ai/DeepSeek-V3.1").
+    #     Values can include:
+    #         api_base: Optional[str]
+    #         headers: Dict[str, str]
+    #         temperature: float
+    #     Precedence:
+    #         base_headers < provider config < model config
+    #     Args:
+    #         models: List of litellm model strings (e.g., ["openai/gpt-4", "anthropic/claude-3"])
+    #         custom_template: Template with placeholders for input/output/ground_truth
+    #         use_fully_custom_prompt: If True, pass complete prompt to judge(prompt=...).
+    #                                  When True, input/output/ground_truth must NOT be passed to judge()
+    #         output_parser: Parser name ('right/wrong', 'yes/no', 'pass/fail', 'numeric')
+    #                       or custom function with signature (str) -> Any
+    #         fallback_comparison: If True and parser returns None, falls back to string comparison
+    #         custom_generation_fns: List of custom inference functions with signature fn(prompt: str) -> str
+    #                                These will be used in addition to litellm models for voting.
+    #         mode: Voting mode - "majority" (default), "single" (first judge only), or "all" (unanimous)
+    #     """
+    #     self.models = models or []
+    #     self.custom_generation_fns = custom_generation_fns or []
+    #     # Validate that at least one judge is provided
+    #     if not self.models and not self.custom_generation_fns:
+    #         raise ValueError("Must provide at least one of: models (litellm) or custom_generation_fns")
+    #     # Validate mode
+    #     if mode not in ("majority", "single", "all"):
+    #         raise ValueError("mode must be 'majority', 'single', or 'all'")
+    #     self.config = config or {}
+    #     self.base_headers = dict(base_headers or {})
+    #     self.wandb_project = wandb_project or os.getenv("WANDB_PROJECT")
+    #     self.notes = notes or ""
+    #     self.use_fully_custom_prompt = use_fully_custom_prompt
+    #     self.mode = mode
+    #     # Resolve output parser
+    #     parser_name = None
+    #     if isinstance(output_parser, str):
+    #         parser_map = {
+    #             'right/wrong': OutputParsers.right_wrong,
+    #             'pass/fail': OutputParsers.pass_fail,
+    #             'yes/no': OutputParsers.yes_no,
+    #             'numeric': OutputParsers.numeric_score,
+    #         }
+    #         if output_parser not in parser_map:
+    #             raise ValueError(f"Unknown parser '{output_parser}'. Available: {list(parser_map.keys())}")
+    #         self.output_parser = parser_map[output_parser]
+    #         parser_name = output_parser
+    #     else:
+    #         self.output_parser = output_parser
+    #     # Set template based on mode
+    #     if use_fully_custom_prompt:
+    #         self.template = None  # No template in fully custom mode
+    #     elif custom_template:
+    #         self.template = custom_template
+    #     elif parser_name and parser_name in self.PARSER_INSTRUCTIONS:
+    #         self.template = self.BASE_TEMPLATE.format(
+    #             instruction=self.PARSER_INSTRUCTIONS[parser_name],
+    #             notes_section="{notes_section}",
+    #             input_block="{input_block}",
+    #             model_output="{model_output}",
+    #             ground_truth="{ground_truth}",
+    #         )
+    #     else:
+    #         # Default to right/wrong for custom parsers
+    #         self.template = self.BASE_TEMPLATE.format(
+    #             instruction=self.PARSER_INSTRUCTIONS['right/wrong'],
+    #             notes_section="{notes_section}",
+    #             input_block="{input_block}",
+    #             model_output="{model_output}",
+    #             ground_truth="{ground_truth}",
+    #         )
+    #     self.fallback_comparison = fallback_comparison
+    #     self.default_temperature = float(default_temperature)
+    #     self.verbose = verbose
+    #     self.num_retries = int(num_retries)
+    #     self.backoff_base = float(backoff_base)
+    #     self.backoff_max = float(backoff_max)
     def __init__(
         self,
         models: Optional[List[str]] = None,
-        config: Optional[Dict[str, Dict[str, Any]]] = None,   # one dict for providers and models
+        config: Optional[Dict[str, Dict[str, Any]]] = None,
         base_headers: Optional[Dict[str, str]] = None,
         wandb_project: Optional[str] = None,
         custom_template: Optional[str] = None,
@@ -335,44 +450,19 @@ Output only the number. No explanation. No extra text.""",
         fallback_comparison: bool = True,
         default_temperature: float = 0.0,
         verbose: bool = False,
-        num_retries: int = 2,          # per-call retries before giving up on that model
-        backoff_base: float = 0.5,     # seconds
-        backoff_max: float = 4.0,      # seconds
+        num_retries: int = 2,
+        backoff_base: float = 0.5,
+        backoff_max: float = 4.0,
         custom_generation_fns: Optional[List[Callable[[str], str]]] = None,
-        mode: str = "majority",        # "single", "majority", "all"
+        mode: str = "majority",
+        litellm_cache_dir: Optional[str] = None,
     ):
-        """
-        config keys can be a provider name ("wandb", "openai", "anthropic")
-        or a full model name ("openai/gpt-4o-mini", "wandb/deepseek-ai/DeepSeek-V3.1").
-        Values can include:
-            api_base: Optional[str]
-            headers: Dict[str, str]
-            temperature: float
-        Precedence:
-            base_headers < provider config < model config
-        Args:
-            models: List of litellm model strings (e.g., ["openai/gpt-4", "anthropic/claude-3"])
-            custom_template: Template with placeholders for input/output/ground_truth
-            use_fully_custom_prompt: If True, pass complete prompt to judge(prompt=...).
-                                     When True, input/output/ground_truth must NOT be passed to judge()
-            output_parser: Parser name ('right/wrong', 'yes/no', 'pass/fail', 'numeric')
-                          or custom function with signature (str) -> Any
-            fallback_comparison: If True and parser returns None, falls back to string comparison
-            custom_generation_fns: List of custom inference functions with signature fn(prompt: str) -> str
-                                   These will be used in addition to litellm models for voting.
-            mode: Voting mode - "majority" (default), "single" (first judge only), or "all" (unanimous)
-        """
         self.models = models or []
         self.custom_generation_fns = custom_generation_fns or []
-        # Validate that at least one judge is provided
         if not self.models and not self.custom_generation_fns:
             raise ValueError("Must provide at least one of: models (litellm) or custom_generation_fns")
-        # Validate mode
         if mode not in ("majority", "single", "all"):
             raise ValueError("mode must be 'majority', 'single', or 'all'")
@@ -382,8 +472,13 @@ Output only the number. No explanation. No extra text.""",
         self.notes = notes or ""
         self.use_fully_custom_prompt = use_fully_custom_prompt
         self.mode = mode
+        self.fallback_comparison = fallback_comparison
+        self.default_temperature = float(default_temperature)
+        self.verbose = verbose
+        self.num_retries = int(num_retries)
+        self.backoff_base = float(backoff_base)
+        self.backoff_max = float(backoff_max)
-        # Resolve output parser
         parser_name = None
         if isinstance(output_parser, str):
             parser_map = {
@@ -393,15 +488,14 @@ Output only the number. No explanation. No extra text.""",
                 'numeric': OutputParsers.numeric_score,
             }
             if output_parser not in parser_map:
-                raise ValueError(f"Unknown parser '{output_parser}'. Available: {list(parser_map.keys())}")
+                raise ValueError(f"Unknown parser '{output_parser}'")
             self.output_parser = parser_map[output_parser]
             parser_name = output_parser
         else:
             self.output_parser = output_parser
-        # Set template based on mode
         if use_fully_custom_prompt:
-            self.template = None  # No template in fully custom mode
+            self.template = None
         elif custom_template:
             self.template = custom_template
         elif parser_name and parser_name in self.PARSER_INSTRUCTIONS:
@@ -413,7 +507,6 @@ Output only the number. No explanation. No extra text.""",
                 ground_truth="{ground_truth}",
             )
         else:
-            # Default to right/wrong for custom parsers
             self.template = self.BASE_TEMPLATE.format(
                 instruction=self.PARSER_INSTRUCTIONS['right/wrong'],
                 notes_section="{notes_section}",
@@ -422,12 +515,18 @@ Output only the number. No explanation. No extra text.""",
                 ground_truth="{ground_truth}",
             )
-        self.fallback_comparison = fallback_comparison
-        self.default_temperature = float(default_temperature)
-        self.verbose = verbose
-        self.num_retries = int(num_retries)
-        self.backoff_base = float(backoff_base)
-        self.backoff_max = float(backoff_max)
+        # optional local cache setup
+        self.cache_enabled = litellm_cache_dir is not None
+        if self.cache_enabled:
+            litellm.cache = Cache(type="disk", disk_cache_dir=litellm_cache_dir)
     def _build_prompt(self, input: Any, model_output: Any, ground_truth: Any) -> str:
         notes_section = f"notes:\n{self.notes}\n" if self.notes else ""
@@ -495,14 +594,24 @@ Output only the number. No explanation. No extra text.""",
         last_err = None
         for i in range(attempts):
             try:
+                # resp = completion(
+                #     model=model,
+                #     api_base=api_base,  # None uses provider default
+                #     messages=[{"role": "user", "content": prompt}],
+                #     temperature=temperature,
+                #     max_tokens=max_tokens,
+                #     extra_headers=headers,
+                # )
                 resp = completion(
                     model=model,
-                    api_base=api_base,  # None uses provider default
+                    api_base=api_base,
                     messages=[{"role": "user", "content": prompt}],
                     temperature=temperature,
                     max_tokens=max_tokens,
                     extra_headers=headers,
-                )
+                    caching=self.cache_enabled
+                )
                 return (resp.choices[0].message.content or "").strip()
             except Exception as e:
                 last_err = e

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/llmasajudge.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llmasajudge
-Version: 0.1.10
+Version: 0.1.11
 Summary: LLM Judge: simple right/wrong voting across models
 Author-email: Brett Young <byyoung3@gmail.com>
 Project-URL: Homepage, https://example.com

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "llmasajudge"
-version = "0.1.10"
+version = "0.1.11"
 description = "LLM Judge: simple right/wrong voting across models"
 authors = [{name="Brett Young", email="byyoung3@gmail.com"}]
 readme = "README.md"

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/README.md RENAMED Viewed

File without changes

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/llmasajudge.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/llmasajudge.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/llmasajudge.egg-info/requires.txt RENAMED Viewed

File without changes

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/llmasajudge.egg-info/top_level.txt RENAMED Viewed

File without changes

{llmasajudge-0.1.10 → llmasajudge-0.1.11}/setup.cfg RENAMED Viewed

File without changes

llmasajudge 0.1.10__tar.gz → 0.1.11__tar.gz

llmasajudge 0.1.10tar.gz → 0.1.11tar.gz