llmasajudge 0.1.14__tar.gz → 0.1.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llmasajudge
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: LLM Judge: simple right/wrong voting across models
5
5
  Author-email: Brett Young <byyoung3@gmail.com>
6
6
  Project-URL: Homepage, https://example.com
@@ -875,6 +875,7 @@ class ReturnType(Enum):
875
875
  BOOLEAN = "boolean"
876
876
  SCALAR = "scalar"
877
877
  MAP = "map"
878
+ STRING = "string" # For arbitrary string returns (categories, choices, etc.)
878
879
 
879
880
 
880
881
  class AggregationMode(Enum):
@@ -888,6 +889,7 @@ class AggregationMode(Enum):
888
889
  MIN = "min"
889
890
  MAX = "max"
890
891
  MEDIAN = "median"
892
+ # String modes - uses MAJORITY and SINGLE from above
891
893
 
892
894
 
893
895
  # Valid aggregation modes per return type
@@ -895,6 +897,7 @@ VALID_MODES = {
895
897
  ReturnType.BOOLEAN: {AggregationMode.MAJORITY, AggregationMode.SINGLE, AggregationMode.ALL},
896
898
  ReturnType.SCALAR: {AggregationMode.AVERAGE, AggregationMode.MIN, AggregationMode.MAX, AggregationMode.MEDIAN, AggregationMode.SINGLE},
897
899
  ReturnType.MAP: {AggregationMode.AVERAGE, AggregationMode.MIN, AggregationMode.MAX, AggregationMode.MEDIAN, AggregationMode.SINGLE},
900
+ ReturnType.STRING: {AggregationMode.MAJORITY, AggregationMode.SINGLE},
898
901
  }
899
902
 
900
903
  # Default aggregation modes per return type
@@ -902,6 +905,7 @@ DEFAULT_MODES = {
902
905
  ReturnType.BOOLEAN: AggregationMode.MAJORITY,
903
906
  ReturnType.SCALAR: AggregationMode.AVERAGE,
904
907
  ReturnType.MAP: AggregationMode.AVERAGE,
908
+ ReturnType.STRING: AggregationMode.MAJORITY,
905
909
  }
906
910
 
907
911
  # String to enum mapping (for backward compat)
@@ -1115,6 +1119,8 @@ def _infer_return_type(value: Any) -> Optional[ReturnType]:
1115
1119
  return ReturnType.SCALAR
1116
1120
  if isinstance(value, dict) and all(isinstance(v, (int, float)) for v in value.values()):
1117
1121
  return ReturnType.MAP
1122
+ if isinstance(value, str):
1123
+ return ReturnType.STRING
1118
1124
  return None
1119
1125
 
1120
1126
 
@@ -1374,15 +1380,21 @@ Output only valid JSON. No explanation. No extra text.""",
1374
1380
  last_err = None
1375
1381
  for i in range(attempts):
1376
1382
  try:
1377
- resp = completion(
1378
- model=model,
1379
- api_base=api_base,
1380
- messages=[{"role": "user", "content": prompt}],
1381
- temperature=temperature,
1382
- max_tokens=max_tokens,
1383
- extra_headers=headers,
1384
- caching=self.cache_enabled
1385
- )
1383
+ # GPT-5 models don't accept temperature argument
1384
+ completion_kwargs = {
1385
+ "model": model,
1386
+ "api_base": api_base,
1387
+ "messages": [{"role": "user", "content": prompt}],
1388
+ "max_tokens": max_tokens,
1389
+ "extra_headers": headers,
1390
+ "caching": self.cache_enabled
1391
+ }
1392
+
1393
+ # Only add temperature if NOT a gpt-5 model
1394
+ if "gpt-5" not in model.lower():
1395
+ completion_kwargs["temperature"] = temperature
1396
+
1397
+ resp = completion(**completion_kwargs)
1386
1398
  return (resp.choices[0].message.content or "").strip()
1387
1399
  except Exception as e:
1388
1400
  last_err = e
@@ -1453,20 +1465,20 @@ Output only valid JSON. No explanation. No extra text.""",
1453
1465
  valid = [v["result"] for v in votes if v["result"] is not None and isinstance(v["result"], dict)]
1454
1466
  if not valid:
1455
1467
  raise ValueError("No valid map votes to aggregate")
1456
-
1468
+
1457
1469
  keys = set()
1458
1470
  for v in valid:
1459
1471
  keys.update(v.keys())
1460
-
1472
+
1461
1473
  if self._mode == AggregationMode.SINGLE:
1462
1474
  return valid[0]
1463
-
1475
+
1464
1476
  result = {}
1465
1477
  for key in keys:
1466
1478
  values = [v[key] for v in valid if key in v]
1467
1479
  if not values:
1468
1480
  continue
1469
-
1481
+
1470
1482
  if self._mode == AggregationMode.AVERAGE:
1471
1483
  result[key] = sum(values) / len(values)
1472
1484
  elif self._mode == AggregationMode.MIN:
@@ -1478,9 +1490,38 @@ Output only valid JSON. No explanation. No extra text.""",
1478
1490
  n = len(s)
1479
1491
  mid = n // 2
1480
1492
  result[key] = (s[mid - 1] + s[mid]) / 2 if n % 2 == 0 else s[mid]
1481
-
1493
+
1482
1494
  return result
1483
1495
 
1496
+ def _aggregate_string(self, votes: List[Dict[str, Any]]) -> str:
1497
+ """
1498
+ Aggregate string votes with tie detection.
1499
+ Returns the majority string, or "tie" if there's no clear majority.
1500
+ """
1501
+ results = [v["result"] for v in votes if v["result"] is not None and isinstance(v["result"], str)]
1502
+ if not results:
1503
+ raise ValueError("No valid string votes to aggregate")
1504
+
1505
+ if self._mode == AggregationMode.SINGLE:
1506
+ return results[0]
1507
+
1508
+ # Count occurrences
1509
+ from collections import Counter
1510
+ counts = Counter(results)
1511
+
1512
+ # Get the most common
1513
+ most_common = counts.most_common()
1514
+
1515
+ if len(most_common) == 0:
1516
+ raise ValueError("No valid string votes to aggregate")
1517
+
1518
+ # Check for tie: if top two have same count
1519
+ if len(most_common) > 1 and most_common[0][1] == most_common[1][1]:
1520
+ return "tie"
1521
+
1522
+ # Return the majority
1523
+ return most_common[0][0]
1524
+
1484
1525
  def judge(
1485
1526
  self,
1486
1527
  input: Any = None,
@@ -1577,13 +1618,16 @@ Output only valid JSON. No explanation. No extra text.""",
1577
1618
  final = self._aggregate_scalar(votes)
1578
1619
  elif return_type == ReturnType.MAP:
1579
1620
  final = self._aggregate_map(votes)
1621
+ elif return_type == ReturnType.STRING:
1622
+ final = self._aggregate_string(votes)
1580
1623
  else:
1581
1624
  raise ValueError(f"Unknown return type: {return_type}")
1582
1625
 
1583
1626
  # Build backward-compatible response
1584
- # Boolean: correct=bool, scores=None
1585
- # Scalar: correct=score, scores=score (both fields for convenience)
1586
- # Map: correct=None, scores=map
1627
+ # Boolean: correct=bool, scores=None, result=bool
1628
+ # Scalar: correct=score, scores=score, result=score (both fields for convenience)
1629
+ # Map: correct=None, scores=map, result=map
1630
+ # String: correct=string, scores=None, result=string
1587
1631
  if return_type == ReturnType.BOOLEAN:
1588
1632
  # Also put "correct" in each vote for backward compat
1589
1633
  for v in votes:
@@ -1591,6 +1635,7 @@ Output only valid JSON. No explanation. No extra text.""",
1591
1635
  return {
1592
1636
  "correct": final,
1593
1637
  "scores": None,
1638
+ "result": final,
1594
1639
  "mode": self.mode,
1595
1640
  "votes": votes,
1596
1641
  }
@@ -1599,6 +1644,16 @@ Output only valid JSON. No explanation. No extra text.""",
1599
1644
  return {
1600
1645
  "correct": final,
1601
1646
  "scores": final,
1647
+ "result": final,
1648
+ "mode": self.mode,
1649
+ "votes": votes,
1650
+ }
1651
+ elif return_type == ReturnType.STRING:
1652
+ # For string, put result in correct field
1653
+ return {
1654
+ "correct": final,
1655
+ "scores": None,
1656
+ "result": final,
1602
1657
  "mode": self.mode,
1603
1658
  "votes": votes,
1604
1659
  }
@@ -1606,6 +1661,7 @@ Output only valid JSON. No explanation. No extra text.""",
1606
1661
  return {
1607
1662
  "correct": None,
1608
1663
  "scores": final,
1664
+ "result": final,
1609
1665
  "mode": self.mode,
1610
1666
  "votes": votes,
1611
1667
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llmasajudge
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: LLM Judge: simple right/wrong voting across models
5
5
  Author-email: Brett Young <byyoung3@gmail.com>
6
6
  Project-URL: Homepage, https://example.com
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "llmasajudge"
7
- version = "0.1.14"
7
+ version = "0.1.15"
8
8
  description = "LLM Judge: simple right/wrong voting across models"
9
9
  authors = [{name="Brett Young", email="byyoung3@gmail.com"}]
10
10
  readme = "README.md"
File without changes
File without changes