PyPI - gemba - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

gemba 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

gemba/gpt_api.py CHANGED Viewed

@@ -6,6 +6,8 @@ from termcolor import colored
 import openai
 import tqdm
 from concurrent.futures import ThreadPoolExecutor
+from collections import defaultdict
 # class for calling OpenAI API and handling cache
@@ -57,6 +59,22 @@ class GptApi:
         for full_answer in answers:
             finish_reason = full_answer["finish_reason"]
             full_answer = full_answer["answer"]
+            if finish_reason != "stop":
+                print(f"No valid answer, giving score 0")
+                errors = defaultdict(list)
+                errors["critical"].append("Judge errored, giving answer score 0.")
+                parsed_answers.append({
+                    "temperature": temperature,
+                    "answer_id": answer_id,
+                    "answer": 0,
+                    "errors": errors,
+                    "prompt": prompt,
+                    "finish_reason": finish_reason,
+                    "model": model,
+                })
+                continue
             answer_id += 1
             answer = parse_response(full_answer)
             if isinstance(answer, tuple):
@@ -67,33 +85,32 @@ class GptApi:
                 print(f"Answer (t={temperature}): " + colored(answer, "yellow") + " (" + colored(full_answer, "blue") + ")", file=sys.stderr)
             if answer is None:
                 continue
-            parsed_answers.append(
-                {
-                    "temperature": temperature,
-                    "answer_id": answer_id,
-                    "answer": answer,
-                    "errors": errors,
-                    "prompt": prompt,
-                    "finish_reason": finish_reason,
-                    "model": model,
-                }
-            )
+            parsed_answers.append({
+                "temperature": temperature,
+                "answer_id": answer_id,
+                "answer": answer,
+                "errors": errors,
+                "prompt": prompt,
+                "finish_reason": finish_reason,
+                "model": model,
+            })
         # there was no valid answer, increase temperature and try again
         if len(parsed_answers) == 0:
+            print(f"No valid answer, increasing temperature to {temperature + 1} and trying again")
             return self.request(prompt, model, parse_response, temperature=temperature + 1, answer_id=answer_id, cache=cache)
         return parsed_answers
     def request_api(self, prompt, model, temperature=0, max_tokens=None):
         if temperature > 10:
-            return []
+            return [{"answer": None, "finish_reason": "error"}]
         # Add maximum token limit
         MAX_TOKENS_LIMIT = 4000  # Adjust this based on your model's context window
         if max_tokens and max_tokens > MAX_TOKENS_LIMIT:
             print(f"Reached maximum token limit of {MAX_TOKENS_LIMIT}", file=sys.stderr)
-            return []
+            return [{"answer": None, "finish_reason": "length"}]
         while True:
             try:
@@ -103,10 +120,10 @@ class GptApi:
                 # response was filtered
                 if hasattr(e, 'code'):
                     if e.code == 'content_filter':
-                        return []
+                        return [{"answer": None, "finish_reason": "filter"}]
                     print(e.code, file=sys.stderr)
                 if hasattr(e, 'error') and e.error['code'] == 'invalid_model_output':
-                    return []
+                    return [{"answer": None, "finish_reason": "invalid"}]
                 # frequent error is reaching the API limit
                 print(colored("Error, retrying...", "red"), file=sys.stderr)
@@ -116,7 +133,7 @@ class GptApi:
         answers = []
         for choice in response.choices:
             if choice.message.content is None:
-                return []
+                return [{"answer": None, "finish_reason": "invalid"}]
             if hasattr(choice, "message"):
                 answer = choice.message.content.strip()
             else:
@@ -126,13 +143,13 @@ class GptApi:
             if choice.finish_reason != "stop":
                 if self.verbose:
                     print(colored(f"Increasing max tokens to fit answers.", "red") + colored(answer, "blue"), file=sys.stderr)
-                print(f"Finish reason: {choice.finish_reason}", file=sys.stderr)
                 if max_tokens is None:
                     max_tokens = 500  # Set initial max_tokens if None
-                new_max_tokens = max_tokens + 200
+                new_max_tokens = max_tokens * 2
+                print(f"Finish reason: {choice.finish_reason}, increasing max tokens to {new_max_tokens}", file=sys.stderr)
                 if new_max_tokens > MAX_TOKENS_LIMIT:
                     print(f"Would exceed maximum token limit of {MAX_TOKENS_LIMIT}", file=sys.stderr)
-                    return []
+                    return [{"answer": None, "finish_reason": choice.finish_reason}]
                 return self.request_api(prompt, model, temperature=temperature, max_tokens=new_max_tokens)
             answers.append({

{gemba-0.1.2.dist-info → gemba-0.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gemba
-Version: 0.1.2
+Version: 0.1.3
 Summary: GEMBA — GPT Estimation Metric Based Assessment
 Project-URL: Homepage, https://github.com/joelniklaus/gemba
 Author-email: Joel Niklaus <joel@niklaus.ai>

{gemba-0.1.2.dist-info → gemba-0.1.3.dist-info}/RECORD RENAMED Viewed

@@ -2,13 +2,13 @@ gemba/__init__.py,sha256=0ZuEumkUMWPI5wQMY7OxLolELI9GYYlup-iJw8SwBgc,67
 gemba/gemba_da.py,sha256=YCOKKP7kZBL9e1d44Zr7aTa23BqLFvh4KDOfbNSMgOU,2360
 gemba/gemba_esa.py,sha256=nBCeFjrS24wXLOcAXHRSmZFYJSkUzRS4hfp2LEqYwp8,4461
 gemba/gemba_mqm_utils.py,sha256=qiIdJv7IDx0eeqpsTCHMoUeo8EUOhG6k-YfrzkRfxyw,9612
-gemba/gpt_api.py,sha256=UJGXQBnRLBujLGdQhr6HUvbvWYQIxqmQqa_JG8iS0Uc,7394
+gemba/gpt_api.py,sha256=A1GYi0vxUGmephkadI-h6v6G52uDQ7yWOFvIxSRrN8o,8380
 gemba/mtme_tools.py,sha256=xpLxCzfnLHFIxsq_LOi1Lpb-gkyFGYqFXiq9y6O315Q,4667
 gemba/prompt.py,sha256=AuPBhO2OBL3EB5I37p-GX10sx29gRw35xFAnB3bqtII,7578
 gemba/scores.py,sha256=FmmBJ-ds-abExphcVUw9qaPMnKttPWobuXNwZKLAtEs,4388
 gemba/testset.py,sha256=tDvi6xQIBXrODg02WWINrYg9jNQqruCmhBrxe9AaK48,1926
 gemba/utils.py,sha256=Re5uW5dcFj3ITWIGpxjXdAKNDKQ7i4H-Tr_s74SQgmk,4311
-gemba-0.1.2.dist-info/METADATA,sha256=98Ge9LVScGEzoTyv6gQICfY4KA8V0Gq3927gcEPE5xI,3727
-gemba-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-gemba-0.1.2.dist-info/licenses/LICENSE.md,sha256=XkNv-P-7d9hgciDpvOIMiRXYYAEP7rbB6-9ahWiOmzk,20137
-gemba-0.1.2.dist-info/RECORD,,
+gemba-0.1.3.dist-info/METADATA,sha256=qyrjjVewIjFJeWKTAXw9rCMqLIj9OqCntenyw0F2oyw,3727
+gemba-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+gemba-0.1.3.dist-info/licenses/LICENSE.md,sha256=XkNv-P-7d9hgciDpvOIMiRXYYAEP7rbB6-9ahWiOmzk,20137
+gemba-0.1.3.dist-info/RECORD,,

{gemba-0.1.2.dist-info → gemba-0.1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{gemba-0.1.2.dist-info → gemba-0.1.3.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

gemba 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

gemba 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl