gemba 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gemba/gpt_api.py CHANGED
@@ -1,12 +1,11 @@
1
1
  import os
2
2
  import sys
3
3
  import time
4
- import ipdb
5
4
  import logging
6
5
  from termcolor import colored
7
- from datetime import datetime
8
6
  import openai
9
7
  import tqdm
8
+ from concurrent.futures import ThreadPoolExecutor
10
9
 
11
10
 
12
11
  # class for calling OpenAI API and handling cache
@@ -89,6 +88,12 @@ class GptApi:
89
88
  def request_api(self, prompt, model, temperature=0, max_tokens=None):
90
89
  if temperature > 10:
91
90
  return []
91
+
92
+ # Add maximum token limit
93
+ MAX_TOKENS_LIMIT = 4000 # Adjust this based on your model's context window
94
+ if max_tokens and max_tokens > MAX_TOKENS_LIMIT:
95
+ print(f"Reached maximum token limit of {MAX_TOKENS_LIMIT}", file=sys.stderr)
96
+ return []
92
97
 
93
98
  while True:
94
99
  try:
@@ -123,8 +128,12 @@ class GptApi:
123
128
  print(colored(f"Increasing max tokens to fit answers.", "red") + colored(answer, "blue"), file=sys.stderr)
124
129
  print(f"Finish reason: {choice.finish_reason}", file=sys.stderr)
125
130
  if max_tokens is None:
131
+ max_tokens = 500 # Set initial max_tokens if None
132
+ new_max_tokens = max_tokens + 200
133
+ if new_max_tokens > MAX_TOKENS_LIMIT:
134
+ print(f"Would exceed maximum token limit of {MAX_TOKENS_LIMIT}", file=sys.stderr)
126
135
  return []
127
- return self.request_api(prompt, model, temperature=temperature, max_tokens=max_tokens + 200)
136
+ return self.request_api(prompt, model, temperature=temperature, max_tokens=new_max_tokens)
128
137
 
129
138
  answers.append({
130
139
  "answer": answer,
@@ -167,8 +176,13 @@ class GptApi:
167
176
 
168
177
  def bulk_request(self, df, model, parse_mqm_answer, cache, max_tokens=None):
169
178
  answers = []
170
- for i, row in tqdm.tqdm(df.iterrows(), total=len(df), file=sys.stderr):
171
- prompt = row["prompt"]
172
- parsed_answers = self.request(prompt, model, parse_mqm_answer, cache=cache, max_tokens=max_tokens)
173
- answers += parsed_answers
179
+ with ThreadPoolExecutor(100) as executor:
180
+ futures = [
181
+ executor.submit(self.request, row["prompt"], model, parse_mqm_answer, cache=cache, max_tokens=max_tokens)
182
+ for _, row in df.iterrows()
183
+ ]
184
+
185
+ for future in tqdm.tqdm(futures, total=len(df), file=sys.stderr):
186
+ answers += future.result()
187
+
174
188
  return answers
@@ -1,10 +1,11 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: gemba
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: GEMBA — GPT Estimation Metric Based Assessment
5
5
  Project-URL: Homepage, https://github.com/joelniklaus/gemba
6
6
  Author-email: Joel Niklaus <joel@niklaus.ai>
7
- License: MIT
7
+ License-Expression: MIT
8
+ License-File: LICENSE.md
8
9
  Classifier: License :: OSI Approved :: MIT License
9
10
  Classifier: Operating System :: OS Independent
10
11
  Classifier: Programming Language :: Python :: 3
@@ -50,7 +51,7 @@ Install the gemba package with `pip install gemba` and use the following code:
50
51
  from gemba import get_gemba_scores
51
52
 
52
53
  source = ["Hello, how are you?", "I am fine, thank you.", "I am not fine, thank you."]
53
- hypothesis = ["Hallo, wie geht es dir?", "Ich bin gut, danke.", "Ich bin Adolf, wer bist du?"]
54
+ hypothesis = ["Hallo, wie geht es dir?", "Ich bin gut, danke.", "Ich bin Joel, wer bist du?"]
54
55
  source_lang = "en"
55
56
  target_lang = "de"
56
57
 
@@ -2,13 +2,13 @@ gemba/__init__.py,sha256=0ZuEumkUMWPI5wQMY7OxLolELI9GYYlup-iJw8SwBgc,67
2
2
  gemba/gemba_da.py,sha256=YCOKKP7kZBL9e1d44Zr7aTa23BqLFvh4KDOfbNSMgOU,2360
3
3
  gemba/gemba_esa.py,sha256=nBCeFjrS24wXLOcAXHRSmZFYJSkUzRS4hfp2LEqYwp8,4461
4
4
  gemba/gemba_mqm_utils.py,sha256=qiIdJv7IDx0eeqpsTCHMoUeo8EUOhG6k-YfrzkRfxyw,9612
5
- gemba/gpt_api.py,sha256=Igp8uQn6chKL1QWFMqKP2VR9Fbzxm8Xk83ELxk5NfM8,6671
5
+ gemba/gpt_api.py,sha256=UJGXQBnRLBujLGdQhr6HUvbvWYQIxqmQqa_JG8iS0Uc,7394
6
6
  gemba/mtme_tools.py,sha256=xpLxCzfnLHFIxsq_LOi1Lpb-gkyFGYqFXiq9y6O315Q,4667
7
7
  gemba/prompt.py,sha256=AuPBhO2OBL3EB5I37p-GX10sx29gRw35xFAnB3bqtII,7578
8
8
  gemba/scores.py,sha256=FmmBJ-ds-abExphcVUw9qaPMnKttPWobuXNwZKLAtEs,4388
9
9
  gemba/testset.py,sha256=tDvi6xQIBXrODg02WWINrYg9jNQqruCmhBrxe9AaK48,1926
10
10
  gemba/utils.py,sha256=Re5uW5dcFj3ITWIGpxjXdAKNDKQ7i4H-Tr_s74SQgmk,4311
11
- gemba-0.1.0.dist-info/METADATA,sha256=9_jYmIPKmAz5cmPn-fTUB7a5xHLbYrlTXpdzhEYaSSw,3692
12
- gemba-0.1.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
13
- gemba-0.1.0.dist-info/licenses/LICENSE.md,sha256=XkNv-P-7d9hgciDpvOIMiRXYYAEP7rbB6-9ahWiOmzk,20137
14
- gemba-0.1.0.dist-info/RECORD,,
11
+ gemba-0.1.2.dist-info/METADATA,sha256=98Ge9LVScGEzoTyv6gQICfY4KA8V0Gq3927gcEPE5xI,3727
12
+ gemba-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
+ gemba-0.1.2.dist-info/licenses/LICENSE.md,sha256=XkNv-P-7d9hgciDpvOIMiRXYYAEP7rbB6-9ahWiOmzk,20137
14
+ gemba-0.1.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.26.3
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any