gemba 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
gemba/gpt_api.py CHANGED
@@ -1,12 +1,11 @@
1
1
  import os
2
2
  import sys
3
3
  import time
4
- import ipdb
5
4
  import logging
6
5
  from termcolor import colored
7
- from datetime import datetime
8
6
  import openai
9
7
  import tqdm
8
+ from concurrent.futures import ThreadPoolExecutor
10
9
 
11
10
 
12
11
  # class for calling OpenAI API and handling cache
@@ -89,6 +88,12 @@ class GptApi:
89
88
  def request_api(self, prompt, model, temperature=0, max_tokens=None):
90
89
  if temperature > 10:
91
90
  return []
91
+
92
+ # Add maximum token limit
93
+ MAX_TOKENS_LIMIT = 4000 # Adjust this based on your model's context window
94
+ if max_tokens and max_tokens > MAX_TOKENS_LIMIT:
95
+ print(f"Reached maximum token limit of {MAX_TOKENS_LIMIT}", file=sys.stderr)
96
+ return []
92
97
 
93
98
  while True:
94
99
  try:
@@ -123,8 +128,12 @@ class GptApi:
123
128
  print(colored(f"Increasing max tokens to fit answers.", "red") + colored(answer, "blue"), file=sys.stderr)
124
129
  print(f"Finish reason: {choice.finish_reason}", file=sys.stderr)
125
130
  if max_tokens is None:
131
+ max_tokens = 500 # Set initial max_tokens if None
132
+ new_max_tokens = max_tokens + 200
133
+ if new_max_tokens > MAX_TOKENS_LIMIT:
134
+ print(f"Would exceed maximum token limit of {MAX_TOKENS_LIMIT}", file=sys.stderr)
126
135
  return []
127
- return self.request_api(prompt, model, temperature=temperature, max_tokens=max_tokens + 200)
136
+ return self.request_api(prompt, model, temperature=temperature, max_tokens=new_max_tokens)
128
137
 
129
138
  answers.append({
130
139
  "answer": answer,
@@ -167,8 +176,13 @@ class GptApi:
167
176
 
168
177
  def bulk_request(self, df, model, parse_mqm_answer, cache, max_tokens=None):
169
178
  answers = []
170
- for i, row in tqdm.tqdm(df.iterrows(), total=len(df), file=sys.stderr):
171
- prompt = row["prompt"]
172
- parsed_answers = self.request(prompt, model, parse_mqm_answer, cache=cache, max_tokens=max_tokens)
173
- answers += parsed_answers
179
+ with ThreadPoolExecutor(100) as executor:
180
+ futures = [
181
+ executor.submit(self.request, row["prompt"], model, parse_mqm_answer, cache=cache, max_tokens=max_tokens)
182
+ for _, row in df.iterrows()
183
+ ]
184
+
185
+ for future in tqdm.tqdm(futures, total=len(df), file=sys.stderr):
186
+ answers += future.result()
187
+
174
188
  return answers
@@ -1,10 +1,11 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: gemba
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: GEMBA — GPT Estimation Metric Based Assessment
5
5
  Project-URL: Homepage, https://github.com/joelniklaus/gemba
6
6
  Author-email: Joel Niklaus <joel@niklaus.ai>
7
- License: MIT
7
+ License-Expression: MIT
8
+ License-File: LICENSE.md
8
9
  Classifier: License :: OSI Approved :: MIT License
9
10
  Classifier: Operating System :: OS Independent
10
11
  Classifier: Programming Language :: Python :: 3
@@ -50,7 +51,7 @@ Install the gemba package with `pip install gemba` and use the following code:
50
51
  from gemba import get_gemba_scores
51
52
 
52
53
  source = ["Hello, how are you?", "I am fine, thank you.", "I am not fine, thank you."]
53
- hypothesis = ["Hallo, wie geht es dir?", "Ich bin gut, danke.", "Ich bin Adolf, wer bist du?"]
54
+ hypothesis = ["Hallo, wie geht es dir?", "Ich bin gut, danke.", "Ich bin Joel, wer bist du?"]
54
55
  source_lang = "en"
55
56
  target_lang = "de"
56
57
 
@@ -2,13 +2,13 @@ gemba/__init__.py,sha256=0ZuEumkUMWPI5wQMY7OxLolELI9GYYlup-iJw8SwBgc,67
2
2
  gemba/gemba_da.py,sha256=YCOKKP7kZBL9e1d44Zr7aTa23BqLFvh4KDOfbNSMgOU,2360
3
3
  gemba/gemba_esa.py,sha256=nBCeFjrS24wXLOcAXHRSmZFYJSkUzRS4hfp2LEqYwp8,4461
4
4
  gemba/gemba_mqm_utils.py,sha256=qiIdJv7IDx0eeqpsTCHMoUeo8EUOhG6k-YfrzkRfxyw,9612
5
- gemba/gpt_api.py,sha256=Igp8uQn6chKL1QWFMqKP2VR9Fbzxm8Xk83ELxk5NfM8,6671
5
+ gemba/gpt_api.py,sha256=UJGXQBnRLBujLGdQhr6HUvbvWYQIxqmQqa_JG8iS0Uc,7394
6
6
  gemba/mtme_tools.py,sha256=xpLxCzfnLHFIxsq_LOi1Lpb-gkyFGYqFXiq9y6O315Q,4667
7
7
  gemba/prompt.py,sha256=AuPBhO2OBL3EB5I37p-GX10sx29gRw35xFAnB3bqtII,7578
8
8
  gemba/scores.py,sha256=FmmBJ-ds-abExphcVUw9qaPMnKttPWobuXNwZKLAtEs,4388
9
9
  gemba/testset.py,sha256=tDvi6xQIBXrODg02WWINrYg9jNQqruCmhBrxe9AaK48,1926
10
10
  gemba/utils.py,sha256=Re5uW5dcFj3ITWIGpxjXdAKNDKQ7i4H-Tr_s74SQgmk,4311
11
- gemba-0.1.0.dist-info/METADATA,sha256=9_jYmIPKmAz5cmPn-fTUB7a5xHLbYrlTXpdzhEYaSSw,3692
12
- gemba-0.1.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
13
- gemba-0.1.0.dist-info/licenses/LICENSE.md,sha256=XkNv-P-7d9hgciDpvOIMiRXYYAEP7rbB6-9ahWiOmzk,20137
14
- gemba-0.1.0.dist-info/RECORD,,
11
+ gemba-0.1.2.dist-info/METADATA,sha256=98Ge9LVScGEzoTyv6gQICfY4KA8V0Gq3927gcEPE5xI,3727
12
+ gemba-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
+ gemba-0.1.2.dist-info/licenses/LICENSE.md,sha256=XkNv-P-7d9hgciDpvOIMiRXYYAEP7rbB6-9ahWiOmzk,20137
14
+ gemba-0.1.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.26.3
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any