langfun 0.0.2.dev20240201__py3-none-any.whl → 0.0.2.dev20240202__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
langfun/core/eval/base.py CHANGED
@@ -57,6 +57,10 @@ class Evaluable(lf.Component):
57
57
  ),
58
58
  ] = lf.contextual(default=None)
59
59
 
60
+ report_precision: Annotated[
61
+ int, 'Number of decimals when reporting precision.'
62
+ ] = lf.contextual(default=1)
63
+
60
64
  @property
61
65
  def dir(self) -> str | None:
62
66
  """Returns the directory for saving results and details."""
@@ -1045,12 +1049,12 @@ class Evaluation(Evaluable):
1045
1049
  def _status(self, progress: lf.concurrent.Progress) -> dict[str, Any]:
1046
1050
  return {
1047
1051
  'Model': self.lm.model_id,
1048
- 'Succeeded': '%.2f%% (%d/%d)' % (
1052
+ 'Succeeded': f'%.{self.report_precision}f%% (%d/%d)' % (
1049
1053
  progress.success_rate * 100,
1050
1054
  progress.succeeded,
1051
1055
  progress.completed,
1052
1056
  ),
1053
- 'Failed': '%.2f%% (%d/%d)' % (
1057
+ 'Failed': f'%.{self.report_precision}f%% (%d/%d)' % (
1054
1058
  progress.failure_rate * 100,
1055
1059
  progress.failed,
1056
1060
  progress.completed,
@@ -1060,14 +1064,18 @@ class Evaluation(Evaluable):
1060
1064
  def _completion_status(self, run_status: str) -> str:
1061
1065
  assert self.result is not None
1062
1066
  m = self.result.metrics
1063
- return 'COMPLETED(%s): Successes=%.2f%% (%d/%d) Failures=%.2f%% (%d/%d)' % (
1064
- run_status,
1065
- (1 - m.failure_rate) * 100,
1066
- m.total - m.failures,
1067
- m.total,
1068
- m.failure_rate * 100,
1069
- m.failures,
1070
- m.total,
1067
+ return (
1068
+ f'COMPLETED(%s): Successes=%.{self.report_precision}f%% (%d/%d)'
1069
+ f' Failures=%.{self.report_precision}f%% (%d/%d)'
1070
+ % (
1071
+ run_status,
1072
+ (1 - m.failure_rate) * 100,
1073
+ m.total - m.failures,
1074
+ m.total,
1075
+ m.failure_rate * 100,
1076
+ m.failures,
1077
+ m.total,
1078
+ )
1071
1079
  )
1072
1080
 
1073
1081
  def summarize(self) -> pg.Dict:
@@ -1130,7 +1138,7 @@ class Evaluation(Evaluable):
1130
1138
  m.failures,
1131
1139
  m.total,
1132
1140
  self.failures_link,
1133
- '%.2f%% ' % (m.failure_rate * 100),
1141
+ f'%.{self.report_precision}f%% ' % (m.failure_rate * 100),
1134
1142
  )
1135
1143
  )
1136
1144
 
@@ -1218,7 +1226,7 @@ class Evaluation(Evaluable):
1218
1226
  s.write(
1219
1227
  '<td><span style="color:orange">%s</span>%s</td>'
1220
1228
  % (
1221
- '%.2f%%' % (self.failure_rate * 100),
1229
+ f'%.{self.report_precision}f%%' % (self.failure_rate * 100),
1222
1230
  '<a href="%s">(%d/%d)</a>'
1223
1231
  % (self.failures_link, self.num_failures, self.num_completed),
1224
1232
  )
@@ -102,17 +102,17 @@ class Matching(base.Evaluation):
102
102
  del progress
103
103
  return {
104
104
  'Model': self.lm.model_id,
105
- 'Matches': '%.2f%% (%d/%d)' % (
105
+ 'Matches': f'%.{self.report_precision}f%% (%d/%d)' % (
106
106
  self.match_rate * 100,
107
107
  self.num_matches,
108
108
  self.num_completed,
109
109
  ),
110
- 'Mismatches': '%.2f%% (%d/%d)' % (
110
+ 'Mismatches': f'%.{self.report_precision}f%% (%d/%d)' % (
111
111
  self.mismatch_rate * 100,
112
112
  self.num_mismatches,
113
113
  self.num_completed,
114
114
  ),
115
- 'Failed': '%.2f%% (%d/%d)' % (
115
+ 'Failed': f'%.{self.report_precision}f%% (%d/%d)' % (
116
116
  self.failure_rate * 100,
117
117
  self.num_failures,
118
118
  self.num_completed,
@@ -123,8 +123,9 @@ class Matching(base.Evaluation):
123
123
  assert self.result is not None
124
124
  m = self.result.metrics
125
125
  return (
126
- 'COMPLETED(%s): Matches=%.2f%% (%d/%d) Mismatches=%.2f%% (%d/%d) '
127
- 'Failures=%.2f%% (%d/%d)'
126
+ f'COMPLETED(%s): Matches=%.{self.report_precision}f%% (%d/%d)'
127
+ f' Mismatches=%.{self.report_precision}f%% (%d/%d)'
128
+ f' Failures=%.{self.report_precision}f%% (%d/%d)'
128
129
  ) % (
129
130
  run_status,
130
131
  m.match_rate * 100,
@@ -202,7 +203,7 @@ class Matching(base.Evaluation):
202
203
  s.write(
203
204
  '<td><span style="color:red">%s</span>%s</td>'
204
205
  % (
205
- '%.2f%% ' % (self.mismatch_rate * 100),
206
+ f'%.{self.report_precision}f%% ' % (self.mismatch_rate * 100),
206
207
  '<a href="%s">(%d/%d)</a>'
207
208
  % (self.mismatches_link, self.num_mismatches, self.num_completed),
208
209
  )
@@ -210,7 +211,7 @@ class Matching(base.Evaluation):
210
211
  s.write(
211
212
  '<td><span style="color:green">%s</span>%s</td>'
212
213
  % (
213
- '%.2f%% ' % (self.match_rate * 100),
214
+ f'%.{self.report_precision}f%% ' % (self.match_rate * 100),
214
215
  '<a href="%s">(%d/%d)</a>'
215
216
  % (self.matches_link, self.num_matches, self.num_completed),
216
217
  )
@@ -226,7 +227,7 @@ class Matching(base.Evaluation):
226
227
  m.num_matches,
227
228
  m.total,
228
229
  self.matches_link,
229
- '%.2f%% ' % (m.match_rate * 100),
230
+ f'%.{self.report_precision}f%% ' % (m.match_rate * 100),
230
231
  )
231
232
  )
232
233
  s.write(' | ')
@@ -236,7 +237,7 @@ class Matching(base.Evaluation):
236
237
  m.num_mismatches,
237
238
  m.total,
238
239
  self.mismatches_link,
239
- '%.2f%% ' % (m.mismatch_rate * 100),
240
+ f'%.{self.report_precision}f%% ' % (m.mismatch_rate * 100),
240
241
  )
241
242
  )
242
243
  s.write(' | ')
@@ -44,29 +44,32 @@ SUPPORTED_MODELS_AND_SETTINGS = [
44
44
  # Model name, max concurrent requests.
45
45
  # The concurrent requests is estimated by TPM/RPM from
46
46
  # https://platform.openai.com/account/limits
47
- # GPT4 Turbo models.
48
- ('gpt-4-1106-preview', 1), # Gpt4 Turbo.
49
- ('gpt-4-vision-preview', 1), # Gpt4 Turbo with Vision.
50
- # GPT4 models.
47
+ # GPT-4 Turbo models.
48
+ ('gpt-4-turbo-preview', 1), # GPT-4 Turbo.
49
+ ('gpt-4-0125-preview', 1), # GPT-4 Turbo
50
+ ('gpt-4-1106-preview', 1), # GPT-4 Turbo
51
+ ('gpt-4-vision-preview', 1), # GPT-4 Turbo with Vision.
52
+ # GPT-4 models.
51
53
  ('gpt-4', 4),
52
54
  ('gpt-4-0613', 4),
53
55
  ('gpt-4-0314', 4),
54
56
  ('gpt-4-32k', 4),
55
57
  ('gpt-4-32k-0613', 4),
56
58
  ('gpt-4-32k-0314', 4),
57
- # GPT3.5 Turbo models.
59
+ # GPT-3.5 Turbo models.
58
60
  ('gpt-3.5-turbo', 16),
61
+ ('gpt-3.5-turbo-0125', 16),
59
62
  ('gpt-3.5-turbo-1106', 16),
60
63
  ('gpt-3.5-turbo-0613', 16),
61
64
  ('gpt-3.5-turbo-0301', 16),
62
65
  ('gpt-3.5-turbo-16k', 16),
63
66
  ('gpt-3.5-turbo-16k-0613', 16),
64
67
  ('gpt-3.5-turbo-16k-0301', 16),
65
- # GPT3.5 models.
66
- ('text-davinci-003', 8), # Gpt3.5, trained with RHLF.
68
+ # GPT-3.5 models.
69
+ ('text-davinci-003', 8), # GPT-3.5, trained with RHLF.
67
70
  ('text-davinci-002', 4), # Trained with SFT but no RHLF.
68
71
  ('code-davinci-002', 4),
69
- # GPT3 instruction-tuned models.
72
+ # GPT-3 instruction-tuned models.
70
73
  ('text-curie-001', 4),
71
74
  ('text-babbage-001', 4),
72
75
  ('text-ada-001', 4),
@@ -74,7 +77,7 @@ SUPPORTED_MODELS_AND_SETTINGS = [
74
77
  ('curie', 4),
75
78
  ('babbage', 4),
76
79
  ('ada', 4),
77
- # GPT3 base models without instruction tuning.
80
+ # GPT-3 base models without instruction tuning.
78
81
  ('babbage-002', 4),
79
82
  ('davinci-002', 4),
80
83
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langfun
3
- Version: 0.0.2.dev20240201
3
+ Version: 0.0.2.dev20240202
4
4
  Summary: Langfun: Language as Functions.
5
5
  Home-page: https://github.com/google/langfun
6
6
  Author: Langfun Authors
@@ -40,9 +40,9 @@ langfun/core/coding/python/parsing_test.py,sha256=9vAWF484kWIm6JZq8NFiMgKUDhXV-d
40
40
  langfun/core/coding/python/permissions.py,sha256=1QWGHvzL8MM0Ok_auQ9tURqZHtdOfJaDpBzZ29GUE-c,2544
41
41
  langfun/core/coding/python/permissions_test.py,sha256=w5EDb8QxpxgJyZkojyzVWQvDfg366zn99-g__6TbPQ0,2699
42
42
  langfun/core/eval/__init__.py,sha256=iDA2OcJ3kR6ixZizXIY3N9LsjkaVrfTbSClTiSP8ekY,1291
43
- langfun/core/eval/base.py,sha256=Wa8kqGQJK2R4pvdCFKB4Mmlcfk4c4qGZn_fl_wy9H-Q,52276
43
+ langfun/core/eval/base.py,sha256=wWFDDrf0jBzs9H_5XfdZSeOBGXyUtXAJJouk7cLckSM,52602
44
44
  langfun/core/eval/base_test.py,sha256=bGs3VLchkAJFWYJ8FdR7mC6qoDestAvCHOQpClG6Mzw,21248
45
- langfun/core/eval/matching.py,sha256=INjG-EU-hQa6zDk2_bAbWWAQnxN4lor-Dnas8i2CTAo,9326
45
+ langfun/core/eval/matching.py,sha256=g2yuBb4FeOlAlB10hqdWvaIg4QVQlJbiViRDcD2Y8go,9567
46
46
  langfun/core/eval/matching_test.py,sha256=IfuMF_dEmy4VzK6tIldRzD2Nqlml7SSh4u-baFNcZrw,4912
47
47
  langfun/core/eval/scoring.py,sha256=mshqbV_WM0zcp15TSR32ACMBDymlsbf6YH06PPx1Tw0,6139
48
48
  langfun/core/eval/scoring_test.py,sha256=_L_B40VZkyI2_PJce-jVKYC4llrO4jGUR5j86Gu6AT0,4046
@@ -53,7 +53,7 @@ langfun/core/llms/gemini.py,sha256=p3d4Cl2uET-os1n_V3YNE6-6cYrZjndj7lxZIk2E8_4,5
53
53
  langfun/core/llms/gemini_test.py,sha256=ybNNCn3JW3hYpMe0wT5ILGDrMPaYYU8PN2kSookM0jk,5433
54
54
  langfun/core/llms/llama_cpp.py,sha256=EIjJa1-Tg4_VaIxVR88oDWSWc_axc1r2KwSPpl4PSp0,2549
55
55
  langfun/core/llms/llama_cpp_test.py,sha256=ZxC6defGd_HX9SFRU9U4cJiQnBKundbOrchbXuC1Z2M,1683
56
- langfun/core/llms/openai.py,sha256=1wJohy6UZVBiZkbnKsD2Q_qt6-x_9KUnlRALd1TFV9E,10985
56
+ langfun/core/llms/openai.py,sha256=ao2sDDoh5ma1GWpLpNPZARIeLZK55gL1Ldc94h1EGtE,11119
57
57
  langfun/core/llms/openai_test.py,sha256=JWcMveifVVVEFWdtmNq1irc9wSFQRxXs-SnOF3Urg9Y,7433
58
58
  langfun/core/llms/cache/__init__.py,sha256=QAo3InUMDM_YpteNnVCSejI4zOsnjSMWKJKzkb3VY64,993
59
59
  langfun/core/llms/cache/base.py,sha256=cFfYvOIUae842pncqCAsRvqXCk2AnAsRYVx0mcIoAeY,3338
@@ -87,8 +87,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
87
87
  langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
88
88
  langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
89
89
  langfun/core/templates/selfplay_test.py,sha256=ZkDfwiW9OtO_MOIdVTRPn6P6vOExQIszqlVQHg5iD3U,2066
90
- langfun-0.0.2.dev20240201.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
91
- langfun-0.0.2.dev20240201.dist-info/METADATA,sha256=BRcyCLLZnV3G5PLpO7ik2hVlnF0xkm7HQX5ckgyiXN8,3368
92
- langfun-0.0.2.dev20240201.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
93
- langfun-0.0.2.dev20240201.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
94
- langfun-0.0.2.dev20240201.dist-info/RECORD,,
90
+ langfun-0.0.2.dev20240202.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
91
+ langfun-0.0.2.dev20240202.dist-info/METADATA,sha256=YNH_28StpsuvajP3UyX4a4-RjS3ErqeooJE_l55L1cQ,3368
92
+ langfun-0.0.2.dev20240202.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
93
+ langfun-0.0.2.dev20240202.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
94
+ langfun-0.0.2.dev20240202.dist-info/RECORD,,