langfun 0.0.2.dev20240201__py3-none-any.whl → 0.0.2.dev20240202__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/core/eval/base.py +20 -12
- langfun/core/eval/matching.py +10 -9
- langfun/core/llms/openai.py +12 -9
- {langfun-0.0.2.dev20240201.dist-info → langfun-0.0.2.dev20240202.dist-info}/METADATA +1 -1
- {langfun-0.0.2.dev20240201.dist-info → langfun-0.0.2.dev20240202.dist-info}/RECORD +8 -8
- {langfun-0.0.2.dev20240201.dist-info → langfun-0.0.2.dev20240202.dist-info}/LICENSE +0 -0
- {langfun-0.0.2.dev20240201.dist-info → langfun-0.0.2.dev20240202.dist-info}/WHEEL +0 -0
- {langfun-0.0.2.dev20240201.dist-info → langfun-0.0.2.dev20240202.dist-info}/top_level.txt +0 -0
langfun/core/eval/base.py
CHANGED
@@ -57,6 +57,10 @@ class Evaluable(lf.Component):
|
|
57
57
|
),
|
58
58
|
] = lf.contextual(default=None)
|
59
59
|
|
60
|
+
report_precision: Annotated[
|
61
|
+
int, 'Number of decimals when reporting precision.'
|
62
|
+
] = lf.contextual(default=1)
|
63
|
+
|
60
64
|
@property
|
61
65
|
def dir(self) -> str | None:
|
62
66
|
"""Returns the directory for saving results and details."""
|
@@ -1045,12 +1049,12 @@ class Evaluation(Evaluable):
|
|
1045
1049
|
def _status(self, progress: lf.concurrent.Progress) -> dict[str, Any]:
|
1046
1050
|
return {
|
1047
1051
|
'Model': self.lm.model_id,
|
1048
|
-
'Succeeded': '%.
|
1052
|
+
'Succeeded': f'%.{self.report_precision}f%% (%d/%d)' % (
|
1049
1053
|
progress.success_rate * 100,
|
1050
1054
|
progress.succeeded,
|
1051
1055
|
progress.completed,
|
1052
1056
|
),
|
1053
|
-
'Failed': '%.
|
1057
|
+
'Failed': f'%.{self.report_precision}f%% (%d/%d)' % (
|
1054
1058
|
progress.failure_rate * 100,
|
1055
1059
|
progress.failed,
|
1056
1060
|
progress.completed,
|
@@ -1060,14 +1064,18 @@ class Evaluation(Evaluable):
|
|
1060
1064
|
def _completion_status(self, run_status: str) -> str:
|
1061
1065
|
assert self.result is not None
|
1062
1066
|
m = self.result.metrics
|
1063
|
-
return
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1067
|
+
return (
|
1068
|
+
f'COMPLETED(%s): Successes=%.{self.report_precision}f%% (%d/%d)'
|
1069
|
+
f' Failures=%.{self.report_precision}f%% (%d/%d)'
|
1070
|
+
% (
|
1071
|
+
run_status,
|
1072
|
+
(1 - m.failure_rate) * 100,
|
1073
|
+
m.total - m.failures,
|
1074
|
+
m.total,
|
1075
|
+
m.failure_rate * 100,
|
1076
|
+
m.failures,
|
1077
|
+
m.total,
|
1078
|
+
)
|
1071
1079
|
)
|
1072
1080
|
|
1073
1081
|
def summarize(self) -> pg.Dict:
|
@@ -1130,7 +1138,7 @@ class Evaluation(Evaluable):
|
|
1130
1138
|
m.failures,
|
1131
1139
|
m.total,
|
1132
1140
|
self.failures_link,
|
1133
|
-
'%.
|
1141
|
+
f'%.{self.report_precision}f%% ' % (m.failure_rate * 100),
|
1134
1142
|
)
|
1135
1143
|
)
|
1136
1144
|
|
@@ -1218,7 +1226,7 @@ class Evaluation(Evaluable):
|
|
1218
1226
|
s.write(
|
1219
1227
|
'<td><span style="color:orange">%s</span>%s</td>'
|
1220
1228
|
% (
|
1221
|
-
'%.
|
1229
|
+
f'%.{self.report_precision}f%%' % (self.failure_rate * 100),
|
1222
1230
|
'<a href="%s">(%d/%d)</a>'
|
1223
1231
|
% (self.failures_link, self.num_failures, self.num_completed),
|
1224
1232
|
)
|
langfun/core/eval/matching.py
CHANGED
@@ -102,17 +102,17 @@ class Matching(base.Evaluation):
|
|
102
102
|
del progress
|
103
103
|
return {
|
104
104
|
'Model': self.lm.model_id,
|
105
|
-
'Matches': '%.
|
105
|
+
'Matches': f'%.{self.report_precision}f%% (%d/%d)' % (
|
106
106
|
self.match_rate * 100,
|
107
107
|
self.num_matches,
|
108
108
|
self.num_completed,
|
109
109
|
),
|
110
|
-
'Mismatches': '%.
|
110
|
+
'Mismatches': f'%.{self.report_precision}f%% (%d/%d)' % (
|
111
111
|
self.mismatch_rate * 100,
|
112
112
|
self.num_mismatches,
|
113
113
|
self.num_completed,
|
114
114
|
),
|
115
|
-
'Failed': '%.
|
115
|
+
'Failed': f'%.{self.report_precision}f%% (%d/%d)' % (
|
116
116
|
self.failure_rate * 100,
|
117
117
|
self.num_failures,
|
118
118
|
self.num_completed,
|
@@ -123,8 +123,9 @@ class Matching(base.Evaluation):
|
|
123
123
|
assert self.result is not None
|
124
124
|
m = self.result.metrics
|
125
125
|
return (
|
126
|
-
'COMPLETED(%s): Matches=%.
|
127
|
-
'
|
126
|
+
f'COMPLETED(%s): Matches=%.{self.report_precision}f%% (%d/%d)'
|
127
|
+
f' Mismatches=%.{self.report_precision}f%% (%d/%d)'
|
128
|
+
f' Failures=%.{self.report_precision}f%% (%d/%d)'
|
128
129
|
) % (
|
129
130
|
run_status,
|
130
131
|
m.match_rate * 100,
|
@@ -202,7 +203,7 @@ class Matching(base.Evaluation):
|
|
202
203
|
s.write(
|
203
204
|
'<td><span style="color:red">%s</span>%s</td>'
|
204
205
|
% (
|
205
|
-
'%.
|
206
|
+
f'%.{self.report_precision}f%% ' % (self.mismatch_rate * 100),
|
206
207
|
'<a href="%s">(%d/%d)</a>'
|
207
208
|
% (self.mismatches_link, self.num_mismatches, self.num_completed),
|
208
209
|
)
|
@@ -210,7 +211,7 @@ class Matching(base.Evaluation):
|
|
210
211
|
s.write(
|
211
212
|
'<td><span style="color:green">%s</span>%s</td>'
|
212
213
|
% (
|
213
|
-
'%.
|
214
|
+
f'%.{self.report_precision}f%% ' % (self.match_rate * 100),
|
214
215
|
'<a href="%s">(%d/%d)</a>'
|
215
216
|
% (self.matches_link, self.num_matches, self.num_completed),
|
216
217
|
)
|
@@ -226,7 +227,7 @@ class Matching(base.Evaluation):
|
|
226
227
|
m.num_matches,
|
227
228
|
m.total,
|
228
229
|
self.matches_link,
|
229
|
-
'%.
|
230
|
+
f'%.{self.report_precision}f%% ' % (m.match_rate * 100),
|
230
231
|
)
|
231
232
|
)
|
232
233
|
s.write(' | ')
|
@@ -236,7 +237,7 @@ class Matching(base.Evaluation):
|
|
236
237
|
m.num_mismatches,
|
237
238
|
m.total,
|
238
239
|
self.mismatches_link,
|
239
|
-
'%.
|
240
|
+
f'%.{self.report_precision}f%% ' % (m.mismatch_rate * 100),
|
240
241
|
)
|
241
242
|
)
|
242
243
|
s.write(' | ')
|
langfun/core/llms/openai.py
CHANGED
@@ -44,29 +44,32 @@ SUPPORTED_MODELS_AND_SETTINGS = [
|
|
44
44
|
# Model name, max concurrent requests.
|
45
45
|
# The concurrent requests is estimated by TPM/RPM from
|
46
46
|
# https://platform.openai.com/account/limits
|
47
|
-
#
|
48
|
-
('gpt-4-
|
49
|
-
('gpt-4-
|
50
|
-
#
|
47
|
+
# GPT-4 Turbo models.
|
48
|
+
('gpt-4-turbo-preview', 1), # GPT-4 Turbo.
|
49
|
+
('gpt-4-0125-preview', 1), # GPT-4 Turbo
|
50
|
+
('gpt-4-1106-preview', 1), # GPT-4 Turbo
|
51
|
+
('gpt-4-vision-preview', 1), # GPT-4 Turbo with Vision.
|
52
|
+
# GPT-4 models.
|
51
53
|
('gpt-4', 4),
|
52
54
|
('gpt-4-0613', 4),
|
53
55
|
('gpt-4-0314', 4),
|
54
56
|
('gpt-4-32k', 4),
|
55
57
|
('gpt-4-32k-0613', 4),
|
56
58
|
('gpt-4-32k-0314', 4),
|
57
|
-
#
|
59
|
+
# GPT-3.5 Turbo models.
|
58
60
|
('gpt-3.5-turbo', 16),
|
61
|
+
('gpt-3.5-turbo-0125', 16),
|
59
62
|
('gpt-3.5-turbo-1106', 16),
|
60
63
|
('gpt-3.5-turbo-0613', 16),
|
61
64
|
('gpt-3.5-turbo-0301', 16),
|
62
65
|
('gpt-3.5-turbo-16k', 16),
|
63
66
|
('gpt-3.5-turbo-16k-0613', 16),
|
64
67
|
('gpt-3.5-turbo-16k-0301', 16),
|
65
|
-
#
|
66
|
-
('text-davinci-003', 8), #
|
68
|
+
# GPT-3.5 models.
|
69
|
+
('text-davinci-003', 8), # GPT-3.5, trained with RHLF.
|
67
70
|
('text-davinci-002', 4), # Trained with SFT but no RHLF.
|
68
71
|
('code-davinci-002', 4),
|
69
|
-
#
|
72
|
+
# GPT-3 instruction-tuned models.
|
70
73
|
('text-curie-001', 4),
|
71
74
|
('text-babbage-001', 4),
|
72
75
|
('text-ada-001', 4),
|
@@ -74,7 +77,7 @@ SUPPORTED_MODELS_AND_SETTINGS = [
|
|
74
77
|
('curie', 4),
|
75
78
|
('babbage', 4),
|
76
79
|
('ada', 4),
|
77
|
-
#
|
80
|
+
# GPT-3 base models without instruction tuning.
|
78
81
|
('babbage-002', 4),
|
79
82
|
('davinci-002', 4),
|
80
83
|
]
|
@@ -40,9 +40,9 @@ langfun/core/coding/python/parsing_test.py,sha256=9vAWF484kWIm6JZq8NFiMgKUDhXV-d
|
|
40
40
|
langfun/core/coding/python/permissions.py,sha256=1QWGHvzL8MM0Ok_auQ9tURqZHtdOfJaDpBzZ29GUE-c,2544
|
41
41
|
langfun/core/coding/python/permissions_test.py,sha256=w5EDb8QxpxgJyZkojyzVWQvDfg366zn99-g__6TbPQ0,2699
|
42
42
|
langfun/core/eval/__init__.py,sha256=iDA2OcJ3kR6ixZizXIY3N9LsjkaVrfTbSClTiSP8ekY,1291
|
43
|
-
langfun/core/eval/base.py,sha256=
|
43
|
+
langfun/core/eval/base.py,sha256=wWFDDrf0jBzs9H_5XfdZSeOBGXyUtXAJJouk7cLckSM,52602
|
44
44
|
langfun/core/eval/base_test.py,sha256=bGs3VLchkAJFWYJ8FdR7mC6qoDestAvCHOQpClG6Mzw,21248
|
45
|
-
langfun/core/eval/matching.py,sha256=
|
45
|
+
langfun/core/eval/matching.py,sha256=g2yuBb4FeOlAlB10hqdWvaIg4QVQlJbiViRDcD2Y8go,9567
|
46
46
|
langfun/core/eval/matching_test.py,sha256=IfuMF_dEmy4VzK6tIldRzD2Nqlml7SSh4u-baFNcZrw,4912
|
47
47
|
langfun/core/eval/scoring.py,sha256=mshqbV_WM0zcp15TSR32ACMBDymlsbf6YH06PPx1Tw0,6139
|
48
48
|
langfun/core/eval/scoring_test.py,sha256=_L_B40VZkyI2_PJce-jVKYC4llrO4jGUR5j86Gu6AT0,4046
|
@@ -53,7 +53,7 @@ langfun/core/llms/gemini.py,sha256=p3d4Cl2uET-os1n_V3YNE6-6cYrZjndj7lxZIk2E8_4,5
|
|
53
53
|
langfun/core/llms/gemini_test.py,sha256=ybNNCn3JW3hYpMe0wT5ILGDrMPaYYU8PN2kSookM0jk,5433
|
54
54
|
langfun/core/llms/llama_cpp.py,sha256=EIjJa1-Tg4_VaIxVR88oDWSWc_axc1r2KwSPpl4PSp0,2549
|
55
55
|
langfun/core/llms/llama_cpp_test.py,sha256=ZxC6defGd_HX9SFRU9U4cJiQnBKundbOrchbXuC1Z2M,1683
|
56
|
-
langfun/core/llms/openai.py,sha256=
|
56
|
+
langfun/core/llms/openai.py,sha256=ao2sDDoh5ma1GWpLpNPZARIeLZK55gL1Ldc94h1EGtE,11119
|
57
57
|
langfun/core/llms/openai_test.py,sha256=JWcMveifVVVEFWdtmNq1irc9wSFQRxXs-SnOF3Urg9Y,7433
|
58
58
|
langfun/core/llms/cache/__init__.py,sha256=QAo3InUMDM_YpteNnVCSejI4zOsnjSMWKJKzkb3VY64,993
|
59
59
|
langfun/core/llms/cache/base.py,sha256=cFfYvOIUae842pncqCAsRvqXCk2AnAsRYVx0mcIoAeY,3338
|
@@ -87,8 +87,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
|
|
87
87
|
langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
|
88
88
|
langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
|
89
89
|
langfun/core/templates/selfplay_test.py,sha256=ZkDfwiW9OtO_MOIdVTRPn6P6vOExQIszqlVQHg5iD3U,2066
|
90
|
-
langfun-0.0.2.
|
91
|
-
langfun-0.0.2.
|
92
|
-
langfun-0.0.2.
|
93
|
-
langfun-0.0.2.
|
94
|
-
langfun-0.0.2.
|
90
|
+
langfun-0.0.2.dev20240202.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
91
|
+
langfun-0.0.2.dev20240202.dist-info/METADATA,sha256=YNH_28StpsuvajP3UyX4a4-RjS3ErqeooJE_l55L1cQ,3368
|
92
|
+
langfun-0.0.2.dev20240202.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
93
|
+
langfun-0.0.2.dev20240202.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
|
94
|
+
langfun-0.0.2.dev20240202.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|