langfun 0.1.1.dev20240729__py3-none-any.whl → 0.1.1.dev20240730__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/core/eval/base.py +36 -24
- langfun/core/eval/matching.py +12 -16
- langfun/core/eval/scoring.py +6 -8
- {langfun-0.1.1.dev20240729.dist-info → langfun-0.1.1.dev20240730.dist-info}/METADATA +1 -1
- {langfun-0.1.1.dev20240729.dist-info → langfun-0.1.1.dev20240730.dist-info}/RECORD +8 -8
- {langfun-0.1.1.dev20240729.dist-info → langfun-0.1.1.dev20240730.dist-info}/WHEEL +1 -1
- {langfun-0.1.1.dev20240729.dist-info → langfun-0.1.1.dev20240730.dist-info}/LICENSE +0 -0
- {langfun-0.1.1.dev20240729.dist-info → langfun-0.1.1.dev20240730.dist-info}/top_level.txt +0 -0
langfun/core/eval/base.py
CHANGED
@@ -1304,20 +1304,22 @@ class Evaluation(Evaluable):
|
|
1304
1304
|
s = io.StringIO()
|
1305
1305
|
definition = _html_repr(self, compact=False, escape=True)
|
1306
1306
|
s.write('<div><table><tr><td>')
|
1307
|
+
self._render_link(
|
1308
|
+
s,
|
1309
|
+
definition,
|
1310
|
+
self.hash,
|
1311
|
+
'',
|
1312
|
+
lambda: self.link(self.dir),
|
1313
|
+
)
|
1307
1314
|
if self.result is None:
|
1308
1315
|
s.write(
|
1309
|
-
f'<a target="_blank" title="{definition}" '
|
1310
|
-
f'href="{self.link(self.dir)}">{self.hash}</a>'
|
1311
1316
|
'</td></tr><tr><td>'
|
1312
1317
|
'<span style="color: gray">(IN-PROGRESS...)</span>'
|
1313
1318
|
)
|
1314
1319
|
else:
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
f' [<a href="{self.link(self.dir)}">dir</a>]'
|
1319
|
-
'</td></tr><tr><td>'
|
1320
|
-
)
|
1320
|
+
if self.dir:
|
1321
|
+
s.write(f' [<a href="{self.link(self.dir)}">dir</a>]')
|
1322
|
+
s.write('</td></tr><tr><td>')
|
1321
1323
|
self._render_summary_metrics(s)
|
1322
1324
|
|
1323
1325
|
# Summarize average usage.
|
@@ -1341,6 +1343,20 @@ class Evaluation(Evaluable):
|
|
1341
1343
|
f'" style="color:gray">({total} tokens)</a>'
|
1342
1344
|
)
|
1343
1345
|
|
1346
|
+
def _render_link(self,
|
1347
|
+
s: io.StringIO,
|
1348
|
+
title: str,
|
1349
|
+
text: str,
|
1350
|
+
style: str,
|
1351
|
+
url_fn: Callable[[], str]) -> None:
|
1352
|
+
"""Renders a link in HTML."""
|
1353
|
+
s.write(
|
1354
|
+
f'<a target="_blank" title="{title}" style="{style}"'
|
1355
|
+
)
|
1356
|
+
if self.dir:
|
1357
|
+
s.write(f' href="{url_fn()}"')
|
1358
|
+
s.write(f'>{text}</a>')
|
1359
|
+
|
1344
1360
|
def _render_summary_metrics(self, s: io.StringIO) -> None:
|
1345
1361
|
"""Renders metrics in HTML."""
|
1346
1362
|
assert self.result is not None
|
@@ -1362,14 +1378,12 @@ class Evaluation(Evaluable):
|
|
1362
1378
|
extra_style = ''
|
1363
1379
|
if m.oop_failure_rate > 0.1 and m.oop_failures > 3:
|
1364
1380
|
extra_style = ';font-weight:bold'
|
1365
|
-
|
1366
|
-
|
1367
|
-
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
self._format_rate(m.oop_failure_rate),
|
1372
|
-
)
|
1381
|
+
self._render_link(
|
1382
|
+
s,
|
1383
|
+
oop_failure_title,
|
1384
|
+
self._format_rate(m.oop_failure_rate),
|
1385
|
+
f'color:magenta{extra_style}',
|
1386
|
+
lambda: self.oop_failures_link,
|
1373
1387
|
)
|
1374
1388
|
s.write(' | ')
|
1375
1389
|
|
@@ -1387,14 +1401,12 @@ class Evaluation(Evaluable):
|
|
1387
1401
|
)
|
1388
1402
|
|
1389
1403
|
extra_style = ';font-weight:bold' if m.non_oop_failures > 0 else ''
|
1390
|
-
|
1391
|
-
|
1392
|
-
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
self._format_rate(m.non_oop_failure_rate),
|
1397
|
-
)
|
1404
|
+
self._render_link(
|
1405
|
+
s,
|
1406
|
+
non_oop_failure_title,
|
1407
|
+
self._format_rate(m.non_oop_failure_rate),
|
1408
|
+
f'color:red{extra_style}',
|
1409
|
+
lambda: self.non_oop_failures_link,
|
1398
1410
|
)
|
1399
1411
|
|
1400
1412
|
def _format_rate(self, rate: float) -> str:
|
langfun/core/eval/matching.py
CHANGED
@@ -239,24 +239,20 @@ class Matching(base.Evaluation):
|
|
239
239
|
"""Renders metrics in HTML."""
|
240
240
|
assert self.result is not None
|
241
241
|
m = self.result.metrics
|
242
|
-
|
243
|
-
|
244
|
-
% (
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
self._format_rate(m.match_rate),
|
249
|
-
)
|
242
|
+
self._render_link(
|
243
|
+
s,
|
244
|
+
'Matches (%d/%d)' % (m.num_matches, m.total),
|
245
|
+
self._format_rate(m.match_rate),
|
246
|
+
'color:green',
|
247
|
+
lambda: self.matches_link,
|
250
248
|
)
|
251
249
|
s.write(' | ')
|
252
|
-
|
253
|
-
|
254
|
-
% (
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
self._format_rate(m.mismatch_rate),
|
259
|
-
)
|
250
|
+
self._render_link(
|
251
|
+
s,
|
252
|
+
'Mismatches (%d/%d)' % (m.num_mismatches, m.total),
|
253
|
+
self._format_rate(m.mismatch_rate),
|
254
|
+
'color:orange',
|
255
|
+
lambda: self.mismatches_link,
|
260
256
|
)
|
261
257
|
s.write(' | ')
|
262
258
|
super()._render_summary_metrics(s)
|
langfun/core/eval/scoring.py
CHANGED
@@ -172,14 +172,12 @@ class Scoring(base.Evaluation):
|
|
172
172
|
"""Renders metrics in HTML."""
|
173
173
|
assert self.result is not None
|
174
174
|
m = self.result.metrics
|
175
|
-
|
176
|
-
|
177
|
-
% (
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
'%.2f%%' % (m.score_rate * 100),
|
182
|
-
)
|
175
|
+
self._render_link(
|
176
|
+
s,
|
177
|
+
'Average score (%d/%d)' % (m.num_scored, m.total),
|
178
|
+
'%.2f (%.2f%%)' % (m.avg_score, m.score_rate * 100),
|
179
|
+
'color:green',
|
180
|
+
lambda: self.scored_link,
|
183
181
|
)
|
184
182
|
s.write(' | ')
|
185
183
|
super()._render_summary_metrics(s)
|
@@ -44,13 +44,13 @@ langfun/core/coding/python/parsing_test.py,sha256=9vAWF484kWIm6JZq8NFiMgKUDhXV-d
|
|
44
44
|
langfun/core/coding/python/permissions.py,sha256=1QWGHvzL8MM0Ok_auQ9tURqZHtdOfJaDpBzZ29GUE-c,2544
|
45
45
|
langfun/core/coding/python/permissions_test.py,sha256=w5EDb8QxpxgJyZkojyzVWQvDfg366zn99-g__6TbPQ0,2699
|
46
46
|
langfun/core/eval/__init__.py,sha256=Evt-E4FEhZF2tXL6-byh_AyA7Cc_ZoGmvnN7vkAZedk,1898
|
47
|
-
langfun/core/eval/base.py,sha256=
|
47
|
+
langfun/core/eval/base.py,sha256=qZNnI89xbpP_jKthGe6LmS08VWFb1MfF2J6MtoHiTJw,74429
|
48
48
|
langfun/core/eval/base_test.py,sha256=cHOTIWVW4Dp8gKKIKcZrAcJ-w84j2GIozTzJoiAX7p4,26743
|
49
|
-
langfun/core/eval/matching.py,sha256=
|
49
|
+
langfun/core/eval/matching.py,sha256=jJy5YRARPwg4xchWQWMyBIQ-GoCZzfoFpMX8NjOFTnE,9690
|
50
50
|
langfun/core/eval/matching_test.py,sha256=f7iVyXH5KGJBWt4Wp14Bt9J3X59A6Ayfog9MbuFvPew,5532
|
51
51
|
langfun/core/eval/patching.py,sha256=R0s2eAd1m97exQt06dmUL0V_MBG0W2Hxg7fhNB7cXW0,3866
|
52
52
|
langfun/core/eval/patching_test.py,sha256=8kCd54Egjju22FMgtJuxEsrXkW8ifs-UUBHtrCG1L6w,4775
|
53
|
-
langfun/core/eval/scoring.py,sha256=
|
53
|
+
langfun/core/eval/scoring.py,sha256=sDb0MuwEtkbAZOGzZ-gxtLdFtzWBHh-Kl8qGmKlWdNo,6350
|
54
54
|
langfun/core/eval/scoring_test.py,sha256=O8olHbrUEg60gMxwOkWzKBJZpZoUlmVnBANX5Se2SXM,4546
|
55
55
|
langfun/core/llms/__init__.py,sha256=sRD_PjfD5wKuzANCOCjChjuZHUn2Q1WruZeVflqej5M,4609
|
56
56
|
langfun/core/llms/anthropic.py,sha256=Gon3fOi31RhZFgNd0ijyTnKnUdp9hrWrCoSXyO4UaLw,7316
|
@@ -117,8 +117,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
|
|
117
117
|
langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
|
118
118
|
langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
|
119
119
|
langfun/core/templates/selfplay_test.py,sha256=rBW2Qr8yi-aWYwoTwRR-n1peKyMX9QXPZXURjLgoiRs,2264
|
120
|
-
langfun-0.1.1.
|
121
|
-
langfun-0.1.1.
|
122
|
-
langfun-0.1.1.
|
123
|
-
langfun-0.1.1.
|
124
|
-
langfun-0.1.1.
|
120
|
+
langfun-0.1.1.dev20240730.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
121
|
+
langfun-0.1.1.dev20240730.dist-info/METADATA,sha256=g9OiRFzhLhJmZNWlBX0oRQOTvMXVUHXW0yCkhXpARDE,5247
|
122
|
+
langfun-0.1.1.dev20240730.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
123
|
+
langfun-0.1.1.dev20240730.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
|
124
|
+
langfun-0.1.1.dev20240730.dist-info/RECORD,,
|
File without changes
|
File without changes
|