langfun 0.1.2.dev202410180804__py3-none-any.whl → 0.1.2.dev202410190803__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/core/concurrent.py +6 -0
- langfun/core/eval/base.py +11 -7
- langfun/core/eval/matching.py +9 -8
- langfun/core/eval/scoring.py +3 -1
- {langfun-0.1.2.dev202410180804.dist-info → langfun-0.1.2.dev202410190803.dist-info}/METADATA +1 -1
- {langfun-0.1.2.dev202410180804.dist-info → langfun-0.1.2.dev202410190803.dist-info}/RECORD +9 -9
- {langfun-0.1.2.dev202410180804.dist-info → langfun-0.1.2.dev202410190803.dist-info}/LICENSE +0 -0
- {langfun-0.1.2.dev202410180804.dist-info → langfun-0.1.2.dev202410190803.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202410180804.dist-info → langfun-0.1.2.dev202410190803.dist-info}/top_level.txt +0 -0
langfun/core/concurrent.py
CHANGED
@@ -921,5 +921,11 @@ def _progress_control(
|
|
921
921
|
raise ValueError(f'Unsupported progress bar type: {progress_bar}')
|
922
922
|
|
923
923
|
|
924
|
+
def get_executor(
|
925
|
+
resource_id: str,
|
926
|
+
max_workers: int | None = None) -> concurrent.futures.ThreadPoolExecutor:
|
927
|
+
"""Gets a thread pool executor associated with a resource id."""
|
928
|
+
return _executor_pool.get(resource_id, max_workers)
|
929
|
+
|
924
930
|
# The global executor pool based on resource IDs.
|
925
931
|
_executor_pool = ExecutorPool()
|
langfun/core/eval/base.py
CHANGED
@@ -1087,7 +1087,7 @@ class Evaluation(Evaluable):
|
|
1087
1087
|
)
|
1088
1088
|
error = e
|
1089
1089
|
|
1090
|
-
copy.audit(example, output_message, error, dryrun=True)
|
1090
|
+
copy.audit(1, example, output_message, error, dryrun=True)
|
1091
1091
|
result = copy.finalize()
|
1092
1092
|
|
1093
1093
|
if verbose:
|
@@ -1124,19 +1124,20 @@ class Evaluation(Evaluable):
|
|
1124
1124
|
with lf.use_settings(debug=debug, cache=self.cache):
|
1125
1125
|
self._reset()
|
1126
1126
|
|
1127
|
-
def _process(
|
1127
|
+
def _process(idx_and_example: Any):
|
1128
1128
|
# NOTE(daiyip): set the `input` symbol of the globals to None, so LLM
|
1129
1129
|
# generated code with calls to `input` will raise an error, thus not
|
1130
1130
|
# blocking the evaluation.
|
1131
|
+
_, example = idx_and_example
|
1131
1132
|
with lf_coding.context(input=None):
|
1132
1133
|
output_message = self.process(example, **(self.additional_args or {}))
|
1133
1134
|
self.process_output(example, output_message)
|
1134
1135
|
return output_message
|
1135
1136
|
|
1136
1137
|
try:
|
1137
|
-
for example, message, error in lf.concurrent_map(
|
1138
|
+
for (idx, example), message, error in lf.concurrent_map(
|
1138
1139
|
_process,
|
1139
|
-
examples,
|
1140
|
+
enumerate(examples),
|
1140
1141
|
max_workers=self.max_workers,
|
1141
1142
|
show_progress=progress_bar or False,
|
1142
1143
|
status_fn=self._status,
|
@@ -1148,7 +1149,7 @@ class Evaluation(Evaluable):
|
|
1148
1149
|
if isinstance(error, lf_structured.MappingError)
|
1149
1150
|
else None
|
1150
1151
|
)
|
1151
|
-
self.audit(example, message, error)
|
1152
|
+
self.audit(idx + 1, example, message, error)
|
1152
1153
|
finally:
|
1153
1154
|
# Save cache upon completion or interruption.
|
1154
1155
|
if self.dir and self.cache:
|
@@ -1437,6 +1438,7 @@ class Evaluation(Evaluable):
|
|
1437
1438
|
|
1438
1439
|
def audit(
|
1439
1440
|
self,
|
1441
|
+
example_idx: int,
|
1440
1442
|
example: Any,
|
1441
1443
|
message: lf.Message | None,
|
1442
1444
|
error: Exception | None = None,
|
@@ -1445,6 +1447,7 @@ class Evaluation(Evaluable):
|
|
1445
1447
|
"""Audits the example against the output. Subclasses should override.
|
1446
1448
|
|
1447
1449
|
Args:
|
1450
|
+
example_idx: 1-based index of the example in its dataset.
|
1448
1451
|
example: The input object.
|
1449
1452
|
message: The entire message returned by the LM, which could be used to
|
1450
1453
|
trace the LM input, response and parsed structure. If error is raised
|
@@ -1465,7 +1468,7 @@ class Evaluation(Evaluable):
|
|
1465
1468
|
else:
|
1466
1469
|
assert message is not None
|
1467
1470
|
output = message.text if self.schema is None else message.result
|
1468
|
-
self.audit_processed(example, output, message, dryrun=dryrun)
|
1471
|
+
self.audit_processed(example_idx, example, output, message, dryrun=dryrun)
|
1469
1472
|
|
1470
1473
|
# Audit usage.
|
1471
1474
|
if message is not None:
|
@@ -1482,7 +1485,8 @@ class Evaluation(Evaluable):
|
|
1482
1485
|
self._num_usages += 1
|
1483
1486
|
|
1484
1487
|
def audit_processed(
|
1485
|
-
self, example: Any, output: Any, message: lf.Message,
|
1488
|
+
self, example_idx: int, example: Any, output: Any, message: lf.Message,
|
1489
|
+
dryrun: bool = False
|
1486
1490
|
) -> None:
|
1487
1491
|
"""Audits a successfully processed example. Subclass should override."""
|
1488
1492
|
|
langfun/core/eval/matching.py
CHANGED
@@ -41,8 +41,8 @@ class Matching(base.Evaluation):
|
|
41
41
|
"""Returns the answer from the structure output."""
|
42
42
|
|
43
43
|
@property
|
44
|
-
def matches(self) -> list[tuple[Any, Any, lf.Message]]:
|
45
|
-
"""Returns the matches examples, outputs and the output messages."""
|
44
|
+
def matches(self) -> list[tuple[int, Any, Any, lf.Message]]:
|
45
|
+
"""Returns the matches IDs, examples, outputs and the output messages."""
|
46
46
|
return self._matches
|
47
47
|
|
48
48
|
@property
|
@@ -57,7 +57,7 @@ class Matching(base.Evaluation):
|
|
57
57
|
return self.num_matches / self.num_completed
|
58
58
|
|
59
59
|
@property
|
60
|
-
def mismatches(self) -> list[tuple[Any, Any, lf.Message]]:
|
60
|
+
def mismatches(self) -> list[tuple[int, Any, Any, lf.Message]]:
|
61
61
|
"""Returns the mismatches examples, outputs and output messages."""
|
62
62
|
return self._mismatches
|
63
63
|
|
@@ -87,7 +87,8 @@ class Matching(base.Evaluation):
|
|
87
87
|
self._mismatches = []
|
88
88
|
|
89
89
|
def audit_processed(
|
90
|
-
self, example: Any, output: Any, message: lf.Message,
|
90
|
+
self, example_idx: int, example: Any, output: Any, message: lf.Message,
|
91
|
+
dryrun: bool = False
|
91
92
|
) -> None:
|
92
93
|
groundtruth = self.groundtruth(example)
|
93
94
|
answer = self.answer(output, example)
|
@@ -107,9 +108,9 @@ class Matching(base.Evaluation):
|
|
107
108
|
)
|
108
109
|
|
109
110
|
if self.match(answer, groundtruth):
|
110
|
-
self._matches.append((example, output, message))
|
111
|
+
self._matches.append((example_idx, example, output, message))
|
111
112
|
else:
|
112
|
-
self._mismatches.append((example, output, message))
|
113
|
+
self._mismatches.append((example_idx, example, output, message))
|
113
114
|
|
114
115
|
def match(self, answer: Any, groundtruth: Any) -> bool:
|
115
116
|
"""Matches answer against the groundtruth. Subclasses can override."""
|
@@ -247,7 +248,7 @@ class Matching(base.Evaluation):
|
|
247
248
|
# Fall back to the default format.
|
248
249
|
return None
|
249
250
|
|
250
|
-
for i, (example, output, message) in enumerate(self.matches):
|
251
|
+
for i, (_, example, output, message) in enumerate(self.matches):
|
251
252
|
bgcolor = 'white' if i % 2 == 0 else '#DDDDDD'
|
252
253
|
s.write(f'<tr style="background-color: {bgcolor}"><td>{i + 1}</td>')
|
253
254
|
input_str = lf.repr_utils.escape_quoted(
|
@@ -282,7 +283,7 @@ class Matching(base.Evaluation):
|
|
282
283
|
'</tr>'
|
283
284
|
)
|
284
285
|
|
285
|
-
for i, (example, output, message) in enumerate(self.mismatches):
|
286
|
+
for i, (_, example, output, message) in enumerate(self.mismatches):
|
286
287
|
bgcolor = 'white' if i % 2 == 0 else '#DDDDDD'
|
287
288
|
s.write(f'<tr style="background-color: {bgcolor}"><td>{i + 1}</td>')
|
288
289
|
input_str = pg.format(example, verbose=False, max_bytes_len=32)
|
langfun/core/eval/scoring.py
CHANGED
@@ -62,8 +62,10 @@ class Scoring(base.Evaluation):
|
|
62
62
|
self._scored = []
|
63
63
|
|
64
64
|
def audit_processed(
|
65
|
-
self, example: Any, output: Any, message: lf.Message,
|
65
|
+
self, example_idx: int, example: Any, output: Any, message: lf.Message,
|
66
|
+
dryrun: bool = False
|
66
67
|
) -> None:
|
68
|
+
del example_idx
|
67
69
|
score = self.score(example, output)
|
68
70
|
|
69
71
|
if dryrun:
|
@@ -2,7 +2,7 @@ langfun/__init__.py,sha256=mCES7t3R7Z-ZQYvG38-yrVqZubrXNfGCa8tI5HGB7mE,2274
|
|
2
2
|
langfun/core/__init__.py,sha256=xlvFTXc7IKUTs8aCFRFhzOLTmmeuhXgk9yx2InBLNiA,4937
|
3
3
|
langfun/core/component.py,sha256=kOWdhEYlGw62CO_7aB_oAdivVhnDfyoymRXHr10VtLo,11502
|
4
4
|
langfun/core/component_test.py,sha256=sG-T2wpvBfHqWGZE7sc4NayJj2aj5QFBzSwFiwrGEIc,10376
|
5
|
-
langfun/core/concurrent.py,sha256=
|
5
|
+
langfun/core/concurrent.py,sha256=ivUwaswRrwldLRZ_p4R3TUy6XqyWY9UdJ8oo2_1Ic0s,29838
|
6
6
|
langfun/core/concurrent_test.py,sha256=F9kQKK0D6CHOejckFcVjCB-ThkBN8Oa4P8WV7FOhxIM,17042
|
7
7
|
langfun/core/console.py,sha256=bk5rNPNm9rMGW5YT2HixxU04p2umnoabn5SDz6Dqe88,2317
|
8
8
|
langfun/core/console_test.py,sha256=5SYJdxpJGLgdSSQqqMPoA1X6jpsLD8rgcyk-EgI65oE,1077
|
@@ -44,13 +44,13 @@ langfun/core/coding/python/parsing_test.py,sha256=9vAWF484kWIm6JZq8NFiMgKUDhXV-d
|
|
44
44
|
langfun/core/coding/python/permissions.py,sha256=1QWGHvzL8MM0Ok_auQ9tURqZHtdOfJaDpBzZ29GUE-c,2544
|
45
45
|
langfun/core/coding/python/permissions_test.py,sha256=w5EDb8QxpxgJyZkojyzVWQvDfg366zn99-g__6TbPQ0,2699
|
46
46
|
langfun/core/eval/__init__.py,sha256=Ogdr9OtTywhhLPHi3AZzOD2mXX2oyaHWflrSTMm96uA,1899
|
47
|
-
langfun/core/eval/base.py,sha256=
|
47
|
+
langfun/core/eval/base.py,sha256=mF3JUUvuDafNDnt0G_0yv022yGZgTy1NAxDMZonvoV8,75430
|
48
48
|
langfun/core/eval/base_test.py,sha256=gAxamZKvHzT3wVr3xVtXLiwBBi5VFX1I29_QuKSR4Wg,26958
|
49
|
-
langfun/core/eval/matching.py,sha256=
|
49
|
+
langfun/core/eval/matching.py,sha256=UnjdM_ebPqXKJamY4lvL3AYxrMIz3LqkjRTnHJ5xsYc,9349
|
50
50
|
langfun/core/eval/matching_test.py,sha256=QCoYEuf4b_1bkHqUCuRzKMbXHrV3AB2FCOBivo1stC4,5249
|
51
51
|
langfun/core/eval/patching.py,sha256=R0s2eAd1m97exQt06dmUL0V_MBG0W2Hxg7fhNB7cXW0,3866
|
52
52
|
langfun/core/eval/patching_test.py,sha256=8kCd54Egjju22FMgtJuxEsrXkW8ifs-UUBHtrCG1L6w,4775
|
53
|
-
langfun/core/eval/scoring.py,sha256=
|
53
|
+
langfun/core/eval/scoring.py,sha256=SUdMzOkP0n2qGaSuUA4VwFiTw36jgMvgCJHPJS4yYDw,6254
|
54
54
|
langfun/core/eval/scoring_test.py,sha256=O8olHbrUEg60gMxwOkWzKBJZpZoUlmVnBANX5Se2SXM,4546
|
55
55
|
langfun/core/llms/__init__.py,sha256=baM-YFySnOyijgmMjv1mKPYDvdlny2Md2aZ5bWyxPfc,5859
|
56
56
|
langfun/core/llms/anthropic.py,sha256=zPisDmmt6zq-U6hEnoSgReXujmmAj7VGs4KHgzcw1EU,8976
|
@@ -119,8 +119,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
|
|
119
119
|
langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
|
120
120
|
langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
|
121
121
|
langfun/core/templates/selfplay_test.py,sha256=Ot__1P1M8oJfoTp-M9-PQ6HUXqZKyMwvZ5f7yQ3yfyM,2326
|
122
|
-
langfun-0.1.2.
|
123
|
-
langfun-0.1.2.
|
124
|
-
langfun-0.1.2.
|
125
|
-
langfun-0.1.2.
|
126
|
-
langfun-0.1.2.
|
122
|
+
langfun-0.1.2.dev202410190803.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
123
|
+
langfun-0.1.2.dev202410190803.dist-info/METADATA,sha256=P6h0k_SY9HTbCZd6foSjU6qj0-YrljfTw2Q97T5WWg0,8890
|
124
|
+
langfun-0.1.2.dev202410190803.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
125
|
+
langfun-0.1.2.dev202410190803.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
|
126
|
+
langfun-0.1.2.dev202410190803.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{langfun-0.1.2.dev202410180804.dist-info → langfun-0.1.2.dev202410190803.dist-info}/top_level.txt
RENAMED
File without changes
|