langfun 0.1.2.dev202412180804__py3-none-any.whl → 0.1.2.dev202412200804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/core/eval/v2/checkpointing.py +63 -8
- langfun/core/eval/v2/evaluation.py +80 -6
- langfun/core/eval/v2/evaluation_test.py +6 -0
- langfun/core/eval/v2/experiment.py +8 -0
- langfun/core/eval/v2/reporting.py +58 -25
- langfun/core/eval/v2/runners.py +71 -36
- langfun/core/logging.py +19 -0
- langfun/core/logging_test.py +19 -0
- langfun/core/structured/parsing.py +24 -17
- langfun/core/structured/parsing_test.py +25 -0
- langfun/core/structured/querying.py +10 -1
- langfun/core/structured/querying_test.py +10 -0
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412200804.dist-info}/METADATA +1 -1
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412200804.dist-info}/RECORD +17 -17
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412200804.dist-info}/LICENSE +0 -0
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412200804.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412200804.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
"""Checkpointing evaluation runs."""
|
15
15
|
import threading
|
16
|
+
import traceback
|
16
17
|
|
17
18
|
import langfun.core as lf
|
18
19
|
from langfun.core.eval.v2 import example as example_lib
|
@@ -27,6 +28,21 @@ Runner = experiment_lib.Runner
|
|
27
28
|
class Checkpointer(experiment_lib.Plugin):
|
28
29
|
"""Base class for checkpointing evaluation examples."""
|
29
30
|
|
31
|
+
def on_experiment_start(self, experiment: Experiment):
|
32
|
+
if experiment.state.evaluated_examples:
|
33
|
+
experiment.info(
|
34
|
+
'Loaded %d examples from checkpoint files. Example IDs: %s' %
|
35
|
+
(
|
36
|
+
len(experiment.state.evaluated_examples),
|
37
|
+
list(sorted(experiment.state.evaluated_examples.keys()))
|
38
|
+
),
|
39
|
+
)
|
40
|
+
else:
|
41
|
+
experiment.info(
|
42
|
+
'No previous evaluated examples are loaded. '
|
43
|
+
f'Experiment {experiment.id} starts from scratch.'
|
44
|
+
)
|
45
|
+
|
30
46
|
|
31
47
|
class PerExampleCheckpointer(Checkpointer):
|
32
48
|
"""Checkpointer that saves each example to a separate file."""
|
@@ -68,10 +84,11 @@ class PerExampleCheckpointer(Checkpointer):
|
|
68
84
|
_load_state, ckpt_files, max_workers=64,
|
69
85
|
):
|
70
86
|
if error is not None:
|
71
|
-
|
87
|
+
experiment.warning(
|
72
88
|
'Failed to load checkpoint file %s: %s. Skipping the file.',
|
73
89
|
ckpt_file, error
|
74
90
|
)
|
91
|
+
super().on_experiment_start(experiment)
|
75
92
|
|
76
93
|
def on_example_complete(
|
77
94
|
self,
|
@@ -80,7 +97,11 @@ class PerExampleCheckpointer(Checkpointer):
|
|
80
97
|
example: Example,
|
81
98
|
) -> None:
|
82
99
|
"""Saves the example to the checkpoint file."""
|
83
|
-
if
|
100
|
+
if example.has_error:
|
101
|
+
experiment.warning(
|
102
|
+
f'Example {example.id} has error. Skipping checkpointing.'
|
103
|
+
)
|
104
|
+
else:
|
84
105
|
def save_state(example: Example):
|
85
106
|
writer = SequenceWriter(
|
86
107
|
runner.current_run.output_path_for(
|
@@ -91,8 +112,18 @@ class PerExampleCheckpointer(Checkpointer):
|
|
91
112
|
)
|
92
113
|
)
|
93
114
|
)
|
94
|
-
|
95
|
-
|
115
|
+
try:
|
116
|
+
writer.add(example)
|
117
|
+
writer.close()
|
118
|
+
experiment.info(
|
119
|
+
f'Example {example.id} is saved to {writer.path}.',
|
120
|
+
)
|
121
|
+
except BaseException as e: # pylint: disable=broad-except
|
122
|
+
experiment.error(
|
123
|
+
f'Failed to save example {example.id} to {writer.path}. '
|
124
|
+
f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
|
125
|
+
)
|
126
|
+
raise e
|
96
127
|
runner.background_run(save_state, example)
|
97
128
|
|
98
129
|
def _file_prefix_and_ext(self, filename: str) -> tuple[str, str]:
|
@@ -164,6 +195,7 @@ class BulkCheckpointer(Checkpointer):
|
|
164
195
|
with self._lock:
|
165
196
|
if self._sequence_writer is not None:
|
166
197
|
self._sequence_writer[experiment.id] = sequence_writer
|
198
|
+
super().on_experiment_start(experiment)
|
167
199
|
|
168
200
|
def on_experiment_complete(
|
169
201
|
self,
|
@@ -178,8 +210,12 @@ class BulkCheckpointer(Checkpointer):
|
|
178
210
|
if self._sequence_writer is not None:
|
179
211
|
# Make sure the writer is closed without delay so the file will be
|
180
212
|
# available immediately.
|
181
|
-
self._sequence_writer
|
182
|
-
|
213
|
+
writer = self._sequence_writer.pop(experiment.id)
|
214
|
+
writer.close()
|
215
|
+
experiment.info(
|
216
|
+
f'{len(experiment.state.evaluated_examples)} examples are '
|
217
|
+
f'checkpointed to {writer.path}.'
|
218
|
+
)
|
183
219
|
|
184
220
|
def on_example_complete(
|
185
221
|
self,
|
@@ -189,8 +225,22 @@ class BulkCheckpointer(Checkpointer):
|
|
189
225
|
) -> None:
|
190
226
|
"""Saves the example to the checkpoint file."""
|
191
227
|
assert experiment.id in self._sequence_writer
|
192
|
-
if
|
193
|
-
|
228
|
+
if example.has_error:
|
229
|
+
experiment.warning(
|
230
|
+
f'Example {example.id} has error. Skipping checkpointing.'
|
231
|
+
)
|
232
|
+
else:
|
233
|
+
def _save_example(example: Example):
|
234
|
+
writer = self._sequence_writer[experiment.id]
|
235
|
+
try:
|
236
|
+
writer.add(example)
|
237
|
+
except BaseException as e: # pylint: disable=broad-except
|
238
|
+
experiment.error(
|
239
|
+
f'Failed to save example {example.id} to {writer.path}. '
|
240
|
+
f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
|
241
|
+
)
|
242
|
+
raise e
|
243
|
+
runner.background_run(_save_example, example)
|
194
244
|
|
195
245
|
|
196
246
|
class SequenceWriter:
|
@@ -198,8 +248,13 @@ class SequenceWriter:
|
|
198
248
|
|
199
249
|
def __init__(self, path: str):
|
200
250
|
self._lock = threading.Lock()
|
251
|
+
self._path = path
|
201
252
|
self._sequence_writer = pg.io.open_sequence(path, 'w')
|
202
253
|
|
254
|
+
@property
|
255
|
+
def path(self) -> str:
|
256
|
+
return self._path
|
257
|
+
|
203
258
|
def add(self, example: Example):
|
204
259
|
example_blob = pg.to_json_str(
|
205
260
|
example,
|
@@ -14,7 +14,9 @@
|
|
14
14
|
"""Base class for Langfun evaluation tasks."""
|
15
15
|
|
16
16
|
import abc
|
17
|
+
import datetime
|
17
18
|
import functools
|
19
|
+
import threading
|
18
20
|
import time
|
19
21
|
|
20
22
|
from typing import Annotated, Any, Callable, Iterable
|
@@ -63,6 +65,8 @@ class Evaluation(experiment_lib.Experiment):
|
|
63
65
|
self.__dict__.pop('is_leaf', None)
|
64
66
|
self.__dict__.pop('children', None)
|
65
67
|
super()._on_bound()
|
68
|
+
self._log_entries = []
|
69
|
+
self._log_lock = threading.Lock()
|
66
70
|
|
67
71
|
#
|
68
72
|
# Handling evaluation hierarchy (materialized vs. hyper evaluations).
|
@@ -277,6 +281,48 @@ class Evaluation(experiment_lib.Experiment):
|
|
277
281
|
for metric in self.metrics:
|
278
282
|
metric.reset()
|
279
283
|
|
284
|
+
#
|
285
|
+
# Evaluation-level logging.
|
286
|
+
#
|
287
|
+
|
288
|
+
def _log(self, log_func, level: lf.logging.LogLevel, message: str, **kwargs):
|
289
|
+
# Write to external logging system.
|
290
|
+
log_message = f'{self.id}: {message}'
|
291
|
+
if kwargs:
|
292
|
+
log_message = f'{log_message} (metadata: {kwargs!r})'
|
293
|
+
log_func(log_message)
|
294
|
+
|
295
|
+
# Add to experiment log history.
|
296
|
+
log_entry = lf.logging.LogEntry(
|
297
|
+
level=level,
|
298
|
+
time=datetime.datetime.now(),
|
299
|
+
message=message,
|
300
|
+
metadata=kwargs,
|
301
|
+
)
|
302
|
+
with self._log_lock:
|
303
|
+
self._log_entries.append(log_entry)
|
304
|
+
|
305
|
+
def debug(self, message: str, **kwargs):
|
306
|
+
"""Logs a debug message to the session."""
|
307
|
+
self._log(pg.logging.debug, 'debug', message, **kwargs)
|
308
|
+
|
309
|
+
def info(self, message: str, **kwargs):
|
310
|
+
"""Logs an info message to the session."""
|
311
|
+
self._log(pg.logging.info, 'info', message, **kwargs)
|
312
|
+
|
313
|
+
def warning(self, message: str, **kwargs):
|
314
|
+
"""Logs a warning message to the session."""
|
315
|
+
self._log(pg.logging.warning, 'warning', message, **kwargs)
|
316
|
+
|
317
|
+
def error(self, message: str, **kwargs):
|
318
|
+
"""Logs an error message to the session."""
|
319
|
+
self._log(pg.logging.error, 'error', message, **kwargs)
|
320
|
+
|
321
|
+
def fatal(self, message: str, **kwargs):
|
322
|
+
"""Logs a fatal message to the session."""
|
323
|
+
# We use error level for fatal message, which does not trigger assertion.
|
324
|
+
self._log(pg.logging.error, 'fatal', message, **kwargs)
|
325
|
+
|
280
326
|
#
|
281
327
|
# HTML views.
|
282
328
|
#
|
@@ -465,6 +511,25 @@ class Evaluation(experiment_lib.Experiment):
|
|
465
511
|
)
|
466
512
|
)
|
467
513
|
|
514
|
+
def _logs_tab() -> pg.views.html.controls.Tab:
|
515
|
+
"""Renders a tab for the logs of the evaluation."""
|
516
|
+
with self._log_lock:
|
517
|
+
log_history = '\n'.join(str(l) for l in self._log_entries)
|
518
|
+
return pg.views.html.controls.Tab(
|
519
|
+
label='Logs',
|
520
|
+
content=pg.Html.element(
|
521
|
+
'div',
|
522
|
+
[
|
523
|
+
pg.Html.element(
|
524
|
+
'textarea',
|
525
|
+
[pg.Html.escape(log_history)],
|
526
|
+
readonly=True,
|
527
|
+
css_classes=['logs-textarea'],
|
528
|
+
)
|
529
|
+
]
|
530
|
+
)
|
531
|
+
)
|
532
|
+
|
468
533
|
def _main_tabs() -> pg.Html:
|
469
534
|
return pg.Html.element(
|
470
535
|
'div',
|
@@ -474,6 +539,8 @@ class Evaluation(experiment_lib.Experiment):
|
|
474
539
|
_definition_tab(),
|
475
540
|
] + [
|
476
541
|
_metric_tab(m) for m in self.metrics
|
542
|
+
] + [
|
543
|
+
_logs_tab()
|
477
544
|
],
|
478
545
|
selected=1,
|
479
546
|
)
|
@@ -593,6 +660,14 @@ class Evaluation(experiment_lib.Experiment):
|
|
593
660
|
width:100%;
|
594
661
|
height:100%;
|
595
662
|
}
|
663
|
+
.logs-textarea {
|
664
|
+
width: 100%;
|
665
|
+
height: 500px;
|
666
|
+
padding: 5px;
|
667
|
+
border: 1px solid #DDD;
|
668
|
+
background-color: #EEE;
|
669
|
+
resize: vertical;
|
670
|
+
}
|
596
671
|
"""
|
597
672
|
]
|
598
673
|
|
@@ -615,6 +690,11 @@ class EvaluationState:
|
|
615
690
|
assert isinstance(example, example_lib.Example), example
|
616
691
|
self._evaluated_examples[example.id] = example
|
617
692
|
|
693
|
+
@property
|
694
|
+
def evaluated_examples(self) -> dict[int, example_lib.Example]:
|
695
|
+
"""Returns the examples in the state."""
|
696
|
+
return self._evaluated_examples
|
697
|
+
|
618
698
|
def get(self, example_id: int) -> example_lib.Example | None:
|
619
699
|
"""Returns the example with the given ID."""
|
620
700
|
return self._evaluated_examples.get(example_id)
|
@@ -622,9 +702,3 @@ class EvaluationState:
|
|
622
702
|
def update(self, example: example_lib.Example) -> None:
|
623
703
|
"""Updates the state with the given example."""
|
624
704
|
self._evaluated_examples[example.id] = example
|
625
|
-
|
626
|
-
@property
|
627
|
-
def evaluated_examples(self) -> dict[int, example_lib.Example]:
|
628
|
-
"""Returns the examples in the state."""
|
629
|
-
return self._evaluated_examples
|
630
|
-
|
@@ -133,6 +133,12 @@ class EvaluationTest(unittest.TestCase):
|
|
133
133
|
|
134
134
|
def test_html_view(self):
|
135
135
|
exp = test_helper.TestEvaluation()
|
136
|
+
exp.debug('debug message')
|
137
|
+
exp.info('info message')
|
138
|
+
exp.warning('warning message', x=1)
|
139
|
+
exp.error('error message', x=1)
|
140
|
+
exp.fatal('fatal message')
|
141
|
+
|
136
142
|
self.assertIn(
|
137
143
|
exp.id,
|
138
144
|
exp.to_html(extra_flags=dict(card_view=True, current_run=None)).content
|
@@ -959,6 +959,14 @@ class Plugin(lf.Component):
|
|
959
959
|
) -> None:
|
960
960
|
"""Called when an experiment (both leaf and non-leaf) is complete."""
|
961
961
|
|
962
|
+
def on_experiment_abort(
|
963
|
+
self,
|
964
|
+
runner: Runner,
|
965
|
+
experiment: Experiment,
|
966
|
+
error: BaseException,
|
967
|
+
) -> None:
|
968
|
+
"""Called when an experiment (both leaf and non-leaf) is aborted."""
|
969
|
+
|
962
970
|
def on_example_start(
|
963
971
|
self,
|
964
972
|
runner: Runner,
|
@@ -14,6 +14,7 @@
|
|
14
14
|
"""Reporting evaluation results."""
|
15
15
|
|
16
16
|
import time
|
17
|
+
import traceback
|
17
18
|
from typing import Annotated
|
18
19
|
|
19
20
|
from langfun.core.eval.v2 import example as example_lib
|
@@ -61,6 +62,14 @@ class HtmlReporter(experiment_lib.Plugin):
|
|
61
62
|
) -> None:
|
62
63
|
self._maybe_update_summary(runner, force=True)
|
63
64
|
|
65
|
+
def on_run_abort(
|
66
|
+
self,
|
67
|
+
runner: Runner,
|
68
|
+
root: Experiment,
|
69
|
+
error: BaseException
|
70
|
+
) -> None:
|
71
|
+
self._maybe_update_summary(runner, force=True)
|
72
|
+
|
64
73
|
def on_experiment_start(
|
65
74
|
self,
|
66
75
|
runner: Runner,
|
@@ -75,6 +84,16 @@ class HtmlReporter(experiment_lib.Plugin):
|
|
75
84
|
if experiment.is_leaf:
|
76
85
|
self._maybe_update_experiment_html(runner, experiment, force=True)
|
77
86
|
|
87
|
+
def on_experiment_abort(
|
88
|
+
self,
|
89
|
+
runner: Runner,
|
90
|
+
experiment: Experiment,
|
91
|
+
error: BaseException
|
92
|
+
) -> None:
|
93
|
+
del error
|
94
|
+
assert experiment.is_leaf
|
95
|
+
self._maybe_update_experiment_html(runner, experiment, force=True)
|
96
|
+
|
78
97
|
def on_example_complete(
|
79
98
|
self, runner: Runner, experiment: Experiment, example: Example
|
80
99
|
):
|
@@ -103,19 +122,26 @@ class HtmlReporter(experiment_lib.Plugin):
|
|
103
122
|
self, runner: Runner, experiment: Experiment, force: bool = False
|
104
123
|
) -> None:
|
105
124
|
def _save():
|
106
|
-
|
107
|
-
|
108
|
-
extra_flags=dict(
|
109
|
-
current_run=runner.current_run,
|
110
|
-
interactive=False,
|
111
|
-
card_view=False,
|
112
|
-
),
|
113
|
-
)
|
114
|
-
html.save(
|
115
|
-
runner.current_run.output_path_for(
|
116
|
-
experiment, _EVALULATION_DETAIL_FILE
|
117
|
-
)
|
125
|
+
index_html_path = runner.current_run.output_path_for(
|
126
|
+
experiment, _EVALULATION_DETAIL_FILE
|
118
127
|
)
|
128
|
+
try:
|
129
|
+
html = experiment.to_html(
|
130
|
+
collapse_level=None,
|
131
|
+
extra_flags=dict(
|
132
|
+
current_run=runner.current_run,
|
133
|
+
interactive=False,
|
134
|
+
card_view=False,
|
135
|
+
),
|
136
|
+
)
|
137
|
+
html.save(index_html_path)
|
138
|
+
except BaseException as e: # pylint: disable=broad-except
|
139
|
+
experiment.error(
|
140
|
+
f'Failed to save HTML {index_html_path!r}. '
|
141
|
+
f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
|
142
|
+
)
|
143
|
+
raise e
|
144
|
+
|
119
145
|
if force or (
|
120
146
|
time.time() - self._last_experiment_report_time[experiment.id]
|
121
147
|
> self.experiment_report_interval
|
@@ -128,17 +154,24 @@ class HtmlReporter(experiment_lib.Plugin):
|
|
128
154
|
) -> None:
|
129
155
|
"""Saves the example."""
|
130
156
|
def _save():
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
157
|
+
try:
|
158
|
+
html = example.to_html(
|
159
|
+
collapse_level=None,
|
160
|
+
enable_summary_tooltip=False,
|
161
|
+
extra_flags=dict(
|
162
|
+
# For properly rendering the next link.
|
163
|
+
num_examples=getattr(experiment, 'num_examples', None)
|
164
|
+
),
|
165
|
+
)
|
166
|
+
html.save(
|
167
|
+
runner.current_run.output_path_for(
|
168
|
+
experiment, f'{example.id}.html'
|
169
|
+
)
|
170
|
+
)
|
171
|
+
except BaseException as e: # pylint: disable=broad-except
|
172
|
+
experiment.error(
|
173
|
+
f'Failed to save HTML {example.id}.html. '
|
174
|
+
f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
|
175
|
+
)
|
176
|
+
raise e
|
144
177
|
runner.background_run(_save)
|
langfun/core/eval/v2/runners.py
CHANGED
@@ -18,6 +18,7 @@ import concurrent.futures
|
|
18
18
|
import random
|
19
19
|
import threading
|
20
20
|
import time
|
21
|
+
import traceback
|
21
22
|
from typing import Any, Annotated, Callable, Iterator
|
22
23
|
|
23
24
|
from langfun import core as lf
|
@@ -120,9 +121,14 @@ class RunnerBase(Runner):
|
|
120
121
|
# Start the progress of the evaluation.
|
121
122
|
if experiment.is_leaf:
|
122
123
|
assert isinstance(experiment, Evaluation)
|
123
|
-
|
124
|
-
|
125
|
-
|
124
|
+
num_examples_to_evaluate = (
|
125
|
+
len(self.current_run.example_ids)
|
126
|
+
if self.current_run.example_ids else experiment.num_examples
|
127
|
+
)
|
128
|
+
experiment.progress.start(total=num_examples_to_evaluate)
|
129
|
+
experiment.info(
|
130
|
+
'Starting evaluation %s with %d examples to evaluate.'
|
131
|
+
% (experiment.id, num_examples_to_evaluate)
|
126
132
|
)
|
127
133
|
else:
|
128
134
|
experiment.progress.start(total=len(experiment.leaf_nodes))
|
@@ -144,8 +150,7 @@ class RunnerBase(Runner):
|
|
144
150
|
|
145
151
|
# Only leaf evaluations will trigger the complete notification of the
|
146
152
|
# ancestors.
|
147
|
-
|
148
|
-
self._update_ancestor_progresses(experiment)
|
153
|
+
self._update_ancestor_progresses(experiment)
|
149
154
|
|
150
155
|
def on_experiment_complete(self, experiment: Experiment) -> None:
|
151
156
|
"""Called when an evaluation is complete."""
|
@@ -160,6 +165,35 @@ class RunnerBase(Runner):
|
|
160
165
|
# ancestors.
|
161
166
|
if experiment.is_leaf:
|
162
167
|
self._update_ancestor_progresses(experiment)
|
168
|
+
self._log_experiment_completion(experiment)
|
169
|
+
|
170
|
+
def _log_experiment_completion(self, experiment: Experiment):
|
171
|
+
example_ids = (
|
172
|
+
self.current_run.example_ids if self.current_run.example_ids else
|
173
|
+
list(range(1, experiment.num_examples + 1))
|
174
|
+
)
|
175
|
+
num_from_checkpoint, num_processed = 0, 0
|
176
|
+
for example_id in example_ids:
|
177
|
+
example = experiment.state.get(example_id)
|
178
|
+
if example.newly_processed:
|
179
|
+
num_processed += 1
|
180
|
+
else:
|
181
|
+
num_from_checkpoint += 1
|
182
|
+
experiment.info(
|
183
|
+
f'{experiment.id} completed with {num_from_checkpoint + num_processed} '
|
184
|
+
f'examples evaluated ({num_from_checkpoint} from checkpoint, '
|
185
|
+
f'{num_processed} newly processed).'
|
186
|
+
)
|
187
|
+
|
188
|
+
def on_experiment_abort(
|
189
|
+
self, experiment: Experiment, error: BaseException) -> None:
|
190
|
+
"""Called when an evaluation is complete."""
|
191
|
+
assert experiment.is_leaf
|
192
|
+
experiment.fatal(f'{error}\n\n{traceback.format_exc()}')
|
193
|
+
|
194
|
+
# Notify the plugins of the experiment abort.
|
195
|
+
for plugin in self._all_plugins(experiment):
|
196
|
+
plugin.on_experiment_abort(self, experiment, error)
|
163
197
|
|
164
198
|
def _update_ancestor_progresses(self, experiment: Experiment):
|
165
199
|
"""Updates the progresses of the parent nodes of the experiment."""
|
@@ -270,31 +304,36 @@ class RunnerBase(Runner):
|
|
270
304
|
|
271
305
|
def run_evaluation(self, evaluation: Evaluation) -> None:
|
272
306
|
"""Runs the evaluation."""
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
307
|
+
try:
|
308
|
+
self.on_experiment_start(evaluation)
|
309
|
+
|
310
|
+
per_evaluation_settings = {}
|
311
|
+
cache = None
|
312
|
+
if self.current_run.use_cache == 'per_dataset':
|
313
|
+
cache = self._load_or_create_cache(evaluation)
|
314
|
+
per_evaluation_settings['cache'] = cache
|
315
|
+
|
316
|
+
with lf.use_settings(**per_evaluation_settings):
|
317
|
+
if self.current_run.example_ids is None:
|
318
|
+
items = (
|
319
|
+
Example(id=i + 1, input=ex) for i, ex in enumerate(
|
320
|
+
evaluation.example_inputs)
|
321
|
+
)
|
322
|
+
else:
|
323
|
+
items = (
|
324
|
+
Example(
|
325
|
+
id=example_id,
|
326
|
+
input=evaluation.example_input_by_id(example_id)
|
327
|
+
) for example_id in self.current_run.example_ids
|
328
|
+
)
|
329
|
+
self._evaluate_items(evaluation, items)
|
330
|
+
|
331
|
+
if cache:
|
332
|
+
self.background_run(cache.save)
|
333
|
+
self.on_experiment_complete(evaluation)
|
334
|
+
except BaseException as e: # pylint: disable=broad-except
|
335
|
+
self.on_experiment_abort(evaluation, e)
|
336
|
+
raise e
|
298
337
|
|
299
338
|
@abc.abstractmethod
|
300
339
|
def _evaluate_items(
|
@@ -410,9 +449,7 @@ class ParallelRunner(RunnerBase):
|
|
410
449
|
groups.values(),
|
411
450
|
max_workers=max(64, len(groups)),
|
412
451
|
timeout=self.timeout,
|
413
|
-
silence_on_errors=
|
414
|
-
None if self.current_run.raise_if_has_error else BaseException
|
415
|
-
)
|
452
|
+
silence_on_errors=None,
|
416
453
|
):
|
417
454
|
pass
|
418
455
|
|
@@ -437,8 +474,6 @@ class ParallelRunner(RunnerBase):
|
|
437
474
|
items,
|
438
475
|
max_workers=evaluation.max_workers,
|
439
476
|
timeout=self.timeout,
|
440
|
-
silence_on_errors=
|
441
|
-
None if self.current_run.raise_if_has_error else BaseException
|
442
|
-
)
|
477
|
+
silence_on_errors=None,
|
443
478
|
):
|
444
479
|
pass
|
langfun/core/logging.py
CHANGED
@@ -54,6 +54,25 @@ class LogEntry(pg.Object, pg.views.HtmlTreeView.Extension):
|
|
54
54
|
def should_output(self, min_log_level: LogLevel) -> bool:
|
55
55
|
return _LOG_LEVELS.index(self.level) >= _LOG_LEVELS.index(min_log_level)
|
56
56
|
|
57
|
+
def format(self,
|
58
|
+
compact: bool = False,
|
59
|
+
verbose: bool = True,
|
60
|
+
root_indent: int = 0,
|
61
|
+
*,
|
62
|
+
text_format: bool = True,
|
63
|
+
**kwargs):
|
64
|
+
if text_format:
|
65
|
+
s = f"""{self.time.strftime('%H:%M:%S')} {self.level.upper()} - {self.message}"""
|
66
|
+
if self.metadata:
|
67
|
+
s += f' (metadata: {self.metadata!r})'
|
68
|
+
return s
|
69
|
+
return super().format(
|
70
|
+
compact=compact,
|
71
|
+
verbose=verbose,
|
72
|
+
root_indent=root_indent,
|
73
|
+
**kwargs
|
74
|
+
)
|
75
|
+
|
57
76
|
def _html_tree_view_summary(
|
58
77
|
self,
|
59
78
|
view: pg.views.HtmlTreeView,
|
langfun/core/logging_test.py
CHANGED
@@ -61,6 +61,25 @@ class LoggingTest(unittest.TestCase):
|
|
61
61
|
print(actual)
|
62
62
|
self.assertEqual(actual, expected)
|
63
63
|
|
64
|
+
def test_format(self):
|
65
|
+
time = datetime.datetime(2024, 10, 10, 12, 30, 45)
|
66
|
+
self.assertEqual(
|
67
|
+
str(
|
68
|
+
logging.LogEntry(
|
69
|
+
level='info', message='hello\nworld',
|
70
|
+
time=time, metadata=dict(x=1),
|
71
|
+
)
|
72
|
+
),
|
73
|
+
'12:30:45 INFO - hello\nworld (metadata: {x=1})',
|
74
|
+
)
|
75
|
+
self.assertIn(
|
76
|
+
'LogEntry(',
|
77
|
+
logging.LogEntry(
|
78
|
+
level='info', message='hello\nworld',
|
79
|
+
time=time, metadata=dict(x=1),
|
80
|
+
).format(text_format=False),
|
81
|
+
)
|
82
|
+
|
64
83
|
def test_html(self):
|
65
84
|
time = datetime.datetime(2024, 10, 10, 12, 30, 45)
|
66
85
|
self.assert_html_content(
|
@@ -270,24 +270,31 @@ def call(
|
|
270
270
|
if schema in (str, None):
|
271
271
|
return lm_output if returns_message else lm_output.text
|
272
272
|
|
273
|
+
def _chain_nl_output_message(parsing_message: lf.Message):
|
274
|
+
"""Chain the source of the parsed output to the LM output."""
|
275
|
+
parsing_message.root.source = lm_output
|
276
|
+
parsing_message.tag('parsing-lm-output')
|
277
|
+
parsing_message.lm_input.tag('parsing-lm-input')
|
278
|
+
|
273
279
|
# Call `parsing_lm` for structured parsing.
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
280
|
+
try:
|
281
|
+
parsing_message = querying.query(
|
282
|
+
lm_output.text,
|
283
|
+
schema,
|
284
|
+
examples=parsing_examples,
|
285
|
+
lm=parsing_lm or lm,
|
286
|
+
include_context=parsing_include_context,
|
287
|
+
cache_seed=cache_seed,
|
288
|
+
autofix=autofix,
|
289
|
+
autofix_lm=autofix_lm or lm,
|
290
|
+
protocol=protocol,
|
291
|
+
returns_message=True,
|
292
|
+
**kwargs,
|
293
|
+
)
|
294
|
+
_chain_nl_output_message(parsing_message)
|
295
|
+
except mapping.MappingError as e:
|
296
|
+
_chain_nl_output_message(e.lm_response)
|
297
|
+
raise e
|
291
298
|
return parsing_message if returns_message else parsing_message.result
|
292
299
|
|
293
300
|
|
@@ -686,6 +686,31 @@ class CallTest(unittest.TestCase):
|
|
686
686
|
],
|
687
687
|
returns_message=True,
|
688
688
|
)
|
689
|
+
self.assertIn('parsing-lm-output', output.tags)
|
690
|
+
self.assertIn('parsing-lm-input', output.source.tags)
|
691
|
+
self.assertEqual(output.root.text, 'Compute 1 + 2')
|
692
|
+
|
693
|
+
def test_call_with_parsing_message_chaining_on_parsing_error(self):
|
694
|
+
try:
|
695
|
+
output = parsing.call(
|
696
|
+
'Compute 1 + 2',
|
697
|
+
int,
|
698
|
+
lm=fake.StaticSequence(['three']),
|
699
|
+
parsing_lm=fake.StaticSequence(['abc']),
|
700
|
+
parsing_examples=[
|
701
|
+
mapping.MappingExample(
|
702
|
+
context='Multiple four and five',
|
703
|
+
input='twenty',
|
704
|
+
schema=int,
|
705
|
+
output=20,
|
706
|
+
)
|
707
|
+
],
|
708
|
+
returns_message=True,
|
709
|
+
)
|
710
|
+
except mapping.MappingError as e:
|
711
|
+
output = e.lm_response
|
712
|
+
self.assertIn('parsing-lm-output', output.tags)
|
713
|
+
self.assertIn('parsing-lm-input', output.source.tags)
|
689
714
|
self.assertEqual(output.root.text, 'Compute 1 + 2')
|
690
715
|
|
691
716
|
def test_call_with_autofix(self):
|
@@ -583,7 +583,16 @@ class QueryInvocation(pg.Object, pg.views.HtmlTreeView.Extension):
|
|
583
583
|
|
584
584
|
@functools.cached_property
|
585
585
|
def output(self) -> Any:
|
586
|
-
|
586
|
+
"""The output of `lf.query`. If it failed, returns the `MappingError`."""
|
587
|
+
try:
|
588
|
+
return query_output(self.lm_response, self.schema)
|
589
|
+
except mapping.MappingError as e:
|
590
|
+
return e
|
591
|
+
|
592
|
+
@property
|
593
|
+
def has_error(self) -> bool:
|
594
|
+
"""Returns True if the query failed to generate a valid output."""
|
595
|
+
return isinstance(self.output, BaseException)
|
587
596
|
|
588
597
|
@property
|
589
598
|
def elapse(self) -> float:
|
@@ -1051,6 +1051,16 @@ class QueryStructureJsonTest(unittest.TestCase):
|
|
1051
1051
|
|
1052
1052
|
class QueryInvocationTest(unittest.TestCase):
|
1053
1053
|
|
1054
|
+
def test_basics(self):
|
1055
|
+
lm = fake.StaticSequence([
|
1056
|
+
'Activity(description="hi"',
|
1057
|
+
])
|
1058
|
+
with querying.track_queries() as queries:
|
1059
|
+
querying.query('foo', Activity, default=None, lm=lm)
|
1060
|
+
|
1061
|
+
self.assertTrue(queries[0].has_error)
|
1062
|
+
self.assertIsInstance(queries[0].output, mapping.MappingError)
|
1063
|
+
|
1054
1064
|
def test_to_html(self):
|
1055
1065
|
lm = fake.StaticSequence([
|
1056
1066
|
'Activity(description="hi")',
|
@@ -10,8 +10,8 @@ langfun/core/langfunc.py,sha256=G50YgoVZ0y1GFw2ev41MlOqr6qa8YakbvNC0h_E0PiA,1114
|
|
10
10
|
langfun/core/langfunc_test.py,sha256=fKIAqcSNI_7M6nwoZW77HEam8Oa6vcWhsCNgVJanzb4,8822
|
11
11
|
langfun/core/language_model.py,sha256=b15MZ_qbydnz5vQ09t7sf9tc3C7qWvMSxUrGfT0p99I,33827
|
12
12
|
langfun/core/language_model_test.py,sha256=hnYhtw7GM_TbhgsJzHNYTaoDewUlPHpOVlI7xEkCFuI,31783
|
13
|
-
langfun/core/logging.py,sha256=
|
14
|
-
langfun/core/logging_test.py,sha256=
|
13
|
+
langfun/core/logging.py,sha256=W3mLEMXdo210Q5OX3a1ZTc4nU-xMy73-IfNKnsA-RFo,8051
|
14
|
+
langfun/core/logging_test.py,sha256=N7-YvSXC8zvnr2SNwWHOykn1CFmqvIuTLDgn41Ku9JU,6642
|
15
15
|
langfun/core/memory.py,sha256=f-asN1F7Vehgdn_fK84v73GrEUOxRtaW934keutTKjk,2416
|
16
16
|
langfun/core/message.py,sha256=16oiMpg9O9VKrgpfrvJrfvga3n3FzUuD_zdWb9nvSWA,25686
|
17
17
|
langfun/core/message_test.py,sha256=jtZoNBNbA99i2fjoKg5vTRgoUe84J4MH8ZMGakGmTHs,32577
|
@@ -58,13 +58,13 @@ langfun/core/eval/patching_test.py,sha256=8kCd54Egjju22FMgtJuxEsrXkW8ifs-UUBHtrC
|
|
58
58
|
langfun/core/eval/scoring.py,sha256=B69IsIxiPs1xZcOBFIhZF70YmDue2Siik-CPL2bh33s,6254
|
59
59
|
langfun/core/eval/scoring_test.py,sha256=O8olHbrUEg60gMxwOkWzKBJZpZoUlmVnBANX5Se2SXM,4546
|
60
60
|
langfun/core/eval/v2/__init__.py,sha256=qoa6zKdFXOFyCX6vay6OdgPf1eUhYGoHYAxe35qECGk,1628
|
61
|
-
langfun/core/eval/v2/checkpointing.py,sha256=
|
61
|
+
langfun/core/eval/v2/checkpointing.py,sha256=ZYKHN91pALZjnoMdikc-JdJ-HiBRIqitOUWbFablFI8,8367
|
62
62
|
langfun/core/eval/v2/checkpointing_test.py,sha256=dAERKQTW_PM1B0oUauB0YVQkMEI-cgJq0q-wAVlGYpU,4383
|
63
|
-
langfun/core/eval/v2/evaluation.py,sha256=
|
64
|
-
langfun/core/eval/v2/evaluation_test.py,sha256=
|
63
|
+
langfun/core/eval/v2/evaluation.py,sha256=NFBGAWw2BtW7H0zcoZhfWtz59Psra84eshJm73uAFwg,21807
|
64
|
+
langfun/core/eval/v2/evaluation_test.py,sha256=ld8oBOjsfN-LNLL2eViSTu17wAq90GcsfURXX6oVlFo,6014
|
65
65
|
langfun/core/eval/v2/example.py,sha256=fURrvdNmMsVMqoEErcsmLmC6Xq3ny16dYsnLH8HVlcY,9626
|
66
66
|
langfun/core/eval/v2/example_test.py,sha256=WcJmU7IQQXvjFia63mokySC4CqxzVL9Wso1sC5F0YK8,3032
|
67
|
-
langfun/core/eval/v2/experiment.py,sha256=
|
67
|
+
langfun/core/eval/v2/experiment.py,sha256=xfk4aNZ3dH46y0lWSS_fC7JpfJCG77Z5qsakV4gHcOs,29762
|
68
68
|
langfun/core/eval/v2/experiment_test.py,sha256=zSMHYqC9cA0k61U71pCSYTAJ6yK2_b6Dml5btc-bKzQ,9133
|
69
69
|
langfun/core/eval/v2/metric_values.py,sha256=_B905bC-jxrYPLSEcP2M8MaHZOVMz_bVrUw8YC4arCE,4660
|
70
70
|
langfun/core/eval/v2/metric_values_test.py,sha256=ab2oF_HsIwrSy459108ggyjgefHSPn8UVILR4dRwx14,2634
|
@@ -74,9 +74,9 @@ langfun/core/eval/v2/progress.py,sha256=azZgssQgNdv3IgjKEaQBuGI5ucFDNbdi02P4z_nQ
|
|
74
74
|
langfun/core/eval/v2/progress_test.py,sha256=YU7VHzmy5knPZwj9vpBN3rQQH2tukj9eKHkuBCI62h8,2540
|
75
75
|
langfun/core/eval/v2/progress_tracking.py,sha256=l9fEkz4oP5McpZzf72Ua7PYm3lAWtRru7gRWNf8H0ms,6083
|
76
76
|
langfun/core/eval/v2/progress_tracking_test.py,sha256=iO-DslCJWncU7-27XaMKxDeKrsGbwdk_tKfoRk3KboE,2271
|
77
|
-
langfun/core/eval/v2/reporting.py,sha256=
|
77
|
+
langfun/core/eval/v2/reporting.py,sha256=vsh45GLVnA7GMU-8cvNYOt4Nb7mEwvcguhO-BSXSzTE,5358
|
78
78
|
langfun/core/eval/v2/reporting_test.py,sha256=JxffbUPWInUyLjo-AQVFrllga884Mdfm05R86FtxSss,1482
|
79
|
-
langfun/core/eval/v2/runners.py,sha256=
|
79
|
+
langfun/core/eval/v2/runners.py,sha256=iTARDpPHPJKZL5Hu3k-O2LhK1ICOo1Ywbm5e7O2rNBA,15680
|
80
80
|
langfun/core/eval/v2/runners_test.py,sha256=UeiUNygux_U6iGVG18rhp68ZE4hoWeoT6XsXvSjxNQg,11620
|
81
81
|
langfun/core/eval/v2/test_helper.py,sha256=pDpZTBnWRR5xjJv3Uy3NWEzArqlL8FTMOgeR4C53F5M,2348
|
82
82
|
langfun/core/llms/__init__.py,sha256=lWXKjGHv66ShG7AE_Bc4QM7SDTxJdfoQMn3PF0lr0sU,6461
|
@@ -127,10 +127,10 @@ langfun/core/structured/function_generation.py,sha256=g7AOR_e8HxFU6n6Df750aGkgMg
|
|
127
127
|
langfun/core/structured/function_generation_test.py,sha256=LaXYDXf9GlqUrR6v_gtmK_H4kxzonmU7SYbn7XXMgjU,12128
|
128
128
|
langfun/core/structured/mapping.py,sha256=vLKH79UT-j0qkQdvqlQBO7SkXXuM-yr2Idm8_HH8qwM,13649
|
129
129
|
langfun/core/structured/mapping_test.py,sha256=bHm2ZCXBITq_G8Lvw_olFHeUUc4s_lGXZm9v9JhoPB4,9630
|
130
|
-
langfun/core/structured/parsing.py,sha256=
|
131
|
-
langfun/core/structured/parsing_test.py,sha256
|
132
|
-
langfun/core/structured/querying.py,sha256=
|
133
|
-
langfun/core/structured/querying_test.py,sha256=
|
130
|
+
langfun/core/structured/parsing.py,sha256=MGvI7ypXlwfzr5XB8_TFU9Ei0_5reYqkWkv64eAy0EA,12015
|
131
|
+
langfun/core/structured/parsing_test.py,sha256=kNPrhpdPY3iWhUld0TFYU-Zgn44wC0d6YuQ9XdVbQ8o,22346
|
132
|
+
langfun/core/structured/querying.py,sha256=nqvsfMS_KLv5EvO0_VAGEHwY4pHy4S0CvJmeV0HBXlM,23066
|
133
|
+
langfun/core/structured/querying_test.py,sha256=YlC4s9LVChfhGZzaXGW1UYlcBnAjNOunu4SLl5_p7PQ,32054
|
134
134
|
langfun/core/structured/schema.py,sha256=0VUPSfX1JEQ0xu8WvEymCKK_WSGwBNA-rQD2hATErmU,27912
|
135
135
|
langfun/core/structured/schema_generation.py,sha256=U3nRQsqmMZg_qIVDh2fiY3K4JLfsAL1LcKzIFP1iXFg,5316
|
136
136
|
langfun/core/structured/schema_generation_test.py,sha256=RM9s71kMNg2jTePwInkiW9fK1ACN37eyPeF8OII-0zw,2950
|
@@ -148,8 +148,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
|
|
148
148
|
langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
|
149
149
|
langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
|
150
150
|
langfun/core/templates/selfplay_test.py,sha256=Ot__1P1M8oJfoTp-M9-PQ6HUXqZKyMwvZ5f7yQ3yfyM,2326
|
151
|
-
langfun-0.1.2.
|
152
|
-
langfun-0.1.2.
|
153
|
-
langfun-0.1.2.
|
154
|
-
langfun-0.1.2.
|
155
|
-
langfun-0.1.2.
|
151
|
+
langfun-0.1.2.dev202412200804.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
152
|
+
langfun-0.1.2.dev202412200804.dist-info/METADATA,sha256=7IgyNjTrvkrYSyuwvSrBzrY8tzacbgL1qLaoonRRgxc,8281
|
153
|
+
langfun-0.1.2.dev202412200804.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
154
|
+
langfun-0.1.2.dev202412200804.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
|
155
|
+
langfun-0.1.2.dev202412200804.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412200804.dist-info}/top_level.txt
RENAMED
File without changes
|