langfun 0.1.2.dev202412180804__py3-none-any.whl → 0.1.2.dev202412200804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
  """Checkpointing evaluation runs."""
15
15
  import threading
16
+ import traceback
16
17
 
17
18
  import langfun.core as lf
18
19
  from langfun.core.eval.v2 import example as example_lib
@@ -27,6 +28,21 @@ Runner = experiment_lib.Runner
27
28
  class Checkpointer(experiment_lib.Plugin):
28
29
  """Base class for checkpointing evaluation examples."""
29
30
 
31
+ def on_experiment_start(self, experiment: Experiment):
32
+ if experiment.state.evaluated_examples:
33
+ experiment.info(
34
+ 'Loaded %d examples from checkpoint files. Example IDs: %s' %
35
+ (
36
+ len(experiment.state.evaluated_examples),
37
+ list(sorted(experiment.state.evaluated_examples.keys()))
38
+ ),
39
+ )
40
+ else:
41
+ experiment.info(
42
+ 'No previous evaluated examples are loaded. '
43
+ f'Experiment {experiment.id} starts from scratch.'
44
+ )
45
+
30
46
 
31
47
  class PerExampleCheckpointer(Checkpointer):
32
48
  """Checkpointer that saves each example to a separate file."""
@@ -68,10 +84,11 @@ class PerExampleCheckpointer(Checkpointer):
68
84
  _load_state, ckpt_files, max_workers=64,
69
85
  ):
70
86
  if error is not None:
71
- pg.logging.warning(
87
+ experiment.warning(
72
88
  'Failed to load checkpoint file %s: %s. Skipping the file.',
73
89
  ckpt_file, error
74
90
  )
91
+ super().on_experiment_start(experiment)
75
92
 
76
93
  def on_example_complete(
77
94
  self,
@@ -80,7 +97,11 @@ class PerExampleCheckpointer(Checkpointer):
80
97
  example: Example,
81
98
  ) -> None:
82
99
  """Saves the example to the checkpoint file."""
83
- if not example.has_error:
100
+ if example.has_error:
101
+ experiment.warning(
102
+ f'Example {example.id} has error. Skipping checkpointing.'
103
+ )
104
+ else:
84
105
  def save_state(example: Example):
85
106
  writer = SequenceWriter(
86
107
  runner.current_run.output_path_for(
@@ -91,8 +112,18 @@ class PerExampleCheckpointer(Checkpointer):
91
112
  )
92
113
  )
93
114
  )
94
- writer.add(example)
95
- writer.close()
115
+ try:
116
+ writer.add(example)
117
+ writer.close()
118
+ experiment.info(
119
+ f'Example {example.id} is saved to {writer.path}.',
120
+ )
121
+ except BaseException as e: # pylint: disable=broad-except
122
+ experiment.error(
123
+ f'Failed to save example {example.id} to {writer.path}. '
124
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
125
+ )
126
+ raise e
96
127
  runner.background_run(save_state, example)
97
128
 
98
129
  def _file_prefix_and_ext(self, filename: str) -> tuple[str, str]:
@@ -164,6 +195,7 @@ class BulkCheckpointer(Checkpointer):
164
195
  with self._lock:
165
196
  if self._sequence_writer is not None:
166
197
  self._sequence_writer[experiment.id] = sequence_writer
198
+ super().on_experiment_start(experiment)
167
199
 
168
200
  def on_experiment_complete(
169
201
  self,
@@ -178,8 +210,12 @@ class BulkCheckpointer(Checkpointer):
178
210
  if self._sequence_writer is not None:
179
211
  # Make sure the writer is closed without delay so the file will be
180
212
  # available immediately.
181
- self._sequence_writer[experiment.id].close()
182
- del self._sequence_writer[experiment.id]
213
+ writer = self._sequence_writer.pop(experiment.id)
214
+ writer.close()
215
+ experiment.info(
216
+ f'{len(experiment.state.evaluated_examples)} examples are '
217
+ f'checkpointed to {writer.path}.'
218
+ )
183
219
 
184
220
  def on_example_complete(
185
221
  self,
@@ -189,8 +225,22 @@ class BulkCheckpointer(Checkpointer):
189
225
  ) -> None:
190
226
  """Saves the example to the checkpoint file."""
191
227
  assert experiment.id in self._sequence_writer
192
- if not example.has_error:
193
- runner.background_run(self._sequence_writer[experiment.id].add, example)
228
+ if example.has_error:
229
+ experiment.warning(
230
+ f'Example {example.id} has error. Skipping checkpointing.'
231
+ )
232
+ else:
233
+ def _save_example(example: Example):
234
+ writer = self._sequence_writer[experiment.id]
235
+ try:
236
+ writer.add(example)
237
+ except BaseException as e: # pylint: disable=broad-except
238
+ experiment.error(
239
+ f'Failed to save example {example.id} to {writer.path}. '
240
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
241
+ )
242
+ raise e
243
+ runner.background_run(_save_example, example)
194
244
 
195
245
 
196
246
  class SequenceWriter:
@@ -198,8 +248,13 @@ class SequenceWriter:
198
248
 
199
249
  def __init__(self, path: str):
200
250
  self._lock = threading.Lock()
251
+ self._path = path
201
252
  self._sequence_writer = pg.io.open_sequence(path, 'w')
202
253
 
254
+ @property
255
+ def path(self) -> str:
256
+ return self._path
257
+
203
258
  def add(self, example: Example):
204
259
  example_blob = pg.to_json_str(
205
260
  example,
@@ -14,7 +14,9 @@
14
14
  """Base class for Langfun evaluation tasks."""
15
15
 
16
16
  import abc
17
+ import datetime
17
18
  import functools
19
+ import threading
18
20
  import time
19
21
 
20
22
  from typing import Annotated, Any, Callable, Iterable
@@ -63,6 +65,8 @@ class Evaluation(experiment_lib.Experiment):
63
65
  self.__dict__.pop('is_leaf', None)
64
66
  self.__dict__.pop('children', None)
65
67
  super()._on_bound()
68
+ self._log_entries = []
69
+ self._log_lock = threading.Lock()
66
70
 
67
71
  #
68
72
  # Handling evaluation hierarchy (materialized vs. hyper evaluations).
@@ -277,6 +281,48 @@ class Evaluation(experiment_lib.Experiment):
277
281
  for metric in self.metrics:
278
282
  metric.reset()
279
283
 
284
+ #
285
+ # Evaluation-level logging.
286
+ #
287
+
288
+ def _log(self, log_func, level: lf.logging.LogLevel, message: str, **kwargs):
289
+ # Write to external logging system.
290
+ log_message = f'{self.id}: {message}'
291
+ if kwargs:
292
+ log_message = f'{log_message} (metadata: {kwargs!r})'
293
+ log_func(log_message)
294
+
295
+ # Add to experiment log history.
296
+ log_entry = lf.logging.LogEntry(
297
+ level=level,
298
+ time=datetime.datetime.now(),
299
+ message=message,
300
+ metadata=kwargs,
301
+ )
302
+ with self._log_lock:
303
+ self._log_entries.append(log_entry)
304
+
305
+ def debug(self, message: str, **kwargs):
306
+ """Logs a debug message to the session."""
307
+ self._log(pg.logging.debug, 'debug', message, **kwargs)
308
+
309
+ def info(self, message: str, **kwargs):
310
+ """Logs an info message to the session."""
311
+ self._log(pg.logging.info, 'info', message, **kwargs)
312
+
313
+ def warning(self, message: str, **kwargs):
314
+ """Logs a warning message to the session."""
315
+ self._log(pg.logging.warning, 'warning', message, **kwargs)
316
+
317
+ def error(self, message: str, **kwargs):
318
+ """Logs an error message to the session."""
319
+ self._log(pg.logging.error, 'error', message, **kwargs)
320
+
321
+ def fatal(self, message: str, **kwargs):
322
+ """Logs a fatal message to the session."""
323
+ # We use error level for fatal message, which does not trigger assertion.
324
+ self._log(pg.logging.error, 'fatal', message, **kwargs)
325
+
280
326
  #
281
327
  # HTML views.
282
328
  #
@@ -465,6 +511,25 @@ class Evaluation(experiment_lib.Experiment):
465
511
  )
466
512
  )
467
513
 
514
+ def _logs_tab() -> pg.views.html.controls.Tab:
515
+ """Renders a tab for the logs of the evaluation."""
516
+ with self._log_lock:
517
+ log_history = '\n'.join(str(l) for l in self._log_entries)
518
+ return pg.views.html.controls.Tab(
519
+ label='Logs',
520
+ content=pg.Html.element(
521
+ 'div',
522
+ [
523
+ pg.Html.element(
524
+ 'textarea',
525
+ [pg.Html.escape(log_history)],
526
+ readonly=True,
527
+ css_classes=['logs-textarea'],
528
+ )
529
+ ]
530
+ )
531
+ )
532
+
468
533
  def _main_tabs() -> pg.Html:
469
534
  return pg.Html.element(
470
535
  'div',
@@ -474,6 +539,8 @@ class Evaluation(experiment_lib.Experiment):
474
539
  _definition_tab(),
475
540
  ] + [
476
541
  _metric_tab(m) for m in self.metrics
542
+ ] + [
543
+ _logs_tab()
477
544
  ],
478
545
  selected=1,
479
546
  )
@@ -593,6 +660,14 @@ class Evaluation(experiment_lib.Experiment):
593
660
  width:100%;
594
661
  height:100%;
595
662
  }
663
+ .logs-textarea {
664
+ width: 100%;
665
+ height: 500px;
666
+ padding: 5px;
667
+ border: 1px solid #DDD;
668
+ background-color: #EEE;
669
+ resize: vertical;
670
+ }
596
671
  """
597
672
  ]
598
673
 
@@ -615,6 +690,11 @@ class EvaluationState:
615
690
  assert isinstance(example, example_lib.Example), example
616
691
  self._evaluated_examples[example.id] = example
617
692
 
693
+ @property
694
+ def evaluated_examples(self) -> dict[int, example_lib.Example]:
695
+ """Returns the examples in the state."""
696
+ return self._evaluated_examples
697
+
618
698
  def get(self, example_id: int) -> example_lib.Example | None:
619
699
  """Returns the example with the given ID."""
620
700
  return self._evaluated_examples.get(example_id)
@@ -622,9 +702,3 @@ class EvaluationState:
622
702
  def update(self, example: example_lib.Example) -> None:
623
703
  """Updates the state with the given example."""
624
704
  self._evaluated_examples[example.id] = example
625
-
626
- @property
627
- def evaluated_examples(self) -> dict[int, example_lib.Example]:
628
- """Returns the examples in the state."""
629
- return self._evaluated_examples
630
-
@@ -133,6 +133,12 @@ class EvaluationTest(unittest.TestCase):
133
133
 
134
134
  def test_html_view(self):
135
135
  exp = test_helper.TestEvaluation()
136
+ exp.debug('debug message')
137
+ exp.info('info message')
138
+ exp.warning('warning message', x=1)
139
+ exp.error('error message', x=1)
140
+ exp.fatal('fatal message')
141
+
136
142
  self.assertIn(
137
143
  exp.id,
138
144
  exp.to_html(extra_flags=dict(card_view=True, current_run=None)).content
@@ -959,6 +959,14 @@ class Plugin(lf.Component):
959
959
  ) -> None:
960
960
  """Called when an experiment (both leaf and non-leaf) is complete."""
961
961
 
962
+ def on_experiment_abort(
963
+ self,
964
+ runner: Runner,
965
+ experiment: Experiment,
966
+ error: BaseException,
967
+ ) -> None:
968
+ """Called when an experiment (both leaf and non-leaf) is aborted."""
969
+
962
970
  def on_example_start(
963
971
  self,
964
972
  runner: Runner,
@@ -14,6 +14,7 @@
14
14
  """Reporting evaluation results."""
15
15
 
16
16
  import time
17
+ import traceback
17
18
  from typing import Annotated
18
19
 
19
20
  from langfun.core.eval.v2 import example as example_lib
@@ -61,6 +62,14 @@ class HtmlReporter(experiment_lib.Plugin):
61
62
  ) -> None:
62
63
  self._maybe_update_summary(runner, force=True)
63
64
 
65
+ def on_run_abort(
66
+ self,
67
+ runner: Runner,
68
+ root: Experiment,
69
+ error: BaseException
70
+ ) -> None:
71
+ self._maybe_update_summary(runner, force=True)
72
+
64
73
  def on_experiment_start(
65
74
  self,
66
75
  runner: Runner,
@@ -75,6 +84,16 @@ class HtmlReporter(experiment_lib.Plugin):
75
84
  if experiment.is_leaf:
76
85
  self._maybe_update_experiment_html(runner, experiment, force=True)
77
86
 
87
+ def on_experiment_abort(
88
+ self,
89
+ runner: Runner,
90
+ experiment: Experiment,
91
+ error: BaseException
92
+ ) -> None:
93
+ del error
94
+ assert experiment.is_leaf
95
+ self._maybe_update_experiment_html(runner, experiment, force=True)
96
+
78
97
  def on_example_complete(
79
98
  self, runner: Runner, experiment: Experiment, example: Example
80
99
  ):
@@ -103,19 +122,26 @@ class HtmlReporter(experiment_lib.Plugin):
103
122
  self, runner: Runner, experiment: Experiment, force: bool = False
104
123
  ) -> None:
105
124
  def _save():
106
- html = experiment.to_html(
107
- collapse_level=None,
108
- extra_flags=dict(
109
- current_run=runner.current_run,
110
- interactive=False,
111
- card_view=False,
112
- ),
113
- )
114
- html.save(
115
- runner.current_run.output_path_for(
116
- experiment, _EVALULATION_DETAIL_FILE
117
- )
125
+ index_html_path = runner.current_run.output_path_for(
126
+ experiment, _EVALULATION_DETAIL_FILE
118
127
  )
128
+ try:
129
+ html = experiment.to_html(
130
+ collapse_level=None,
131
+ extra_flags=dict(
132
+ current_run=runner.current_run,
133
+ interactive=False,
134
+ card_view=False,
135
+ ),
136
+ )
137
+ html.save(index_html_path)
138
+ except BaseException as e: # pylint: disable=broad-except
139
+ experiment.error(
140
+ f'Failed to save HTML {index_html_path!r}. '
141
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
142
+ )
143
+ raise e
144
+
119
145
  if force or (
120
146
  time.time() - self._last_experiment_report_time[experiment.id]
121
147
  > self.experiment_report_interval
@@ -128,17 +154,24 @@ class HtmlReporter(experiment_lib.Plugin):
128
154
  ) -> None:
129
155
  """Saves the example."""
130
156
  def _save():
131
- html = example.to_html(
132
- collapse_level=None,
133
- enable_summary_tooltip=False,
134
- extra_flags=dict(
135
- # For properly rendering the next link.
136
- num_examples=getattr(experiment, 'num_examples', None)
137
- ),
138
- )
139
- html.save(
140
- runner.current_run.output_path_for(
141
- experiment, f'{example.id}.html'
142
- )
143
- )
157
+ try:
158
+ html = example.to_html(
159
+ collapse_level=None,
160
+ enable_summary_tooltip=False,
161
+ extra_flags=dict(
162
+ # For properly rendering the next link.
163
+ num_examples=getattr(experiment, 'num_examples', None)
164
+ ),
165
+ )
166
+ html.save(
167
+ runner.current_run.output_path_for(
168
+ experiment, f'{example.id}.html'
169
+ )
170
+ )
171
+ except BaseException as e: # pylint: disable=broad-except
172
+ experiment.error(
173
+ f'Failed to save HTML {example.id}.html. '
174
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
175
+ )
176
+ raise e
144
177
  runner.background_run(_save)
@@ -18,6 +18,7 @@ import concurrent.futures
18
18
  import random
19
19
  import threading
20
20
  import time
21
+ import traceback
21
22
  from typing import Any, Annotated, Callable, Iterator
22
23
 
23
24
  from langfun import core as lf
@@ -120,9 +121,14 @@ class RunnerBase(Runner):
120
121
  # Start the progress of the evaluation.
121
122
  if experiment.is_leaf:
122
123
  assert isinstance(experiment, Evaluation)
123
- experiment.progress.start(
124
- total=(len(self.current_run.example_ids)
125
- if self.current_run.example_ids else experiment.num_examples)
124
+ num_examples_to_evaluate = (
125
+ len(self.current_run.example_ids)
126
+ if self.current_run.example_ids else experiment.num_examples
127
+ )
128
+ experiment.progress.start(total=num_examples_to_evaluate)
129
+ experiment.info(
130
+ 'Starting evaluation %s with %d examples to evaluate.'
131
+ % (experiment.id, num_examples_to_evaluate)
126
132
  )
127
133
  else:
128
134
  experiment.progress.start(total=len(experiment.leaf_nodes))
@@ -144,8 +150,7 @@ class RunnerBase(Runner):
144
150
 
145
151
  # Only leaf evaluations will trigger the complete notification of the
146
152
  # ancestors.
147
- if experiment.is_leaf:
148
- self._update_ancestor_progresses(experiment)
153
+ self._update_ancestor_progresses(experiment)
149
154
 
150
155
  def on_experiment_complete(self, experiment: Experiment) -> None:
151
156
  """Called when an evaluation is complete."""
@@ -160,6 +165,35 @@ class RunnerBase(Runner):
160
165
  # ancestors.
161
166
  if experiment.is_leaf:
162
167
  self._update_ancestor_progresses(experiment)
168
+ self._log_experiment_completion(experiment)
169
+
170
+ def _log_experiment_completion(self, experiment: Experiment):
171
+ example_ids = (
172
+ self.current_run.example_ids if self.current_run.example_ids else
173
+ list(range(1, experiment.num_examples + 1))
174
+ )
175
+ num_from_checkpoint, num_processed = 0, 0
176
+ for example_id in example_ids:
177
+ example = experiment.state.get(example_id)
178
+ if example.newly_processed:
179
+ num_processed += 1
180
+ else:
181
+ num_from_checkpoint += 1
182
+ experiment.info(
183
+ f'{experiment.id} completed with {num_from_checkpoint + num_processed} '
184
+ f'examples evaluated ({num_from_checkpoint} from checkpoint, '
185
+ f'{num_processed} newly processed).'
186
+ )
187
+
188
+ def on_experiment_abort(
189
+ self, experiment: Experiment, error: BaseException) -> None:
190
+ """Called when an evaluation is complete."""
191
+ assert experiment.is_leaf
192
+ experiment.fatal(f'{error}\n\n{traceback.format_exc()}')
193
+
194
+ # Notify the plugins of the experiment abort.
195
+ for plugin in self._all_plugins(experiment):
196
+ plugin.on_experiment_abort(self, experiment, error)
163
197
 
164
198
  def _update_ancestor_progresses(self, experiment: Experiment):
165
199
  """Updates the progresses of the parent nodes of the experiment."""
@@ -270,31 +304,36 @@ class RunnerBase(Runner):
270
304
 
271
305
  def run_evaluation(self, evaluation: Evaluation) -> None:
272
306
  """Runs the evaluation."""
273
- self.on_experiment_start(evaluation)
274
-
275
- per_evaluation_settings = {}
276
- cache = None
277
- if self.current_run.use_cache == 'per_dataset':
278
- cache = self._load_or_create_cache(evaluation)
279
- per_evaluation_settings['cache'] = cache
280
-
281
- with lf.use_settings(**per_evaluation_settings):
282
- if self.current_run.example_ids is None:
283
- items = (
284
- Example(id=i + 1, input=ex) for i, ex in enumerate(
285
- evaluation.example_inputs)
286
- )
287
- else:
288
- items = (
289
- Example(
290
- id=example_id, input=evaluation.example_input_by_id(example_id)
291
- ) for example_id in self.current_run.example_ids
292
- )
293
- self._evaluate_items(evaluation, items)
294
-
295
- if cache:
296
- self.background_run(cache.save)
297
- self.on_experiment_complete(evaluation)
307
+ try:
308
+ self.on_experiment_start(evaluation)
309
+
310
+ per_evaluation_settings = {}
311
+ cache = None
312
+ if self.current_run.use_cache == 'per_dataset':
313
+ cache = self._load_or_create_cache(evaluation)
314
+ per_evaluation_settings['cache'] = cache
315
+
316
+ with lf.use_settings(**per_evaluation_settings):
317
+ if self.current_run.example_ids is None:
318
+ items = (
319
+ Example(id=i + 1, input=ex) for i, ex in enumerate(
320
+ evaluation.example_inputs)
321
+ )
322
+ else:
323
+ items = (
324
+ Example(
325
+ id=example_id,
326
+ input=evaluation.example_input_by_id(example_id)
327
+ ) for example_id in self.current_run.example_ids
328
+ )
329
+ self._evaluate_items(evaluation, items)
330
+
331
+ if cache:
332
+ self.background_run(cache.save)
333
+ self.on_experiment_complete(evaluation)
334
+ except BaseException as e: # pylint: disable=broad-except
335
+ self.on_experiment_abort(evaluation, e)
336
+ raise e
298
337
 
299
338
  @abc.abstractmethod
300
339
  def _evaluate_items(
@@ -410,9 +449,7 @@ class ParallelRunner(RunnerBase):
410
449
  groups.values(),
411
450
  max_workers=max(64, len(groups)),
412
451
  timeout=self.timeout,
413
- silence_on_errors=(
414
- None if self.current_run.raise_if_has_error else BaseException
415
- )
452
+ silence_on_errors=None,
416
453
  ):
417
454
  pass
418
455
 
@@ -437,8 +474,6 @@ class ParallelRunner(RunnerBase):
437
474
  items,
438
475
  max_workers=evaluation.max_workers,
439
476
  timeout=self.timeout,
440
- silence_on_errors=(
441
- None if self.current_run.raise_if_has_error else BaseException
442
- )
477
+ silence_on_errors=None,
443
478
  ):
444
479
  pass
langfun/core/logging.py CHANGED
@@ -54,6 +54,25 @@ class LogEntry(pg.Object, pg.views.HtmlTreeView.Extension):
54
54
  def should_output(self, min_log_level: LogLevel) -> bool:
55
55
  return _LOG_LEVELS.index(self.level) >= _LOG_LEVELS.index(min_log_level)
56
56
 
57
+ def format(self,
58
+ compact: bool = False,
59
+ verbose: bool = True,
60
+ root_indent: int = 0,
61
+ *,
62
+ text_format: bool = True,
63
+ **kwargs):
64
+ if text_format:
65
+ s = f"""{self.time.strftime('%H:%M:%S')} {self.level.upper()} - {self.message}"""
66
+ if self.metadata:
67
+ s += f' (metadata: {self.metadata!r})'
68
+ return s
69
+ return super().format(
70
+ compact=compact,
71
+ verbose=verbose,
72
+ root_indent=root_indent,
73
+ **kwargs
74
+ )
75
+
57
76
  def _html_tree_view_summary(
58
77
  self,
59
78
  view: pg.views.HtmlTreeView,
@@ -61,6 +61,25 @@ class LoggingTest(unittest.TestCase):
61
61
  print(actual)
62
62
  self.assertEqual(actual, expected)
63
63
 
64
+ def test_format(self):
65
+ time = datetime.datetime(2024, 10, 10, 12, 30, 45)
66
+ self.assertEqual(
67
+ str(
68
+ logging.LogEntry(
69
+ level='info', message='hello\nworld',
70
+ time=time, metadata=dict(x=1),
71
+ )
72
+ ),
73
+ '12:30:45 INFO - hello\nworld (metadata: {x=1})',
74
+ )
75
+ self.assertIn(
76
+ 'LogEntry(',
77
+ logging.LogEntry(
78
+ level='info', message='hello\nworld',
79
+ time=time, metadata=dict(x=1),
80
+ ).format(text_format=False),
81
+ )
82
+
64
83
  def test_html(self):
65
84
  time = datetime.datetime(2024, 10, 10, 12, 30, 45)
66
85
  self.assert_html_content(
@@ -270,24 +270,31 @@ def call(
270
270
  if schema in (str, None):
271
271
  return lm_output if returns_message else lm_output.text
272
272
 
273
+ def _chain_nl_output_message(parsing_message: lf.Message):
274
+ """Chain the source of the parsed output to the LM output."""
275
+ parsing_message.root.source = lm_output
276
+ parsing_message.tag('parsing-lm-output')
277
+ parsing_message.lm_input.tag('parsing-lm-input')
278
+
273
279
  # Call `parsing_lm` for structured parsing.
274
- parsing_message = querying.query(
275
- lm_output.text,
276
- schema,
277
- examples=parsing_examples,
278
- lm=parsing_lm or lm,
279
- include_context=parsing_include_context,
280
- cache_seed=cache_seed,
281
- autofix=autofix,
282
- autofix_lm=autofix_lm or lm,
283
- protocol=protocol,
284
- returns_message=True,
285
- **kwargs,
286
- )
287
- # Chain the source of the parsed output to the LM output.
288
- parsing_message.root.source = lm_output
289
- parsing_message.tag('parsing-lm-output')
290
- parsing_message.lm_input.tag('parsing-lm-input')
280
+ try:
281
+ parsing_message = querying.query(
282
+ lm_output.text,
283
+ schema,
284
+ examples=parsing_examples,
285
+ lm=parsing_lm or lm,
286
+ include_context=parsing_include_context,
287
+ cache_seed=cache_seed,
288
+ autofix=autofix,
289
+ autofix_lm=autofix_lm or lm,
290
+ protocol=protocol,
291
+ returns_message=True,
292
+ **kwargs,
293
+ )
294
+ _chain_nl_output_message(parsing_message)
295
+ except mapping.MappingError as e:
296
+ _chain_nl_output_message(e.lm_response)
297
+ raise e
291
298
  return parsing_message if returns_message else parsing_message.result
292
299
 
293
300
 
@@ -686,6 +686,31 @@ class CallTest(unittest.TestCase):
686
686
  ],
687
687
  returns_message=True,
688
688
  )
689
+ self.assertIn('parsing-lm-output', output.tags)
690
+ self.assertIn('parsing-lm-input', output.source.tags)
691
+ self.assertEqual(output.root.text, 'Compute 1 + 2')
692
+
693
+ def test_call_with_parsing_message_chaining_on_parsing_error(self):
694
+ try:
695
+ output = parsing.call(
696
+ 'Compute 1 + 2',
697
+ int,
698
+ lm=fake.StaticSequence(['three']),
699
+ parsing_lm=fake.StaticSequence(['abc']),
700
+ parsing_examples=[
701
+ mapping.MappingExample(
702
+ context='Multiple four and five',
703
+ input='twenty',
704
+ schema=int,
705
+ output=20,
706
+ )
707
+ ],
708
+ returns_message=True,
709
+ )
710
+ except mapping.MappingError as e:
711
+ output = e.lm_response
712
+ self.assertIn('parsing-lm-output', output.tags)
713
+ self.assertIn('parsing-lm-input', output.source.tags)
689
714
  self.assertEqual(output.root.text, 'Compute 1 + 2')
690
715
 
691
716
  def test_call_with_autofix(self):
@@ -583,7 +583,16 @@ class QueryInvocation(pg.Object, pg.views.HtmlTreeView.Extension):
583
583
 
584
584
  @functools.cached_property
585
585
  def output(self) -> Any:
586
- return query_output(self.lm_response, self.schema)
586
+ """The output of `lf.query`. If it failed, returns the `MappingError`."""
587
+ try:
588
+ return query_output(self.lm_response, self.schema)
589
+ except mapping.MappingError as e:
590
+ return e
591
+
592
+ @property
593
+ def has_error(self) -> bool:
594
+ """Returns True if the query failed to generate a valid output."""
595
+ return isinstance(self.output, BaseException)
587
596
 
588
597
  @property
589
598
  def elapse(self) -> float:
@@ -1051,6 +1051,16 @@ class QueryStructureJsonTest(unittest.TestCase):
1051
1051
 
1052
1052
  class QueryInvocationTest(unittest.TestCase):
1053
1053
 
1054
+ def test_basics(self):
1055
+ lm = fake.StaticSequence([
1056
+ 'Activity(description="hi"',
1057
+ ])
1058
+ with querying.track_queries() as queries:
1059
+ querying.query('foo', Activity, default=None, lm=lm)
1060
+
1061
+ self.assertTrue(queries[0].has_error)
1062
+ self.assertIsInstance(queries[0].output, mapping.MappingError)
1063
+
1054
1064
  def test_to_html(self):
1055
1065
  lm = fake.StaticSequence([
1056
1066
  'Activity(description="hi")',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langfun
3
- Version: 0.1.2.dev202412180804
3
+ Version: 0.1.2.dev202412200804
4
4
  Summary: Langfun: Language as Functions.
5
5
  Home-page: https://github.com/google/langfun
6
6
  Author: Langfun Authors
@@ -10,8 +10,8 @@ langfun/core/langfunc.py,sha256=G50YgoVZ0y1GFw2ev41MlOqr6qa8YakbvNC0h_E0PiA,1114
10
10
  langfun/core/langfunc_test.py,sha256=fKIAqcSNI_7M6nwoZW77HEam8Oa6vcWhsCNgVJanzb4,8822
11
11
  langfun/core/language_model.py,sha256=b15MZ_qbydnz5vQ09t7sf9tc3C7qWvMSxUrGfT0p99I,33827
12
12
  langfun/core/language_model_test.py,sha256=hnYhtw7GM_TbhgsJzHNYTaoDewUlPHpOVlI7xEkCFuI,31783
13
- langfun/core/logging.py,sha256=uslllP0RTGN223oro1m4nZZ0bFppcL07OwbFKm2iG6k,7519
14
- langfun/core/logging_test.py,sha256=b5bPTSUoYeICATaO6I8dOVumodwRbxSp1Oz96Sf3KcE,6104
13
+ langfun/core/logging.py,sha256=W3mLEMXdo210Q5OX3a1ZTc4nU-xMy73-IfNKnsA-RFo,8051
14
+ langfun/core/logging_test.py,sha256=N7-YvSXC8zvnr2SNwWHOykn1CFmqvIuTLDgn41Ku9JU,6642
15
15
  langfun/core/memory.py,sha256=f-asN1F7Vehgdn_fK84v73GrEUOxRtaW934keutTKjk,2416
16
16
  langfun/core/message.py,sha256=16oiMpg9O9VKrgpfrvJrfvga3n3FzUuD_zdWb9nvSWA,25686
17
17
  langfun/core/message_test.py,sha256=jtZoNBNbA99i2fjoKg5vTRgoUe84J4MH8ZMGakGmTHs,32577
@@ -58,13 +58,13 @@ langfun/core/eval/patching_test.py,sha256=8kCd54Egjju22FMgtJuxEsrXkW8ifs-UUBHtrC
58
58
  langfun/core/eval/scoring.py,sha256=B69IsIxiPs1xZcOBFIhZF70YmDue2Siik-CPL2bh33s,6254
59
59
  langfun/core/eval/scoring_test.py,sha256=O8olHbrUEg60gMxwOkWzKBJZpZoUlmVnBANX5Se2SXM,4546
60
60
  langfun/core/eval/v2/__init__.py,sha256=qoa6zKdFXOFyCX6vay6OdgPf1eUhYGoHYAxe35qECGk,1628
61
- langfun/core/eval/v2/checkpointing.py,sha256=8vxH3AfIBS8dxA0IiOZBUxAHXIx5m2tSWSSumDLpzp8,6546
61
+ langfun/core/eval/v2/checkpointing.py,sha256=ZYKHN91pALZjnoMdikc-JdJ-HiBRIqitOUWbFablFI8,8367
62
62
  langfun/core/eval/v2/checkpointing_test.py,sha256=dAERKQTW_PM1B0oUauB0YVQkMEI-cgJq0q-wAVlGYpU,4383
63
- langfun/core/eval/v2/evaluation.py,sha256=h_AWRUSKhEs-bHLBgqo-GeBYXluD5bPbAqypRW0ajfA,19441
64
- langfun/core/eval/v2/evaluation_test.py,sha256=hh6L2HhQPQ6NBv1pXKcNkYraNcV9MLuJ--69t9jbmaI,5846
63
+ langfun/core/eval/v2/evaluation.py,sha256=NFBGAWw2BtW7H0zcoZhfWtz59Psra84eshJm73uAFwg,21807
64
+ langfun/core/eval/v2/evaluation_test.py,sha256=ld8oBOjsfN-LNLL2eViSTu17wAq90GcsfURXX6oVlFo,6014
65
65
  langfun/core/eval/v2/example.py,sha256=fURrvdNmMsVMqoEErcsmLmC6Xq3ny16dYsnLH8HVlcY,9626
66
66
  langfun/core/eval/v2/example_test.py,sha256=WcJmU7IQQXvjFia63mokySC4CqxzVL9Wso1sC5F0YK8,3032
67
- langfun/core/eval/v2/experiment.py,sha256=0JBGckJ93aqSdffpJPDVPy_I5T2BXscghTxiglHzJWo,29556
67
+ langfun/core/eval/v2/experiment.py,sha256=xfk4aNZ3dH46y0lWSS_fC7JpfJCG77Z5qsakV4gHcOs,29762
68
68
  langfun/core/eval/v2/experiment_test.py,sha256=zSMHYqC9cA0k61U71pCSYTAJ6yK2_b6Dml5btc-bKzQ,9133
69
69
  langfun/core/eval/v2/metric_values.py,sha256=_B905bC-jxrYPLSEcP2M8MaHZOVMz_bVrUw8YC4arCE,4660
70
70
  langfun/core/eval/v2/metric_values_test.py,sha256=ab2oF_HsIwrSy459108ggyjgefHSPn8UVILR4dRwx14,2634
@@ -74,9 +74,9 @@ langfun/core/eval/v2/progress.py,sha256=azZgssQgNdv3IgjKEaQBuGI5ucFDNbdi02P4z_nQ
74
74
  langfun/core/eval/v2/progress_test.py,sha256=YU7VHzmy5knPZwj9vpBN3rQQH2tukj9eKHkuBCI62h8,2540
75
75
  langfun/core/eval/v2/progress_tracking.py,sha256=l9fEkz4oP5McpZzf72Ua7PYm3lAWtRru7gRWNf8H0ms,6083
76
76
  langfun/core/eval/v2/progress_tracking_test.py,sha256=iO-DslCJWncU7-27XaMKxDeKrsGbwdk_tKfoRk3KboE,2271
77
- langfun/core/eval/v2/reporting.py,sha256=TGkli1IDwqfqsCJ_WslOMGk_24JDg7oRRTGXlAJlWpc,4361
77
+ langfun/core/eval/v2/reporting.py,sha256=vsh45GLVnA7GMU-8cvNYOt4Nb7mEwvcguhO-BSXSzTE,5358
78
78
  langfun/core/eval/v2/reporting_test.py,sha256=JxffbUPWInUyLjo-AQVFrllga884Mdfm05R86FtxSss,1482
79
- langfun/core/eval/v2/runners.py,sha256=nh5qIAkdlY1MohDfiPkFcCY_By1SN0A1SOqmaShGziM,14339
79
+ langfun/core/eval/v2/runners.py,sha256=iTARDpPHPJKZL5Hu3k-O2LhK1ICOo1Ywbm5e7O2rNBA,15680
80
80
  langfun/core/eval/v2/runners_test.py,sha256=UeiUNygux_U6iGVG18rhp68ZE4hoWeoT6XsXvSjxNQg,11620
81
81
  langfun/core/eval/v2/test_helper.py,sha256=pDpZTBnWRR5xjJv3Uy3NWEzArqlL8FTMOgeR4C53F5M,2348
82
82
  langfun/core/llms/__init__.py,sha256=lWXKjGHv66ShG7AE_Bc4QM7SDTxJdfoQMn3PF0lr0sU,6461
@@ -127,10 +127,10 @@ langfun/core/structured/function_generation.py,sha256=g7AOR_e8HxFU6n6Df750aGkgMg
127
127
  langfun/core/structured/function_generation_test.py,sha256=LaXYDXf9GlqUrR6v_gtmK_H4kxzonmU7SYbn7XXMgjU,12128
128
128
  langfun/core/structured/mapping.py,sha256=vLKH79UT-j0qkQdvqlQBO7SkXXuM-yr2Idm8_HH8qwM,13649
129
129
  langfun/core/structured/mapping_test.py,sha256=bHm2ZCXBITq_G8Lvw_olFHeUUc4s_lGXZm9v9JhoPB4,9630
130
- langfun/core/structured/parsing.py,sha256=lhEkdnvxKzkYwHsTvBdE2j6uLWl-J8uQu6c-3xcsBXM,11770
131
- langfun/core/structured/parsing_test.py,sha256=-uPiLi0cRBkf0ZycZsgLPIfRLLdwYhRbm2LHHp_pVGE,21475
132
- langfun/core/structured/querying.py,sha256=sXGhYtiEBac8iOkYOErGXyX8SAHSB1gg69WePhOyGxE,22759
133
- langfun/core/structured/querying_test.py,sha256=M9Apg83KjQUjT42K9LheBEr74DX3Inwd0YmCanA71kc,31738
130
+ langfun/core/structured/parsing.py,sha256=MGvI7ypXlwfzr5XB8_TFU9Ei0_5reYqkWkv64eAy0EA,12015
131
+ langfun/core/structured/parsing_test.py,sha256=kNPrhpdPY3iWhUld0TFYU-Zgn44wC0d6YuQ9XdVbQ8o,22346
132
+ langfun/core/structured/querying.py,sha256=nqvsfMS_KLv5EvO0_VAGEHwY4pHy4S0CvJmeV0HBXlM,23066
133
+ langfun/core/structured/querying_test.py,sha256=YlC4s9LVChfhGZzaXGW1UYlcBnAjNOunu4SLl5_p7PQ,32054
134
134
  langfun/core/structured/schema.py,sha256=0VUPSfX1JEQ0xu8WvEymCKK_WSGwBNA-rQD2hATErmU,27912
135
135
  langfun/core/structured/schema_generation.py,sha256=U3nRQsqmMZg_qIVDh2fiY3K4JLfsAL1LcKzIFP1iXFg,5316
136
136
  langfun/core/structured/schema_generation_test.py,sha256=RM9s71kMNg2jTePwInkiW9fK1ACN37eyPeF8OII-0zw,2950
@@ -148,8 +148,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
148
148
  langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
149
149
  langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
150
150
  langfun/core/templates/selfplay_test.py,sha256=Ot__1P1M8oJfoTp-M9-PQ6HUXqZKyMwvZ5f7yQ3yfyM,2326
151
- langfun-0.1.2.dev202412180804.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
152
- langfun-0.1.2.dev202412180804.dist-info/METADATA,sha256=WA_ko5VxtjWbxeWozO3MMzeAVsCi1CX0UVMbznPggvk,8281
153
- langfun-0.1.2.dev202412180804.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
154
- langfun-0.1.2.dev202412180804.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
155
- langfun-0.1.2.dev202412180804.dist-info/RECORD,,
151
+ langfun-0.1.2.dev202412200804.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
152
+ langfun-0.1.2.dev202412200804.dist-info/METADATA,sha256=7IgyNjTrvkrYSyuwvSrBzrY8tzacbgL1qLaoonRRgxc,8281
153
+ langfun-0.1.2.dev202412200804.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
154
+ langfun-0.1.2.dev202412200804.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
155
+ langfun-0.1.2.dev202412200804.dist-info/RECORD,,