langfun 0.1.2.dev202412180804__py3-none-any.whl → 0.1.2.dev202412190804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/core/eval/v2/evaluation.py +73 -6
- langfun/core/eval/v2/evaluation_test.py +6 -0
- langfun/core/logging.py +19 -0
- langfun/core/logging_test.py +19 -0
- langfun/core/structured/parsing.py +24 -17
- langfun/core/structured/parsing_test.py +25 -0
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412190804.dist-info}/METADATA +1 -1
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412190804.dist-info}/RECORD +11 -11
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412190804.dist-info}/LICENSE +0 -0
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412190804.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412190804.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,9 @@
|
|
14
14
|
"""Base class for Langfun evaluation tasks."""
|
15
15
|
|
16
16
|
import abc
|
17
|
+
import datetime
|
17
18
|
import functools
|
19
|
+
import threading
|
18
20
|
import time
|
19
21
|
|
20
22
|
from typing import Annotated, Any, Callable, Iterable
|
@@ -63,6 +65,8 @@ class Evaluation(experiment_lib.Experiment):
|
|
63
65
|
self.__dict__.pop('is_leaf', None)
|
64
66
|
self.__dict__.pop('children', None)
|
65
67
|
super()._on_bound()
|
68
|
+
self._log_entries = []
|
69
|
+
self._log_lock = threading.Lock()
|
66
70
|
|
67
71
|
#
|
68
72
|
# Handling evaluation hierarchy (materialized vs. hyper evaluations).
|
@@ -277,6 +281,41 @@ class Evaluation(experiment_lib.Experiment):
|
|
277
281
|
for metric in self.metrics:
|
278
282
|
metric.reset()
|
279
283
|
|
284
|
+
#
|
285
|
+
# Evaluation-level logging.
|
286
|
+
#
|
287
|
+
|
288
|
+
def _log(self, level: lf.logging.LogLevel, message: str, **kwargs):
|
289
|
+
with self._log_lock:
|
290
|
+
self._log_entries.append(
|
291
|
+
lf.logging.LogEntry(
|
292
|
+
level=level,
|
293
|
+
time=datetime.datetime.now(),
|
294
|
+
message=message,
|
295
|
+
metadata=kwargs,
|
296
|
+
)
|
297
|
+
)
|
298
|
+
|
299
|
+
def debug(self, message: str, **kwargs):
|
300
|
+
"""Logs a debug message to the session."""
|
301
|
+
self._log('debug', message, **kwargs)
|
302
|
+
|
303
|
+
def info(self, message: str, **kwargs):
|
304
|
+
"""Logs an info message to the session."""
|
305
|
+
self._log('info', message, **kwargs)
|
306
|
+
|
307
|
+
def warning(self, message: str, **kwargs):
|
308
|
+
"""Logs a warning message to the session."""
|
309
|
+
self._log('warning', message, **kwargs)
|
310
|
+
|
311
|
+
def error(self, message: str, **kwargs):
|
312
|
+
"""Logs an error message to the session."""
|
313
|
+
self._log('error', message, **kwargs)
|
314
|
+
|
315
|
+
def fatal(self, message: str, **kwargs):
|
316
|
+
"""Logs a fatal message to the session."""
|
317
|
+
self._log('fatal', message, **kwargs)
|
318
|
+
|
280
319
|
#
|
281
320
|
# HTML views.
|
282
321
|
#
|
@@ -465,6 +504,25 @@ class Evaluation(experiment_lib.Experiment):
|
|
465
504
|
)
|
466
505
|
)
|
467
506
|
|
507
|
+
def _logs_tab() -> pg.views.html.controls.Tab:
|
508
|
+
"""Renders a tab for the logs of the evaluation."""
|
509
|
+
with self._log_lock:
|
510
|
+
log_history = '\n'.join(str(l) for l in self._log_entries)
|
511
|
+
return pg.views.html.controls.Tab(
|
512
|
+
label='Logs',
|
513
|
+
content=pg.Html.element(
|
514
|
+
'div',
|
515
|
+
[
|
516
|
+
pg.Html.element(
|
517
|
+
'textarea',
|
518
|
+
[pg.Html.escape(log_history)],
|
519
|
+
readonly=True,
|
520
|
+
css_classes=['logs-textarea'],
|
521
|
+
)
|
522
|
+
]
|
523
|
+
)
|
524
|
+
)
|
525
|
+
|
468
526
|
def _main_tabs() -> pg.Html:
|
469
527
|
return pg.Html.element(
|
470
528
|
'div',
|
@@ -474,6 +532,8 @@ class Evaluation(experiment_lib.Experiment):
|
|
474
532
|
_definition_tab(),
|
475
533
|
] + [
|
476
534
|
_metric_tab(m) for m in self.metrics
|
535
|
+
] + [
|
536
|
+
_logs_tab()
|
477
537
|
],
|
478
538
|
selected=1,
|
479
539
|
)
|
@@ -593,6 +653,14 @@ class Evaluation(experiment_lib.Experiment):
|
|
593
653
|
width:100%;
|
594
654
|
height:100%;
|
595
655
|
}
|
656
|
+
.logs-textarea {
|
657
|
+
width: 100%;
|
658
|
+
height: 500px;
|
659
|
+
padding: 5px;
|
660
|
+
border: 1px solid #DDD;
|
661
|
+
background-color: #EEE;
|
662
|
+
resize: vertical;
|
663
|
+
}
|
596
664
|
"""
|
597
665
|
]
|
598
666
|
|
@@ -615,6 +683,11 @@ class EvaluationState:
|
|
615
683
|
assert isinstance(example, example_lib.Example), example
|
616
684
|
self._evaluated_examples[example.id] = example
|
617
685
|
|
686
|
+
@property
|
687
|
+
def evaluated_examples(self) -> dict[int, example_lib.Example]:
|
688
|
+
"""Returns the examples in the state."""
|
689
|
+
return self._evaluated_examples
|
690
|
+
|
618
691
|
def get(self, example_id: int) -> example_lib.Example | None:
|
619
692
|
"""Returns the example with the given ID."""
|
620
693
|
return self._evaluated_examples.get(example_id)
|
@@ -622,9 +695,3 @@ class EvaluationState:
|
|
622
695
|
def update(self, example: example_lib.Example) -> None:
|
623
696
|
"""Updates the state with the given example."""
|
624
697
|
self._evaluated_examples[example.id] = example
|
625
|
-
|
626
|
-
@property
|
627
|
-
def evaluated_examples(self) -> dict[int, example_lib.Example]:
|
628
|
-
"""Returns the examples in the state."""
|
629
|
-
return self._evaluated_examples
|
630
|
-
|
@@ -133,6 +133,12 @@ class EvaluationTest(unittest.TestCase):
|
|
133
133
|
|
134
134
|
def test_html_view(self):
|
135
135
|
exp = test_helper.TestEvaluation()
|
136
|
+
exp.debug('debug message')
|
137
|
+
exp.info('info message')
|
138
|
+
exp.warning('warning message', x=1)
|
139
|
+
exp.error('error message', x=1)
|
140
|
+
exp.fatal('fatal message')
|
141
|
+
|
136
142
|
self.assertIn(
|
137
143
|
exp.id,
|
138
144
|
exp.to_html(extra_flags=dict(card_view=True, current_run=None)).content
|
langfun/core/logging.py
CHANGED
@@ -54,6 +54,25 @@ class LogEntry(pg.Object, pg.views.HtmlTreeView.Extension):
|
|
54
54
|
def should_output(self, min_log_level: LogLevel) -> bool:
|
55
55
|
return _LOG_LEVELS.index(self.level) >= _LOG_LEVELS.index(min_log_level)
|
56
56
|
|
57
|
+
def format(self,
|
58
|
+
compact: bool = False,
|
59
|
+
verbose: bool = True,
|
60
|
+
root_indent: int = 0,
|
61
|
+
*,
|
62
|
+
text_format: bool = True,
|
63
|
+
**kwargs):
|
64
|
+
if text_format:
|
65
|
+
s = f"""{self.time.strftime('%H:%M:%S')} {self.level.upper()} - {self.message}"""
|
66
|
+
if self.metadata:
|
67
|
+
s += f' (metadata: {self.metadata!r})'
|
68
|
+
return s
|
69
|
+
return super().format(
|
70
|
+
compact=compact,
|
71
|
+
verbose=verbose,
|
72
|
+
root_indent=root_indent,
|
73
|
+
**kwargs
|
74
|
+
)
|
75
|
+
|
57
76
|
def _html_tree_view_summary(
|
58
77
|
self,
|
59
78
|
view: pg.views.HtmlTreeView,
|
langfun/core/logging_test.py
CHANGED
@@ -61,6 +61,25 @@ class LoggingTest(unittest.TestCase):
|
|
61
61
|
print(actual)
|
62
62
|
self.assertEqual(actual, expected)
|
63
63
|
|
64
|
+
def test_format(self):
|
65
|
+
time = datetime.datetime(2024, 10, 10, 12, 30, 45)
|
66
|
+
self.assertEqual(
|
67
|
+
str(
|
68
|
+
logging.LogEntry(
|
69
|
+
level='info', message='hello\nworld',
|
70
|
+
time=time, metadata=dict(x=1),
|
71
|
+
)
|
72
|
+
),
|
73
|
+
'12:30:45 INFO - hello\nworld (metadata: {x=1})',
|
74
|
+
)
|
75
|
+
self.assertIn(
|
76
|
+
'LogEntry(',
|
77
|
+
logging.LogEntry(
|
78
|
+
level='info', message='hello\nworld',
|
79
|
+
time=time, metadata=dict(x=1),
|
80
|
+
).format(text_format=False),
|
81
|
+
)
|
82
|
+
|
64
83
|
def test_html(self):
|
65
84
|
time = datetime.datetime(2024, 10, 10, 12, 30, 45)
|
66
85
|
self.assert_html_content(
|
@@ -270,24 +270,31 @@ def call(
|
|
270
270
|
if schema in (str, None):
|
271
271
|
return lm_output if returns_message else lm_output.text
|
272
272
|
|
273
|
+
def _chain_nl_output_message(parsing_message: lf.Message):
|
274
|
+
"""Chain the source of the parsed output to the LM output."""
|
275
|
+
parsing_message.root.source = lm_output
|
276
|
+
parsing_message.tag('parsing-lm-output')
|
277
|
+
parsing_message.lm_input.tag('parsing-lm-input')
|
278
|
+
|
273
279
|
# Call `parsing_lm` for structured parsing.
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
280
|
+
try:
|
281
|
+
parsing_message = querying.query(
|
282
|
+
lm_output.text,
|
283
|
+
schema,
|
284
|
+
examples=parsing_examples,
|
285
|
+
lm=parsing_lm or lm,
|
286
|
+
include_context=parsing_include_context,
|
287
|
+
cache_seed=cache_seed,
|
288
|
+
autofix=autofix,
|
289
|
+
autofix_lm=autofix_lm or lm,
|
290
|
+
protocol=protocol,
|
291
|
+
returns_message=True,
|
292
|
+
**kwargs,
|
293
|
+
)
|
294
|
+
_chain_nl_output_message(parsing_message)
|
295
|
+
except mapping.MappingError as e:
|
296
|
+
_chain_nl_output_message(e.lm_response)
|
297
|
+
raise e
|
291
298
|
return parsing_message if returns_message else parsing_message.result
|
292
299
|
|
293
300
|
|
@@ -686,6 +686,31 @@ class CallTest(unittest.TestCase):
|
|
686
686
|
],
|
687
687
|
returns_message=True,
|
688
688
|
)
|
689
|
+
self.assertIn('parsing-lm-output', output.tags)
|
690
|
+
self.assertIn('parsing-lm-input', output.source.tags)
|
691
|
+
self.assertEqual(output.root.text, 'Compute 1 + 2')
|
692
|
+
|
693
|
+
def test_call_with_parsing_message_chaining_on_parsing_error(self):
|
694
|
+
try:
|
695
|
+
output = parsing.call(
|
696
|
+
'Compute 1 + 2',
|
697
|
+
int,
|
698
|
+
lm=fake.StaticSequence(['three']),
|
699
|
+
parsing_lm=fake.StaticSequence(['abc']),
|
700
|
+
parsing_examples=[
|
701
|
+
mapping.MappingExample(
|
702
|
+
context='Multiple four and five',
|
703
|
+
input='twenty',
|
704
|
+
schema=int,
|
705
|
+
output=20,
|
706
|
+
)
|
707
|
+
],
|
708
|
+
returns_message=True,
|
709
|
+
)
|
710
|
+
except mapping.MappingError as e:
|
711
|
+
output = e.lm_response
|
712
|
+
self.assertIn('parsing-lm-output', output.tags)
|
713
|
+
self.assertIn('parsing-lm-input', output.source.tags)
|
689
714
|
self.assertEqual(output.root.text, 'Compute 1 + 2')
|
690
715
|
|
691
716
|
def test_call_with_autofix(self):
|
@@ -10,8 +10,8 @@ langfun/core/langfunc.py,sha256=G50YgoVZ0y1GFw2ev41MlOqr6qa8YakbvNC0h_E0PiA,1114
|
|
10
10
|
langfun/core/langfunc_test.py,sha256=fKIAqcSNI_7M6nwoZW77HEam8Oa6vcWhsCNgVJanzb4,8822
|
11
11
|
langfun/core/language_model.py,sha256=b15MZ_qbydnz5vQ09t7sf9tc3C7qWvMSxUrGfT0p99I,33827
|
12
12
|
langfun/core/language_model_test.py,sha256=hnYhtw7GM_TbhgsJzHNYTaoDewUlPHpOVlI7xEkCFuI,31783
|
13
|
-
langfun/core/logging.py,sha256=
|
14
|
-
langfun/core/logging_test.py,sha256=
|
13
|
+
langfun/core/logging.py,sha256=W3mLEMXdo210Q5OX3a1ZTc4nU-xMy73-IfNKnsA-RFo,8051
|
14
|
+
langfun/core/logging_test.py,sha256=N7-YvSXC8zvnr2SNwWHOykn1CFmqvIuTLDgn41Ku9JU,6642
|
15
15
|
langfun/core/memory.py,sha256=f-asN1F7Vehgdn_fK84v73GrEUOxRtaW934keutTKjk,2416
|
16
16
|
langfun/core/message.py,sha256=16oiMpg9O9VKrgpfrvJrfvga3n3FzUuD_zdWb9nvSWA,25686
|
17
17
|
langfun/core/message_test.py,sha256=jtZoNBNbA99i2fjoKg5vTRgoUe84J4MH8ZMGakGmTHs,32577
|
@@ -60,8 +60,8 @@ langfun/core/eval/scoring_test.py,sha256=O8olHbrUEg60gMxwOkWzKBJZpZoUlmVnBANX5Se
|
|
60
60
|
langfun/core/eval/v2/__init__.py,sha256=qoa6zKdFXOFyCX6vay6OdgPf1eUhYGoHYAxe35qECGk,1628
|
61
61
|
langfun/core/eval/v2/checkpointing.py,sha256=8vxH3AfIBS8dxA0IiOZBUxAHXIx5m2tSWSSumDLpzp8,6546
|
62
62
|
langfun/core/eval/v2/checkpointing_test.py,sha256=dAERKQTW_PM1B0oUauB0YVQkMEI-cgJq0q-wAVlGYpU,4383
|
63
|
-
langfun/core/eval/v2/evaluation.py,sha256=
|
64
|
-
langfun/core/eval/v2/evaluation_test.py,sha256=
|
63
|
+
langfun/core/eval/v2/evaluation.py,sha256=7PC-npbEQjwwv0pWbv8vGi_OkzZ7QpJrEpYoixFBlno,21429
|
64
|
+
langfun/core/eval/v2/evaluation_test.py,sha256=ld8oBOjsfN-LNLL2eViSTu17wAq90GcsfURXX6oVlFo,6014
|
65
65
|
langfun/core/eval/v2/example.py,sha256=fURrvdNmMsVMqoEErcsmLmC6Xq3ny16dYsnLH8HVlcY,9626
|
66
66
|
langfun/core/eval/v2/example_test.py,sha256=WcJmU7IQQXvjFia63mokySC4CqxzVL9Wso1sC5F0YK8,3032
|
67
67
|
langfun/core/eval/v2/experiment.py,sha256=0JBGckJ93aqSdffpJPDVPy_I5T2BXscghTxiglHzJWo,29556
|
@@ -127,8 +127,8 @@ langfun/core/structured/function_generation.py,sha256=g7AOR_e8HxFU6n6Df750aGkgMg
|
|
127
127
|
langfun/core/structured/function_generation_test.py,sha256=LaXYDXf9GlqUrR6v_gtmK_H4kxzonmU7SYbn7XXMgjU,12128
|
128
128
|
langfun/core/structured/mapping.py,sha256=vLKH79UT-j0qkQdvqlQBO7SkXXuM-yr2Idm8_HH8qwM,13649
|
129
129
|
langfun/core/structured/mapping_test.py,sha256=bHm2ZCXBITq_G8Lvw_olFHeUUc4s_lGXZm9v9JhoPB4,9630
|
130
|
-
langfun/core/structured/parsing.py,sha256=
|
131
|
-
langfun/core/structured/parsing_test.py,sha256
|
130
|
+
langfun/core/structured/parsing.py,sha256=MGvI7ypXlwfzr5XB8_TFU9Ei0_5reYqkWkv64eAy0EA,12015
|
131
|
+
langfun/core/structured/parsing_test.py,sha256=kNPrhpdPY3iWhUld0TFYU-Zgn44wC0d6YuQ9XdVbQ8o,22346
|
132
132
|
langfun/core/structured/querying.py,sha256=sXGhYtiEBac8iOkYOErGXyX8SAHSB1gg69WePhOyGxE,22759
|
133
133
|
langfun/core/structured/querying_test.py,sha256=M9Apg83KjQUjT42K9LheBEr74DX3Inwd0YmCanA71kc,31738
|
134
134
|
langfun/core/structured/schema.py,sha256=0VUPSfX1JEQ0xu8WvEymCKK_WSGwBNA-rQD2hATErmU,27912
|
@@ -148,8 +148,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
|
|
148
148
|
langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
|
149
149
|
langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
|
150
150
|
langfun/core/templates/selfplay_test.py,sha256=Ot__1P1M8oJfoTp-M9-PQ6HUXqZKyMwvZ5f7yQ3yfyM,2326
|
151
|
-
langfun-0.1.2.
|
152
|
-
langfun-0.1.2.
|
153
|
-
langfun-0.1.2.
|
154
|
-
langfun-0.1.2.
|
155
|
-
langfun-0.1.2.
|
151
|
+
langfun-0.1.2.dev202412190804.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
152
|
+
langfun-0.1.2.dev202412190804.dist-info/METADATA,sha256=Zr8TfOnhdo83h3aGRNRWXTrJ54h7Sh7E-7Lj95iJVDw,8281
|
153
|
+
langfun-0.1.2.dev202412190804.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
154
|
+
langfun-0.1.2.dev202412190804.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
|
155
|
+
langfun-0.1.2.dev202412190804.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{langfun-0.1.2.dev202412180804.dist-info → langfun-0.1.2.dev202412190804.dist-info}/top_level.txt
RENAMED
File without changes
|