langfun 0.1.2.dev202509020804__py3-none-any.whl → 0.1.2.dev202511110805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (133) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +6 -1
  3. langfun/core/agentic/__init__.py +4 -0
  4. langfun/core/agentic/action.py +412 -103
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +68 -6
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +9 -2
  20. langfun/core/data/conversion/gemini_test.py +12 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +47 -43
  24. langfun/core/eval/base_test.py +4 -4
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +1 -0
  29. langfun/core/eval/v2/checkpointing.py +30 -4
  30. langfun/core/eval/v2/eval_test_helper.py +1 -1
  31. langfun/core/eval/v2/evaluation.py +60 -14
  32. langfun/core/eval/v2/example.py +22 -11
  33. langfun/core/eval/v2/experiment.py +51 -8
  34. langfun/core/eval/v2/metric_values.py +31 -3
  35. langfun/core/eval/v2/metric_values_test.py +32 -0
  36. langfun/core/eval/v2/metrics.py +39 -4
  37. langfun/core/eval/v2/metrics_test.py +14 -0
  38. langfun/core/eval/v2/progress.py +30 -1
  39. langfun/core/eval/v2/progress_test.py +27 -0
  40. langfun/core/eval/v2/progress_tracking_test.py +6 -0
  41. langfun/core/eval/v2/reporting.py +90 -71
  42. langfun/core/eval/v2/reporting_test.py +20 -6
  43. langfun/core/eval/v2/runners.py +27 -7
  44. langfun/core/eval/v2/runners_test.py +3 -0
  45. langfun/core/langfunc.py +45 -130
  46. langfun/core/langfunc_test.py +6 -4
  47. langfun/core/language_model.py +151 -31
  48. langfun/core/language_model_test.py +9 -3
  49. langfun/core/llms/__init__.py +12 -1
  50. langfun/core/llms/anthropic.py +157 -2
  51. langfun/core/llms/azure_openai.py +29 -17
  52. langfun/core/llms/cache/base.py +25 -3
  53. langfun/core/llms/cache/in_memory.py +48 -7
  54. langfun/core/llms/cache/in_memory_test.py +14 -4
  55. langfun/core/llms/compositional.py +25 -1
  56. langfun/core/llms/deepseek.py +30 -2
  57. langfun/core/llms/fake.py +39 -1
  58. langfun/core/llms/fake_test.py +9 -0
  59. langfun/core/llms/gemini.py +43 -7
  60. langfun/core/llms/google_genai.py +34 -1
  61. langfun/core/llms/groq.py +28 -3
  62. langfun/core/llms/llama_cpp.py +23 -4
  63. langfun/core/llms/openai.py +93 -3
  64. langfun/core/llms/openai_compatible.py +148 -27
  65. langfun/core/llms/openai_compatible_test.py +207 -20
  66. langfun/core/llms/openai_test.py +0 -2
  67. langfun/core/llms/rest.py +16 -1
  68. langfun/core/llms/vertexai.py +59 -8
  69. langfun/core/logging.py +1 -1
  70. langfun/core/mcp/__init__.py +10 -0
  71. langfun/core/mcp/client.py +177 -0
  72. langfun/core/mcp/client_test.py +71 -0
  73. langfun/core/mcp/session.py +241 -0
  74. langfun/core/mcp/session_test.py +54 -0
  75. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  76. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  77. langfun/core/mcp/tool.py +256 -0
  78. langfun/core/mcp/tool_test.py +197 -0
  79. langfun/core/memory.py +1 -0
  80. langfun/core/message.py +160 -55
  81. langfun/core/message_test.py +65 -81
  82. langfun/core/modalities/__init__.py +8 -0
  83. langfun/core/modalities/audio.py +21 -1
  84. langfun/core/modalities/image.py +19 -1
  85. langfun/core/modalities/mime.py +62 -3
  86. langfun/core/modalities/pdf.py +19 -1
  87. langfun/core/modalities/video.py +21 -1
  88. langfun/core/modality.py +167 -29
  89. langfun/core/modality_test.py +42 -12
  90. langfun/core/natural_language.py +1 -1
  91. langfun/core/sampling.py +4 -4
  92. langfun/core/sampling_test.py +20 -4
  93. langfun/core/structured/completion.py +34 -44
  94. langfun/core/structured/completion_test.py +23 -43
  95. langfun/core/structured/description.py +54 -50
  96. langfun/core/structured/function_generation.py +29 -12
  97. langfun/core/structured/mapping.py +74 -28
  98. langfun/core/structured/parsing.py +90 -74
  99. langfun/core/structured/parsing_test.py +0 -3
  100. langfun/core/structured/querying.py +242 -156
  101. langfun/core/structured/querying_test.py +95 -64
  102. langfun/core/structured/schema.py +70 -10
  103. langfun/core/structured/schema_generation.py +33 -14
  104. langfun/core/structured/scoring.py +45 -34
  105. langfun/core/structured/tokenization.py +24 -9
  106. langfun/core/subscription.py +2 -2
  107. langfun/core/template.py +175 -50
  108. langfun/core/template_test.py +123 -17
  109. langfun/env/__init__.py +43 -0
  110. langfun/env/base_environment.py +827 -0
  111. langfun/env/base_environment_test.py +473 -0
  112. langfun/env/base_feature.py +304 -0
  113. langfun/env/base_feature_test.py +228 -0
  114. langfun/env/base_sandbox.py +842 -0
  115. langfun/env/base_sandbox_test.py +1235 -0
  116. langfun/env/event_handlers/__init__.py +14 -0
  117. langfun/env/event_handlers/chain.py +233 -0
  118. langfun/env/event_handlers/chain_test.py +253 -0
  119. langfun/env/event_handlers/event_logger.py +472 -0
  120. langfun/env/event_handlers/event_logger_test.py +304 -0
  121. langfun/env/event_handlers/metric_writer.py +726 -0
  122. langfun/env/event_handlers/metric_writer_test.py +214 -0
  123. langfun/env/interface.py +1640 -0
  124. langfun/env/interface_test.py +151 -0
  125. langfun/env/load_balancers.py +59 -0
  126. langfun/env/load_balancers_test.py +139 -0
  127. langfun/env/test_utils.py +497 -0
  128. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/METADATA +7 -3
  129. langfun-0.1.2.dev202511110805.dist-info/RECORD +200 -0
  130. langfun-0.1.2.dev202509020804.dist-info/RECORD +0 -172
  131. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/WHEEL +0 -0
  132. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/licenses/LICENSE +0 -0
  133. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/top_level.txt +0 -0
@@ -249,35 +249,60 @@ class QueryTest(unittest.TestCase):
249
249
 
250
250
  def test_root_modality_to_structure_render(self):
251
251
  lm = fake.StaticResponse('1')
252
+ image = modalities.Image.from_bytes(b'mock_image')
252
253
  self.assert_render(
253
- modalities.Image.from_bytes(b'mock_image'),
254
+ image,
254
255
  int,
255
256
  lm=lm,
256
- expected_snippet='\n\nREQUEST:\n <<[[input]]>>\n\n',
257
+ expected_snippet=f'\n\nREQUEST:\n <<[[{image.id}]]>>\n\n',
257
258
  expected_modalities=1,
258
259
  )
259
260
 
260
261
  def test_root_modality_to_str_render(self):
261
262
  lm = fake.StaticResponse('1')
263
+ modality = modalities.Image.from_bytes(b'mock_image')
262
264
  self.assert_render(
263
- modalities.Image.from_bytes(b'mock_image'),
265
+ modality,
264
266
  None,
265
267
  lm=lm,
266
- expected_snippet='<<[[input]]>>',
268
+ expected_snippet=f'<<[[{modality.id}]]>>',
267
269
  exact_match=True,
268
270
  expected_modalities=1,
269
271
  )
270
272
 
271
273
  def test_str_with_modality_to_str_render(self):
272
274
  lm = fake.StaticResponse('A cat and a mouse.')
275
+ cat_image = modalities.Image.from_bytes(b'cat_image')
276
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
273
277
  self.assert_render(
274
278
  'What are these? {{this_image}} and {{that_image}}',
275
279
  None,
276
- this_image=modalities.Image.from_bytes(b'cat_image'),
277
- that_image=modalities.Image.from_bytes(b'mouse_image'),
280
+ this_image=cat_image,
281
+ that_image=mouse_image,
278
282
  lm=lm,
279
283
  expected_snippet=(
280
- 'What are these? <<[[this_image]]>> and <<[[that_image]]>>'
284
+ f'What are these? <<[[{cat_image.id}]]>> and '
285
+ f'<<[[{mouse_image.id}]]>>'
286
+ ),
287
+ exact_match=True,
288
+ expected_modalities=2,
289
+ )
290
+
291
+ def test_message_with_modality_to_str_render(self):
292
+ lm = fake.StaticResponse('A cat and a mouse.')
293
+ cat_image = modalities.Image.from_bytes(b'cat_image')
294
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
295
+ self.assert_render(
296
+ lf.Template(
297
+ 'What are these? {{this_image}} and {{that_image}}',
298
+ this_image=cat_image,
299
+ that_image=mouse_image,
300
+ ).render(),
301
+ None,
302
+ lm=lm,
303
+ expected_snippet=(
304
+ f'What are these? <<[[{cat_image.id}]]>> and '
305
+ f'<<[[{mouse_image.id}]]>>'
281
306
  ),
282
307
  exact_match=True,
283
308
  expected_modalities=2,
@@ -285,33 +310,33 @@ class QueryTest(unittest.TestCase):
285
310
 
286
311
  def test_structure_with_modality_to_str_render(self):
287
312
  lm = fake.StaticResponse('A cat and a mouse.')
313
+ cat_image = modalities.Image.from_bytes(b'cat_image')
314
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
288
315
  self.assert_render(
289
- [
290
- modalities.Image.from_bytes(b'cat_image'),
291
- modalities.Image.from_bytes(b'mouse_image'),
292
- ],
316
+ [cat_image, mouse_image],
293
317
  None,
294
318
  lm=lm,
295
- expected_snippet='`[<<[[input[0]]]>>, <<[[input[1]]]>>]`',
319
+ expected_snippet=(
320
+ f'`[<<[[{cat_image.id}]]>>, <<[[{mouse_image.id}]]>>]`'
321
+ ),
296
322
  exact_match=True,
297
323
  expected_modalities=2,
298
324
  )
299
325
 
300
326
  def test_structure_with_modality_to_structure_render(self):
301
327
  lm = fake.StaticResponse('["cat", "mouse"]')
328
+ cat_image = modalities.Image.from_bytes(b'cat_image')
329
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
302
330
  self.assert_render(
303
- [
304
- modalities.Image.from_bytes(b'cat_image'),
305
- modalities.Image.from_bytes(b'mouse_image'),
306
- ],
331
+ [cat_image, mouse_image],
307
332
  list[str],
308
333
  lm=lm,
309
- expected_snippet=inspect.cleandoc("""
334
+ expected_snippet=inspect.cleandoc(f"""
310
335
  REQUEST:
311
336
  ```python
312
337
  [
313
- <<[[input[0]]]>>,
314
- <<[[input[1]]]>>
338
+ <<[[{cat_image.id}]]>>,
339
+ <<[[{mouse_image.id}]]>>
315
340
  ]
316
341
  ```
317
342
  """),
@@ -320,25 +345,25 @@ class QueryTest(unittest.TestCase):
320
345
 
321
346
  def test_structure_with_modality_and_examples_to_structure_render(self):
322
347
  lm = fake.StaticResponse('["cat", "mouse"]')
348
+ cat_image = modalities.Image.from_bytes(b'cat_image')
349
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
350
+ dog_image = modalities.Image.from_bytes(b'dog_image')
323
351
  self.assert_render(
324
- [
325
- modalities.Image.from_bytes(b'cat_image'),
326
- modalities.Image.from_bytes(b'mouse_image'),
327
- ],
352
+ [cat_image, mouse_image],
328
353
  list[str],
329
354
  examples=[
330
355
  mapping.MappingExample(
331
- input=[modalities.Image.from_bytes(b'dog_image')],
356
+ input=[dog_image],
332
357
  schema=list[str],
333
358
  output=['dog'],
334
359
  ),
335
360
  ],
336
361
  lm=lm,
337
- expected_snippet=inspect.cleandoc("""
362
+ expected_snippet=inspect.cleandoc(f"""
338
363
  REQUEST:
339
364
  ```python
340
365
  [
341
- <<[[examples[0].input[0]]]>>
366
+ <<[[{dog_image.id}]]>>
342
367
  ]
343
368
  ```
344
369
 
@@ -356,8 +381,8 @@ class QueryTest(unittest.TestCase):
356
381
  REQUEST:
357
382
  ```python
358
383
  [
359
- <<[[input[0]]]>>,
360
- <<[[input[1]]]>>
384
+ <<[[{cat_image.id}]]>>,
385
+ <<[[{mouse_image.id}]]>>
361
386
  ]
362
387
  ```
363
388
 
@@ -369,6 +394,17 @@ class QueryTest(unittest.TestCase):
369
394
  expected_modalities=3,
370
395
  )
371
396
 
397
+ def test_query_with_modality_output(self):
398
+ cat_image = modalities.Image.from_bytes(b'cat_image')
399
+ lm = fake.StaticResponse(
400
+ lf.Template('Here you go: {{image}}', image=cat_image).render(
401
+ message_cls=lf.AIMessage
402
+ )
403
+ )
404
+ response = querying.query('Generate a cat image', lm=lm)
405
+ self.assertIsInstance(response, lf.AIMessage)
406
+ self.assertEqual(response.modalities(), [cat_image])
407
+
372
408
  def test_multiple_queries(self):
373
409
  self.assertEqual(
374
410
  querying.query(
@@ -545,7 +581,7 @@ class QueryTest(unittest.TestCase):
545
581
  )
546
582
  ).input,
547
583
  )
548
- self.assertIsNotNone(output.get_modality('image'))
584
+ self.assertEqual(len(output.referred_modalities), 1)
549
585
 
550
586
  def test_query_and_reduce(self):
551
587
  self.assertEqual(
@@ -991,15 +1027,11 @@ class LfQueryPythonV2Test(unittest.TestCase):
991
1027
  )
992
1028
 
993
1029
  def test_bad_response(self):
994
- with lf.context(
995
- lm=fake.StaticSequence(['a2']),
996
- override_attrs=True,
1030
+ with self.assertRaisesRegex(
1031
+ mapping.MappingError,
1032
+ 'name .* is not defined',
997
1033
  ):
998
- with self.assertRaisesRegex(
999
- mapping.MappingError,
1000
- 'name .* is not defined',
1001
- ):
1002
- querying.query('Compute 1 + 2', int)
1034
+ querying.query('Compute 1 + 2', int, lm=fake.StaticSequence(['a2']))
1003
1035
 
1004
1036
  def test_not_allowed_code(self):
1005
1037
  lm = fake.StaticResponse(
@@ -1026,21 +1058,20 @@ class LfQueryPythonV2Test(unittest.TestCase):
1026
1058
  self.assertEqual(querying.query('what is 1 + 0', int, lm=lm, autofix=3), 1)
1027
1059
 
1028
1060
  def test_response_postprocess(self):
1029
- with lf.context(
1030
- lm=fake.StaticResponse('<!-- some comment-->\n3'),
1031
- override_attrs=True,
1032
- ):
1033
- self.assertEqual(
1034
- querying.query(
1035
- 'Compute 1 + 2', response_postprocess=lambda x: x.split('\n')[1]),
1036
- '3'
1037
- )
1038
- self.assertEqual(
1039
- querying.query(
1040
- 'Compute 1 + 2', int,
1041
- response_postprocess=lambda x: x.split('\n')[1]),
1042
- 3
1043
- )
1061
+ self.assertEqual(
1062
+ querying.query(
1063
+ 'Compute 1 + 2',
1064
+ lm=fake.StaticResponse('<!-- some comment-->\n3'),
1065
+ response_postprocess=lambda x: x.split('\n')[1]),
1066
+ '3'
1067
+ )
1068
+ self.assertEqual(
1069
+ querying.query(
1070
+ 'Compute 1 + 2', int,
1071
+ lm=fake.StaticResponse('<!-- some comment-->\n3'),
1072
+ response_postprocess=lambda x: x.split('\n')[1]),
1073
+ 3
1074
+ )
1044
1075
 
1045
1076
  def test_render(self):
1046
1077
  l = querying.LfQuery.from_protocol('python:2.0')(
@@ -1312,15 +1343,15 @@ class LfQueryJsonV1Test(unittest.TestCase):
1312
1343
 
1313
1344
  def test_bad_transform(self):
1314
1345
  with in_memory.lm_cache() as cache:
1315
- with lf.context(
1316
- lm=fake.StaticSequence(['3']),
1317
- override_attrs=True,
1346
+ with self.assertRaisesRegex(
1347
+ mapping.MappingError,
1348
+ 'No JSON dict in the output',
1318
1349
  ):
1319
- with self.assertRaisesRegex(
1320
- mapping.MappingError,
1321
- 'No JSON dict in the output',
1322
- ):
1323
- querying.query('Compute 1 + 2', int, protocol='json', cache_seed=1)
1350
+ querying.query(
1351
+ 'Compute 1 + 2', int,
1352
+ lm=fake.StaticSequence(['3']),
1353
+ protocol='json', cache_seed=1
1354
+ )
1324
1355
  # Make sure bad mapping does not impact cache.
1325
1356
  self.assertEqual(len(cache), 0)
1326
1357
 
@@ -1595,7 +1626,7 @@ class TrackQueriesTest(unittest.TestCase):
1595
1626
  'bar',
1596
1627
  ])
1597
1628
  state = {}
1598
- def start_callabck(query):
1629
+ def start_callback(query):
1599
1630
  self.assertFalse(query.is_completed)
1600
1631
  self.assertIsNone(query.end_time)
1601
1632
  elapse1 = query.elapse
@@ -1620,7 +1651,7 @@ class TrackQueriesTest(unittest.TestCase):
1620
1651
  state['end'] = query
1621
1652
 
1622
1653
  with querying.track_queries(
1623
- start_callabck=start_callabck, end_callabck=end_callback
1654
+ start_callback=start_callback, end_callback=end_callback
1624
1655
  ) as queries:
1625
1656
  querying.query('foo', lm=lm)
1626
1657
  self.assertIs(state['start'], queries[0])
@@ -1631,7 +1662,7 @@ class TrackQueriesTest(unittest.TestCase):
1631
1662
  'bar',
1632
1663
  ])
1633
1664
  state = {}
1634
- def start_callabck(query):
1665
+ def start_callback(query):
1635
1666
  self.assertFalse(query.is_completed)
1636
1667
  self.assertIsNone(query.end_time)
1637
1668
  self.assertIsNotNone(query.usage_summary)
@@ -1653,7 +1684,7 @@ class TrackQueriesTest(unittest.TestCase):
1653
1684
 
1654
1685
  with self.assertRaises(mapping.MappingError):
1655
1686
  with querying.track_queries(
1656
- start_callabck=start_callabck, end_callabck=end_callback
1687
+ start_callback=start_callback, end_callback=end_callback
1657
1688
  ) as queries:
1658
1689
  querying.query('foo', int, lm=lm)
1659
1690
  self.assertIs(state['start'], queries[0])
@@ -33,12 +33,12 @@ def include_method_in_prompt(method):
33
33
 
34
34
 
35
35
  def should_include_method_in_prompt(method):
36
- """Returns true if the method should be shown in the prompt."""
36
+ """Returns True if the method should be shown in the prompt."""
37
37
  return getattr(method, '__show_in_prompt__', False)
38
38
 
39
39
 
40
40
  def parse_value_spec(value) -> pg.typing.ValueSpec:
41
- """Parses a PyGlove ValueSpec equivalence into a ValueSpec."""
41
+ """Parses a PyGlove ValueSpec equivalent into a ValueSpec."""
42
42
  if isinstance(value, pg.typing.ValueSpec):
43
43
  return value
44
44
 
@@ -121,7 +121,67 @@ class Schema(
121
121
  pg.Object,
122
122
  pg.views.HtmlTreeView.Extension
123
123
  ):
124
- """Base class for structured data schema."""
124
+ """Schema for structured inputs and outputs.
125
+
126
+ `lf.Schema` provides a unified representation for defining the output schema
127
+ used in Langfun's structured operations like `lf.query`, `lf.parse`,
128
+ `lf.complete`, and `lf.describe`. It acts as an abstraction layer,
129
+ allowing schemas to be defined using Python type annotations, `pg.Object`
130
+ classes, or dictionaries, and then converting them into a format that
131
+ language models can understand.
132
+
133
+ `lf.Schema` can be created from various types using `lf.Schema.from_value`:
134
+ * Built-in types: `int`, `str`, `bool`, `float`
135
+ * Typing constructs: `list`, `dict`, `typing.Union`, `typing.Literal`,
136
+ `typing.Optional`
137
+ * PyGlove classes: `pg.Object` subclasses
138
+
139
+ **1. Creating a Schema:**
140
+
141
+ ```python
142
+ import langfun as lf
143
+ import pyglove as pg
144
+ from typing import Literal, Union
145
+
146
+ # From a basic type
147
+ int_schema = lf.Schema.from_value(int)
148
+
149
+ # From a list type
150
+ list_schema = lf.Schema.from_value(list[int])
151
+
152
+ # From a dictionary
153
+ dict_schema = lf.Schema.from_value(dict(a=int, b=str))
154
+
155
+ # From pg.Object
156
+ class Point(pg.Object):
157
+ x: int
158
+ y: int
159
+ point_schema = lf.Schema.from_value(Point)
160
+
161
+ # From Union or Literal
162
+ union_schema = lf.Schema.from_value(Union[int, str])
163
+ literal_schema = lf.Schema.from_value(Literal['A', 'B'])
164
+ ```
165
+
166
+ **2. Schema Representation:**
167
+ Once created, a schema object can represent itself in different formats,
168
+ such as Python-like syntax or JSON, which is used in prompts to LLMs.
169
+
170
+ ```python
171
+ print(point_schema.repr('python'))
172
+ # Output:
173
+ # class Point:
174
+ # x: int
175
+ # y: int
176
+
177
+ print(dict_schema.repr('json'))
178
+ # Output:
179
+ # {
180
+ # "a": "int",
181
+ # "b": "str"
182
+ # }
183
+ ```
184
+ """
125
185
 
126
186
  spec: pg.typing.Annotated[
127
187
  pg.typing.Object(pg.typing.ValueSpec, transform=parse_value_spec),
@@ -144,7 +204,7 @@ class Schema(
144
204
  def parse(
145
205
  self, text: str, protocol: SchemaProtocol = 'json', **kwargs
146
206
  ) -> Any:
147
- """Parse a LM generated text into a structured value."""
207
+ """Parses a LM generated text into a structured value."""
148
208
  value = value_repr(protocol).parse(text, self, **kwargs)
149
209
 
150
210
  # TODO(daiyip): support autofix for schema error.
@@ -157,7 +217,7 @@ class Schema(
157
217
  return self.schema_str()
158
218
 
159
219
  def schema_dict(self) -> dict[str, Any]:
160
- """Returns the dict representation of the schema."""
220
+ """Returns the dictionary representation of the schema."""
161
221
 
162
222
  def _node(vs: pg.typing.ValueSpec) -> Any:
163
223
  if isinstance(vs, pg.typing.PrimitiveType):
@@ -406,7 +466,7 @@ def class_definitions(
406
466
  strict: bool = False,
407
467
  markdown: bool = False,
408
468
  ) -> str | None:
409
- """Returns a str for class definitions."""
469
+ """Returns a string for class definitions."""
410
470
  if not classes:
411
471
  return None
412
472
  def_str = io.StringIO()
@@ -683,7 +743,7 @@ class ValueRepr(metaclass=abc.ABCMeta):
683
743
 
684
744
  @abc.abstractmethod
685
745
  def parse(self, text: str, schema: Schema | None = None, **kwargs) -> Any:
686
- """Parse a LM generated text into a structured value."""
746
+ """Parses a LM generated text into a structured value."""
687
747
 
688
748
 
689
749
  class ValuePythonRepr(ValueRepr):
@@ -739,7 +799,7 @@ class ValuePythonRepr(ValueRepr):
739
799
  autofix_lm: lf.LanguageModel = lf.contextual(),
740
800
  **kwargs,
741
801
  ) -> Any:
742
- """Parse a Python string into a structured object."""
802
+ """Parses a Python string into a structured object."""
743
803
  del kwargs
744
804
  global_vars = additional_context or {}
745
805
  if schema is not None:
@@ -820,7 +880,7 @@ class ValueJsonRepr(ValueRepr):
820
880
  return pg.to_json_str(dict(result=value))
821
881
 
822
882
  def parse(self, text: str, schema: Schema | None = None, **kwargs) -> Any:
823
- """Parse a JSON string into a structured object."""
883
+ """Parses a JSON string into a structured object."""
824
884
  del schema
825
885
  try:
826
886
  text = cleanup_json(text)
@@ -837,7 +897,7 @@ class ValueJsonRepr(ValueRepr):
837
897
 
838
898
 
839
899
  def cleanup_json(json_str: str) -> str:
840
- """Clean up the LM responded JSON string."""
900
+ """Cleans up the LM responded JSON string."""
841
901
  # Treatments:
842
902
  # 1. Extract the JSON string with a top-level dict from the response.
843
903
  # This prevents the leading and trailing texts in the response to
@@ -90,16 +90,35 @@ def generate_class(
90
90
  skip_lm: bool = False,
91
91
  **kwargs,
92
92
  ) -> Type[Any] | lf.Message:
93
- """Generate a class with specified name based on the prompt.
94
-
95
- Example:
96
- ```
97
- trip_cls = lf.classgen(
98
- 'Trip',
99
- 'A trip plan to visit {{ city }}, city='San Francisco',
100
- lm=lf.llms.GeminiPro()
101
- )
102
- ```
93
+ """Generates a Python class dynamically from a prompt using an LLM.
94
+
95
+ `lf.structured.generate_class` takes a class name and a natural language
96
+ description (prompt) and uses a language model to generate a Python class
97
+ (inheriting from `pg.Object`) that matches the description.
98
+ This is useful for creating structured data types on-the-fly based on
99
+ dynamic requirements.
100
+
101
+ **Example:**
102
+
103
+ ```python
104
+ import langfun as lf
105
+ import pyglove as pg
106
+
107
+ trip_plan_cls = lf.structured.generate_class(
108
+ 'TripPlan',
109
+ 'A trip plan to visit San Francisco, including a list of destinations,'
110
+ 'start date, end date, and total budget.',
111
+ lm=lf.llms.Gemini25Flash())
112
+
113
+ # This might generate a class like:
114
+ # class TripPlan(pg.Object):
115
+ # destinations: list[str]
116
+ # start_date: str
117
+ # end_date: str
118
+ # total_budget: float
119
+
120
+ print(lf.Schema.from_value(trip_plan_cls).schema_str('python'))
121
+ ```
103
122
 
104
123
  Args:
105
124
  name: Class name to be generated.
@@ -108,17 +127,17 @@ def generate_class(
108
127
  lm: The language model to use. If not specified, the language model from
109
128
  `lf.context` context manager will be used.
110
129
  examples: An optional list of fewshot examples for helping class generation.
111
- If None, a default single shot example will be used. Use
112
- `lf.structured.classgen_example` to generate example.
130
+ If None, a default single-shot example will be used. Use
131
+ `lf.structured.classgen_example` to generate examples.
113
132
  returns_message: If True, returns `lf.Message` as the output, instead of
114
133
  returning the structured `message.result`.
115
134
  skip_lm: If True, returns the rendered prompt as a UserMessage object.
116
- otherwise return the LLM response based on the rendered prompt.
135
+ otherwise returns the LLM response based on the rendered prompt.
117
136
  **kwargs: Template variables passed to `prompt` and keyword arguments passed
118
137
  to `lf.structured.GenerateClass`.
119
138
 
120
139
  Returns:
121
- Generated class.
140
+ The generated Python class, or `lf.Message` if `returns_message` is True.
122
141
 
123
142
  Raises:
124
143
  CodeError: if generation failed.
@@ -35,38 +35,50 @@ def score(
35
35
  return_scoring_results: bool = False,
36
36
  **kwargs,
37
37
  ) -> list[float] | list[lf.LMScoringResult]:
38
- """Scores the outputs based on the prompt.
39
-
40
- Examples:
41
- ```
42
- # Example 1: Scoring text output based on the user prompt.
43
- scores = lf.score('{{x}} + {{y}} =', ['1', '2', '3'], lm=lm, x=1, y=2)
44
- assert len(scores) == 3
45
-
46
- # Example 2: Scoring int output based on the formulated OOP prompt.
47
- scores = lf.score('1 + 1 =', [1, 2, 3], lm=lm)
48
- assert len(scores) == 3
49
-
50
- class Answer(pg.Object):
51
- result: int
52
-
53
- # Example 3: Scoring object output based on the formulated OOP prompt.
54
- scores = lf.score('1 + 1 =', [Answer(1), Answer(2), Answer(3)], lm=lm)
55
- assert len(scores) == 3
56
-
57
- # Example 4: Scoring object field value based on the formulated OOP prompt
58
- # and the generated tokens before the first `pg.oneof`.
59
- scores = lf.score('1 + 1 =', [Answer(pg.oneof([1, 2, 3]))], lm=lm)
60
- assert len(scores) == 3
61
-
62
- # Example 5: Scoring multiple prompt/completion pairs.
63
- scores = lf.score(
64
- ['1 + 1=', '2 + 3='],
65
- ['2', '4'],
66
- lm=lm
67
- )
68
- assert len(scores) == 2
69
- ```
38
+ """Scores completions based on a prompt using a language model.
39
+
40
+ `lf.score` computes the likelihood of each completion being generated given
41
+ a prompt, according to the specified language model. It can score text
42
+ completions or structured objects. If `schema` is provided, Langfun
43
+ formats the prompt and completions appropriately before scoring.
44
+
45
+ **Example 1: Score text completions**
46
+ ```python
47
+ import langfun as lf
48
+ scores = lf.score(
49
+ '1 + 1 =',
50
+ ['2', '3', '4'],
51
+ lm=lf.llms.Gemini25Flash())
52
+ print([f'{s:.3f}' for s in scores])
53
+ # Output: ['-0.001', '-2.345', '-3.456']
54
+ ```
55
+
56
+ **Example 2: Score structured completions**
57
+ ```python
58
+ import langfun as lf
59
+ import pyglove as pg
60
+
61
+ class Answer(pg.Object):
62
+ result: int
63
+
64
+ scores = lf.score(
65
+ '1 + 1 =',
66
+ [Answer(result=2), Answer(result=3), Answer(result=4)],
67
+ lm=lf.llms.Gemini25Flash())
68
+ print([f'{s:.3f}' for s in scores])
69
+ # Output: ['-0.001', '-2.345', '-3.456']
70
+ ```
71
+
72
+ **Example 3: Score multiple prompt/completion pairs**
73
+ ```python
74
+ import langfun as lf
75
+ scores = lf.score(
76
+ ['1 + 1 =', '2 + 2 ='],
77
+ ['2', '4'],
78
+ lm=lf.llms.Gemini25Flash())
79
+ print([f'{s:.3f}' for s in scores])
80
+ # Output: ['-0.001', '-0.002']
81
+ ```
70
82
 
71
83
  Args:
72
84
  prompt: The prompt(s) based on which each completion will be scored.
@@ -74,8 +86,7 @@ def score(
74
86
  schema: The schema as the output type. If None, it will be inferred from
75
87
  the completions.
76
88
  lm: The language model used for scoring.
77
- examples: Fewshot exemplars used together with the prompt in getting the
78
- completions.
89
+ examples: Few-shot examples used to construct the prompt for scoring.
79
90
  protocol: The protocol for formulating the prompt based on objects.
80
91
  return_scoring_results: If True, returns a list of `lf.LMScoringResult`,
81
92
  otherwise returns a list of floats as the scores of each completion.
@@ -23,7 +23,7 @@ import pyglove as pg
23
23
 
24
24
 
25
25
  def tokenize(
26
- prompt: Union[str, pg.Symbolic] | list[str | pg.Symbolic],
26
+ prompt: Union[str, pg.Symbolic, list[str | pg.Symbolic]],
27
27
  schema: Union[
28
28
  schema_lib.Schema, Type[Any], list[Type[Any]], dict[str, Any], None
29
29
  ] = None,
@@ -33,20 +33,35 @@ def tokenize(
33
33
  protocol: schema_lib.SchemaProtocol = 'python',
34
34
  **kwargs,
35
35
  ) -> list[tuple[str | bytes, int]]:
36
- """Tokenize the prompt for `lf.query`.
36
+ """Renders a prompt and tokenizes it using a language model.
37
+
38
+ `lf.tokenize` first renders a prompt based on the provided `prompt`,
39
+ `schema`, and `examples`, similar to `lf.query`, and then uses the
40
+ specified language model (`lm`) to tokenize the resulting message.
41
+ This is useful for understanding how a prompt is seen by the model or
42
+ for estimating token counts before sending requests.
43
+
44
+ **Example:**
45
+
46
+ ```python
47
+ import langfun as lf
48
+ tokens = lf.tokenize('Hello world!', lm=lf.llms.Gpt4())
49
+ print(tokens)
50
+ # Output might look like: [('Hello', 15339), (' world', 1917), ('!', 0)]
51
+ ```
37
52
 
38
53
  Args:
39
- prompt: The prompt(s) based on which each completion will be scored.
40
- schema: The schema as the output type. If None, it will be inferred from
41
- the completions.
42
- lm: The language model used for scoring.
43
- examples: Fewshot exemplars used together with the prompt in getting the
44
- completions.
54
+ prompt: The prompt to render and tokenize. Can be a string, `pg.Symbolic`,
55
+ or `lf.Template`.
56
+ schema: The schema for formatting the prompt, if `prompt` is structured or
57
+ if schema-based formatting is needed.
58
+ lm: The language model to use for tokenization.
59
+ examples: Few-shot examples to include in the rendered prompt.
45
60
  protocol: The protocol for formulating the prompt based on objects.
46
61
  **kwargs: Keyword arguments that are referred by the prompt.
47
62
 
48
63
  Returns:
49
- A list of (text, token_id) tuples.
64
+ A list of (token_str, token_id) tuples representing the tokenized prompt.
50
65
  """
51
66
  input_message = querying.query_prompt(
52
67
  prompt,
@@ -35,7 +35,7 @@ EventType = TypeVar('EventType')
35
35
 
36
36
 
37
37
  class EventHandler(Generic[EventType], metaclass=abc.ABCMeta):
38
- """Interface for event subscriber."""
38
+ """Interface for event handler."""
39
39
 
40
40
  @classmethod
41
41
  @functools.cache
@@ -51,7 +51,7 @@ class EventHandler(Generic[EventType], metaclass=abc.ABCMeta):
51
51
 
52
52
  @classmethod
53
53
  def accepts(cls, event: Event[Any]) -> bool:
54
- """Returns True if current event handler class can accepts an event."""
54
+ """Returns True if current event handler class can accept an event."""
55
55
  return isinstance(event, cls.event_type())
56
56
 
57
57
  @abc.abstractmethod