langfun 0.1.2.dev202501080804__py3-none-any.whl → 0.1.2.dev202501240804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. langfun/core/__init__.py +1 -6
  2. langfun/core/coding/python/__init__.py +5 -11
  3. langfun/core/coding/python/correction.py +4 -7
  4. langfun/core/coding/python/correction_test.py +2 -3
  5. langfun/core/coding/python/execution.py +22 -211
  6. langfun/core/coding/python/execution_test.py +11 -90
  7. langfun/core/coding/python/generation.py +3 -2
  8. langfun/core/coding/python/generation_test.py +2 -2
  9. langfun/core/coding/python/parsing.py +108 -194
  10. langfun/core/coding/python/parsing_test.py +2 -105
  11. langfun/core/component.py +11 -273
  12. langfun/core/component_test.py +2 -29
  13. langfun/core/concurrent.py +187 -82
  14. langfun/core/concurrent_test.py +28 -19
  15. langfun/core/console.py +7 -3
  16. langfun/core/eval/base.py +2 -3
  17. langfun/core/eval/v2/evaluation.py +3 -1
  18. langfun/core/eval/v2/reporting.py +8 -4
  19. langfun/core/language_model.py +84 -8
  20. langfun/core/language_model_test.py +84 -29
  21. langfun/core/llms/__init__.py +46 -11
  22. langfun/core/llms/anthropic.py +1 -123
  23. langfun/core/llms/anthropic_test.py +0 -48
  24. langfun/core/llms/deepseek.py +117 -0
  25. langfun/core/llms/deepseek_test.py +61 -0
  26. langfun/core/llms/gemini.py +1 -1
  27. langfun/core/llms/groq.py +12 -99
  28. langfun/core/llms/groq_test.py +31 -137
  29. langfun/core/llms/llama_cpp.py +17 -54
  30. langfun/core/llms/llama_cpp_test.py +2 -34
  31. langfun/core/llms/openai.py +9 -147
  32. langfun/core/llms/openai_compatible.py +179 -0
  33. langfun/core/llms/openai_compatible_test.py +495 -0
  34. langfun/core/llms/openai_test.py +13 -423
  35. langfun/core/llms/rest_test.py +1 -1
  36. langfun/core/llms/vertexai.py +387 -18
  37. langfun/core/llms/vertexai_test.py +52 -0
  38. langfun/core/message_test.py +3 -3
  39. langfun/core/modalities/mime.py +8 -0
  40. langfun/core/modalities/mime_test.py +19 -4
  41. langfun/core/modality_test.py +0 -1
  42. langfun/core/structured/mapping.py +13 -13
  43. langfun/core/structured/mapping_test.py +2 -2
  44. langfun/core/structured/schema.py +16 -8
  45. langfun/core/structured/schema_generation.py +1 -1
  46. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/METADATA +13 -2
  47. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/RECORD +50 -52
  48. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/WHEEL +1 -1
  49. langfun/core/coding/python/errors.py +0 -108
  50. langfun/core/coding/python/errors_test.py +0 -99
  51. langfun/core/coding/python/permissions.py +0 -90
  52. langfun/core/coding/python/permissions_test.py +0 -86
  53. langfun/core/text_formatting.py +0 -168
  54. langfun/core/text_formatting_test.py +0 -65
  55. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/LICENSE +0 -0
  56. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/top_level.txt +0 -0
@@ -13,206 +13,120 @@
13
13
  # limitations under the License.
14
14
  """Python code parsing."""
15
15
 
16
- import ast
17
16
  import inspect
18
17
  import io
19
18
  import re
20
19
 
21
- import langfun.core as lf
22
- from langfun.core.coding.python import errors
23
- from langfun.core.coding.python import permissions
24
-
25
-
26
- class PythonCodeParser(lf.Component):
27
- """Python code parser with permission control."""
28
-
29
- _ID_REGEX = re.compile('^[a-zA-Z_\\-]*$')
30
-
31
- class _CodeValidator(ast.NodeVisitor):
32
- """Python AST node visitor for ensuring code are permitted."""
33
-
34
- def __init__(self, code: str, permission: permissions.CodePermission):
35
- super().__init__()
36
- self.code = code
37
- self.permission = permission
38
-
39
- def verify(
40
- self,
41
- node,
42
- flag: permissions.CodePermission,
43
- node_type,
44
- error_message: str,
45
- ) -> None:
46
- if isinstance(node, node_type) and not (self.permission & flag):
47
- raise SyntaxError(
48
- error_message, (
49
- '<generated-code>',
50
- node.lineno,
51
- node.col_offset,
52
- self._code_line(node.lineno),
53
- node.end_lineno,
54
- node.end_col_offset,
55
- ))
56
-
57
- def _code_line(self, lineno):
58
- return self.code.split('\n')[lineno - 1]
59
-
60
- def generic_visit(self, node):
61
- self.verify(
62
- node,
63
- permissions.CodePermission.CONDITION,
64
- (ast.If, ast.Match),
65
- 'Condition is not allowed.',
66
- )
67
-
68
- self.verify(
69
- node,
70
- permissions.CodePermission.LOOP,
71
- (ast.For, ast.While, ast.AsyncFor, ast.AsyncWith),
72
- 'Loop is not allowed.',
73
- )
74
-
75
- self.verify(
76
- node,
77
- permissions.CodePermission.EXCEPTION,
78
- (ast.Try, ast.Raise, ast.Assert),
79
- 'Exception is not allowed.',
80
- )
81
-
82
- self.verify(
83
- node,
84
- permissions.CodePermission.CLASS_DEFINITION,
85
- ast.ClassDef,
86
- 'Class definition is not allowed.',
87
- )
88
-
89
- self.verify(
90
- node,
91
- permissions.CodePermission.FUNCTION_DEFINITION,
92
- (
93
- ast.FunctionDef,
94
- ast.AsyncFunctionDef,
95
- ast.Return,
96
- ast.Yield,
97
- ast.YieldFrom,
98
- ),
99
- 'Function definition is not allowed.',
100
- )
101
-
102
- self.verify(
103
- node,
104
- permissions.CodePermission.IMPORT,
105
- (ast.Import, ast.ImportFrom),
106
- '`import` is not allowed.',
107
- )
108
-
109
- super().generic_visit(node)
110
-
111
- def parse(
112
- self, code: str, permission: permissions.CodePermission
113
- ) -> tuple[str, ast.AST]:
114
- code = self.clean(code)
115
- try:
116
- parsed_code = ast.parse(code, mode='exec')
117
- PythonCodeParser._CodeValidator(code, permission).visit(parsed_code)
118
- except SyntaxError as e:
119
- raise errors.CodeError(code, e) from e
120
- return code, parsed_code
121
-
122
- def clean(self, code_text: str) -> str:
123
- # TODO(daiyip): Deal with markdown in docstrings.
124
- code = io.StringIO()
125
- quote_char = None
126
- in_code = False
127
- i = 0
128
- in_comment = False
129
- while i < len(code_text):
130
- c = code_text[i]
131
- # Detect code block separator (```).
132
- if (not in_comment
133
- and quote_char is None
134
- and c == '`'
135
- and code_text[i:i + 3] == '```'):
136
- in_code = not in_code
137
- if in_code:
138
- i += 3
139
- continue
140
- else:
141
- break
142
-
143
- # Detect string literal boundary.
144
- if (in_code
145
- and not in_comment
146
- and c in ('\'', '"')
147
- and i > 0
148
- and code_text[i - 1] != '\\'):
149
- # Handle ''' and """.
150
- if code_text[i: i + 3] == c * 3:
151
- c = c * 3
152
- i += 2
153
-
154
- if quote_char is None:
155
- quote_char = c
156
- elif quote_char == c:
157
- # NOTE(daiyip): at times, LM forgets to escape quotes inside a string.
158
- # Thus we do some smart checking here to automatically correct such
159
- # case. This logic here is pretty involved in handling special cases.
160
- # We might want to revisit them later.
161
-
162
- # Peek forward to see if it could be a valid string.
163
- nt, nnt_start = _next_token(code_text, i + 1)
164
- if (len(c) == 3
165
- or nt in (',', '[', ']', '}', ')', '+', '*', '%', '\n', ':')):
166
- end_quote = True
167
- elif nt == ' ':
168
- # Detect if . could be a method invocation.
169
- # NOTE(daiyip): 'in' and 'not in' might have false positives. But
170
- # given the chance is low, we do not complicate the reasoning logic
171
- # for now.
172
- nnt, _ = _next_token(code_text, nnt_start, skip_whitespace=True)
173
- end_quote = nnt in ('+', '*', '%', '#', '[', 'in', 'not', ':')
174
- elif nt == '.':
175
- # Detect if . could be method invocation on string.
176
- nnt, nnnt_start = _next_token(code_text, nnt_start)
177
- nnnt, _ = _next_token(code_text, nnnt_start)
178
- end_quote = nnt.isidentifier() and nnnt == '('
179
- else:
180
- end_quote = False
181
-
182
- if end_quote:
183
- quote_char = None
184
- else:
185
- c = f'\\{c}'
186
- # Detect comment.
187
- elif c == '#' and quote_char is None:
188
- in_comment = True
189
- # Detect end-of-comment.
190
- elif c == '\n':
191
- # NOTE(daiyip): deal with cases that LM forgot to escape linebreaks
192
- # within strings.
193
- if quote_char is not None:
194
- # Only add \\ for ' and " (other than ''' and """).
195
- if len(quote_char) == 1:
196
- c = '\\n'
197
- else:
198
- in_comment = False
199
20
 
21
+ _ID_REGEX = re.compile('^[a-zA-Z_\\-]*$')
22
+
23
+
24
+ def clean(code_text: str) -> str:
25
+ """Cleans up Python code.
26
+
27
+ LLM may generate code with markdown annotations, as well as minor syntax
28
+ errors. This function removes such annotations and fixes minor syntax errors
29
+ without extra LLM calls.
30
+
31
+ Args:
32
+ code_text: The code text to clean up.
33
+
34
+ Returns:
35
+ The cleaned up code text.
36
+ """
37
+ # TODO(daiyip): Deal with markdown in docstrings.
38
+ code = io.StringIO()
39
+ quote_char = None
40
+ in_code = False
41
+ i = 0
42
+ in_comment = False
43
+ while i < len(code_text):
44
+ c = code_text[i]
45
+ # Detect code block separator (```).
46
+ if (not in_comment
47
+ and quote_char is None
48
+ and c == '`'
49
+ and code_text[i:i + 3] == '```'):
50
+ in_code = not in_code
200
51
  if in_code:
201
- code.write(c)
202
-
203
- i += 1
204
-
205
- code = code.getvalue()
206
- if code:
207
- pos = code.find('\n')
208
- # Strip markdown code type. E.g. ```python
209
- if pos > 0 and self._ID_REGEX.match(code[:pos]):
210
- code = code[pos:]
211
- else:
212
- # Maybe-code that resides not within a code markdown block.
213
- # Adding '\n' makes inspect.cleandoc to make right adjustment.
214
- code = '\n' + code_text
215
- return inspect.cleandoc(code).strip()
52
+ i += 3
53
+ continue
54
+ else:
55
+ break
56
+
57
+ # Detect string literal boundary.
58
+ if (in_code
59
+ and not in_comment
60
+ and c in ('\'', '"')
61
+ and i > 0
62
+ and code_text[i - 1] != '\\'):
63
+ # Handle ''' and """.
64
+ if code_text[i: i + 3] == c * 3:
65
+ c = c * 3
66
+ i += 2
67
+
68
+ if quote_char is None:
69
+ quote_char = c
70
+ elif quote_char == c:
71
+ # NOTE(daiyip): at times, LM forgets to escape quotes inside a string.
72
+ # Thus we do some smart checking here to automatically correct such
73
+ # case. This logic here is pretty involved in handling special cases.
74
+ # We might want to revisit them later.
75
+
76
+ # Peek forward to see if it could be a valid string.
77
+ nt, nnt_start = _next_token(code_text, i + 1)
78
+ if (len(c) == 3
79
+ or nt in (',', '[', ']', '}', ')', '+', '*', '%', '\n', ':')):
80
+ end_quote = True
81
+ elif nt == ' ':
82
+ # Detect if . could be a method invocation.
83
+ # NOTE(daiyip): 'in' and 'not in' might have false positives. But
84
+ # given the chance is low, we do not complicate the reasoning logic
85
+ # for now.
86
+ nnt, _ = _next_token(code_text, nnt_start, skip_whitespace=True)
87
+ end_quote = nnt in ('+', '*', '%', '#', '[', 'in', 'not', ':')
88
+ elif nt == '.':
89
+ # Detect if . could be method invocation on string.
90
+ nnt, nnnt_start = _next_token(code_text, nnt_start)
91
+ nnnt, _ = _next_token(code_text, nnnt_start)
92
+ end_quote = nnt.isidentifier() and nnnt == '('
93
+ else:
94
+ end_quote = False
95
+
96
+ if end_quote:
97
+ quote_char = None
98
+ else:
99
+ c = f'\\{c}'
100
+ # Detect comment.
101
+ elif c == '#' and quote_char is None:
102
+ in_comment = True
103
+ # Detect end-of-comment.
104
+ elif c == '\n':
105
+ # NOTE(daiyip): deal with cases that LM forgot to escape linebreaks
106
+ # within strings.
107
+ if quote_char is not None:
108
+ # Only add \\ for ' and " (other than ''' and """).
109
+ if len(quote_char) == 1:
110
+ c = '\\n'
111
+ else:
112
+ in_comment = False
113
+
114
+ if in_code:
115
+ code.write(c)
116
+
117
+ i += 1
118
+
119
+ code = code.getvalue()
120
+ if code:
121
+ pos = code.find('\n')
122
+ # Strip markdown code type. E.g. ```python
123
+ if pos > 0 and _ID_REGEX.match(code[:pos]):
124
+ code = code[pos:]
125
+ else:
126
+ # Maybe-code that resides not within a code markdown block.
127
+ # Adding '\n' makes inspect.cleandoc to make right adjustment.
128
+ code = '\n' + code_text
129
+ return inspect.cleandoc(code).strip()
216
130
 
217
131
 
218
132
  def _next_token(
@@ -15,18 +15,16 @@
15
15
 
16
16
  import inspect
17
17
  import unittest
18
- from langfun.core.coding.python import errors
19
18
  from langfun.core.coding.python import parsing
20
- from langfun.core.coding.python import permissions
21
19
 
22
20
 
23
- class PythonCodeParserTest(unittest.TestCase):
21
+ class CleanTest(unittest.TestCase):
24
22
 
25
23
  def assert_clean(self, code: str, cleaned_code: str, clean: bool = True):
26
24
  if clean:
27
25
  cleaned_code = inspect.cleandoc(cleaned_code)
28
26
  self.assertEqual(
29
- parsing.PythonCodeParser().clean(code), cleaned_code
27
+ parsing.clean(code), cleaned_code
30
28
  )
31
29
 
32
30
  def test_clean(self):
@@ -272,107 +270,6 @@ class PythonCodeParserTest(unittest.TestCase):
272
270
  """
273
271
  )
274
272
 
275
- def assert_allowed(self, code: str, permission: permissions.CodePermission):
276
- _, ast = parsing.PythonCodeParser().parse(code, permission)
277
- self.assertIsNotNone(ast)
278
-
279
- def assert_not_allowed(
280
- self, code: str, permission: permissions.CodePermission
281
- ):
282
- with self.assertRaisesRegex(errors.CodeError, '.* is not allowed'):
283
- parsing.PythonCodeParser().parse(code, permission)
284
-
285
- def test_parse_with_allowed_code(self):
286
- self.assert_allowed(
287
- """
288
- x = y + 1
289
- z = x + y
290
- """,
291
- permissions.CodePermission.BASIC,
292
- )
293
- self.assert_allowed(
294
- """
295
- if x > 0:
296
- print(x)
297
- """,
298
- permissions.CodePermission.CONDITION,
299
- )
300
- self.assert_allowed(
301
- """
302
- for i in range(5):
303
- print(i)
304
- """,
305
- permissions.CodePermission.LOOP,
306
- )
307
- self.assert_allowed(
308
- """
309
- assert x > 1
310
- """,
311
- permissions.CodePermission.EXCEPTION,
312
- )
313
- self.assert_allowed(
314
- """
315
- class A:
316
- pass
317
- """,
318
- permissions.CodePermission.CLASS_DEFINITION,
319
- )
320
- self.assert_allowed(
321
- """
322
- def foo(x, y):
323
- return x + y
324
- """,
325
- permissions.CodePermission.FUNCTION_DEFINITION,
326
- )
327
- self.assert_allowed(
328
- """
329
- import re
330
- """,
331
- permissions.CodePermission.IMPORT,
332
- )
333
-
334
- def test_parse_with_not_allowed_code(self):
335
- self.assert_not_allowed(
336
- """
337
- if x > 0:
338
- print(x)
339
- """,
340
- permissions.CodePermission.BASIC,
341
- )
342
- self.assert_not_allowed(
343
- """
344
- for i in range(5):
345
- print(i)
346
- """,
347
- permissions.CodePermission.BASIC,
348
- )
349
- self.assert_not_allowed(
350
- """
351
- assert x > 1
352
- """,
353
- permissions.CodePermission.BASIC,
354
- )
355
- self.assert_not_allowed(
356
- """
357
- class A:
358
- pass
359
- """,
360
- permissions.CodePermission.BASIC,
361
- )
362
- self.assert_not_allowed(
363
- """
364
- def foo(x, y):
365
- return x + y
366
- """,
367
- permissions.CodePermission.BASIC,
368
- )
369
- self.assert_not_allowed(
370
- """
371
- import re
372
- """,
373
- permissions.CodePermission.BASIC,
374
- )
375
-
376
273
 
377
274
  if __name__ == '__main__':
378
275
  unittest.main()