langfun 0.1.2.dev202501080804__py3-none-any.whl → 0.1.2.dev202501240804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/core/__init__.py +1 -6
- langfun/core/coding/python/__init__.py +5 -11
- langfun/core/coding/python/correction.py +4 -7
- langfun/core/coding/python/correction_test.py +2 -3
- langfun/core/coding/python/execution.py +22 -211
- langfun/core/coding/python/execution_test.py +11 -90
- langfun/core/coding/python/generation.py +3 -2
- langfun/core/coding/python/generation_test.py +2 -2
- langfun/core/coding/python/parsing.py +108 -194
- langfun/core/coding/python/parsing_test.py +2 -105
- langfun/core/component.py +11 -273
- langfun/core/component_test.py +2 -29
- langfun/core/concurrent.py +187 -82
- langfun/core/concurrent_test.py +28 -19
- langfun/core/console.py +7 -3
- langfun/core/eval/base.py +2 -3
- langfun/core/eval/v2/evaluation.py +3 -1
- langfun/core/eval/v2/reporting.py +8 -4
- langfun/core/language_model.py +84 -8
- langfun/core/language_model_test.py +84 -29
- langfun/core/llms/__init__.py +46 -11
- langfun/core/llms/anthropic.py +1 -123
- langfun/core/llms/anthropic_test.py +0 -48
- langfun/core/llms/deepseek.py +117 -0
- langfun/core/llms/deepseek_test.py +61 -0
- langfun/core/llms/gemini.py +1 -1
- langfun/core/llms/groq.py +12 -99
- langfun/core/llms/groq_test.py +31 -137
- langfun/core/llms/llama_cpp.py +17 -54
- langfun/core/llms/llama_cpp_test.py +2 -34
- langfun/core/llms/openai.py +9 -147
- langfun/core/llms/openai_compatible.py +179 -0
- langfun/core/llms/openai_compatible_test.py +495 -0
- langfun/core/llms/openai_test.py +13 -423
- langfun/core/llms/rest_test.py +1 -1
- langfun/core/llms/vertexai.py +387 -18
- langfun/core/llms/vertexai_test.py +52 -0
- langfun/core/message_test.py +3 -3
- langfun/core/modalities/mime.py +8 -0
- langfun/core/modalities/mime_test.py +19 -4
- langfun/core/modality_test.py +0 -1
- langfun/core/structured/mapping.py +13 -13
- langfun/core/structured/mapping_test.py +2 -2
- langfun/core/structured/schema.py +16 -8
- langfun/core/structured/schema_generation.py +1 -1
- {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/METADATA +13 -2
- {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/RECORD +50 -52
- {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/WHEEL +1 -1
- langfun/core/coding/python/errors.py +0 -108
- langfun/core/coding/python/errors_test.py +0 -99
- langfun/core/coding/python/permissions.py +0 -90
- langfun/core/coding/python/permissions_test.py +0 -86
- langfun/core/text_formatting.py +0 -168
- langfun/core/text_formatting_test.py +0 -65
- {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/LICENSE +0 -0
- {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/top_level.txt +0 -0
@@ -13,206 +13,120 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
"""Python code parsing."""
|
15
15
|
|
16
|
-
import ast
|
17
16
|
import inspect
|
18
17
|
import io
|
19
18
|
import re
|
20
19
|
|
21
|
-
import langfun.core as lf
|
22
|
-
from langfun.core.coding.python import errors
|
23
|
-
from langfun.core.coding.python import permissions
|
24
|
-
|
25
|
-
|
26
|
-
class PythonCodeParser(lf.Component):
|
27
|
-
"""Python code parser with permission control."""
|
28
|
-
|
29
|
-
_ID_REGEX = re.compile('^[a-zA-Z_\\-]*$')
|
30
|
-
|
31
|
-
class _CodeValidator(ast.NodeVisitor):
|
32
|
-
"""Python AST node visitor for ensuring code are permitted."""
|
33
|
-
|
34
|
-
def __init__(self, code: str, permission: permissions.CodePermission):
|
35
|
-
super().__init__()
|
36
|
-
self.code = code
|
37
|
-
self.permission = permission
|
38
|
-
|
39
|
-
def verify(
|
40
|
-
self,
|
41
|
-
node,
|
42
|
-
flag: permissions.CodePermission,
|
43
|
-
node_type,
|
44
|
-
error_message: str,
|
45
|
-
) -> None:
|
46
|
-
if isinstance(node, node_type) and not (self.permission & flag):
|
47
|
-
raise SyntaxError(
|
48
|
-
error_message, (
|
49
|
-
'<generated-code>',
|
50
|
-
node.lineno,
|
51
|
-
node.col_offset,
|
52
|
-
self._code_line(node.lineno),
|
53
|
-
node.end_lineno,
|
54
|
-
node.end_col_offset,
|
55
|
-
))
|
56
|
-
|
57
|
-
def _code_line(self, lineno):
|
58
|
-
return self.code.split('\n')[lineno - 1]
|
59
|
-
|
60
|
-
def generic_visit(self, node):
|
61
|
-
self.verify(
|
62
|
-
node,
|
63
|
-
permissions.CodePermission.CONDITION,
|
64
|
-
(ast.If, ast.Match),
|
65
|
-
'Condition is not allowed.',
|
66
|
-
)
|
67
|
-
|
68
|
-
self.verify(
|
69
|
-
node,
|
70
|
-
permissions.CodePermission.LOOP,
|
71
|
-
(ast.For, ast.While, ast.AsyncFor, ast.AsyncWith),
|
72
|
-
'Loop is not allowed.',
|
73
|
-
)
|
74
|
-
|
75
|
-
self.verify(
|
76
|
-
node,
|
77
|
-
permissions.CodePermission.EXCEPTION,
|
78
|
-
(ast.Try, ast.Raise, ast.Assert),
|
79
|
-
'Exception is not allowed.',
|
80
|
-
)
|
81
|
-
|
82
|
-
self.verify(
|
83
|
-
node,
|
84
|
-
permissions.CodePermission.CLASS_DEFINITION,
|
85
|
-
ast.ClassDef,
|
86
|
-
'Class definition is not allowed.',
|
87
|
-
)
|
88
|
-
|
89
|
-
self.verify(
|
90
|
-
node,
|
91
|
-
permissions.CodePermission.FUNCTION_DEFINITION,
|
92
|
-
(
|
93
|
-
ast.FunctionDef,
|
94
|
-
ast.AsyncFunctionDef,
|
95
|
-
ast.Return,
|
96
|
-
ast.Yield,
|
97
|
-
ast.YieldFrom,
|
98
|
-
),
|
99
|
-
'Function definition is not allowed.',
|
100
|
-
)
|
101
|
-
|
102
|
-
self.verify(
|
103
|
-
node,
|
104
|
-
permissions.CodePermission.IMPORT,
|
105
|
-
(ast.Import, ast.ImportFrom),
|
106
|
-
'`import` is not allowed.',
|
107
|
-
)
|
108
|
-
|
109
|
-
super().generic_visit(node)
|
110
|
-
|
111
|
-
def parse(
|
112
|
-
self, code: str, permission: permissions.CodePermission
|
113
|
-
) -> tuple[str, ast.AST]:
|
114
|
-
code = self.clean(code)
|
115
|
-
try:
|
116
|
-
parsed_code = ast.parse(code, mode='exec')
|
117
|
-
PythonCodeParser._CodeValidator(code, permission).visit(parsed_code)
|
118
|
-
except SyntaxError as e:
|
119
|
-
raise errors.CodeError(code, e) from e
|
120
|
-
return code, parsed_code
|
121
|
-
|
122
|
-
def clean(self, code_text: str) -> str:
|
123
|
-
# TODO(daiyip): Deal with markdown in docstrings.
|
124
|
-
code = io.StringIO()
|
125
|
-
quote_char = None
|
126
|
-
in_code = False
|
127
|
-
i = 0
|
128
|
-
in_comment = False
|
129
|
-
while i < len(code_text):
|
130
|
-
c = code_text[i]
|
131
|
-
# Detect code block separator (```).
|
132
|
-
if (not in_comment
|
133
|
-
and quote_char is None
|
134
|
-
and c == '`'
|
135
|
-
and code_text[i:i + 3] == '```'):
|
136
|
-
in_code = not in_code
|
137
|
-
if in_code:
|
138
|
-
i += 3
|
139
|
-
continue
|
140
|
-
else:
|
141
|
-
break
|
142
|
-
|
143
|
-
# Detect string literal boundary.
|
144
|
-
if (in_code
|
145
|
-
and not in_comment
|
146
|
-
and c in ('\'', '"')
|
147
|
-
and i > 0
|
148
|
-
and code_text[i - 1] != '\\'):
|
149
|
-
# Handle ''' and """.
|
150
|
-
if code_text[i: i + 3] == c * 3:
|
151
|
-
c = c * 3
|
152
|
-
i += 2
|
153
|
-
|
154
|
-
if quote_char is None:
|
155
|
-
quote_char = c
|
156
|
-
elif quote_char == c:
|
157
|
-
# NOTE(daiyip): at times, LM forgets to escape quotes inside a string.
|
158
|
-
# Thus we do some smart checking here to automatically correct such
|
159
|
-
# case. This logic here is pretty involved in handling special cases.
|
160
|
-
# We might want to revisit them later.
|
161
|
-
|
162
|
-
# Peek forward to see if it could be a valid string.
|
163
|
-
nt, nnt_start = _next_token(code_text, i + 1)
|
164
|
-
if (len(c) == 3
|
165
|
-
or nt in (',', '[', ']', '}', ')', '+', '*', '%', '\n', ':')):
|
166
|
-
end_quote = True
|
167
|
-
elif nt == ' ':
|
168
|
-
# Detect if . could be a method invocation.
|
169
|
-
# NOTE(daiyip): 'in' and 'not in' might have false positives. But
|
170
|
-
# given the chance is low, we do not complicate the reasoning logic
|
171
|
-
# for now.
|
172
|
-
nnt, _ = _next_token(code_text, nnt_start, skip_whitespace=True)
|
173
|
-
end_quote = nnt in ('+', '*', '%', '#', '[', 'in', 'not', ':')
|
174
|
-
elif nt == '.':
|
175
|
-
# Detect if . could be method invocation on string.
|
176
|
-
nnt, nnnt_start = _next_token(code_text, nnt_start)
|
177
|
-
nnnt, _ = _next_token(code_text, nnnt_start)
|
178
|
-
end_quote = nnt.isidentifier() and nnnt == '('
|
179
|
-
else:
|
180
|
-
end_quote = False
|
181
|
-
|
182
|
-
if end_quote:
|
183
|
-
quote_char = None
|
184
|
-
else:
|
185
|
-
c = f'\\{c}'
|
186
|
-
# Detect comment.
|
187
|
-
elif c == '#' and quote_char is None:
|
188
|
-
in_comment = True
|
189
|
-
# Detect end-of-comment.
|
190
|
-
elif c == '\n':
|
191
|
-
# NOTE(daiyip): deal with cases that LM forgot to escape linebreaks
|
192
|
-
# within strings.
|
193
|
-
if quote_char is not None:
|
194
|
-
# Only add \\ for ' and " (other than ''' and """).
|
195
|
-
if len(quote_char) == 1:
|
196
|
-
c = '\\n'
|
197
|
-
else:
|
198
|
-
in_comment = False
|
199
20
|
|
21
|
+
_ID_REGEX = re.compile('^[a-zA-Z_\\-]*$')
|
22
|
+
|
23
|
+
|
24
|
+
def clean(code_text: str) -> str:
|
25
|
+
"""Cleans up Python code.
|
26
|
+
|
27
|
+
LLM may generate code with markdown annotations, as well as minor syntax
|
28
|
+
errors. This function removes such annotations and fixes minor syntax errors
|
29
|
+
without extra LLM calls.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
code_text: The code text to clean up.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
The cleaned up code text.
|
36
|
+
"""
|
37
|
+
# TODO(daiyip): Deal with markdown in docstrings.
|
38
|
+
code = io.StringIO()
|
39
|
+
quote_char = None
|
40
|
+
in_code = False
|
41
|
+
i = 0
|
42
|
+
in_comment = False
|
43
|
+
while i < len(code_text):
|
44
|
+
c = code_text[i]
|
45
|
+
# Detect code block separator (```).
|
46
|
+
if (not in_comment
|
47
|
+
and quote_char is None
|
48
|
+
and c == '`'
|
49
|
+
and code_text[i:i + 3] == '```'):
|
50
|
+
in_code = not in_code
|
200
51
|
if in_code:
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
#
|
213
|
-
|
214
|
-
|
215
|
-
|
52
|
+
i += 3
|
53
|
+
continue
|
54
|
+
else:
|
55
|
+
break
|
56
|
+
|
57
|
+
# Detect string literal boundary.
|
58
|
+
if (in_code
|
59
|
+
and not in_comment
|
60
|
+
and c in ('\'', '"')
|
61
|
+
and i > 0
|
62
|
+
and code_text[i - 1] != '\\'):
|
63
|
+
# Handle ''' and """.
|
64
|
+
if code_text[i: i + 3] == c * 3:
|
65
|
+
c = c * 3
|
66
|
+
i += 2
|
67
|
+
|
68
|
+
if quote_char is None:
|
69
|
+
quote_char = c
|
70
|
+
elif quote_char == c:
|
71
|
+
# NOTE(daiyip): at times, LM forgets to escape quotes inside a string.
|
72
|
+
# Thus we do some smart checking here to automatically correct such
|
73
|
+
# case. This logic here is pretty involved in handling special cases.
|
74
|
+
# We might want to revisit them later.
|
75
|
+
|
76
|
+
# Peek forward to see if it could be a valid string.
|
77
|
+
nt, nnt_start = _next_token(code_text, i + 1)
|
78
|
+
if (len(c) == 3
|
79
|
+
or nt in (',', '[', ']', '}', ')', '+', '*', '%', '\n', ':')):
|
80
|
+
end_quote = True
|
81
|
+
elif nt == ' ':
|
82
|
+
# Detect if . could be a method invocation.
|
83
|
+
# NOTE(daiyip): 'in' and 'not in' might have false positives. But
|
84
|
+
# given the chance is low, we do not complicate the reasoning logic
|
85
|
+
# for now.
|
86
|
+
nnt, _ = _next_token(code_text, nnt_start, skip_whitespace=True)
|
87
|
+
end_quote = nnt in ('+', '*', '%', '#', '[', 'in', 'not', ':')
|
88
|
+
elif nt == '.':
|
89
|
+
# Detect if . could be method invocation on string.
|
90
|
+
nnt, nnnt_start = _next_token(code_text, nnt_start)
|
91
|
+
nnnt, _ = _next_token(code_text, nnnt_start)
|
92
|
+
end_quote = nnt.isidentifier() and nnnt == '('
|
93
|
+
else:
|
94
|
+
end_quote = False
|
95
|
+
|
96
|
+
if end_quote:
|
97
|
+
quote_char = None
|
98
|
+
else:
|
99
|
+
c = f'\\{c}'
|
100
|
+
# Detect comment.
|
101
|
+
elif c == '#' and quote_char is None:
|
102
|
+
in_comment = True
|
103
|
+
# Detect end-of-comment.
|
104
|
+
elif c == '\n':
|
105
|
+
# NOTE(daiyip): deal with cases that LM forgot to escape linebreaks
|
106
|
+
# within strings.
|
107
|
+
if quote_char is not None:
|
108
|
+
# Only add \\ for ' and " (other than ''' and """).
|
109
|
+
if len(quote_char) == 1:
|
110
|
+
c = '\\n'
|
111
|
+
else:
|
112
|
+
in_comment = False
|
113
|
+
|
114
|
+
if in_code:
|
115
|
+
code.write(c)
|
116
|
+
|
117
|
+
i += 1
|
118
|
+
|
119
|
+
code = code.getvalue()
|
120
|
+
if code:
|
121
|
+
pos = code.find('\n')
|
122
|
+
# Strip markdown code type. E.g. ```python
|
123
|
+
if pos > 0 and _ID_REGEX.match(code[:pos]):
|
124
|
+
code = code[pos:]
|
125
|
+
else:
|
126
|
+
# Maybe-code that resides not within a code markdown block.
|
127
|
+
# Adding '\n' makes inspect.cleandoc to make right adjustment.
|
128
|
+
code = '\n' + code_text
|
129
|
+
return inspect.cleandoc(code).strip()
|
216
130
|
|
217
131
|
|
218
132
|
def _next_token(
|
@@ -15,18 +15,16 @@
|
|
15
15
|
|
16
16
|
import inspect
|
17
17
|
import unittest
|
18
|
-
from langfun.core.coding.python import errors
|
19
18
|
from langfun.core.coding.python import parsing
|
20
|
-
from langfun.core.coding.python import permissions
|
21
19
|
|
22
20
|
|
23
|
-
class
|
21
|
+
class CleanTest(unittest.TestCase):
|
24
22
|
|
25
23
|
def assert_clean(self, code: str, cleaned_code: str, clean: bool = True):
|
26
24
|
if clean:
|
27
25
|
cleaned_code = inspect.cleandoc(cleaned_code)
|
28
26
|
self.assertEqual(
|
29
|
-
parsing.
|
27
|
+
parsing.clean(code), cleaned_code
|
30
28
|
)
|
31
29
|
|
32
30
|
def test_clean(self):
|
@@ -272,107 +270,6 @@ class PythonCodeParserTest(unittest.TestCase):
|
|
272
270
|
"""
|
273
271
|
)
|
274
272
|
|
275
|
-
def assert_allowed(self, code: str, permission: permissions.CodePermission):
|
276
|
-
_, ast = parsing.PythonCodeParser().parse(code, permission)
|
277
|
-
self.assertIsNotNone(ast)
|
278
|
-
|
279
|
-
def assert_not_allowed(
|
280
|
-
self, code: str, permission: permissions.CodePermission
|
281
|
-
):
|
282
|
-
with self.assertRaisesRegex(errors.CodeError, '.* is not allowed'):
|
283
|
-
parsing.PythonCodeParser().parse(code, permission)
|
284
|
-
|
285
|
-
def test_parse_with_allowed_code(self):
|
286
|
-
self.assert_allowed(
|
287
|
-
"""
|
288
|
-
x = y + 1
|
289
|
-
z = x + y
|
290
|
-
""",
|
291
|
-
permissions.CodePermission.BASIC,
|
292
|
-
)
|
293
|
-
self.assert_allowed(
|
294
|
-
"""
|
295
|
-
if x > 0:
|
296
|
-
print(x)
|
297
|
-
""",
|
298
|
-
permissions.CodePermission.CONDITION,
|
299
|
-
)
|
300
|
-
self.assert_allowed(
|
301
|
-
"""
|
302
|
-
for i in range(5):
|
303
|
-
print(i)
|
304
|
-
""",
|
305
|
-
permissions.CodePermission.LOOP,
|
306
|
-
)
|
307
|
-
self.assert_allowed(
|
308
|
-
"""
|
309
|
-
assert x > 1
|
310
|
-
""",
|
311
|
-
permissions.CodePermission.EXCEPTION,
|
312
|
-
)
|
313
|
-
self.assert_allowed(
|
314
|
-
"""
|
315
|
-
class A:
|
316
|
-
pass
|
317
|
-
""",
|
318
|
-
permissions.CodePermission.CLASS_DEFINITION,
|
319
|
-
)
|
320
|
-
self.assert_allowed(
|
321
|
-
"""
|
322
|
-
def foo(x, y):
|
323
|
-
return x + y
|
324
|
-
""",
|
325
|
-
permissions.CodePermission.FUNCTION_DEFINITION,
|
326
|
-
)
|
327
|
-
self.assert_allowed(
|
328
|
-
"""
|
329
|
-
import re
|
330
|
-
""",
|
331
|
-
permissions.CodePermission.IMPORT,
|
332
|
-
)
|
333
|
-
|
334
|
-
def test_parse_with_not_allowed_code(self):
|
335
|
-
self.assert_not_allowed(
|
336
|
-
"""
|
337
|
-
if x > 0:
|
338
|
-
print(x)
|
339
|
-
""",
|
340
|
-
permissions.CodePermission.BASIC,
|
341
|
-
)
|
342
|
-
self.assert_not_allowed(
|
343
|
-
"""
|
344
|
-
for i in range(5):
|
345
|
-
print(i)
|
346
|
-
""",
|
347
|
-
permissions.CodePermission.BASIC,
|
348
|
-
)
|
349
|
-
self.assert_not_allowed(
|
350
|
-
"""
|
351
|
-
assert x > 1
|
352
|
-
""",
|
353
|
-
permissions.CodePermission.BASIC,
|
354
|
-
)
|
355
|
-
self.assert_not_allowed(
|
356
|
-
"""
|
357
|
-
class A:
|
358
|
-
pass
|
359
|
-
""",
|
360
|
-
permissions.CodePermission.BASIC,
|
361
|
-
)
|
362
|
-
self.assert_not_allowed(
|
363
|
-
"""
|
364
|
-
def foo(x, y):
|
365
|
-
return x + y
|
366
|
-
""",
|
367
|
-
permissions.CodePermission.BASIC,
|
368
|
-
)
|
369
|
-
self.assert_not_allowed(
|
370
|
-
"""
|
371
|
-
import re
|
372
|
-
""",
|
373
|
-
permissions.CodePermission.BASIC,
|
374
|
-
)
|
375
|
-
|
376
273
|
|
377
274
|
if __name__ == '__main__':
|
378
275
|
unittest.main()
|