PyPI - langfun - Versions diffs - 0.1.2.dev202501080804__py3-none-any.whl → 0.1.2.dev202501240804__py3-none-any.whl - Mend

langfun 0.1.2.dev202501080804py3-none-any.whl → 0.1.2.dev202501240804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

langfun/core/__init__.py +1 -6
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +4 -7
langfun/core/coding/python/correction_test.py +2 -3
langfun/core/coding/python/execution.py +22 -211
langfun/core/coding/python/execution_test.py +11 -90
langfun/core/coding/python/generation.py +3 -2
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -194
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +11 -273
langfun/core/component_test.py +2 -29
langfun/core/concurrent.py +187 -82
langfun/core/concurrent_test.py +28 -19
langfun/core/console.py +7 -3
langfun/core/eval/base.py +2 -3
langfun/core/eval/v2/evaluation.py +3 -1
langfun/core/eval/v2/reporting.py +8 -4
langfun/core/language_model.py +84 -8
langfun/core/language_model_test.py +84 -29
langfun/core/llms/__init__.py +46 -11
langfun/core/llms/anthropic.py +1 -123
langfun/core/llms/anthropic_test.py +0 -48
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/gemini.py +1 -1
langfun/core/llms/groq.py +12 -99
langfun/core/llms/groq_test.py +31 -137
langfun/core/llms/llama_cpp.py +17 -54
langfun/core/llms/llama_cpp_test.py +2 -34
langfun/core/llms/openai.py +9 -147
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +13 -423
langfun/core/llms/rest_test.py +1 -1
langfun/core/llms/vertexai.py +387 -18
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/message_test.py +3 -3
langfun/core/modalities/mime.py +8 -0
langfun/core/modalities/mime_test.py +19 -4
langfun/core/modality_test.py +0 -1
langfun/core/structured/mapping.py +13 -13
langfun/core/structured/mapping_test.py +2 -2
langfun/core/structured/schema.py +16 -8
langfun/core/structured/schema_generation.py +1 -1
{langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/METADATA +13 -2
{langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/RECORD +50 -52
{langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/text_formatting.py +0 -168
langfun/core/text_formatting_test.py +0 -65
{langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/LICENSE +0 -0
{langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/top_level.txt +0 -0

langfun/core/coding/python/parsing.py CHANGED Viewed

@@ -13,206 +13,120 @@
 # limitations under the License.
 """Python code parsing."""
-import ast
 import inspect
 import io
 import re
-import langfun.core as lf
-from langfun.core.coding.python import errors
-from langfun.core.coding.python import permissions
-class PythonCodeParser(lf.Component):
-  """Python code parser with permission control."""
-  _ID_REGEX = re.compile('^[a-zA-Z_\\-]*$')
-  class _CodeValidator(ast.NodeVisitor):
-    """Python AST node visitor for ensuring code are permitted."""
-    def __init__(self, code: str, permission: permissions.CodePermission):
-      super().__init__()
-      self.code = code
-      self.permission = permission
-    def verify(
-        self,
-        node,
-        flag: permissions.CodePermission,
-        node_type,
-        error_message: str,
-    ) -> None:
-      if isinstance(node, node_type) and not (self.permission & flag):
-        raise SyntaxError(
-            error_message, (
-                '<generated-code>',
-                node.lineno,
-                node.col_offset,
-                self._code_line(node.lineno),
-                node.end_lineno,
-                node.end_col_offset,
-            ))
-    def _code_line(self, lineno):
-      return self.code.split('\n')[lineno - 1]
-    def generic_visit(self, node):
-      self.verify(
-          node,
-          permissions.CodePermission.CONDITION,
-          (ast.If, ast.Match),
-          'Condition is not allowed.',
-      )
-      self.verify(
-          node,
-          permissions.CodePermission.LOOP,
-          (ast.For, ast.While, ast.AsyncFor, ast.AsyncWith),
-          'Loop is not allowed.',
-      )
-      self.verify(
-          node,
-          permissions.CodePermission.EXCEPTION,
-          (ast.Try, ast.Raise, ast.Assert),
-          'Exception is not allowed.',
-      )
-      self.verify(
-          node,
-          permissions.CodePermission.CLASS_DEFINITION,
-          ast.ClassDef,
-          'Class definition is not allowed.',
-      )
-      self.verify(
-          node,
-          permissions.CodePermission.FUNCTION_DEFINITION,
-          (
-              ast.FunctionDef,
-              ast.AsyncFunctionDef,
-              ast.Return,
-              ast.Yield,
-              ast.YieldFrom,
-          ),
-          'Function definition is not allowed.',
-      )
-      self.verify(
-          node,
-          permissions.CodePermission.IMPORT,
-          (ast.Import, ast.ImportFrom),
-          '`import` is not allowed.',
-      )
-      super().generic_visit(node)
-  def parse(
-      self, code: str, permission: permissions.CodePermission
-  ) -> tuple[str, ast.AST]:
-    code = self.clean(code)
-    try:
-      parsed_code = ast.parse(code, mode='exec')
-      PythonCodeParser._CodeValidator(code, permission).visit(parsed_code)
-    except SyntaxError as e:
-      raise errors.CodeError(code, e) from e
-    return code, parsed_code
-  def clean(self, code_text: str) -> str:
-    # TODO(daiyip): Deal with markdown in docstrings.
-    code = io.StringIO()
-    quote_char = None
-    in_code = False
-    i = 0
-    in_comment = False
-    while i < len(code_text):
-      c = code_text[i]
-      # Detect code block separator (```).
-      if (not in_comment
-          and quote_char is None
-          and c == '`'
-          and code_text[i:i + 3] == '```'):
-        in_code = not in_code
-        if in_code:
-          i += 3
-          continue
-        else:
-          break
-      # Detect string literal boundary.
-      if (in_code
-          and not in_comment
-          and c in ('\'', '"')
-          and i > 0
-          and code_text[i - 1] != '\\'):
-        # Handle ''' and """.
-        if code_text[i: i + 3] == c * 3:
-          c = c * 3
-          i += 2
-        if quote_char is None:
-          quote_char = c
-        elif quote_char == c:
-          # NOTE(daiyip): at times, LM forgets to escape quotes inside a string.
-          # Thus we do some smart checking here to automatically correct such
-          # case. This logic here is pretty involved in handling special cases.
-          # We might want to revisit them later.
-          # Peek forward to see if it could be a valid string.
-          nt, nnt_start = _next_token(code_text, i + 1)
-          if (len(c) == 3
-              or nt in (',', '[', ']', '}', ')', '+', '*', '%', '\n', ':')):
-            end_quote = True
-          elif nt == ' ':
-            # Detect if . could be a method invocation.
-            # NOTE(daiyip): 'in' and 'not in' might have false positives. But
-            # given the chance is low, we do not complicate the reasoning logic
-            # for now.
-            nnt, _ = _next_token(code_text, nnt_start, skip_whitespace=True)
-            end_quote = nnt in ('+', '*', '%', '#', '[', 'in', 'not', ':')
-          elif nt == '.':
-            # Detect if . could be method invocation on string.
-            nnt, nnnt_start = _next_token(code_text, nnt_start)
-            nnnt, _ = _next_token(code_text, nnnt_start)
-            end_quote = nnt.isidentifier() and nnnt == '('
-          else:
-            end_quote = False
-          if end_quote:
-            quote_char = None
-          else:
-            c = f'\\{c}'
-      # Detect comment.
-      elif c == '#' and quote_char is None:
-        in_comment = True
-      # Detect end-of-comment.
-      elif c == '\n':
-        # NOTE(daiyip): deal with cases that LM forgot to escape linebreaks
-        # within strings.
-        if quote_char is not None:
-          # Only add \\ for ' and " (other than ''' and """).
-          if len(quote_char) == 1:
-            c = '\\n'
-        else:
-          in_comment = False
+_ID_REGEX = re.compile('^[a-zA-Z_\\-]*$')
+def clean(code_text: str) -> str:
+  """Cleans up Python code.
+  LLM may generate code with markdown annotations, as well as minor syntax
+  errors. This function removes such annotations and fixes minor syntax errors
+  without extra LLM calls.
+  Args:
+    code_text: The code text to clean up.
+  Returns:
+    The cleaned up code text.
+  """
+  # TODO(daiyip): Deal with markdown in docstrings.
+  code = io.StringIO()
+  quote_char = None
+  in_code = False
+  i = 0
+  in_comment = False
+  while i < len(code_text):
+    c = code_text[i]
+    # Detect code block separator (```).
+    if (not in_comment
+        and quote_char is None
+        and c == '`'
+        and code_text[i:i + 3] == '```'):
+      in_code = not in_code
       if in_code:
-        code.write(c)
-      i += 1
-    code = code.getvalue()
-    if code:
-      pos = code.find('\n')
-      # Strip markdown code type. E.g. ```python
-      if pos > 0 and self._ID_REGEX.match(code[:pos]):
-        code = code[pos:]
-    else:
-      # Maybe-code that resides not within a code markdown block.
-      # Adding '\n' makes inspect.cleandoc to make right adjustment.
-      code = '\n' + code_text
-    return inspect.cleandoc(code).strip()
+        i += 3
+        continue
+      else:
+        break
+    # Detect string literal boundary.
+    if (in_code
+        and not in_comment
+        and c in ('\'', '"')
+        and i > 0
+        and code_text[i - 1] != '\\'):
+      # Handle ''' and """.
+      if code_text[i: i + 3] == c * 3:
+        c = c * 3
+        i += 2
+      if quote_char is None:
+        quote_char = c
+      elif quote_char == c:
+        # NOTE(daiyip): at times, LM forgets to escape quotes inside a string.
+        # Thus we do some smart checking here to automatically correct such
+        # case. This logic here is pretty involved in handling special cases.
+        # We might want to revisit them later.
+        # Peek forward to see if it could be a valid string.
+        nt, nnt_start = _next_token(code_text, i + 1)
+        if (len(c) == 3
+            or nt in (',', '[', ']', '}', ')', '+', '*', '%', '\n', ':')):
+          end_quote = True
+        elif nt == ' ':
+          # Detect if . could be a method invocation.
+          # NOTE(daiyip): 'in' and 'not in' might have false positives. But
+          # given the chance is low, we do not complicate the reasoning logic
+          # for now.
+          nnt, _ = _next_token(code_text, nnt_start, skip_whitespace=True)
+          end_quote = nnt in ('+', '*', '%', '#', '[', 'in', 'not', ':')
+        elif nt == '.':
+          # Detect if . could be method invocation on string.
+          nnt, nnnt_start = _next_token(code_text, nnt_start)
+          nnnt, _ = _next_token(code_text, nnnt_start)
+          end_quote = nnt.isidentifier() and nnnt == '('
+        else:
+          end_quote = False
+        if end_quote:
+          quote_char = None
+        else:
+          c = f'\\{c}'
+    # Detect comment.
+    elif c == '#' and quote_char is None:
+      in_comment = True
+    # Detect end-of-comment.
+    elif c == '\n':
+      # NOTE(daiyip): deal with cases that LM forgot to escape linebreaks
+      # within strings.
+      if quote_char is not None:
+        # Only add \\ for ' and " (other than ''' and """).
+        if len(quote_char) == 1:
+          c = '\\n'
+      else:
+        in_comment = False
+    if in_code:
+      code.write(c)
+    i += 1
+  code = code.getvalue()
+  if code:
+    pos = code.find('\n')
+    # Strip markdown code type. E.g. ```python
+    if pos > 0 and _ID_REGEX.match(code[:pos]):
+      code = code[pos:]
+  else:
+    # Maybe-code that resides not within a code markdown block.
+    # Adding '\n' makes inspect.cleandoc to make right adjustment.
+    code = '\n' + code_text
+  return inspect.cleandoc(code).strip()
 def _next_token(

langfun/core/coding/python/parsing_test.py CHANGED Viewed

@@ -15,18 +15,16 @@
 import inspect
 import unittest
-from langfun.core.coding.python import errors
 from langfun.core.coding.python import parsing
-from langfun.core.coding.python import permissions
-class PythonCodeParserTest(unittest.TestCase):
+class CleanTest(unittest.TestCase):
   def assert_clean(self, code: str, cleaned_code: str, clean: bool = True):
     if clean:
       cleaned_code = inspect.cleandoc(cleaned_code)
     self.assertEqual(
-        parsing.PythonCodeParser().clean(code), cleaned_code
+        parsing.clean(code), cleaned_code
     )
   def test_clean(self):
@@ -272,107 +270,6 @@ class PythonCodeParserTest(unittest.TestCase):
         """
     )
-  def assert_allowed(self, code: str, permission: permissions.CodePermission):
-    _, ast = parsing.PythonCodeParser().parse(code, permission)
-    self.assertIsNotNone(ast)
-  def assert_not_allowed(
-      self, code: str, permission: permissions.CodePermission
-  ):
-    with self.assertRaisesRegex(errors.CodeError, '.* is not allowed'):
-      parsing.PythonCodeParser().parse(code, permission)
-  def test_parse_with_allowed_code(self):
-    self.assert_allowed(
-        """
-        x = y + 1
-        z = x + y
-        """,
-        permissions.CodePermission.BASIC,
-    )
-    self.assert_allowed(
-        """
-        if x > 0:
-          print(x)
-        """,
-        permissions.CodePermission.CONDITION,
-    )
-    self.assert_allowed(
-        """
-        for i in range(5):
-          print(i)
-        """,
-        permissions.CodePermission.LOOP,
-    )
-    self.assert_allowed(
-        """
-        assert x > 1
-        """,
-        permissions.CodePermission.EXCEPTION,
-    )
-    self.assert_allowed(
-        """
-        class A:
-          pass
-        """,
-        permissions.CodePermission.CLASS_DEFINITION,
-    )
-    self.assert_allowed(
-        """
-        def foo(x, y):
-          return x + y
-        """,
-        permissions.CodePermission.FUNCTION_DEFINITION,
-    )
-    self.assert_allowed(
-        """
-        import re
-        """,
-        permissions.CodePermission.IMPORT,
-    )
-  def test_parse_with_not_allowed_code(self):
-    self.assert_not_allowed(
-        """
-        if x > 0:
-          print(x)
-        """,
-        permissions.CodePermission.BASIC,
-    )
-    self.assert_not_allowed(
-        """
-        for i in range(5):
-          print(i)
-        """,
-        permissions.CodePermission.BASIC,
-    )
-    self.assert_not_allowed(
-        """
-        assert x > 1
-        """,
-        permissions.CodePermission.BASIC,
-    )
-    self.assert_not_allowed(
-        """
-        class A:
-          pass
-        """,
-        permissions.CodePermission.BASIC,
-    )
-    self.assert_not_allowed(
-        """
-        def foo(x, y):
-          return x + y
-        """,
-        permissions.CodePermission.BASIC,
-    )
-    self.assert_not_allowed(
-        """
-        import re
-        """,
-        permissions.CodePermission.BASIC,
-    )
 if __name__ == '__main__':
   unittest.main()

langfun 0.1.2.dev202501080804__py3-none-any.whl → 0.1.2.dev202501240804__py3-none-any.whl

langfun 0.1.2.dev202501080804py3-none-any.whl → 0.1.2.dev202501240804py3-none-any.whl