PyPI - langfun - Versions diffs - 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

langfun/__init__.py +22 -2
langfun/core/__init__.py +17 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -28
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +69 -2
langfun/core/component_test.py +54 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +18 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +17 -0
langfun/core/eval/base.py +767 -140
langfun/core/eval/base_test.py +238 -53
langfun/core/eval/matching.py +80 -76
langfun/core/eval/matching_test.py +19 -9
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +37 -28
langfun/core/eval/scoring_test.py +21 -3
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +3 -21
langfun/core/langfunc_test.py +26 -8
langfun/core/language_model.py +686 -48
langfun/core/language_model_test.py +681 -44
langfun/core/llms/__init__.py +100 -12
langfun/core/llms/anthropic.py +488 -0
langfun/core/llms/anthropic_test.py +235 -0
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +88 -28
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +39 -26
langfun/core/llms/fake_test.py +136 -11
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -197
langfun/core/llms/groq.py +276 -0
langfun/core/llms/groq_test.py +64 -0
langfun/core/llms/llama_cpp.py +15 -40
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +436 -226
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +35 -174
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -23
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +15 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +9 -8
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +278 -0
langfun/core/structured/function_generation_test.py +399 -0
langfun/core/structured/mapping.py +150 -46
langfun/core/structured/mapping_test.py +105 -0
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +71 -22
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +545 -60
langfun/core/structured/schema.py +208 -99
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_generation_test.py +2 -2
langfun/core/structured/schema_test.py +133 -34
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +240 -11
langfun/core/template_test.py +146 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +14 -2
langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -217
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240330.dist-info/METADATA +0 -99
langfun-0.0.2.dev20240330.dist-info/RECORD +0 -102
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0

langfun/core/structured/schema.py CHANGED Viewed

@@ -16,6 +16,7 @@
 import abc
 import inspect
 import io
+import re
 import textwrap
 import typing
 from typing import Any, Literal, Sequence, Type, Union
@@ -24,6 +25,17 @@ from langfun.core.coding.python import correction
 import pyglove as pg
+def include_method_in_prompt(method):
+  """Decorator to include a method in the class definition of the prompt."""
+  setattr(method, '__show_in_prompt__', True)
+  return method
+def should_include_method_in_prompt(method):
+  """Returns true if the method should be shown in the prompt."""
+  return getattr(method, '__show_in_prompt__', False)
 def parse_value_spec(value) -> pg.typing.ValueSpec:
   """Parses a PyGlove ValueSpec equivalence into a ValueSpec."""
   if isinstance(value, pg.typing.ValueSpec):
@@ -79,26 +91,35 @@ class SchemaError(Exception):   # pylint: disable=g-bad-exception-name
   def __str__(self):
     r = io.StringIO()
     r.write(
-        lf.colored(f'{self.cause.__class__.__name__}: {self.cause}', 'magenta'))
+        pg.colored(
+            f'{self.cause.__class__.__name__}: {self.cause}', 'magenta'
+        )
+    )
     r.write('\n')
-    r.write(lf.colored('Schema:', 'red'))
+    r.write(pg.colored('Schema:', 'red'))
     r.write('\n\n')
     r.write(textwrap.indent(
-        lf.colored(schema_repr(self.protocol).repr(self.schema), 'magenta'),
+        pg.colored(
+            schema_repr(self.protocol).repr(self.schema), 'magenta'
+        ),
         ' ' * 2
     ))
     r.write('\n\n')
-    r.write(lf.colored('Generated value:', 'red'))
+    r.write(pg.colored('Generated value:', 'red'))
     r.write('\n\n')
     r.write(textwrap.indent(
-        lf.colored(value_repr(self.protocol).repr(self.value), 'magenta'),
+        pg.colored(value_repr(self.protocol).repr(self.value), 'magenta'),
         ' ' * 2
     ))
     return r.getvalue()
-class Schema(lf.NaturalLanguageFormattable, pg.Object):
+class Schema(
+    lf.NaturalLanguageFormattable,
+    pg.Object,
+    pg.views.HtmlTreeView.Extension
+):
   """Base class for structured data schema."""
   spec: pg.typing.Annotated[
@@ -163,9 +184,12 @@ class Schema(lf.NaturalLanguageFormattable, pg.Object):
   def class_dependencies(
       self,
+      include_base_classes: bool = True,
       include_subclasses: bool = True) -> list[Type[Any]]:
     """Returns a list of class dependencies for current schema."""
-    return class_dependencies(self.spec, include_subclasses)
+    return class_dependencies(
+        self.spec, include_base_classes, include_subclasses
+    )
   @classmethod
   def from_value(cls, value) -> 'Schema':
@@ -174,6 +198,29 @@ class Schema(lf.NaturalLanguageFormattable, pg.Object):
       return value
     return cls(parse_value_spec(value))
+  def _html_tree_view_content(
+      self,
+      *,
+      view: pg.views.HtmlTreeView,
+      **kwargs,
+  ):
+    return pg.Html.element(
+        'div',
+        [pg.Html.escape(self.schema_str(protocol='python'))],
+        css_classes=['lf-schema-definition']
+    ).add_style(
+        """
+        .lf-schema-definition {
+            color: blue;
+            margin: 5px;
+            white-space: pre-wrap;
+        }
+        """
+    )
+SchemaType = Union[Schema, Type[Any], list[Type[Any]], dict[str, Any]]
 def _top_level_object_specs_from_value(value: pg.Symbolic) -> list[Type[Any]]:
   """Returns a list of top level value specs from a symbolic value."""
@@ -198,11 +245,12 @@ def class_dependencies(
         Type[pg.Object],
         tuple[Union[pg.typing.ValueSpec, Type[pg.Object]], ...],
     ],
+    include_base_classes: bool = True,
     include_subclasses: bool = True,
 ) -> list[Type[Any]]:
   """Returns a list of class dependencies from a value or specs."""
   if isinstance(value_or_spec, Schema):
-    return value_or_spec.class_dependencies(include_subclasses)
+    value_or_spec = value_or_spec.spec
   if inspect.isclass(value_or_spec) or isinstance(
       value_or_spec, pg.typing.ValueSpec
@@ -236,16 +284,17 @@ def class_dependencies(
       if vs.cls not in seen:
         seen.add(vs.cls)
-        # Add base classes as dependencies.
-        for base_cls in vs.cls.__bases__:
-          # We only keep track of user-defined symbolic classes.
-          if base_cls is not object and base_cls is not pg.Object:
-            _fill_dependencies(
-                pg.typing.Object(base_cls), include_subclasses=False
-            )
+        if include_base_classes:
+          # Add base classes as dependencies.
+          for base_cls in vs.cls.__bases__:
+            # We only keep track of user-defined symbolic classes.
+            if base_cls is not object and base_cls is not pg.Object:
+              _fill_dependencies(
+                  pg.typing.Object(base_cls), include_subclasses=False
+              )
         # Add members as dependencies.
-        for field in _pg_schema(vs.cls).values():
+        for field in pg.schema(vs.cls).values():
           _fill_dependencies(field.value, include_subclasses)
       _add_dependency(vs.cls)
@@ -262,7 +311,7 @@ def class_dependencies(
         _fill_dependencies(elem.value, include_subclasses)
     elif isinstance(vs, pg.typing.Dict) and vs.schema:
       for v in vs.schema.values():
-        _fill_dependencies(v, include_subclasses)
+        _fill_dependencies(v.value, include_subclasses)
     elif isinstance(vs, pg.typing.Union):
       for v in vs.candidates:
         _fill_dependencies(v, include_subclasses)
@@ -314,23 +363,35 @@ class SchemaPythonRepr(SchemaRepr):
       ret += f'\n\n{class_definition_str}'
     return ret.strip()
-  def class_definitions(self, schema: Schema, **kwargs) -> str | None:
-    deps = schema.class_dependencies(include_subclasses=True)
-    return class_definitions(deps, **kwargs)
+  def class_definitions(
+      self,
+      schema: Schema,
+      additional_dependencies: list[Type[Any]] | None = None,
+      **kwargs
+  ) -> str | None:
+    """Returns a string containing of class definitions from a schema."""
+    deps = schema.class_dependencies(
+        include_base_classes=False, include_subclasses=True
+    )
+    allowed_dependencies = set(deps)
+    if additional_dependencies:
+      allowed_dependencies.update(additional_dependencies)
+    return class_definitions(
+        deps, allowed_dependencies=allowed_dependencies, **kwargs)
   def result_definition(self, schema: Schema) -> str:
     return annotation(schema.spec)
-def source_form(value, markdown: bool = False) -> str:
+def source_form(value, compact: bool = True, markdown: bool = False) -> str:
   """Returns the source code form of an object."""
-  return ValuePythonRepr().repr(value, markdown=markdown)
+  return ValuePythonRepr().repr(value, compact=compact, markdown=markdown)
 def class_definitions(
     classes: Sequence[Type[Any]],
     *,
-    include_pg_object_as_base: bool = False,
+    allowed_dependencies: set[Type[Any]] | None = None,
     strict: bool = False,
     markdown: bool = False,
 ) -> str | None:
@@ -345,7 +406,7 @@ def class_definitions(
         class_definition(
             cls,
             strict=strict,
-            include_pg_object_as_base=include_pg_object_as_base,
+            allowed_dependencies=allowed_dependencies,
         )
     )
   ret = def_str.getvalue()
@@ -355,15 +416,17 @@ def class_definitions(
 def class_definition(
-    cls, strict: bool = False, include_pg_object_as_base: bool = False
+    cls,
+    strict: bool = False,
+    allowed_dependencies: set[Type[Any]] | None = None,
 ) -> str:
   """Returns the Python class definition."""
   out = io.StringIO()
-  schema = _pg_schema(cls)
+  schema = pg.schema(cls)
   eligible_bases = []
   for base_cls in cls.__bases__:
     if base_cls is not object:
-      if include_pg_object_as_base or base_cls is not pg.Object:
+      if allowed_dependencies is None or base_cls in allowed_dependencies:
         eligible_bases.append(base_cls.__name__)
   if eligible_bases:
@@ -383,13 +446,16 @@ def class_definition(
         out.write('\n')
       out.write('  """\n')
+  empty_class = True
   if schema.fields:
     for key, field in schema.items():
       if not isinstance(key, pg.typing.ConstStrKey):
-        raise TypeError(
+        pg.logging.warning(
             'Variable-length keyword arguments is not supported in '
-            f'structured parsing or query. Encountered: {field}'
+            f'structured parsing or query. Encountered: {cls}, Schema: {schema}'
         )
+        continue
       # Write field doc string as comments before the field definition.
       if field.description:
         for line in field.description.split('\n'):
@@ -397,19 +463,54 @@ def class_definition(
             out.write('  # ')
             out.write(line)
             out.write('\n')
-      out.write(f'  {field.key}: {annotation(field.value, strict=strict)}')
+      annotation_str = annotation(
+          field.value, strict=strict, allowed_dependencies=allowed_dependencies
+      )
+      out.write(f'  {field.key}: {annotation_str}')
       out.write('\n')
-  else:
+      empty_class = False
+  for method in _iter_newly_defined_methods(cls, allowed_dependencies):
+    source = inspect.getsource(method)
+    # Remove decorators from the method definition.
+    source = re.sub(r'\s*@.*\.include_method_in_prompt.*\n', '', source)
+    out.write('\n')
+    out.write(
+        textwrap.indent(
+            inspect.cleandoc('\n' + source), ' ' * 2)
+    )
+    out.write('\n')
+    empty_class = False
+  if empty_class:
     out.write('  pass\n')
   return out.getvalue()
+def _iter_newly_defined_methods(
+    cls, allowed_dependencies: set[Type[Any]] | None):
+  names = {attr_name: True for attr_name in dir(cls)}
+  for base in cls.__bases__:
+    if allowed_dependencies is None or base in allowed_dependencies:
+      for name in dir(base):
+        names.pop(name, None)
+  for name in names.keys():
+    attr = getattr(cls, name)
+    if callable(attr) and should_include_method_in_prompt(attr):
+      yield attr
 def annotation(
     vs: pg.typing.ValueSpec,
     annotate_optional: bool = True,
     strict: bool = False,
+    allowed_dependencies: set[Type[Any]] | None = None,
 ) -> str:
   """Returns the annotation string for a value spec."""
+  child_annotation_kwargs = dict(
+      strict=strict, allowed_dependencies=allowed_dependencies
+  )
   if isinstance(vs, pg.typing.Any):
     return 'Any'
   elif isinstance(vs, pg.typing.Enum):
@@ -418,7 +519,7 @@ def annotation(
   elif isinstance(vs, pg.typing.Union):
     candidate_str = ', '.join(
         [
-            annotation(c, annotate_optional=False, strict=strict)
+            annotation(c, annotate_optional=False, **child_annotation_kwargs)
             for c in vs.candidates
         ]
     )
@@ -454,20 +555,23 @@ def annotation(
         )
       x += '(' + ', '.join(constraints) + ')'
   elif isinstance(vs, pg.typing.Object):
-    x = vs.cls.__name__
+    if allowed_dependencies is None or vs.cls in allowed_dependencies:
+      x = vs.cls.__name__
+    else:
+      x = 'Any'
   elif isinstance(vs, pg.typing.List):
-    item_str = annotation(vs.element.value, strict=strict)
+    item_str = annotation(vs.element.value, **child_annotation_kwargs)
     x = f'list[{item_str}]'
   elif isinstance(vs, pg.typing.Tuple):
     elem_str = ', '.join(
-        [annotation(el.value, strict=strict) for el in vs.elements]
+        [annotation(el.value, **child_annotation_kwargs) for el in vs.elements]
     )
     x = f'tuple[{elem_str}]'
   elif isinstance(vs, pg.typing.Dict):
     kv_pairs = None
     if vs.schema is not None:
       kv_pairs = [
-          (k, annotation(f.value, strict=strict))
+          (k, annotation(f.value, **child_annotation_kwargs))
           for k, f in vs.schema.items()
           if isinstance(k, pg.typing.ConstStrKey)
       ]
@@ -477,6 +581,9 @@ def annotation(
       x = '{' + kv_str + '}'
       if strict:
         x = f'pg.typing.Dict({x})'
+    elif vs.schema and vs.schema.dynamic_field:
+      v = annotation(vs.schema.dynamic_field.value, **child_annotation_kwargs)
+      x = f'dict[str, {v}]'
     else:
       x = 'dict[str, Any]'
@@ -491,7 +598,8 @@ def annotation(
 class SchemaJsonRepr(SchemaRepr):
   """JSON-representation for a schema."""
-  def repr(self, schema: Schema) -> str:
+  def repr(self, schema: Schema, **kwargs) -> str:
+    del kwargs
     out = io.StringIO()
     def _visit(node: Any) -> None:
       if isinstance(node, str):
@@ -569,12 +677,19 @@ class ValuePythonRepr(ValueRepr):
       cls_schema = Schema.from_value(value)
       if isinstance(cls_schema.spec, pg.typing.Object):
         object_code = SchemaPythonRepr().class_definitions(
-            cls_schema, markdown=markdown, include_pg_object_as_base=True
+            cls_schema,
+            markdown=markdown,
+            # We add `pg.Object` as additional dependencies to the class
+            # definition so exemplars for class generation could show
+            # pg.Object as their bases.
+            additional_dependencies=[pg.Object]
         )
         assert object_code is not None
         return object_code
       else:
         object_code = SchemaPythonRepr().result_definition(cls_schema)
+    elif isinstance(value, lf.Template):
+      return str(value)
     else:
       object_code = pg.format(
           value, compact=compact, verbose=verbose, python_format=True
@@ -649,12 +764,15 @@ class JsonError(Exception):
   def __str__(self) -> str:
     r = io.StringIO()
     r.write(
-        lf.colored(f'{self.cause.__class__.__name__}: {self.cause}', 'magenta'))
+        pg.colored(
+            f'{self.cause.__class__.__name__}: {self.cause}', 'magenta'
+        )
+    )
     r.write('\n\n')
-    r.write(lf.colored('JSON text:', 'red'))
+    r.write(pg.colored('JSON text:', 'red'))
     r.write('\n\n')
-    r.write(textwrap.indent(lf.colored(self.json, 'magenta'), ' ' * 2))
+    r.write(textwrap.indent(pg.colored(self.json, 'magenta'), ' ' * 2))
     return r.getvalue()
@@ -669,7 +787,7 @@ class ValueJsonRepr(ValueRepr):
     """Parse a JSON string into a structured object."""
     del schema
     try:
-      text = self.cleanup_json(text)
+      text = cleanup_json(text)
       v = pg.from_json_str(text, **kwargs)
     except Exception as e:
       raise JsonError(text, e)  # pylint: disable=raise-missing-from
@@ -681,55 +799,56 @@ class ValueJsonRepr(ValueRepr):
       ))
     return v['result']
-  def cleanup_json(self, json_str: str) -> str:
-    """Clean up the LM responded JSON string."""
-    # Treatments:
-    # 1. Extract the JSON string with a top-level dict from the response.
-    #    This prevents the leading and trailing texts in the response to
-    #    be counted as part of the JSON.
-    # 2. Escape new lines in JSON values.
-    curly_brackets = 0
-    under_json = False
-    under_str = False
-    str_begin = -1
-    cleaned = io.StringIO()
-    for i, c in enumerate(json_str):
-      if c == '{' and not under_str:
-        cleaned.write(c)
-        curly_brackets += 1
-        under_json = True
-        continue
-      elif not under_json:
-        continue
-      if c == '}' and not under_str:
-        cleaned.write(c)
-        curly_brackets -= 1
-        if curly_brackets == 0:
-          break
-      elif c == '"' and json_str[i - 1] != '\\':
-        under_str = not under_str
-        if under_str:
-          str_begin = i
-        else:
-          assert str_begin > 0
-          str_value = json_str[str_begin : i + 1].replace('\n', '\\n')
-          cleaned.write(str_value)
-          str_begin = -1
-      elif not under_str:
-        cleaned.write(c)
-    if not under_json:
-      raise ValueError(f'No JSON dict in the output: {json_str}')
-    if curly_brackets > 0:
-      raise ValueError(
-          f'Malformated JSON: missing {curly_brackets} closing curly braces.'
-      )
+def cleanup_json(json_str: str) -> str:
+  """Clean up the LM responded JSON string."""
+  # Treatments:
+  # 1. Extract the JSON string with a top-level dict from the response.
+  #    This prevents the leading and trailing texts in the response to
+  #    be counted as part of the JSON.
+  # 2. Escape new lines in JSON values.
+  curly_brackets = 0
+  under_json = False
+  under_str = False
+  str_begin = -1
+  cleaned = io.StringIO()
+  for i, c in enumerate(json_str):
+    if c == '{' and not under_str:
+      cleaned.write(c)
+      curly_brackets += 1
+      under_json = True
+      continue
+    elif not under_json:
+      continue
+    if c == '}' and not under_str:
+      cleaned.write(c)
+      curly_brackets -= 1
+      if curly_brackets == 0:
+        break
+    elif c == '"' and json_str[i - 1] != '\\':
+      under_str = not under_str
+      if under_str:
+        str_begin = i
+      else:
+        assert str_begin > 0
+        str_value = json_str[str_begin : i + 1].replace('\n', '\\n')
+        cleaned.write(str_value)
+        str_begin = -1
+    elif not under_str:
+      cleaned.write(c)
+  if not under_json:
+    raise ValueError(f'No JSON dict in the output: {json_str}')
+  if curly_brackets > 0:
+    raise ValueError(
+        f'Malformated JSON: missing {curly_brackets} closing curly braces.'
+    )
-    return cleaned.getvalue()
+  return cleaned.getvalue()
 def schema_repr(protocol: SchemaProtocol) -> SchemaRepr:
@@ -830,13 +949,3 @@ class Unknown(pg.Object, pg.typing.CustomTyping):
 UNKNOWN = Unknown()
-def _pg_schema(cls: Type[Any]) -> pg.Schema:
-  """Returns PyGlove schema for the constructor of a class."""
-  schema = getattr(cls, '__schema__', None)
-  if schema is None:
-    schema = pg.symbolic.callable_schema(
-        cls.__init__, auto_typing=True, auto_doc=True, remove_self=True
-    )
-  return schema

langfun/core/structured/schema_generation.py CHANGED Viewed

@@ -58,7 +58,7 @@ class GenerateClass(mapping.Mapping):
     class_name = self.context
     cls = output_vars.get(class_name, None)
     if cls is None:
-      raise correction.errors.CodeError(
+      raise pg.coding.CodeError(
           final_code,
           TypeError(f'Class {class_name} is absent from LLM output.'),
       )

langfun/core/structured/schema_generation_test.py CHANGED Viewed

@@ -14,8 +14,8 @@
 import inspect
 import unittest
-import langfun.core.coding as lf_coding
 from langfun.core.llms import fake
+from langfun.core.structured import mapping
 from langfun.core.structured import schema_generation
@@ -92,7 +92,7 @@ class GenerateClassTest(unittest.TestCase):
     )
     self.assertIs(cls.__name__, 'B')
-    with self.assertRaises(lf_coding.CodeError):
+    with self.assertRaises(mapping.MappingError):
       schema_generation.generate_class(
           'Foo',
           'Generate a Foo class with a field pointing to another class A',

langfun 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl