PyPI - langfun - Versions diffs - 0.0.2.dev20240314__tar.gz → 0.0.2.dev20240316__tar.gz - Mend

langfun 0.0.2.dev20240314tar.gz → 0.0.2.dev20240316tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langfun
-Version: 0.0.2.dev20240314
+Version: 0.0.2.dev20240316
 Summary: Langfun: Language as Functions.
 Home-page: https://github.com/google/langfun
 Author: Langfun Authors
@@ -24,7 +24,7 @@ License-File: LICENSE
 Requires-Dist: google-generativeai>=0.3.2
 Requires-Dist: jinja2>=3.1.2
 Requires-Dist: openai==0.27.2
-Requires-Dist: pyglove>=0.4.5.dev20240201
+Requires-Dist: pyglove>=0.4.5.dev20240314
 Requires-Dist: python-magic>=0.4.27
 Requires-Dist: requests>=2.31.0
 Requires-Dist: termcolor==1.1.0

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/__init__.py RENAMED Viewed

@@ -31,6 +31,9 @@ query = structured.query
 describe = structured.describe
 complete = structured.complete
 score = structured.score
+generate_class = structured.generate_class
+source_form = structured.source_form
 from langfun.core import eval  # pylint: disable=redefined-builtin
 from langfun.core import templates

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/langfunc_test.py RENAMED Viewed

@@ -95,8 +95,8 @@ class LangFuncCallTest(unittest.TestCase):
         ' lm=ExcitedEchoer(sampling_options=LMSamplingOptions(temperature=0.0,'
         ' max_tokens=1024, n=1, top_k=40, top_p=None, stop=None,'
         ' random_seed=None, logprobs=False, top_logprobs=None), cache=None,'
-        ' timeout=120.0, max_attempts=5, retry_interval=(5, 60),'
-        ' exponential_backoff=True, debug=False))',
+        ' max_concurrency=None, timeout=120.0, max_attempts=5,'
+        ' retry_interval=(5, 60), exponential_backoff=True, debug=False))',
     )
     l = LangFunc('Hello')

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/language_model.py RENAMED Viewed

@@ -17,8 +17,9 @@ import abc
 import dataclasses
 import enum
 import time
-from typing import Annotated, Any
+from typing import Annotated, Any, Callable, Sequence, Tuple, Type, Union
 from langfun.core import component
+from langfun.core import concurrent
 from langfun.core import console
 from langfun.core import message as message_lib
 import pyglove as pg
@@ -209,6 +210,22 @@ class LanguageModel(component.Component):
       )
   ] = component.contextual(default=None)
+  max_concurrency: Annotated[
+      int | None,
+      (
+          'Max concurrent requests being sent to the server. '
+          'If None, there is no limit. '
+          'Please note that the concurrency control is based on the '
+          '`resource_id` property, meaning that model instances shared '
+          'the same resource ID will be accounted under the same concurrency '
+          'control key. This allows a process-level concurrency control '
+          'for specific models regardless the number of LM (client) instances '
+          'created by the program. Subclasses could override this number or '
+          'replace it with a `max_concurrency` property to allow dynamic '
+          'concurrency control.'
+      ),
+  ] = None
   timeout: Annotated[
       float | None, 'Timeout in seconds. If None, there is no timeout.'
   ] = 120.0
@@ -284,11 +301,6 @@ class LanguageModel(component.Component):
     """Resource ID for performing request parallism control."""
     return self.model_id
-  @property
-  def max_concurrency(self) -> int:
-    """Max concurrent requests."""
-    return 32
   def sample(
       self,
       prompts: list[str | message_lib.Message],
@@ -355,6 +367,28 @@ class LanguageModel(component.Component):
   ) -> list[LMSamplingResult]:
     """Subclass should override."""
+  def _parallel_execute_with_currency_control(
+      self,
+      action: Callable[..., Any],
+      inputs: Sequence[Any],
+      retry_on_errors: Union[
+          None,
+          Union[Type[Exception], Tuple[Type[Exception], str]],
+          Sequence[Union[Type[Exception], Tuple[Type[Exception], str]]],
+      ] = None,
+  ) -> Any:
+    """Helper method for subclasses for implementing _sample."""
+    return concurrent.concurrent_execute(
+        action,
+        inputs,
+        executor=self.resource_id if self.max_concurrency else None,
+        max_workers=self.max_concurrency or len(inputs),
+        retry_on_errors=retry_on_errors,
+        max_attempts=self.max_attempts,
+        retry_interval=self.retry_interval,
+        exponential_backoff=self.exponential_backoff,
+    )
   def __call__(
       self, prompt: message_lib.Message, *, cache_seed: int = 0, **kwargs
   ) -> message_lib.Message:

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/language_model_test.py RENAMED Viewed

@@ -89,7 +89,7 @@ class LanguageModelTest(unittest.TestCase):
     lm = MockModel(1, temperature=0.5, top_k=2, max_attempts=2)
     self.assertEqual(lm.model_id, 'MockModel')
     self.assertEqual(lm.resource_id, 'MockModel')
-    self.assertEqual(lm.max_concurrency, 32)
+    self.assertIsNone(lm.max_concurrency)
     self.assertEqual(lm.failures_before_attempt, 1)
     self.assertEqual(lm.sampling_options.temperature, 0.5)
     self.assertEqual(lm.sampling_options.top_k, 2)

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/llms/gemini.py RENAMED Viewed

@@ -133,14 +133,9 @@ class Gemini(lf.LanguageModel):
   def _sample(self, prompts: list[lf.Message]) -> list[lf.LMSamplingResult]:
     assert self._api_initialized, 'Vertex AI API is not initialized.'
-    return lf.concurrent_execute(
+    return self._parallel_execute_with_currency_control(
         self._sample_single,
         prompts,
-        executor=self.resource_id,
-        max_workers=self.max_concurrency,
-        # NOTE(daiyip): Vertex has its own policy on handling
-        # with rate limit, so we do not retry on errors.
-        retry_on_errors=None,
     )
   def _sample_single(self, prompt: lf.Message) -> lf.LMSamplingResult:

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/llms/llama_cpp.py RENAMED Viewed

@@ -67,13 +67,6 @@ class LlamaCppRemote(lf.LanguageModel):
         results.append(result)
       return results
-    return lf.concurrent_execute(
-        _complete_fn,
-        [prompts],
-        executor=self.resource_id,
-        max_workers=self.max_concurrency,
-        retry_on_errors=(),
-        max_attempts=self.max_attempts,
-        retry_interval=self.retry_interval,
-        exponential_backoff=self.exponential_backoff,
+    return self._parallel_execute_with_currency_control(
+        _complete_fn, [prompts]
     )[0]

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/llms/openai.py RENAMED Viewed

@@ -214,18 +214,13 @@ class OpenAI(lf.LanguageModel):
           for index in sorted(samples_by_index.keys())
       ]
-    return lf.concurrent_execute(
+    return self._parallel_execute_with_currency_control(
         _open_ai_completion,
         [prompts],
-        executor=self.resource_id,
-        max_workers=self.max_concurrency,
         retry_on_errors=(
             openai_error.ServiceUnavailableError,
             openai_error.RateLimitError,
         ),
-        max_attempts=self.max_attempts,
-        retry_interval=self.retry_interval,
-        exponential_backoff=self.exponential_backoff,
     )[0]
   def _chat_complete_batch(
@@ -280,18 +275,13 @@ class OpenAI(lf.LanguageModel):
           ),
       )
-    return lf.concurrent_execute(
+    return self._parallel_execute_with_currency_control(
         _open_ai_chat_completion,
         prompts,
-        executor=self.resource_id,
-        max_workers=self.max_concurrency,
         retry_on_errors=(
             openai_error.ServiceUnavailableError,
             openai_error.RateLimitError,
         ),
-        max_attempts=self.max_attempts,
-        retry_interval=self.retry_interval,
-        exponential_backoff=self.exponential_backoff,
     )

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/structured/__init__.py RENAMED Viewed

@@ -41,8 +41,12 @@ from langfun.core.structured.schema import ValueRepr
 from langfun.core.structured.schema import ValueJsonRepr
 from langfun.core.structured.schema import ValuePythonRepr
 from langfun.core.structured.schema import schema_repr
+from langfun.core.structured.schema import source_form
 from langfun.core.structured.schema import value_repr
+from langfun.core.structured.schema_generation import generate_class
+from langfun.core.structured.schema_generation import classgen_example
+from langfun.core.structured.schema_generation import default_classgen_examples
 from langfun.core.structured.mapping import Mapping
 from langfun.core.structured.mapping import MappingExample
@@ -68,8 +72,8 @@ from langfun.core.structured.scoring import score
 # Expose default examples for structured operations so users could refer to
 # them.
-from langfun.core.structured.parsing import DEFAULT_PARSE_EXAMPLES
-from langfun.core.structured.description import DEFAULT_DESCRIBE_EXAMPLES
+from langfun.core.structured.parsing import default_parse_examples
+from langfun.core.structured.description import default_describe_examples
 # Default examples.

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/structured/description.py RENAMED Viewed

@@ -106,58 +106,61 @@ def describe(
   Returns:
     The parsed result based on the schema.
   """
-  if examples is None:
-    examples = DEFAULT_DESCRIBE_EXAMPLES
   return DescribeStructure(
-      input=value, context=context, examples=examples, **kwargs
+      input=value,
+      context=context,
+      examples=examples or default_describe_examples(),
+      **kwargs,
   )(lm=lm, cache_seed=cache_seed).text
-class _Country(pg.Object):
-  """A example dataclass for structured mapping."""
-  name: str
-  continents: list[
-      Literal[
-          'Africa',
-          'Asia',
-          'Europe',
-          'Oceania',
-          'North America',
-          'South America',
-      ]
+def default_describe_examples() -> list[mapping.MappingExample]:
+  """Default describe examples."""
+  class Country(pg.Object):
+    """A example dataclass for structured mapping."""
+    name: str
+    continents: list[
+        Literal[
+            'Africa',
+            'Asia',
+            'Europe',
+            'Oceania',
+            'North America',
+            'South America',
+        ]
+    ]
+    num_states: int
+    neighbor_countries: list[str]
+    population: int
+    capital: str | None
+    president: str | None
+  return [
+      mapping.MappingExample(
+          context='Brief intro to United States',
+          input=Country(
+              name='The United States of America',
+              continents=['North America'],
+              num_states=50,
+              neighbor_countries=[
+                  'Canada',
+                  'Mexico',
+                  'Bahamas',
+                  'Cuba',
+                  'Russia',
+              ],
+              population=333000000,
+              capital='Washington, D.C',
+              president=None,
+          ),
+          output=inspect.cleandoc("""
+              The United States of America is a country primarily located in North America
+              consisting of fifty states. It shares land borders with Canada to its north
+              and with Mexico to its south and has maritime borders with the Bahamas, Cuba,
+              Russia, and other nations. With a population of over 333 million. The national
+              capital of the United States is Washington, D.C.
+              """),
+      ),
   ]
-  num_states: int
-  neighbor_countries: list[str]
-  population: int
-  capital: str | None
-  president: str | None
-DEFAULT_DESCRIBE_EXAMPLES: list[mapping.MappingExample] = [
-    mapping.MappingExample(
-        context='Brief intro to United States',
-        input=_Country(
-            name='The United States of America',
-            continents=['North America'],
-            num_states=50,
-            neighbor_countries=[
-                'Canada',
-                'Mexico',
-                'Bahamas',
-                'Cuba',
-                'Russia',
-            ],
-            population=333000000,
-            capital='Washington, D.C',
-            president=None,
-        ),
-        output=inspect.cleandoc("""
-            The United States of America is a country primarily located in North America
-            consisting of fifty states. It shares land borders with Canada to its north
-            and with Mexico to its south and has maritime borders with the Bahamas, Cuba,
-            Russia, and other nations. With a population of over 333 million. The national
-            capital of the United States is Washington, D.C.
-            """),
-    ),
-]

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/structured/mapping.py RENAMED Viewed

@@ -293,25 +293,27 @@ class Mapping(lf.LangFunc):
   def transform_output(self, lm_output: lf.Message) -> lf.Message:
     """Transforms LM response into structure if schema is present."""
-    schema = self.mapping_request.schema
-    if schema is None:
-      return lm_output
     try:
-      result = schema.parse(
-          lm_output.text,
-          protocol=self.protocol,
-          additional_context=self.globals(),
-          autofix=self.autofix,
-          autofix_lm=self.autofix_lm or self.lm,
-      )
-      lm_output.result = self.postprocess_result(result)
+      lm_output.result = self.postprocess_result(self.parse_result(lm_output))
     except Exception as e:  # pylint: disable=broad-exception-caught
       if self.default == lf.RAISE_IF_HAS_ERROR:
         raise e
       lm_output.result = self.default
     return lm_output
+  def parse_result(self, lm_output: lf.Message) -> Any:
+    """Parse result from LLM response."""
+    schema = self.mapping_request.schema
+    if schema is None:
+      return None
+    return schema.parse(
+        lm_output.text,
+        protocol=self.protocol,
+        additional_context=self.globals(),
+        autofix=self.autofix,
+        autofix_lm=self.autofix_lm or self.lm,
+    )
   def postprocess_result(self, result: Any) -> Any:
     """Post process structured output."""
     return result

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/structured/parsing.py RENAMED Viewed

@@ -162,11 +162,11 @@ def parse(
     message.source = lf.UserMessage(user_prompt, tags=['lm-input'])
   context = getattr(message.lm_input, 'text', None) if include_context else None
-  if examples is None:
-    examples = DEFAULT_PARSE_EXAMPLES
   t = _parse_structure_cls(protocol)(
-      schema=schema, context=context, default=default, examples=examples
+      schema=schema,
+      context=context,
+      default=default,
+      examples=examples or default_parse_examples(),
   )
   # Setting up context.
@@ -296,17 +296,19 @@ def _parse_structure_cls(
     raise ValueError(f'Unknown protocol: {protocol!r}.')
-class _AdditionResults(pg.Object):
-  one_plus_one_equals: int | None
-  two_plus_two_equals: int | None
+def default_parse_examples() -> list[mapping.MappingExample]:
+  """Default parsing examples."""
+  class AdditionResults(pg.Object):
+    one_plus_one_equals: int | None
+    two_plus_two_equals: int | None
-DEFAULT_PARSE_EXAMPLES: list[mapping.MappingExample] = [
-    mapping.MappingExample(
-        input='Two plus two equals four. Three plus three equals six.',
-        schema=_AdditionResults,
-        output=_AdditionResults(
-            one_plus_one_equals=None, two_plus_two_equals=4
-        ),
-    ),
-]
+  return [
+      mapping.MappingExample(
+          input='Two plus two equals four. Three plus three equals six.',
+          schema=AdditionResults,
+          output=AdditionResults(
+              one_plus_one_equals=None, two_plus_two_equals=4
+          ),
+      ),
+  ]

{langfun-0.0.2.dev20240314 → langfun-0.0.2.dev20240316}/langfun/core/structured/schema.py RENAMED Viewed

@@ -301,23 +301,43 @@ class SchemaRepr(metaclass=abc.ABCMeta):
 class SchemaPythonRepr(SchemaRepr):
   """Python-representation for a schema."""
-  def repr(self, schema: Schema) -> str:
-    ret = self.result_definition(schema)
-    class_definition_str = self.class_definitions(schema)
+  def repr(
+      self,
+      schema: Schema,
+      *,
+      include_result_definition: bool = True,
+      markdown: bool = True,
+      **kwargs,
+  ) -> str:
+    ret = ''
+    if include_result_definition:
+      ret += self.result_definition(schema)
+    class_definition_str = self.class_definitions(
+        schema, markdown=markdown, **kwargs
+    )
     if class_definition_str:
-      ret += f'\n\n```python\n{class_definition_str}```'
-    return ret
+      ret += f'\n\n{class_definition_str}'
+    return ret.strip()
-  def class_definitions(self, schema: Schema) -> str | None:
+  def class_definitions(self, schema: Schema, **kwargs) -> str | None:
     deps = schema.class_dependencies(include_subclasses=True)
-    return class_definitions(deps)
+    return class_definitions(deps, **kwargs)
   def result_definition(self, schema: Schema) -> str:
     return annotation(schema.spec)
+def source_form(value, markdown: bool = False) -> str:
+  """Returns the source code form of an object."""
+  return ValuePythonRepr().repr(value, markdown=markdown)
 def class_definitions(
-    classes: Sequence[Type[Any]], strict: bool = False, markdown: bool = False
+    classes: Sequence[Type[Any]],
+    *,
+    include_pg_object_as_base: bool = False,
+    strict: bool = False,
+    markdown: bool = False,
 ) -> str | None:
   """Returns a str for class definitions."""
   if not classes:
@@ -326,14 +346,22 @@ def class_definitions(
   for i, cls in enumerate(classes):
     if i > 0:
       def_str.write('\n')
-    def_str.write(class_definition(cls, strict))
+    def_str.write(
+        class_definition(
+            cls,
+            strict=strict,
+            include_pg_object_as_base=include_pg_object_as_base,
+        )
+    )
   ret = def_str.getvalue()
   if markdown and ret:
     ret = f'```python\n{ret}```'
   return ret
-def class_definition(cls, strict: bool = False) -> str:
+def class_definition(
+    cls, strict: bool = False, include_pg_object_as_base: bool = False
+) -> str:
   """Returns the Python class definition."""
   out = io.StringIO()
   if not issubclass(cls, pg.Object):
@@ -344,10 +372,9 @@ def class_definition(cls, strict: bool = False) -> str:
   schema = cls.__schema__
   eligible_bases = []
   for base_cls in cls.__bases__:
-    if issubclass(base_cls, pg.Symbolic) and not base_cls.__module__.startswith(
-        'pyglove'
-    ):
-      eligible_bases.append(base_cls.__name__)
+    if issubclass(base_cls, pg.Object):
+      if include_pg_object_as_base or base_cls is not pg.Object:
+        eligible_bases.append(base_cls.__name__)
   if eligible_bases:
     base_cls_str = ', '.join(eligible_bases)
     out.write(f'class {cls.__name__}({base_cls_str}):\n')
@@ -547,8 +574,20 @@ class ValuePythonRepr(ValueRepr):
            markdown: bool = True,
            **kwargs) -> str:
     del schema
-    object_code = pg.format(
-        value, compact=compact, verbose=verbose, python_format=True)
+    if inspect.isclass(value):
+      cls_schema = Schema.from_value(value)
+      if isinstance(cls_schema.spec, pg.typing.Object):
+        object_code = SchemaPythonRepr().class_definitions(
+            cls_schema, markdown=markdown, include_pg_object_as_base=True
+        )
+        assert object_code is not None
+        return object_code
+      else:
+        object_code = SchemaPythonRepr().result_definition(cls_schema)
+    else:
+      object_code = pg.format(
+          value, compact=compact, verbose=verbose, python_format=True
+      )
     if markdown:
       return f'```python\n{ object_code }\n```'
     return object_code
@@ -588,6 +627,7 @@ def structure_from_python(
   global_vars = global_vars or {}
   global_vars.update({
       'pg': pg,
+      'Object': pg.Object,
       'Any': typing.Any,
       'List': typing.List,
       'Tuple': typing.Tuple,

langfun 0.0.2.dev20240314__tar.gz → 0.0.2.dev20240316__tar.gz

langfun 0.0.2.dev20240314tar.gz → 0.0.2.dev20240316tar.gz