PyPI - langfun - Versions diffs - 0.0.2.dev20240327__tar.gz → 0.0.2.dev20240330__tar.gz - Mend

langfun 0.0.2.dev20240327tar.gz → 0.0.2.dev20240330tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

{langfun-0.0.2.dev20240327 → langfun-0.0.2.dev20240330}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langfun
-Version: 0.0.2.dev20240327
+Version: 0.0.2.dev20240330
 Summary: Langfun: Language as Functions.
 Home-page: https://github.com/google/langfun
 Author: Langfun Authors
@@ -24,7 +24,7 @@ License-File: LICENSE
 Requires-Dist: google-generativeai>=0.3.2
 Requires-Dist: jinja2>=3.1.2
 Requires-Dist: openai==0.27.2
-Requires-Dist: pyglove>=0.4.5.dev20240314
+Requires-Dist: pyglove>=0.4.5.dev20240323
 Requires-Dist: python-magic>=0.4.27
 Requires-Dist: requests>=2.31.0
 Requires-Dist: termcolor==1.1.0

{langfun-0.0.2.dev20240327 → langfun-0.0.2.dev20240330}/langfun/core/eval/base.py RENAMED Viewed

@@ -27,6 +27,7 @@ import time
 from typing import Annotated, Any, Callable, Iterator, Literal, Optional, Sequence, Type, Union
 import langfun.core as lf
+import langfun.core.coding as lf_coding
 from langfun.core.llms.cache import in_memory
 import langfun.core.structured as lf_structured
 import pyglove as pg
@@ -41,14 +42,6 @@ class Evaluable(lf.Component):
   INDEX_HTML = 'index.html'
   SUMMARY_HTML = 'summary.html'
-  id: Annotated[
-      str,
-      (
-          'The ID of the evaluation, which should be unique across all '
-          'evaluations.'
-      ),
-  ]
   root_dir: Annotated[
       str | None,
       (
@@ -61,6 +54,18 @@ class Evaluable(lf.Component):
       int, 'Number of decimals when reporting precision.'
   ] = lf.contextual(default=1)
+  @property
+  @abc.abstractmethod
+  def id(self) -> str:
+    """Returns the ID of the task.
+    Returns:
+      Evaluation task ID. Different evaluation task should have their unique
+      task IDs, for each task will be stored in sub-directoreis identified by
+      their IDs. For suites, the ID could be an empty string as they will not
+      produce sub-directories
+    """
   @property
   def dir(self) -> str | None:
     """Returns the directory for saving results and details."""
@@ -578,12 +583,15 @@ class _LeafNode:
   progress_bar: int | None = None
-@pg.use_init_args(['id', 'children'])
+@pg.use_init_args(['children'])
 class Suite(Evaluable):
   """Evaluation suite."""
   children: Annotated[list[Evaluable], 'Child evaluation sets or suites.']
+  # Use empty ID as suite is just a container of child evaluations.
+  id: str = ''
   __kwargs__: Annotated[
       Any,
       (
@@ -841,8 +849,10 @@ class Evaluation(Evaluable):
       kwargs['evaluation'] = self
     return self.schema_fn(**kwargs)
-  def _formalize_schema(self, annotation) -> lf_structured.Schema:
+  def _formalize_schema(self, annotation) -> lf_structured.Schema | None:
     """Formalizes schema from annotation."""
+    if annotation in (str, None):
+      return None
     if self.method == 'complete':
       if not hasattr(annotation, '__schema__'):
         raise TypeError(
@@ -883,6 +893,14 @@ class Evaluation(Evaluable):
       completion_examples.append(ex)
     return completion_examples
+  @property
+  def id(self) -> str:
+    """Returns the ID of this evaluation."""
+    id_prefix = self.__class__.__name__
+    if not self.is_deterministic:
+      return id_prefix
+    return f'{id_prefix}@{self.hash}'
   @functools.cached_property
   def children(self) -> list['Evaluation']:
     """Returns the trials as child evaluations if this evaluation is a space."""
@@ -892,7 +910,6 @@ class Evaluation(Evaluable):
     for i, child in enumerate(pg.iter(self)):
       child.sym_setparent(self)
       child.sym_setpath(self.sym_path + f'children[{i}]')
-      child.rebind(id=f'{self.id}@{child.hash}', skip_notification=True)
       children.append(child)
     return children
@@ -1004,7 +1021,11 @@ class Evaluation(Evaluable):
       self._reset()
       def _process(example: Any):
-        return self.process(example, **(self.additional_args or {}))
+        # NOTE(daiyip): set the `input` symbol of the globals to None, so LLM
+        # generated code with calls to `input` will raise an error, thus not
+        # blocking the evaluation.
+        with lf_coding.context(input=None):
+          return self.process(example, **(self.additional_args or {}))
       try:
         for example, message, error in lf.concurrent_map(
@@ -1015,10 +1036,7 @@ class Evaluation(Evaluable):
             status_fn=self._status,
         ):
           if error is not None:
-            try:
-              self._failures.append((example, str(error)))
-            except Exception as e:  # pylint: disable=broad-exception-caught
-              self._failures.append((example, str(e)))
+            self._failures.append((example, str(error)))
           else:
             output = message.text if self.schema is None else message.result
             self.audit(example, output, message)
@@ -1521,9 +1539,12 @@ class Summary(pg.Object):
     pivot_field = pivot_field or self.pivot_field
     s = io.StringIO()
     s.write('<html><body>')
-    for task in self.tasks():
+    for task in sorted(self.tasks(), key=lambda cls: cls.__name__):
+      table_id = task.__name__.lower()
       s.write('<div>')
-      s.write(f'<h2>{task.__name__}</h2>')
+      s.write(f'<a id="{table_id}"')
+      s.write(f'<h2><a href="#{table_id}">{task.__name__}</a></h2>')
+      s.write('</a>')
       table = Summary.Table.from_evaluations(
           self.select(task=task).evaluations, pivot_field
       )

{langfun-0.0.2.dev20240327 → langfun-0.0.2.dev20240330}/langfun/core/eval/base_test.py RENAMED Viewed

@@ -70,8 +70,7 @@ def eval_set(
   """Creates an evaluation object for testing."""
   tmp_dir = tempfile.gettempdir()
   return cls(
-      id=eval_id,
-      root_dir=tmp_dir,
+      root_dir=os.path.join(tmp_dir, eval_id),
       inputs=base.as_inputs([
           pg.Dict(question='Compute 1 + 1'),
           pg.Dict(question='Compute 1 + 2'),
@@ -210,7 +209,7 @@ class EvaluationTest(unittest.TestCase):
         s.result,
         dict(
             experiment_setup=dict(
-                id='run_test',
+                id='Evaluation@17915dc6',
                 dir=s.dir,
                 model='StaticSequence',
                 prompt_template='{{example.question}}',
@@ -302,7 +301,6 @@ class EvaluationTest(unittest.TestCase):
         '3',
     ])
     s = base.Evaluation(
-        id='search_space_test',
         root_dir=tempfile.gettempdir(),
         inputs=base.as_inputs([
             pg.Dict(question='Compute 1 + 1'),
@@ -439,7 +437,6 @@ class SuiteTest(unittest.TestCase):
         '3',
     ] * 5)
     s = base.Suite(
-        'suite_run_test',
         [
             eval_set('run_test_1', 'query', schema_fn=answer_schema()),
             # A suite of search space. Two of the sub-experiments are identical,
@@ -548,7 +545,6 @@ class SummaryTest(unittest.TestCase):
   def _eval_set(self, root_dir):
     return base.Suite(id='select_test', children=[
         TaskA(
-            id='task_a',
             inputs=base.as_inputs([
                 pg.Dict(question='Compute 1 + 1'),
             ]),
@@ -569,7 +565,6 @@ class SummaryTest(unittest.TestCase):
             max_workers=1,
         ),
         TaskB(
-            id='task_b',
             inputs=base.as_inputs([
                 pg.Dict(question='Compute 1 + 1'),
             ]),
@@ -650,10 +645,10 @@ class SummaryTest(unittest.TestCase):
         len(base.Summary.from_dirs(root_dir)), 2 * 2 * 2 * 2 + 2 * 1 * 1 * 2
     )
     self.assertEqual(
-        len(base.Summary.from_dirs(root_dir, 'task_b')), 2 * 1 * 1 * 2
+        len(base.Summary.from_dirs(root_dir, 'TaskB')), 2 * 1 * 1 * 2
     )
     self.assertEqual(
-        len(base.Summary.from_dirs(root_dir, ('task_a'))), 2 * 2 * 2 * 2
+        len(base.Summary.from_dirs(root_dir, ('TaskA'))), 2 * 2 * 2 * 2
     )
   def test_monitor(self):

{langfun-0.0.2.dev20240327 → langfun-0.0.2.dev20240330}/langfun/core/eval/matching_test.py RENAMED Viewed

@@ -65,10 +65,8 @@ def eval_set(
     use_cache: bool = True,
 ):
   """Creates an evaluation object for testing."""
-  tmp_dir = tempfile.gettempdir()
   return MyTask(
-      id=eval_id,
-      root_dir=tmp_dir,
+      root_dir=os.path.join(tempfile.gettempdir(), eval_id),
       inputs=base.as_inputs([
           pg.Dict(question='Compute 1 + 1', groundtruth=2),
           pg.Dict(question='Compute 1 + 2', groundtruth=3),
@@ -105,7 +103,7 @@ class MatchingTest(unittest.TestCase):
         s.result,
         dict(
             experiment_setup=dict(
-                id='match_run_test',
+                id='MyTask@3d87f97f',
                 dir=s.dir,
                 model='StaticSequence',
                 prompt_template='{{example.question}}',

{langfun-0.0.2.dev20240327 → langfun-0.0.2.dev20240330}/langfun/core/eval/scoring_test.py RENAMED Viewed

@@ -43,7 +43,6 @@ def constrained_by_upperbound(upper_bound: int):
 class ConstraintFollowing(scoring.Scoring):
-  id = 'constraint_following'
   inputs = constrained_by_upperbound(1)
   prompt = '{{example}}'
   method = 'query'
@@ -82,7 +81,7 @@ class ScoringTest(unittest.TestCase):
         s.result,
         dict(
             experiment_setup=dict(
-                id='constraint_following',
+                id='ConstraintFollowing@9e51bb9e',
                 dir=s.dir,
                 model='StaticSequence',
                 prompt_template='{{example}}',

{langfun-0.0.2.dev20240327 → langfun-0.0.2.dev20240330}/langfun/core/llms/__init__.py RENAMED Viewed

@@ -25,9 +25,11 @@ from langfun.core.llms.fake import StaticResponse
 from langfun.core.llms.fake import StaticSequence
 # Gemini models.
-from langfun.core.llms.gemini import Gemini
-from langfun.core.llms.gemini import GeminiPro
-from langfun.core.llms.gemini import GeminiProVision
+from langfun.core.llms.google_genai import GenAI
+from langfun.core.llms.google_genai import GeminiPro
+from langfun.core.llms.google_genai import GeminiProVision
+from langfun.core.llms.google_genai import Palm2
+from langfun.core.llms.google_genai import Palm2_IT
 # OpenAI models.
 from langfun.core.llms.openai import OpenAI

langfun-0.0.2.dev20240327/langfun/core/llms/gemini.py → langfun-0.0.2.dev20240330/langfun/core/llms/google_genai.py RENAMED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 """Gemini models exposed through Google Generative AI APIs."""
+import abc
 import functools
 import os
 from typing import Annotated, Any, Literal
@@ -20,14 +21,20 @@ from typing import Annotated, Any, Literal
 import google.generativeai as genai
 import langfun.core as lf
 from langfun.core import modalities as lf_modalities
+import pyglove as pg
 @lf.use_init_args(['model'])
-class Gemini(lf.LanguageModel):
-  """Language model served on VertexAI."""
+class GenAI(lf.LanguageModel):
+  """Language models provided by Google GenAI."""
   model: Annotated[
-      Literal['gemini-pro', 'gemini-pro-vision', ''],
+      Literal[
+          'gemini-pro',
+          'gemini-pro-vision',
+          'text-bison-001',
+          'chat-bison-001',
+      ],
       'Model name.',
   ]
@@ -35,7 +42,8 @@ class Gemini(lf.LanguageModel):
       str | None,
       (
           'API key. If None, the key will be read from environment variable '
-          "'GOOGLE_API_KEY'."
+          "'GOOGLE_API_KEY'. "
+          'Get an API key at https://ai.google.dev/tutorials/setup'
       ),
   ] = None
@@ -43,6 +51,9 @@ class Gemini(lf.LanguageModel):
       False
   )
+  # Set the default max concurrency to 8 workers.
+  max_concurrency = 8
   def _on_bound(self):
     super()._on_bound()
     self.__dict__.pop('_api_initialized', None)
@@ -67,7 +78,11 @@ class Gemini(lf.LanguageModel):
     return [
         m.name.lstrip('models/')
         for m in genai.list_models()
-        if 'generateContent' in m.supported_generation_methods
+        if (
+            'generateContent' in m.supported_generation_methods
+            or 'generateText' in m.supported_generation_methods
+            or 'generateMessage' in m.supported_generation_methods
+        )
     ]
   @property
@@ -80,11 +95,6 @@ class Gemini(lf.LanguageModel):
     """Returns a string to identify the resource for rate control."""
     return self.model_id
-  @property
-  def max_concurrency(self) -> int:
-    """Max concurrent requests."""
-    return 8
   def _generation_config(self, options: lf.LMSamplingOptions) -> dict[str, Any]:
     """Creates generation config from langfun sampling options."""
     return genai.GenerationConfig(
@@ -117,7 +127,7 @@ class Gemini(lf.LanguageModel):
     return chunks
   def _response_to_result(
-      self, response: genai.types.GenerateContentResponse
+      self, response: genai.types.GenerateContentResponse | pg.Dict
   ) -> lf.LMSamplingResult:
     """Parses generative response into message."""
     samples = []
@@ -149,17 +159,97 @@ class Gemini(lf.LanguageModel):
     return self._response_to_result(response)
+class _LegacyGenerativeModel(pg.Object):
+  """Base for legacy GenAI generative model."""
+  model: str
+  def generate_content(
+      self,
+      input_content: list[str | genai.types.BlobDict],
+      generation_config: genai.GenerationConfig,
+  ) -> pg.Dict:
+    """Generate content."""
+    segments = []
+    for s in input_content:
+      if not isinstance(s, str):
+        raise ValueError(f'Unsupported modality: {s!r}')
+      segments.append(s)
+    return self.generate(' '.join(segments), generation_config)
+  @abc.abstractmethod
+  def generate(
+      self, prompt: str, generation_config: genai.GenerationConfig) -> pg.Dict:
+    """Generate response based on prompt."""
+class _LegacyCompletionModel(_LegacyGenerativeModel):
+  """Legacy GenAI completion model."""
+  def generate(
+      self, prompt: str, generation_config: genai.GenerationConfig
+  ) -> pg.Dict:
+    completion: genai.types.Completion = genai.generate_text(
+        model=f'models/{self.model}',
+        prompt=prompt,
+        temperature=generation_config.temperature,
+        top_k=generation_config.top_k,
+        top_p=generation_config.top_p,
+        candidate_count=generation_config.candidate_count,
+        max_output_tokens=generation_config.max_output_tokens,
+        stop_sequences=generation_config.stop_sequences,
+    )
+    return pg.Dict(
+        candidates=[
+            pg.Dict(content=pg.Dict(parts=[pg.Dict(text=c['output'])]))
+            for c in completion.candidates
+        ]
+    )
+class _LegacyChatModel(_LegacyGenerativeModel):
+  """Legacy GenAI chat model."""
+  def generate(
+      self, prompt: str, generation_config: genai.GenerationConfig
+  ) -> pg.Dict:
+    response: genai.types.ChatResponse = genai.chat(
+        model=f'models/{self.model}',
+        messages=prompt,
+        temperature=generation_config.temperature,
+        top_k=generation_config.top_k,
+        top_p=generation_config.top_p,
+        candidate_count=generation_config.candidate_count,
+    )
+    return pg.Dict(
+        candidates=[
+            pg.Dict(content=pg.Dict(parts=[pg.Dict(text=c['content'])]))
+            for c in response.candidates
+        ]
+    )
 class _ModelHub:
   """Google Generative AI model hub."""
   def __init__(self):
     self._model_cache = {}
-  def get(self, model_name: str) -> genai.GenerativeModel:
+  def get(
+      self, model_name: str
+  ) -> genai.GenerativeModel | _LegacyGenerativeModel:
     """Gets a generative model by model id."""
     model = self._model_cache.get(model_name, None)
     if model is None:
-      model = genai.GenerativeModel(model_name)
+      model_info = genai.get_model(f'models/{model_name}')
+      if 'generateContent' in model_info.supported_generation_methods:
+        model = genai.GenerativeModel(model_name)
+      elif 'generateText' in model_info.supported_generation_methods:
+        model = _LegacyCompletionModel(model_name)
+      elif 'generateMessage' in model_info.supported_generation_methods:
+        model = _LegacyChatModel(model_name)
+      else:
+        raise ValueError(f'Unsupported model: {model_name!r}')
       self._model_cache[model_name] = model
     return model
@@ -172,14 +262,26 @@ _GOOGLE_GENAI_MODEL_HUB = _ModelHub()
 #
-class GeminiPro(Gemini):
+class GeminiPro(GenAI):
   """Gemini Pro model."""
   model = 'gemini-pro'
-class GeminiProVision(Gemini):
+class GeminiProVision(GenAI):
   """Gemini Pro vision model."""
   model = 'gemini-pro-vision'
   multimodal = True
+class Palm2(GenAI):
+  """PaLM2 model."""
+  model = 'text-bison-001'
+class Palm2_IT(GenAI):  # pylint: disable=invalid-name
+  """PaLM2 instruction-tuned model."""
+  model = 'chat-bison-001'

langfun-0.0.2.dev20240327/langfun/core/llms/gemini_test.py → langfun-0.0.2.dev20240330/langfun/core/llms/google_genai_test.py RENAMED Viewed

@@ -20,7 +20,7 @@ from unittest import mock
 from google import generativeai as genai
 import langfun.core as lf
 from langfun.core import modalities as lf_modalities
-from langfun.core.llms import gemini
+from langfun.core.llms import google_genai
 import pyglove as pg
@@ -36,6 +36,29 @@ example_image = (
 )
+def mock_get_model(model_name, *args, **kwargs):
+  del args, kwargs
+  if 'gemini' in model_name:
+    method = 'generateContent'
+  elif 'chat' in model_name:
+    method = 'generateMessage'
+  else:
+    method = 'generateText'
+  return pg.Dict(supported_generation_methods=[method])
+def mock_generate_text(*, model, prompt, **kwargs):
+  return pg.Dict(
+      candidates=[pg.Dict(output=f'{prompt} to {model} with {kwargs}')]
+  )
+def mock_chat(*, model, messages, **kwargs):
+  return pg.Dict(
+      candidates=[pg.Dict(content=f'{messages} to {model} with {kwargs}')]
+  )
 def mock_generate_content(content, generation_config, **kwargs):
   del kwargs
   c = generation_config
@@ -68,12 +91,12 @@ def mock_generate_content(content, generation_config, **kwargs):
   )
-class GeminiTest(unittest.TestCase):
-  """Tests for Evergreen language model."""
+class GenAITest(unittest.TestCase):
+  """Tests for Google GenAI model."""
   def test_content_from_message_text_only(self):
     text = 'This is a beautiful day'
-    model = gemini.GeminiPro()
+    model = google_genai.GeminiPro()
     chunks = model._content_from_message(lf.UserMessage(text))
     self.assertEqual(chunks, [text])
@@ -85,9 +108,9 @@ class GeminiTest(unittest.TestCase):
     # Non-multimodal model.
     with self.assertRaisesRegex(ValueError, 'Unsupported modality'):
-      gemini.GeminiPro()._content_from_message(message)
+      google_genai.GeminiPro()._content_from_message(message)
-    model = gemini.GeminiProVision()
+    model = google_genai.GeminiProVision()
     chunks = model._content_from_message(message)
     self.maxDiff = None
     self.assertEqual(
@@ -118,7 +141,7 @@ class GeminiTest(unittest.TestCase):
             ],
         ),
     )
-    model = gemini.GeminiProVision()
+    model = google_genai.GeminiProVision()
     result = model._response_to_result(response)
     self.assertEqual(
         result,
@@ -129,26 +152,28 @@ class GeminiTest(unittest.TestCase):
     )
   def test_model_hub(self):
-    model = gemini._GOOGLE_GENAI_MODEL_HUB.get('gemini-pro')
+    model = google_genai._GOOGLE_GENAI_MODEL_HUB.get('gemini-pro')
     self.assertIsNotNone(model)
-    self.assertIs(gemini._GOOGLE_GENAI_MODEL_HUB.get('gemini-pro'), model)
+    self.assertIs(google_genai._GOOGLE_GENAI_MODEL_HUB.get('gemini-pro'), model)
   def test_api_key_check(self):
     with self.assertRaisesRegex(ValueError, 'Please specify `api_key`'):
-      _ = gemini.GeminiPro()._api_initialized
+      _ = google_genai.GeminiPro()._api_initialized
-    self.assertTrue(gemini.GeminiPro(api_key='abc')._api_initialized)
+    self.assertTrue(google_genai.GeminiPro(api_key='abc')._api_initialized)
     os.environ['GOOGLE_API_KEY'] = 'abc'
-    self.assertTrue(gemini.GeminiPro()._api_initialized)
+    self.assertTrue(google_genai.GeminiPro()._api_initialized)
     del os.environ['GOOGLE_API_KEY']
   def test_call(self):
     with mock.patch(
         'google.generativeai.generative_models.GenerativeModel.generate_content'
     ) as mock_generate:
+      orig_get_model = genai.get_model
+      genai.get_model = mock_get_model
       mock_generate.side_effect = mock_generate_content
-      lm = gemini.GeminiPro(api_key='test_key')
+      lm = google_genai.GeminiPro(api_key='test_key')
       self.maxDiff = None
       self.assertEqual(
           lm('hello', temperature=2.0, top_k=20).text,
@@ -157,6 +182,44 @@ class GeminiTest(unittest.TestCase):
               'top_p=None, top_k=20, max_tokens=1024, stop=None.'
           ),
       )
+      genai.get_model = orig_get_model
+  def test_call_with_legacy_completion_model(self):
+    orig_get_model = genai.get_model
+    genai.get_model = mock_get_model
+    orig_generate_text = genai.generate_text
+    genai.generate_text = mock_generate_text
+    lm = google_genai.Palm2(api_key='test_key')
+    self.maxDiff = None
+    self.assertEqual(
+        lm('hello', temperature=2.0, top_k=20).text,
+        (
+            "hello to models/text-bison-001 with {'temperature': 2.0, "
+            "'top_k': 20, 'top_p': None, 'candidate_count': 1, "
+            "'max_output_tokens': 1024, 'stop_sequences': None}"
+        ),
+    )
+    genai.get_model = orig_get_model
+    genai.generate_text = orig_generate_text
+  def test_call_with_legacy_chat_model(self):
+    orig_get_model = genai.get_model
+    genai.get_model = mock_get_model
+    orig_chat = genai.chat
+    genai.chat = mock_chat
+    lm = google_genai.Palm2_IT(api_key='test_key')
+    self.maxDiff = None
+    self.assertEqual(
+        lm('hello', temperature=2.0, top_k=20).text,
+        (
+            "hello to models/chat-bison-001 with {'temperature': 2.0, "
+            "'top_k': 20, 'top_p': None, 'candidate_count': 1}"
+        ),
+    )
+    genai.get_model = orig_get_model
+    genai.chat = orig_chat
 if __name__ == '__main__':

langfun 0.0.2.dev20240327__tar.gz → 0.0.2.dev20240330__tar.gz

langfun 0.0.2.dev20240327tar.gz → 0.0.2.dev20240330tar.gz