PyPI - langfun - Versions diffs - 0.1.2.dev202511030805__py3-none-any.whl → 0.1.2.dev202511050805__py3-none-any.whl - Mend

langfun 0.1.2.dev202511030805py3-none-any.whl → 0.1.2.dev202511050805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (77) hide show

langfun/core/agentic/action.py +76 -9
langfun/core/agentic/action_eval.py +9 -2
langfun/core/async_support.py +32 -3
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +10 -3
langfun/core/data/conversion/gemini.py +9 -2
langfun/core/data/conversion/openai.py +17 -7
langfun/core/eval/base.py +46 -42
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/checkpointing.py +30 -4
langfun/core/eval/v2/evaluation.py +59 -13
langfun/core/eval/v2/example.py +22 -11
langfun/core/eval/v2/experiment.py +51 -8
langfun/core/eval/v2/metric_values.py +23 -3
langfun/core/eval/v2/metrics.py +33 -4
langfun/core/eval/v2/progress.py +9 -1
langfun/core/eval/v2/reporting.py +15 -1
langfun/core/eval/v2/runners.py +27 -7
langfun/core/langfunc.py +45 -130
langfun/core/language_model.py +88 -10
langfun/core/llms/anthropic.py +27 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +22 -2
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +29 -1
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +9 -1
langfun/core/llms/google_genai.py +29 -1
langfun/core/llms/groq.py +27 -2
langfun/core/llms/llama_cpp.py +22 -3
langfun/core/llms/openai.py +29 -1
langfun/core/llms/openai_compatible.py +18 -6
langfun/core/llms/rest.py +12 -1
langfun/core/llms/vertexai.py +39 -6
langfun/core/logging.py +1 -1
langfun/core/mcp/client.py +77 -22
langfun/core/mcp/session.py +90 -10
langfun/core/mcp/tool.py +83 -23
langfun/core/memory.py +1 -0
langfun/core/message.py +75 -11
langfun/core/message_test.py +9 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +19 -1
langfun/core/modalities/mime.py +54 -4
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +66 -5
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/structured/completion.py +32 -37
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +70 -15
langfun/core/structured/parsing.py +90 -74
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +201 -130
langfun/core/structured/schema.py +70 -10
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +45 -34
langfun/core/structured/tokenization.py +24 -9
langfun/core/subscription.py +2 -2
langfun/core/template.py +139 -40
langfun/core/template_test.py +40 -0
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/RECORD +77 -77
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/top_level.txt +0 -0

langfun/core/concurrent.py CHANGED Viewed

@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Utility library for handling concurrency in langfun."""
+"""Utilities for concurrency in Langfun."""
 import abc
 import collections
@@ -97,7 +97,7 @@ class RetryError(RuntimeError):
 def with_retry(
-    func: Callable[[Any], Any],
+    func: Callable[..., Any],
     retry_on_errors: Union[
         Union[Type[BaseException], Tuple[Type[BaseException], str]],
         Sequence[Union[Type[BaseException], Tuple[Type[BaseException], str]]],
@@ -108,10 +108,25 @@ def with_retry(
     max_retry_interval: int = 300,
     seed: int | None = None,
 ) -> Callable[..., Any]:
-  """Derives a user function with retry on error.
+  """Decorator-like function to add retry mechanism to a function.
+  Example:
+  ```
+  def flaky_function():
+    if random.random() < 0.5:
+      raise ValueError('error')
+    return 1
+  reliable_function = lf.with_retry(
+      flaky_function,
+      retry_on_errors=ValueError,
+      max_attempts=3)
+  reliable_function()
+  ```
   Args:
-    func: A user function.
+    func: The function to add retry mechanism.
     retry_on_errors: A sequence of exception types or tuples of exception type
       and error messages (described in regular expression) as the desired
       exception types to retry.
@@ -128,8 +143,7 @@ def with_retry(
       determined based on current time.
   Returns:
-    A function with the same signature of the input function, with the retry
-    capability.
+    A function with the same signature of `func`, but with retry capability.
   """
   def _func(*args, **kwargs):
@@ -179,6 +193,24 @@ def concurrent_execute(
 ) -> list[Any]:
   """Executes a function concurrently under current component context.
+  `lf.concurrent_execute` applies a function to each item in an iterable of
+  inputs in parallel and returns a list of results in the same order as the
+  inputs. It is a convenient wrapper around `lf.concurrent_map` for synchronous
+  bulk processing.
+  **Example:**
+  ```python
+  import langfun as lf
+  def square(x):
+    return x ** 2
+  results = lf.concurrent_execute(square, [1, 2, 3, 4], max_workers=2)
+  print(results)
+  # Output: [1, 4, 9, 16]
+  ```
   Args:
     func: A user function.
     parallel_inputs: The inputs for `func` which will be processed in parallel.
@@ -649,6 +681,38 @@ def concurrent_map(
 ) -> Iterator[Any]:
   """Maps inputs to outptus via func concurrently under current context.
+  `lf.concurrent_map` applies a function to each item in an iterable of
+  inputs in parallel and yields `(input, output, error)` tuples as they are
+  completed. It supports features like ordered/unordered results, progress
+  bars, timeouts, and automatic retries for transient errors.
+  **Example:**
+  ```python
+  import langfun as lf
+  import time
+  import random
+  def flaky_square(x):
+    time.sleep(random.random())
+    if random.random() < 0.3:
+      raise ValueError("Flaky error")
+    return x ** 2
+  # Unordered execution with progress bar and retries
+  for input, output, error in lf.concurrent_map(
+      flaky_square,
+      range(10),
+      max_workers=3,
+      show_progress=True,
+      retry_on_errors=ValueError,
+      max_attempts=3):
+    if error:
+      print(f"Input {input} failed with error: {error}")
+    else:
+      print(f"Input {input} succeeded with output: {output}")
+  ```
   Args:
     func: A user function.
     parallel_inputs: The inputs for `func` which will be processed in parallel.

langfun/core/console.py CHANGED Viewed

@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Console utilities."""
+"""Utilities for console output and notebook display."""
 import sys
 from typing import Any

langfun/core/data/conversion/anthropic.py CHANGED Viewed

@@ -21,7 +21,14 @@ from langfun.core import modalities as lf_modalities
 class AnthropicMessageConverter(lf.MessageConverter):
-  """Converter to Anthropic public API."""
+  """Converter between Langfun messages and Anthropic API message format.
+  This converter translates `lf.Message` objects into the JSON format required
+  by the Anthropic API and vice versa. It handles text and modalities like
+  images and PDFs by encoding them in base64 format as expected by Anthropic.
+  An optional `chunk_preprocessor` can be provided to modify or filter
+  chunks before conversion.
+  """
   FORMAT_ID = 'anthropic'
@@ -30,12 +37,12 @@ class AnthropicMessageConverter(lf.MessageConverter):
       (
           'Chunk preprocessor for Langfun chunk to Anthropic chunk conversion. '
           'It will be applied before each Langfun chunk is converted. '
-          'If returns None, the chunk will be skipped.'
+          'If it returns None, the chunk will be skipped.'
       )
   ] = None
   def to_value(self, message: lf.Message) -> dict[str, Any]:
-    """Converts a Langfun message to Gemini API."""
+    """Converts a Langfun message to Anthropic API."""
     content = []
     for chunk in message.chunk():
       if self.chunk_preprocessor:

langfun/core/data/conversion/gemini.py CHANGED Viewed

@@ -21,7 +21,14 @@ from langfun.core import modalities as lf_modalities
 class GeminiMessageConverter(lf.MessageConverter):
-  """Converter to Gemini public API."""
+  """Converter between Langfun messages and Gemini API message format.
+  This converter translates `lf.Message` objects into the JSON format required
+  by the public Gemini API (e.g., via Vertex AI or Google AI Studio) and
+  vice versa. It handles text and modalities like images, extracting thought
+  chunks if present. An optional `chunk_preprocessor` can be provided to
+  modify or filter chunks before conversion.
+  """
   FORMAT_ID = 'gemini'
@@ -30,7 +37,7 @@ class GeminiMessageConverter(lf.MessageConverter):
       (
           'Chunk preprocessor for Langfun chunk to Gemini chunk conversion. '
           'It will be applied before each Langfun chunk is converted. '
-          'If returns None, the chunk will be skipped.'
+          'If it returns None, the chunk will be skipped.'
       ),
   ] = None

langfun/core/data/conversion/openai.py CHANGED Viewed

@@ -20,9 +20,14 @@ from langfun.core import modalities as lf_modalities
 class OpenAIChatCompletionAPIMessageConverter(lf.MessageConverter):
-  """Converter to OpenAI ChatCompletion API.
-  See https://platform.openai.com/docs/api-reference/chat
+  """Converter for OpenAI Chat Completion API.
+  This converter translates `lf.Message` objects into the JSON format
+  required by the OpenAI Chat Completions API
+  (https://platform.openai.com/docs/api-reference/chat) and vice versa.
+  It handles text and image modalities, mapping Langfun roles to OpenAI
+  roles ('system', 'user', 'assistant'). An optional `chunk_preprocessor`
+  can be provided to modify or filter chunks before conversion.
   """
   FORMAT_ID = 'openai_chat_completion_api'
@@ -32,7 +37,7 @@ class OpenAIChatCompletionAPIMessageConverter(lf.MessageConverter):
       (
           'Chunk preprocessor for Langfun chunk to OpenAI chunk conversion. '
           'It will be applied before each Langfun chunk is converted. '
-          'If returns None, the chunk will be skipped.'
+          'If it returns None, the chunk will be skipped.'
       )
   ] = None
@@ -159,9 +164,14 @@ lf.Message.from_openai_chat_completion_api_format = (
 class OpenAIResponsesAPIMessageConverter(
     OpenAIChatCompletionAPIMessageConverter
 ):
-  """Converter to OpenAI Responses API.
-  See https://platform.openai.com/docs/api-reference/responses/create
+  """Converter for OpenAI Responses API.
+  This converter translates `lf.Message` objects into the JSON format
+  required by the OpenAI Responses API
+  (https://platform.openai.com/docs/api-reference/responses/create),
+  which is used for human-in-the-loop rating, and vice versa.
+  It extends `OpenAIChatCompletionAPIMessageConverter` but uses different
+  type names for content chunks (e.g., 'input_text', 'output_image').
   """
   FORMAT_ID = 'openai_responses_api'

langfun/core/eval/base.py CHANGED Viewed

@@ -59,18 +59,20 @@ class Evaluable(lf.Component):
   @property
   @abc.abstractmethod
   def id(self) -> str:
-    """Returns the ID of the task.
+    """Returns the ID of this evaluable node.
     Returns:
-      Evaluation task ID. Different evaluation task should have their unique
-      task IDs, for each task will be stored in sub-directoreis identified by
-      their IDs. For suites, the ID could be an empty string as they will not
-      produce sub-directories
+      A string as the ID of this evaluable node.
+      If an evaluable node acts as a container for other evaluable nodes
+      (e.g. `lf.Suite`), its ID could be empty.
+      Leaf evaluable nodes (e.g. `lf.Evaluation`) must have unique IDs
+      under the same container, as their IDs will be used as the directory
+      name for saving their results.
     """
   @property
   def dir(self) -> str | None:
-    """Returns the directory for saving results and details."""
+    """Returns the directory for saving results."""
     if self.root_dir is None:
       return None
     return os.path.join(self.root_dir, self.id)
@@ -82,18 +84,18 @@ class Evaluable(lf.Component):
   @property
   def index_link(self) -> str | None:
-    """Returns the index page."""
+    """Returns the link to the index page."""
     if self.dir is None:
       return None
     return self.link(os.path.join(self.dir, Evaluable.INDEX_HTML))
   def summary(self, pivot_field: str = 'lm') -> 'Summary':
-    """Returns a summary for all child evaluations.."""
+    """Returns a summary for all child evaluations."""
     return Summary([pg.Ref(x) for x in self.leaf_nodes], pivot_field)
   @property
   def summary_link(self) -> str | None:
-    """Returns the summary page."""
+    """Returns the link to the summary page."""
     if self.root_dir is None:
       return None
     return self.link(os.path.join(self.root_dir, Evaluable.SUMMARY_HTML))
@@ -177,6 +179,7 @@ class Evaluable(lf.Component):
   @property
   def is_leaf(self) -> bool:
+    """Returns whether this node is a leaf node."""
     return isinstance(self, Evaluation) and not self.children
   @functools.cached_property
@@ -404,7 +407,7 @@ class Evaluable(lf.Component):
       timeout: int | None = None,
       **kwargs,
   ) -> None:
-    """Run the evaluate and fill `self.result`. Subclass to implement."""
+    """Run the evaluation and fill `self.result`. Subclass to implement."""
   @abc.abstractmethod
   def _completion_status(self, run_status: str) -> str:
@@ -545,6 +548,7 @@ class Evaluable(lf.Component):
   def from_dir(
       cls, maybe_dir: str, load_result: bool = True
   ) -> Optional['Evaluable']:
+    """Loads an evaluable object from a directory."""
     exp_json = os.path.join(maybe_dir, Evaluable.EXPERIMENT_JSON)
     if not pg.io.path_exists(exp_json):
       return None
@@ -558,7 +562,7 @@ class Evaluable(lf.Component):
     return experiment
   def try_load_result(self) -> bool:
-    """Try load result."""
+    """Try loads result from file if it's not loaded."""
     if self.result is None:
       result_json = os.path.join(self.dir, Evaluable.RESULT_JSON)
       if pg.io.path_exists(result_json):
@@ -604,6 +608,7 @@ class Suite(Evaluable):
   @functools.cached_property
   def hash(self) -> str:
+    """Returns the hash of this suite."""
     return hashlib.md5(
         ' '.join(sorted([c.hash for c in self.children])).encode()
     ).hexdigest()[:8]
@@ -619,14 +624,14 @@ class Suite(Evaluable):
 class Evaluation(Evaluable):
-  """Base class for evaluation set."""
+  """Base class for evaluation sets."""
   inputs: pg.typing.Annotated[
       pg.typing.Functor(),
       (
           'A functor that returns a list of user-defined objects as the input '
-          'examples. It could be inputs loaded from a JSON file via '
-          '`lf.eval.inputs_from(path)`, from a Python coded list via '
+          'examples. It can be inputs loaded from a JSON file via '
+          '`lf.eval.inputs_from(path)`, from a Python-coded list via '
           '`lf.eval.as_inputs(values)` or a user-defined functor that '
           'generates input objects at runtime.'
       ),
@@ -648,12 +653,12 @@ class Evaluation(Evaluable):
       pg.typing.Functor().noneable(),
       (
           'A functor that returns a type annotation that will be converted to '
-          '`lf.Schema`, or a tuple of (annotation, fewshot examples). '
+          '`lf.Schema`, or a tuple of (annotation, few-shot examples). '
           'For "call" method, it could be None, indicating that the raw '
-          'response from the LM will be used as the output, and the fewshot '
-          'examples will be used for parsing. For "query" and "complete", it '
-          'must be provided, and the fewshot examples will be used directly '
-          'for prompting. Here are the example code on how the '
+          'response from the LM will be used as the output, and the few-shot '
+          'examples will be used for parsing. For "query" and "complete" '
+          'methods, it must be provided, and the few-shot examples will be '
+          'used directly for prompting. Here is example code on how the '
           'functors should be defined:'
           + inspect.cleandoc("""
               ```
@@ -693,7 +698,7 @@ class Evaluation(Evaluable):
   completion_prompt_field: Annotated[
       str | None,
       (
-          'A str field that will be automatically added to the class of the '
+          'A string field that will be automatically added to the class of the '
           'input object for `lf.complete`. If None, no field will be added to '
           'the class, instead the prompt will be passed as the first argument '
           'of the input object to complete. Applicable only when `method` is '
@@ -738,7 +743,7 @@ class Evaluation(Evaluable):
   @functools.cached_property
   def hash(self) -> str:
-    """Returns the semantic-based hash of the evaluation."""
+    """Returns the semantics-based hash of the evaluation."""
     if self.is_deterministic:
       identity = pg.format(self._identifiers(), compact=True)
     else:
@@ -784,7 +789,7 @@ class Evaluation(Evaluable):
   @property
   def complete_rate(self) -> float:
-    """Returns the complete rate."""
+    """Returns the completion rate of examples."""
     return self.num_completed / self.num_examples
   #
@@ -837,7 +842,7 @@ class Evaluation(Evaluable):
   @functools.cached_property
   def non_oop_failures(self) -> list[tuple[Any, Exception]]:
-    """Returns the OOP failures."""
+    """Returns the non-OOP failures."""
     return [item for item in self.failures
             if not isinstance(item[1], lf_structured.MappingError)]
@@ -883,7 +888,7 @@ class Evaluation(Evaluable):
   @functools.cached_property
   def schema(self) -> lf_structured.Schema | None:
-    """Schema."""
+    """Returns the schema for parsing LLM response."""
     if self.schema_fn is None:
       return None
@@ -897,7 +902,7 @@ class Evaluation(Evaluable):
   @functools.cached_property
   def fewshot_examples(self) -> list[lf.structured.MappingExample] | None:
-    """Fewshot examples."""
+    """Returns the few-shot examples for prompting or parsing."""
     if self.schema_fn is None:
       return None
@@ -973,7 +978,7 @@ class Evaluation(Evaluable):
   @functools.cached_property
   def children(self) -> list['Evaluation']:
-    """Returns the trials as child evaluations if this evaluation is a space."""
+    """Returns child evaluations if this evaluation has a parameter space."""
     if self.is_deterministic:
       return []
     children = []
@@ -1023,7 +1028,7 @@ class Evaluation(Evaluable):
   @property
   def non_oop_failures_link(self) -> str | None:
-    """Returns the link to then non-OOP failures page."""
+    """Returns the link to the non-OOP failures page."""
     if self.dir is None:
       return None
     return self.link(os.path.join(self.dir, Evaluation.NON_OOP_FAILURES_HTML))
@@ -1208,10 +1213,10 @@ class Evaluation(Evaluable):
       )
   def process_output(self, example: Any, output: lf.Message) -> None:
-    """Process the output for an example.
+    """Processes the output for an example.
     Subclasses can override this method to generate and attach additional
-    metadata for debugging purpose. For example, draw bounding boxes on the
+    metadata for debugging purposes. For example, draw bounding boxes on the
     input image based on LLM predicted boxes and attach to output_message's
     metadata.
@@ -1219,8 +1224,8 @@ class Evaluation(Evaluable):
       class BoundingBoxEval(lf.eval.Matching):
         ...
-        def process_output(example, output):
-          output.metadata.image_with_bbox = draw_bboxes(
+        def process_output(self, example, output):
+          output.metadata.image_with_bbox = draw_bounding_box(
               example.image, output.result)
     Args:
@@ -1449,7 +1454,7 @@ class Evaluation(Evaluable):
         trace the LM input, response and parsed structure. If error is raised
         before LLM could return a response, None will be its value.
       error: The exception during processing the example.
-      dryrun: Whether or not audition takes place during dryrun.
+      dryrun: Whether or not auditing takes place during dryrun.
     """
     if error is not None:
       self._failures.append((example, error))
@@ -1674,7 +1679,7 @@ class Evaluation(Evaluable):
   @classmethod
   def visualize(cls, evaluations: list['Evaluation']) -> str | None:
-    """Visualize the a list of evaluations of this task in HTML."""
+    """Visualize a list of evaluations of this task in HTML."""
     del evaluations
     return None
@@ -1810,7 +1815,7 @@ class Summary(pg.Object):
     )
   class Table(pg.Object):
-    """A pivot table for view evaluations."""
+    """A pivot table for viewing evaluations."""
     class Row(pg.Object):
       descriptor: dict[str, Any]
@@ -2013,12 +2018,12 @@ class Summary(pg.Object):
       return self._context.completed
     def stop(self) -> 'Summary':
-      """Signal and wait the monitor thread to stop."""
+      """Signals and waits for the monitor thread to stop."""
       self._context.stopping = True
       return self.join()
     def join(self) -> 'Summary':
-      """Waits the monitor thread to complete."""
+      """Waits for the monitor thread to complete."""
       self._thread.join()
       summary = self.summary
       assert summary is not None
@@ -2035,7 +2040,7 @@ class Summary(pg.Object):
       scan_interval: int = 60,
       refresh_when_stop: bool = True,
   ) -> MonitorResult:
-    """Monitor one or more root directories and save summary in period."""
+    """Monitors one or more root directories and save summary periodically."""
     context = pg.Dict(stopping=False, completed=False, summary=None)
     def _monitor():
@@ -2187,7 +2192,7 @@ def monitor_async(
     scan_interval: int = 60,
     refresh_when_stop: bool = True,
 ) -> Summary.MonitorResult:
-  """Asynchronorsly monitor one or more root directories for summary."""
+  """Asynchronously monitors one or more root directories for summary."""
   return Summary.monitor_async(
       root_dir,
       save_as,
@@ -2365,10 +2370,9 @@ def run(
       a string (for string-based patcher), a `pg.patching.Patcher` object, or
       a rebind function (e.g. `pg.rebind`). See `lf.eval.patch_*` for more
       details.
-    mode: The mode to run the suite. "run" to run the suite, with reusing
-      existing results if available; "rerun" to rerun all evaluations even if
-      there are existing results; "dryrun" to dryrun the suite; and "noop"
-      to do nothing.
+    mode: The mode to run the suite: "run" to run with reuse of existing
+      results, "rerun" to force re-evaluation, "dryrun" for a dry run, and
+      "noop" to do nothing.
     debug: Whether to run in debug mode.
     print_definition: Whether to print the experiment definition.
     **kwargs: Additional arguments to be passed to dryrun/run the suite.

langfun/core/eval/matching.py CHANGED Viewed

@@ -38,7 +38,7 @@ class Matching(base.Evaluation):
   @abc.abstractmethod
   def answer(self, output: Any, example: Any) -> Any:
-    """Returns the answer from the structure output."""
+    """Returns the answer from the structured output."""
   @property
   def matches(self) -> list[tuple[int, Any, Any, lf.Message]]:
@@ -52,6 +52,7 @@ class Matching(base.Evaluation):
   @property
   def match_rate(self) -> float:
+    """Returns the match rate."""
     if self.num_completed == 0:
       return 0.0
     return self.num_matches / self.num_completed
@@ -68,17 +69,19 @@ class Matching(base.Evaluation):
   @property
   def mismatch_rate(self) -> float:
+    """Returns the mismatch rate."""
     if self.num_completed == 0:
       return 0.0
     return self.num_mismatches / self.num_completed
   @property
   def matches_link(self) -> str:
-    """Returns the matches page."""
+    """Returns the link to the matches page."""
     return self.link(os.path.join(self.dir, Matching.MATCHES_HTML))
   @property
   def mismatches_link(self) -> str:
+    """Returns the link to the mismatches page."""
     return self.link(os.path.join(self.dir, Matching.MISMATCHES_HTML))
   def _reset(self) -> None:

langfun/core/eval/patching.py CHANGED Viewed

@@ -114,17 +114,17 @@ def model_by_name(name: str) -> lf.LanguageModel:
 @pg.patcher(auto_typing=True)
 def lm(unused_eval, models: list[str]):
-  """Patch the LM used for benchmarking."""
+  """Patches the LM used for benchmarking."""
   return patch_lm(pg.oneof([model_by_name(name) for name in models]))
 @pg.patcher(auto_typing=True)
 def temperature(unused_eval, value: float):
-  """Patch the temperature used for benchmarking."""
+  """Patches the temperature used for benchmarking."""
   return patch_member(lf.LMSamplingOptions, "temperature", value)
 @pg.patcher(auto_typing=True)
 def max_tokens(unused_eval, value: int | None):
-  """Patch the temperature used for benchmarking."""
+  """Patches the max_tokens used for benchmarking."""
   return patch_member(lf.LMSamplingOptions, "max_tokens", value)

langfun/core/eval/scoring.py CHANGED Viewed

@@ -41,18 +41,19 @@ class Scoring(base.Evaluation):
   @property
   def score_rate(self) -> float:
-    """Returns the score rate."""
+    """Returns the rate of scored examples among the completed ones."""
     if self.num_completed == 0:
       return 0.0
     return self.num_scored / self.num_completed
   @property
   def scored_link(self) -> str:
-    """Returns the matches page."""
+    """Returns the scored examples page."""
     return self.link(os.path.join(self.dir, Scoring.SCORED_HTML))
   @property
   def avg_score(self) -> float:
+    """Returns the average score of scored examples."""
     if self.num_scored == 0:
       return 0
     return sum([i[2] for i in self._scored]) / self.num_scored
@@ -181,7 +182,7 @@ class Scoring(base.Evaluation):
     super()._render_summary_metrics(s)
   def _render_scored(self, s: io.StringIO) -> None:
-    """Formats the matched cases into html."""
+    """Formats the scored cases into html."""
     s.write('<h2> Scored </h2>')
     s.write('<div style="white-space:pre">\n')
     s.write(

langfun/core/eval/v2/checkpointing.py CHANGED Viewed

@@ -29,7 +29,17 @@ Runner = experiment_lib.Runner
 class Checkpointer(experiment_lib.Plugin):
-  """Base class for checkpointing evaluation examples."""
+  """Base class for checkpointing evaluation examples.
+  `Checkpointer` is a plugin that saves the state of processed examples
+  incrementally during an experiment run, allowing the experiment to be resumed
+  later. When an experiment starts, the checkpointer loads any previously saved
+  examples from an earlier run (or a warm-start run) into `experiment.state`,
+  so the runner can skip processing them again.
+  Subclasses should implement `_list_checkpoint_filenames` to identify
+  checkpoint files to load, and `_save_example` to save a newly processed
+  example.
+  """
   checkpoint_filename: Annotated[
       str,
@@ -170,7 +180,12 @@ class Checkpointer(experiment_lib.Plugin):
 class PerExampleCheckpointer(Checkpointer):
-  """Checkpointer that saves each example to a separate file."""
+  """Checkpointer that saves each example to a separate file.
+  This checkpointer saves each processed example to its own checkpoint file,
+  named using the pattern `<checkpoint_filename_prefix>_<example_id>.<ext>`.
+  For example, `checkpoint_1.bagz`, `checkpoint_2.bagz`, etc.
+  """
   def _on_bound(self):
     super()._on_bound()
@@ -235,7 +250,13 @@ class PerExampleCheckpointer(Checkpointer):
 class BulkCheckpointer(Checkpointer):
-  """Checkpointer that saves all examples to a single file."""
+  """Checkpointer that saves all examples of an evaluation to a single file.
+  This checkpointer appends newly processed examples of an evaluation to a
+  single sequence file (e.g., `checkpoint.bagz`). This is often more efficient
+  than `PerExampleCheckpointer` when dealing with a large number of examples
+  or when file system overhead is a concern.
+  """
   def _on_bound(self):
     super()._on_bound()
@@ -341,7 +362,12 @@ class BulkCheckpointer(Checkpointer):
 class SequenceWriter:
-  """Thread safe sequence writer."""
+  """A thread-safe writer for sequence files (e.g., Bagz).
+  `SequenceWriter` wraps a `pg.io.SequenceWriter` to provide thread-safe
+  `add` and `close` operations, ensuring that examples can be written
+  concurrently from multiple threads without corrupting the sequence file.
+  """
   def __init__(self, path: str):
     self._lock = threading.Lock()

langfun 0.1.2.dev202511030805__py3-none-any.whl → 0.1.2.dev202511050805__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202511030805py3-none-any.whl → 0.1.2.dev202511050805py3-none-any.whl