PyPI - langfun - Versions diffs - 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

langfun/__init__.py +22 -2
langfun/core/__init__.py +17 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -28
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +69 -2
langfun/core/component_test.py +54 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +18 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +17 -0
langfun/core/eval/base.py +767 -140
langfun/core/eval/base_test.py +238 -53
langfun/core/eval/matching.py +80 -76
langfun/core/eval/matching_test.py +19 -9
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +37 -28
langfun/core/eval/scoring_test.py +21 -3
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +3 -21
langfun/core/langfunc_test.py +26 -8
langfun/core/language_model.py +686 -48
langfun/core/language_model_test.py +681 -44
langfun/core/llms/__init__.py +100 -12
langfun/core/llms/anthropic.py +488 -0
langfun/core/llms/anthropic_test.py +235 -0
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +88 -28
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +39 -26
langfun/core/llms/fake_test.py +136 -11
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -197
langfun/core/llms/groq.py +276 -0
langfun/core/llms/groq_test.py +64 -0
langfun/core/llms/llama_cpp.py +15 -40
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +436 -226
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +35 -174
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -23
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +15 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +9 -8
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +278 -0
langfun/core/structured/function_generation_test.py +399 -0
langfun/core/structured/mapping.py +150 -46
langfun/core/structured/mapping_test.py +105 -0
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +71 -22
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +545 -60
langfun/core/structured/schema.py +208 -99
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_generation_test.py +2 -2
langfun/core/structured/schema_test.py +133 -34
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +240 -11
langfun/core/template_test.py +146 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +14 -2
langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -217
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240330.dist-info/METADATA +0 -99
langfun-0.0.2.dev20240330.dist-info/RECORD +0 -102
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0

langfun/core/concurrent.py CHANGED Viewed

@@ -13,17 +13,30 @@
 # limitations under the License.
 """Utility library for handling concurrency in langfun."""
+import abc
 import collections
+from collections.abc import Mapping
 import concurrent.futures
 import dataclasses
+import io
 import random
+import sys
 import threading
 import time
-from typing import Any, Callable, Iterable, Iterator, Literal, Sequence, Tuple, Type, Union
+from typing import Annotated, Any, Callable, Iterable, Iterator, Literal, Sequence, Tuple, Type, Union
 from langfun.core import component
 import pyglove as pg
-from tqdm import auto as tqdm
+progress_bar: Literal['tqdm', 'console', None] = None
+try:
+  from tqdm import auto as tqdm   # pylint: disable=g-import-not-at-top
+  progress_bar = 'tqdm'
+except ImportError:
+  progress_bar = 'console'
+  tqdm = None
 def with_context_access(func: Callable[..., Any]) -> Callable[..., Any]:
@@ -44,7 +57,7 @@ class RetryError(RuntimeError):
   def __init__(
       self,
       func: Callable[..., Any],
-      errors: list[Exception],
+      errors: list[BaseException],
       wait_intervals: list[int],
   ):
     assert len(errors) == len(wait_intervals) + 1
@@ -99,12 +112,13 @@ class RetryError(RuntimeError):
 def with_retry(
     func: Callable[[Any], Any],
     retry_on_errors: Union[
-        Union[Type[Exception], Tuple[Type[Exception], str]],
-        Sequence[Union[Type[Exception], Tuple[Type[Exception], str]]],
+        Union[Type[BaseException], Tuple[Type[BaseException], str]],
+        Sequence[Union[Type[BaseException], Tuple[Type[BaseException], str]]],
     ],
     max_attempts: int,
     retry_interval: int | tuple[int, int] = (5, 60),
     exponential_backoff: bool = True,
+    max_retry_interval: int = 300,
     seed: int | None = None,
 ) -> Callable[..., Any]:
   """Derives a user function with retry on error.
@@ -120,6 +134,9 @@ def with_retry(
       of the tuple.
     exponential_backoff: If True, exponential wait time will be applied on top
       of the base retry interval.
+    max_retry_interval: The max retry interval in seconds. This is useful when
+      the retry interval is exponential, to avoid the wait time to grow
+      exponentially.
     seed: Random seed to generate retry interval. If None, the seed will be
       determined based on current time.
@@ -127,44 +144,33 @@ def with_retry(
     A function with the same signature of the input function, with the retry
     capability.
   """
-  rand = random if seed is None else random.Random(seed)
-  def _func(*args, **kwargs) -> Any:
-    def base_interval() -> int:
-      if isinstance(retry_interval, tuple):
-        return rand.randint(retry_interval[0], retry_interval[1])
-      else:
-        assert isinstance(retry_interval, int)
-        return retry_interval
-    def next_wait_interval(attempt: int) -> float:
-      if not exponential_backoff:
-        attempt = 1
-      return base_interval() * (2 ** (attempt - 1))
-    wait_interval = None
-    wait_intervals = []
-    errors = []
-    while True:
-      with pg.catch_errors(retry_on_errors) as error_context:
-        return func(*args, **kwargs)
+  def _func(*args, **kwargs):
+    job = Job(
+        func,
+        args,
+        kwargs,
+        retry_on_errors=retry_on_errors,
+        max_attempts=max_attempts,
+        retry_interval=retry_interval,
+        exponential_backoff=exponential_backoff,
+        max_retry_interval=max_retry_interval,
+        seed=seed,
+    )
+    job()
+    if job.error:
+      raise job.error
+    return job.result
-      # Branch when errors are met for retry.
-      errors.append(error_context.error)
-      if len(errors) < max_attempts:
-        wait_interval = next_wait_interval(len(errors))
-        wait_intervals.append(wait_interval)
+  return _func
-        pg.logging.warning(
-            f'Calling {func!r} encountered {error_context.error!r} '
-            f'(attempts={len(errors)}), retrying in {wait_interval} seconds...'
-        )
-        time.sleep(wait_interval)
-      else:
-        raise RetryError(func, errors, wait_intervals)
+class RetryEntry(pg.Object):
+  """Retry entry."""
-  return _func
+  call_interval: float
+  error: BaseException | None = None
+  wait_interval: float = 0.
 def concurrent_execute(
@@ -174,13 +180,15 @@ def concurrent_execute(
     executor: Union[concurrent.futures.ThreadPoolExecutor, str, None] = None,
     max_workers: int = 32,
     retry_on_errors: Union[
-        Union[Type[Exception], Tuple[Type[Exception], str]],
-        Sequence[Union[Type[Exception], Tuple[Type[Exception], str]]],
+        Union[Type[BaseException], Tuple[Type[BaseException], str]],
+        Sequence[Union[Type[BaseException], Tuple[Type[BaseException], str]]],
         None,
     ] = None,
     max_attempts: int = 5,
     retry_interval: int | tuple[int, int] = (5, 60),
     exponential_backoff: bool = True,
+    max_retry_interval: int = 300,
+    return_jobs: bool = False,
 ) -> list[Any]:
   """Executes a function concurrently under current component context.
@@ -201,31 +209,55 @@ def concurrent_execute(
       of the tuple.
     exponential_backoff: If True, exponential wait time will be applied on top
       of the base retry interval.
+    max_retry_interval: The max retry interval in seconds. This is useful when
+      the retry interval is exponential, to avoid the wait time to grow
+      exponentially.
+    return_jobs: If True, return a list of `Job` objects. Otherwise, return a
+      list of outputs.
   Returns:
     A list of ouputs. Each is the return value of `func` based on the input
       value. Order is preserved.
   """
-  if retry_on_errors is not None:
-    func = with_retry(
-        func,
-        retry_on_errors,
-        max_attempts=max_attempts,
-        retry_interval=retry_interval,
-        exponential_backoff=exponential_backoff,
+  jobs = []
+  for inputs in parallel_inputs:
+    jobs.append(
+        Job(
+            func,
+            (inputs,),
+            retry_on_errors=retry_on_errors,
+            max_attempts=max_attempts,
+            retry_interval=retry_interval,
+            exponential_backoff=exponential_backoff,
+            max_retry_interval=max_retry_interval,
+        )
     )
   # NOTE(daiyip): when executor is not specified and max_worker is 1,
   # we don't need to create a executor pool. Instead, the inputs will be
   # processed by the user function in sequence within the current thread.
   if executor is None and max_workers == 1:
-    return [func(i) for i in parallel_inputs]
+    for job in jobs:
+      job()
+      if job.error:
+        raise job.error
+    return jobs if return_jobs else [job.result for job in jobs]
   shutdown_after_finish = executor is None
   executor = _executor_pool.executor_from(executor, max_workers=max_workers)
   try:
-    return list(executor.map(with_context_access(func), parallel_inputs))
+    executed_jobs = list(
+        executor.map(
+            lambda job: job(), [with_context_access(job) for job in jobs]
+        )
+    )
+    for job in executed_jobs:
+      if job.error:
+        raise job.error
+    return (
+        executed_jobs if return_jobs else [job.result for job in executed_jobs]
+    )
   finally:
     if shutdown_after_finish:
       # Do not wait threads to finish if they are timed out.
@@ -237,36 +269,139 @@ class Job:
   """Thread pool job."""
   func: Callable[[Any], Any]
-  arg: Any
+  args: Sequence[Any] = ()
+  kwargs: Mapping[str, Any] = dataclasses.field(default_factory=dict)
+  _: dataclasses.KW_ONLY
   result: Any = pg.MISSING_VALUE
-  error: Exception | None = None
-  start_time: float | None = None
-  end_time: float | None = None
+  error: Annotated[
+      BaseException | None,
+      'The non-retryable error encountered during the job execution.',
+  ] = None
+  retry_entries: Annotated[
+      Sequence[RetryEntry], 'Records of retry attempts.'
+  ] = dataclasses.field(default_factory=list)
+  retry_on_errors: Annotated[
+      Sequence[Type[BaseException] | str],
+      (
+          'A sequence of exception types or tuples of exception type and error '
+          'messages (described in regular expression) as the desired exception '
+          'types to retry.'
+      ),
+  ] = ()
+  max_attempts: Annotated[
+      int, 'Max number of attempts if an error to retry is encountered.'
+  ] = 5
+  retry_interval: Annotated[
+      int | tuple[int, int],
+      (
+          'The (base) retry interval in seconds. If a tuple, the retry '
+          'interval will be randomly chosen between the first and the second '
+          'element of the tuple.'
+      ),
+  ] = (5, 60)
+  exponential_backoff: Annotated[
+      bool,
+      (
+          'If True, exponential wait time will be applied on top of the base '
+          'retry interval.'
+      ),
+  ] = True
+  max_retry_interval: Annotated[
+      int,
+      (
+          'The max retry interval in seconds. This is useful when the retry '
+          'interval is exponential, to avoid the wait time to grow '
+          'exponentially.'
+      ),
+  ] = 300
+  seed: Annotated[
+      int | None,
+      (
+          'Random seed to generate retry interval. If None, the seed will be'
+          ' determined based on current time.'
+      ),
+  ] = None
+  timeit: pg.object_utils.TimeIt = dataclasses.field(
+      default_factory=lambda: pg.object_utils.TimeIt('job')
+  )
+  @property
+  def elapse(self) -> float:
+    """Returns the running time in seconds since the job get started."""
+    return self.timeit.elapse
+  def _retry_call(self) -> 'Job':
+    """Retries func call on args."""
+    rand = random if self.seed is None else random.Random(self.seed)
+    def base_interval() -> int:
+      if isinstance(self.retry_interval, tuple):
+        return rand.randint(*self.retry_interval)
+      else:
+        assert isinstance(self.retry_interval, int)
+        return self.retry_interval
+    def next_wait_interval(attempt: int) -> float:
+      if not self.exponential_backoff:
+        attempt = 1
+      return min(
+          self.max_retry_interval, base_interval() * (2 ** (attempt - 1))
+      )
+    retry_entries = []
+    wait_interval = 0
+    while True:
+      with pg.catch_errors(self.retry_on_errors) as error_context:
+        begin_time = time.time()
+        self.result = self.func(*self.args, **self.kwargs)
+      end_time = time.time()
+      retry_entries.append(RetryEntry(
+          call_interval=end_time - begin_time,
+          wait_interval=wait_interval,
+          error=error_context.error,
+      ))
+      if error_context.error is None:
+        self.retry_entries = retry_entries
+        return self
+      # Branch when errors are met for retry.
+      if len(retry_entries) < self.max_attempts:
+        wait_interval = next_wait_interval(len(retry_entries))
+        pg.logging.warning(
+            f'Calling {self.func!r} encountered {error_context.error!r} '
+            f'(attempts={len(retry_entries)}), retrying in '
+            f'{wait_interval} seconds...'
+        )
-  def __call__(self) -> Any:
-    self.start_time = time.time()
+        time.sleep(wait_interval)
+      else:
+        errors = [e.error for e in retry_entries]
+        # First wait interval is 0.
+        wait_intervals = [e.wait_interval for e in retry_entries[1:]]
+        raise RetryError(self.func, errors, wait_intervals)
+  def __call__(self) -> 'Job':
+    if getattr(self, '_has_call', False):
+      raise ValueError('Job can only be called once.')
+    self._has_call = True
     try:
-      self.result = self.func(self.arg)
-      return self.result
-    except Exception as e:  # pylint: disable=broad-exception-caught
+      with self.timeit:
+        if self.retry_on_errors:
+          return self._retry_call()
+        self.result = self.func(*self.args, **self.kwargs)
+    except BaseException as e:  # pylint: disable=broad-exception-caught
       self.error = e
-      return e
-    finally:
-      self.end_time = time.time()
+    return self
-  def mark_canceled(self, error: Exception) -> None:
+  def mark_canceled(self, error: BaseException) -> None:
     """Marks the job as canceled."""
+    self.timeit.end(error)
     self.error = error
-    self.end_time = time.time()
-  @property
-  def elapse(self) -> float:
-    """Returns the running time in seconds since the job get started."""
-    if self.start_time is None:
-      return 0.0
-    if self.end_time is None:
-      return time.time() - self.start_time
-    return self.end_time - self.start_time
 @dataclasses.dataclass
@@ -276,9 +411,12 @@ class Progress:
   _succeeded: int = 0
   _failed: int = 0
-  _last_error: Exception | None = None
+  _last_error: BaseException | None = None
   _total_duration: float = 0.0
   _job: Job | None = None
+  _timeit_summary: pg.object_utils.TimeIt.StatusSummary = dataclasses.field(
+      default_factory=pg.object_utils.TimeIt.StatusSummary
+  )
   @property
   def succeeded(self) -> int:
@@ -296,7 +434,7 @@ class Progress:
     return self.succeeded + self.failed
   @property
-  def last_error(self) -> Exception | None:
+  def last_error(self) -> BaseException | None:
     """Returns last error."""
     return self._last_error
@@ -326,6 +464,28 @@ class Progress:
       return 0.0
     return self._total_duration / self.completed
+  @property
+  def timeit_summary(self) -> pg.object_utils.TimeIt.StatusSummary:
+    """Returns the aggregated summary for each `pg.timeit`."""
+    return self._timeit_summary
+  def timeit_summary_str(self) -> str | None:
+    if not self.timeit_summary:
+      return None
+    return ', '.join([
+        '%s (%.2fs, %d/%d)' % (
+            k.lstrip('job.'), v.avg_duration, v.num_ended, v.num_started
+        ) for k, v in self.timeit_summary.breakdown.items() if k != 'job'
+    ])
+  def last_error_str(self) -> str | None:
+    if self.last_error is None:
+      return None
+    error_text = repr(self.last_error)
+    if len(error_text) >= 64:
+      error_text = error_text[:64] + '...'
+    return error_text
   def update(self, job: Job) -> None:
     """Mark a job as completed."""
     self._job = job
@@ -335,6 +495,7 @@ class Progress:
       self._failed += 1
       self._last_error = job.error
     self._total_duration += job.elapse
+    self._timeit_summary.aggregate(job.timeit.status())
 class ProgressBar:
@@ -356,17 +517,17 @@ class ProgressBar:
     label: str | None
     total: int
     color: str | None = None
-    postfix: dict[str, str] | None = None
+    status: dict[str, Any] | None = None
   @dataclasses.dataclass
   class Update:
     """Progress bar update."""
     bar_id: int
     delta: int
-    postfix: Union[dict[str, str], str, None] = None
+    status: Union[dict[str, Any], str, None] = None
     color: str | None = None
-  _progress_bars: dict[int, tqdm.tqdm] = {}
+  _progress_bars: dict[int, '_ProgressControl'] = {}
   _install_requests: list[tuple[int, Settings]] = []
   _updates: collections.deque[Update] = collections.deque()
   _uninstall_requests: list[int] = []
@@ -378,11 +539,11 @@ class ProgressBar:
       label: str | None,
       total: int,
       color: str | None = None,
-      postfix: dict[str, str] | None = None,
+      status: dict[str, Any] | None = None,
       ) -> int:
     """Installs a progress bar and returns a reference id."""
     with cls._lock:
-      settings = ProgressBar.Settings(label, total, color, postfix)
+      settings = ProgressBar.Settings(label, total, color, status)
       bar_id = id(settings)
       cls._install_requests.append((bar_id, settings))
       return bar_id
@@ -392,15 +553,17 @@ class ProgressBar:
       cls,
       bar_id: int,
       delta: int = 0,
-      postfix: Union[dict[str, str], str, None] = None,
+      status: Union[dict[str, Any], str, None] = None,
       color: str | None = None,
       refresh: bool = True,
       ) -> None:
     """Report the progress for a label."""
+    if status is not None and not isinstance(status, (str, dict)):
+      raise ValueError(f'Unsupported status: {status}')
     with cls._lock:
       cls._updates.append(
           ProgressBar.Update(
-              bar_id=bar_id, delta=delta, postfix=postfix, color=color,
+              bar_id=bar_id, delta=delta, status=status, color=color,
           )
       )
     if refresh:
@@ -422,11 +585,11 @@ class ProgressBar:
       # Process install requests.
       if cls._install_requests:
         for bar_id, settings in cls._install_requests:
-          cls._progress_bars[bar_id] = tqdm.tqdm(
+          cls._progress_bars[bar_id] = _progress_control(
               total=settings.total,
-              desc=settings.label,
-              colour=settings.color,
-              postfix=settings.postfix)
+              label=settings.label,
+              color=settings.color,
+              status=settings.status)
         cls._install_requests.clear()
       # Process updates.
@@ -441,15 +604,11 @@ class ProgressBar:
         if update.delta > 0:
           bar.update(update.delta)
-        if isinstance(update.postfix, str):
-          bar.set_postfix_str(update.postfix, refresh=False)
-        elif isinstance(update.postfix, dict):
-          bar.set_postfix(update.postfix, refresh=False)
-        elif update.postfix is not None:
-          raise ValueError(f'Unsupported postfix: {update.postfix}')
+        if update.status is not None:
+          bar.set_status(update.status)
         if update.color is not None:
-          bar.colour = update.color
+          bar.set_color(update.color)
         updated_bars.add(bar)
       # Refresh each updated bar just once.
@@ -459,7 +618,9 @@ class ProgressBar:
       # Process uninstall requests.
       if cls._uninstall_requests:
         for bar_id in cls._uninstall_requests:
-          cls._progress_bars.pop(bar_id, None)
+          bar = cls._progress_bars.pop(bar_id, None)
+          if bar is not None:
+            bar.close()
         cls._uninstall_requests.clear()
@@ -486,17 +647,18 @@ def concurrent_map(
     status_fn: Callable[[Progress], dict[str, Any]] | None = None,
     timeout: int | None = None,
     silence_on_errors: Union[
-        Type[Exception], Tuple[Type[Exception], ...], None
+        Type[BaseException], Tuple[Type[BaseException], ...], None
     ] = Exception,
     retry_on_errors: Union[
-        Type[Exception],
-        Tuple[Type[Exception], ...],
+        Type[BaseException],
+        Tuple[Type[BaseException], ...],
         None,
     ] = None,
     max_attempts: int = 5,
     retry_interval: int | tuple[int, int] = (5, 60),
     exponential_backoff: bool = True,
-) -> Iterator[tuple[Any, Any, Exception | None]]:
+    return_jobs: bool = False,
+) -> Iterator[Any]:
   """Maps inputs to outptus via func concurrently under current context.
   Args:
@@ -539,9 +701,10 @@ def concurrent_map(
       of the tuple.
     exponential_backoff: If True, exponential wait time will be applied on top
       of the base retry interval.
+    return_jobs: If True, the returned iterator will emit `Job` objects.
   Yields:
-    An iterator of (input, output, error).
+    An iterator of (input, output, error) or Job object.
   Raises:
     Exception: Errors that are not in `silence_on_errors` or `retry_on_errors`,
@@ -551,15 +714,6 @@ def concurrent_map(
   """
     # Internal usage logging.
-  if retry_on_errors:
-    func = with_retry(
-        func,
-        retry_on_errors,
-        max_attempts=max_attempts,
-        retry_interval=retry_interval,
-        exponential_backoff=exponential_backoff,
-    )
   status_fn = status_fn or (lambda p: {   # pylint: disable=g-long-lambda
       'Succeeded': '%.2f%% (%d/%d)' % (
           p.success_rate * 100, p.succeeded, p.completed),
@@ -574,7 +728,14 @@ def concurrent_map(
   pending_futures = []
   total = 0
   for inputs in parallel_inputs:
-    job = Job(func, inputs)
+    job = Job(
+        func,
+        (inputs,),
+        retry_on_errors=retry_on_errors,
+        max_attempts=max_attempts,
+        retry_interval=retry_interval,
+        exponential_backoff=exponential_backoff,
+    )
     future = executor.submit(
         with_context_access(job),
     )
@@ -596,14 +757,14 @@ def concurrent_map(
     if show_progress:
       status = status_fn(progress)
       status.update({
-          'AvgDuration': '%.2f seconds' % progress.avg_duration
+          'AvgDuration': '%.2fs' % progress.avg_duration
       })
       if progress.last_error is not None:
-        error_text = repr(progress.last_error)
-        if len(error_text) >= 64:
-          error_text = error_text[:64] + '...'
-        status['LastError'] = error_text
-      ProgressBar.update(bar_id, delta=1, postfix=status)
+        status['LastError'] = progress.last_error_str()
+      if progress.timeit_summary:
+        status['TimeIt'] = progress.timeit_summary_str()
+      ProgressBar.update(bar_id, delta=1, status=status)
   try:
     if ordered:
@@ -627,7 +788,7 @@ def concurrent_map(
                 silence_on_errors and isinstance(job.error, silence_on_errors)):
               raise job.error   # pylint: disable=g-doc-exception
-            yield job.arg, job.result, job.error
+            yield job if return_jobs else job.args[0], job.result, job.error
             progress.update(job)
             update_progress_bar(progress)
             ProgressBar.refresh()
@@ -648,7 +809,7 @@ def concurrent_map(
             if job.error is not None and not (
                 silence_on_errors and isinstance(job.error, silence_on_errors)):
               raise job.error   # pylint: disable=g-doc-exception
-            yield job.arg, job.result, job.error
+            yield job if return_jobs else job.args[0], job.result, job.error
             progress.update(job)
             update_progress_bar(progress)
             completed_batch.add(future)
@@ -671,7 +832,7 @@ def concurrent_map(
                       and isinstance(job.error, silence_on_errors)):
                 raise job.error  # pylint: disable=g-doc-exception
-            yield job.arg, job.result, job.error
+            yield job.args[0], job.result, job.error
             progress.update(job)
             update_progress_bar(progress)
           else:
@@ -729,5 +890,141 @@ class ExecutorPool:
       raise ValueError(f'Unsupported value: {maybe_executor}.')
+class _ProgressControl(pg.Object):
+  """Abstract progress control."""
+  # Disable symbolic comparison so the hash is based on object address.
+  use_symbolic_comparison = False
+  total: int
+  label: str | None
+  color: str | None
+  status: str | dict[str, Any] | None
+  def set_color(self, color: str | None):
+    with pg.notify_on_change(False):
+      self.rebind(color=color)
+  def set_status(self, status: str | dict[str, Any] | None):
+    with pg.notify_on_change(False):
+      self.rebind(status=status)
+  @abc.abstractmethod
+  def update(self, delta):
+    """Update progress."""
+  @abc.abstractmethod
+  def refresh(self) -> None:
+    """Refresh progress bar."""
+  @abc.abstractmethod
+  def close(self) -> None:
+    """Close progress bar."""
+class _TqdmProgressControl(_ProgressControl):
+  """Tqdm-based progress control."""
+  def _on_bound(self):
+    super()._on_bound()
+    assert tqdm is not None
+    self._tqdm = tqdm.tqdm(
+        total=self.total,
+        desc=self.label,
+        colour=self.color,
+        postfix=self.status,
+    )
+  def update(self, delta: int) -> None:
+    self._tqdm.update(delta)
+  def refresh(self):
+    self._tqdm.set_description(self.label, refresh=False)
+    if isinstance(self.status, str):
+      self._tqdm.set_postfix_str(self.status, refresh=False)
+    else:
+      self._tqdm.set_postfix(self.status, refresh=False)
+    self._tqdm.colour = self.color
+    self._tqdm.refresh()
+  def close(self):
+    self._tqdm.close()
+class _ConsoleProgressControl(_ProgressControl):
+  """Simple progress control by printing the status to the console."""
+  def _on_bound(self):
+    super()._on_bound()
+    self._progress = 0
+  def update(self, delta: int) -> None:
+    self._progress += delta
+  def refresh(self):
+    s = io.StringIO()
+    if self.label is not None:
+      s.write(pg.colored(self.label, 'red', styles=['bold']))
+      s.write(': ')
+    s.write(
+        pg.colored(
+            '%d%% (%d/%d)' %
+            (
+                self._progress * 100 // self.total,
+                self._progress,
+                self.total,
+            ),
+            color=self.color or 'green'
+        )
+    )
+    if self.status is not None:
+      status = repr(self.status) if isinstance(
+          self.status, dict) else self.status
+      s.write(f' : {status}')
+    sys.stderr.write(s.getvalue() + '\n')
+  def close(self):
+    sys.stderr.flush()
+class _NoopProgressControl(_ProgressControl):
+  """No-op progress control."""
+  def update(self, delta: int) -> None:
+    pass
+  def refresh(self) -> None:
+    pass
+  def close(self) -> None:
+    pass
+def _progress_control(
+    total: int,
+    label: str | None,
+    color: str | None,
+    status: str | dict[str, Any] | None,
+) -> _ProgressControl:
+  """Creates a process control."""
+  if progress_bar == 'tqdm':
+    if not tqdm:
+      raise RuntimeError(
+          'Please install package "tqdm" to use `tqdm` progress bar.'
+      )
+    return _TqdmProgressControl(total, label, color, status)
+  elif progress_bar == 'console':
+    return _ConsoleProgressControl(total, label, color, status)
+  elif progress_bar is None:
+    return _NoopProgressControl(total, label, color, status)
+  else:
+    raise ValueError(f'Unsupported progress bar type: {progress_bar}')
+def get_executor(
+    resource_id: str,
+    max_workers: int | None = None) -> concurrent.futures.ThreadPoolExecutor:
+  """Gets a thread pool executor associated with a resource id."""
+  return _executor_pool.get(resource_id, max_workers)
 # The global executor pool based on resource IDs.
 _executor_pool = ExecutorPool()

langfun 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl