langfun 0.1.2.dev202511030805__py3-none-any.whl → 0.1.2.dev202511050805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (77) hide show
  1. langfun/core/agentic/action.py +76 -9
  2. langfun/core/agentic/action_eval.py +9 -2
  3. langfun/core/async_support.py +32 -3
  4. langfun/core/coding/python/correction.py +19 -9
  5. langfun/core/coding/python/execution.py +14 -12
  6. langfun/core/coding/python/generation.py +21 -16
  7. langfun/core/coding/python/sandboxing.py +23 -3
  8. langfun/core/component.py +42 -3
  9. langfun/core/concurrent.py +70 -6
  10. langfun/core/console.py +1 -1
  11. langfun/core/data/conversion/anthropic.py +10 -3
  12. langfun/core/data/conversion/gemini.py +9 -2
  13. langfun/core/data/conversion/openai.py +17 -7
  14. langfun/core/eval/base.py +46 -42
  15. langfun/core/eval/matching.py +5 -2
  16. langfun/core/eval/patching.py +3 -3
  17. langfun/core/eval/scoring.py +4 -3
  18. langfun/core/eval/v2/checkpointing.py +30 -4
  19. langfun/core/eval/v2/evaluation.py +59 -13
  20. langfun/core/eval/v2/example.py +22 -11
  21. langfun/core/eval/v2/experiment.py +51 -8
  22. langfun/core/eval/v2/metric_values.py +23 -3
  23. langfun/core/eval/v2/metrics.py +33 -4
  24. langfun/core/eval/v2/progress.py +9 -1
  25. langfun/core/eval/v2/reporting.py +15 -1
  26. langfun/core/eval/v2/runners.py +27 -7
  27. langfun/core/langfunc.py +45 -130
  28. langfun/core/language_model.py +88 -10
  29. langfun/core/llms/anthropic.py +27 -2
  30. langfun/core/llms/azure_openai.py +29 -17
  31. langfun/core/llms/cache/base.py +22 -2
  32. langfun/core/llms/cache/in_memory.py +48 -7
  33. langfun/core/llms/compositional.py +25 -1
  34. langfun/core/llms/deepseek.py +29 -1
  35. langfun/core/llms/fake.py +32 -1
  36. langfun/core/llms/gemini.py +9 -1
  37. langfun/core/llms/google_genai.py +29 -1
  38. langfun/core/llms/groq.py +27 -2
  39. langfun/core/llms/llama_cpp.py +22 -3
  40. langfun/core/llms/openai.py +29 -1
  41. langfun/core/llms/openai_compatible.py +18 -6
  42. langfun/core/llms/rest.py +12 -1
  43. langfun/core/llms/vertexai.py +39 -6
  44. langfun/core/logging.py +1 -1
  45. langfun/core/mcp/client.py +77 -22
  46. langfun/core/mcp/session.py +90 -10
  47. langfun/core/mcp/tool.py +83 -23
  48. langfun/core/memory.py +1 -0
  49. langfun/core/message.py +75 -11
  50. langfun/core/message_test.py +9 -0
  51. langfun/core/modalities/audio.py +21 -1
  52. langfun/core/modalities/image.py +19 -1
  53. langfun/core/modalities/mime.py +54 -4
  54. langfun/core/modalities/pdf.py +19 -1
  55. langfun/core/modalities/video.py +21 -1
  56. langfun/core/modality.py +66 -5
  57. langfun/core/natural_language.py +1 -1
  58. langfun/core/sampling.py +4 -4
  59. langfun/core/structured/completion.py +32 -37
  60. langfun/core/structured/description.py +54 -50
  61. langfun/core/structured/function_generation.py +29 -12
  62. langfun/core/structured/mapping.py +70 -15
  63. langfun/core/structured/parsing.py +90 -74
  64. langfun/core/structured/parsing_test.py +0 -3
  65. langfun/core/structured/querying.py +201 -130
  66. langfun/core/structured/schema.py +70 -10
  67. langfun/core/structured/schema_generation.py +33 -14
  68. langfun/core/structured/scoring.py +45 -34
  69. langfun/core/structured/tokenization.py +24 -9
  70. langfun/core/subscription.py +2 -2
  71. langfun/core/template.py +139 -40
  72. langfun/core/template_test.py +40 -0
  73. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/METADATA +1 -1
  74. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/RECORD +77 -77
  75. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/WHEEL +0 -0
  76. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/licenses/LICENSE +0 -0
  77. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/top_level.txt +0 -0
@@ -478,7 +478,7 @@ class UsageNotAvailable(LMSamplingUsage):
478
478
 
479
479
 
480
480
  class LMSamplingResult(pg.Object):
481
- """Language model response."""
481
+ """The result from a language model sampling."""
482
482
 
483
483
  samples: Annotated[
484
484
  list[LMSample],
@@ -681,13 +681,91 @@ class LMDebugMode(enum.IntFlag):
681
681
 
682
682
 
683
683
  class LanguageModel(component.Component):
684
- """Interface of a language model.
685
-
686
- Language models are at the center of LLM-based agents. ``LanguageModel``
687
- is the interface to interact with different language modles.
688
-
689
- In langfun, users can use different language models with the same agents,
690
- allowing fast prototype, as well as side-by-side comparisons.
684
+ """Interface for language model.
685
+
686
+ `lf.LanguageModel` is the cornerstone of Langfun, providing a consistent
687
+ interface for interacting with various language models, such as those from
688
+ Google, OpenAI, Anthropic, and more. It abstracts away provider-specific
689
+ details, allowing users to switch between models seamlessly.
690
+
691
+ All language models in Langfun can be accessed via `lf.llms`. For example,
692
+ `lf.llms.Gpt4()` creates an instance for OpenAI's GPT-4, and
693
+ `lf.llms.GeminiPro()` creates an instance for Google's Gemini Pro.
694
+
695
+ **Key Features:**
696
+
697
+ * **Unified API**: Provides `sample`, `score`, and `tokenize` methods
698
+ across all supported models.
699
+ * **Sampling**: The `__call__` method and `sample` method allow generating
700
+ text completions or chat responses.
701
+ * **Scoring**: The `score` method computes the likelihood of completions
702
+ given a prompt.
703
+ * **Tokenization**: The `tokenize` method breaks text into tokens
704
+ according to the model's tokenizer.
705
+ * **Caching**: Built-in support for caching LLM requests to save cost and
706
+ time via the `cache` attribute.
707
+ * **Concurrency**: Manages concurrency to respect API rate limits via
708
+ `max_concurrency`.
709
+ * **Retries**: Automatic retries with exponential backoff for transient
710
+ errors via `max_attempts` and `retry_interval`.
711
+
712
+ **1. Creating a Language Model:**
713
+ You can create a language model by instantiating its class or by using
714
+ `lf.LanguageModel.get`:
715
+
716
+ ```python
717
+ # Direct instantiation
718
+ gpt4 = lf.llms.Gpt4()
719
+ gemini = lf.llms.GeminiPro()
720
+
721
+ # Creation via lf.LanguageModel.get()
722
+ gpt4 = lf.LanguageModel.get('gpt-4')
723
+ ```
724
+
725
+ **2. Customizing Sampling Options:**
726
+ Sampling options like `temperature`, `max_tokens`, etc., can be customized
727
+ at model creation, or overridden at call time or via `lf.context`.
728
+
729
+ ```python
730
+ # Set temperature to 0 at model creation
731
+ lm = lf.llms.Gpt4(temperature=0.0)
732
+
733
+ # Override temperature to 0.5 for a single call
734
+ response = lm('1 + 1 =', temperature=0.5)
735
+
736
+ # Override temperature to 1.0 using lf.context
737
+ with lf.context(temperature=1.0):
738
+ response = lm('1 + 1 =')
739
+ ```
740
+
741
+ **3. Sampling:**
742
+ Use `lm()`, `lm.sample()`, or `lf.query()` to generate text:
743
+
744
+ ```python
745
+ lm = lf.llms.Gpt4()
746
+ response = lm('1 + 1 =')
747
+ print(response.text)
748
+ # Output: 2
749
+ ```
750
+
751
+ **4. Scoring:**
752
+ Use `lm.score()` to score completions:
753
+
754
+ ```python
755
+ lm = lf.llms.Gpt4()
756
+ results = lm.score('Weather in SF is', completions=['sunny', 'cloudy'])
757
+ print(results[0].score)
758
+ # Output: -1.0
759
+ ```
760
+
761
+ **5. Tokenization:**
762
+ Use `lm.tokenize()` to get tokens:
763
+ ```python
764
+ lm = lf.llms.Gpt4()
765
+ tokens = lm.tokenize('hello world')
766
+ print(tokens)
767
+ # Output: [('hello', 15339), (' world', 1917)]
768
+ ```
691
769
  """
692
770
 
693
771
  sampling_options: LMSamplingOptions = LMSamplingOptions()
@@ -1448,7 +1526,7 @@ class LanguageModel(component.Component):
1448
1526
  max_requests_per_minute: int | None,
1449
1527
  average_tokens_per_request: int = 250
1450
1528
  ) -> int | None:
1451
- """Estimates max concurrency concurrency based on the rate limits."""
1529
+ """Estimates max concurrency based on the rate limits."""
1452
1530
  # NOTE(daiyip): max concurrency is estimated based on the rate limit.
1453
1531
  # We assume each request has approximately 250 tokens, and each request
1454
1532
  # takes 1 second to complete. This might not be accurate for all models.
@@ -1521,7 +1599,7 @@ class _ConcurrencyControl:
1521
1599
 
1522
1600
 
1523
1601
  class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
1524
- """Usage sumary."""
1602
+ """Usage summary."""
1525
1603
 
1526
1604
  class AggregatedUsage(pg.Object):
1527
1605
  """Aggregated usage."""
@@ -650,9 +650,34 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
650
650
 
651
651
  @lf.use_init_args(['model'])
652
652
  class Anthropic(rest.REST):
653
- """Anthropic LLMs (Claude) through REST APIs.
653
+ """Anthropic Claude models.
654
654
 
655
- See https://docs.anthropic.com/claude/reference/messages_post
655
+ **Quick Start:**
656
+
657
+ ```python
658
+ import langfun as lf
659
+
660
+ # Call Claude 3.5 Sonnet using API key from environment variable
661
+ # 'ANTHROPIC_API_KEY'.
662
+ lm = lf.llms.Claude35Sonnet()
663
+ r = lm('Who are you?')
664
+ print(r)
665
+ ```
666
+
667
+ **Setting up API key:**
668
+
669
+ The Anthropic API key can be specified in following ways:
670
+
671
+ 1. At model instantiation:
672
+
673
+ ```python
674
+ lm = lf.llms.Claude35Sonnet(api_key='MY_API_KEY')
675
+
676
+ 2. via environment variable `ANTHROPIC_API_KEY`.
677
+
678
+ **References:**
679
+
680
+ * https://docs.anthropic.com/claude/reference/messages_post
656
681
  """
657
682
 
658
683
  model: pg.typing.Annotated[
@@ -23,23 +23,35 @@ import pyglove as pg
23
23
  @lf.use_init_args(['model', 'deployment_name'])
24
24
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
25
25
  class AzureOpenAI(openai.OpenAI):
26
- """Azure OpenAI model service.
27
-
28
- This service interacts with the Azure OpenAI API to generate chat completions.
29
- It uses the deployment_name and API version to construct the endpoint, and
30
- authenticates using an API key provided via parameter or the
31
- AZURE_OPENAI_API_KEY environment variable.
32
-
33
- Example:
34
- lm = AzureOpenAI(
35
- model='gpt-4o',
36
- deployment_name='gpt-4o',
37
- api_version='2024-08-01-preview',
38
- azure_endpoint='https://trackname.openai.azure.com/',
39
- api_key='token'
40
- )
41
- response = lf.query(prompt="what the capital of France", lm=lm)
42
- print(response)
26
+ """Azure OpenAI models.
27
+
28
+ **Quick Start:**
29
+
30
+ ```python
31
+ import langfun as lf
32
+
33
+ # Call GPT-4o on Azure using API key from environment variable
34
+ # 'AZURE_OPENAI_API_KEY'.
35
+ lm = lf.llms.AzureOpenAI(
36
+ model='gpt-4o',
37
+ deployment_name='my-gpt4o-deployment',
38
+ api_version='2024-08-01-preview',
39
+ azure_endpoint='https://my-resource.openai.azure.com/',
40
+ )
41
+ r = lm('Who are you?')
42
+ print(r)
43
+ ```
44
+
45
+ **Setting up API key:**
46
+
47
+ The Azure OpenAI API key can be specified in following ways:
48
+
49
+ 1. At model instantiation:
50
+
51
+ ```python
52
+ lm = lf.llms.AzureOpenAI(..., api_key='MY_API_KEY')
53
+ ```
54
+ 2. via environment variable `AZURE_OPENAI_API_KEY`.
43
55
  """
44
56
 
45
57
  deployment_name: Annotated[
@@ -22,13 +22,33 @@ import langfun.core as lf
22
22
 
23
23
  @dataclasses.dataclass(frozen=True)
24
24
  class LMCacheEntry:
25
- """LM cache entry."""
25
+ """Represents a single entry in the language model cache.
26
+
27
+ An `LMCacheEntry` stores the result of a language model sampling operation
28
+ and an optional expiration timestamp.
29
+ """
26
30
  result: lf.LMSamplingResult
27
31
  expire: datetime.datetime | None = None
28
32
 
29
33
 
30
34
  class LMCacheBase(lf.LMCache):
31
- """The common LMCache base."""
35
+ """Base class for language model cache implementations.
36
+
37
+ `LMCacheBase` provides the core logic for a key-value based cache,
38
+ handling key generation, expiration (TTL), and statistics tracking.
39
+ Subclasses must implement the abstract methods `_get`, `_put`, and `_delete`
40
+ to provide the specific storage mechanism (e.g., in-memory, file-based).
41
+
42
+ **Key Features:**
43
+
44
+ * **Customizable Keying**: Allows specifying a custom function to generate
45
+ cache keys based on the language model, prompt, and seed. If not provided,
46
+ a default key based on prompt text, sampling options, and seed is used.
47
+ * **Time-to-Live (TTL)**: Supports setting an expiration time for cache
48
+ entries, after which they are considered invalid and removed upon access.
49
+ * **Cache Statistics**: Tracks metrics like hits, misses, updates,
50
+ deletions, and expired hits through the `stats` property.
51
+ """
32
52
 
33
53
  key: Annotated[
34
54
  Callable[[lf.LanguageModel, lf.Message, int], Any] | None,
@@ -24,7 +24,32 @@ import pyglove as pg
24
24
 
25
25
  @pg.use_init_args(['filename', 'ttl', 'key'])
26
26
  class InMemory(base.LMCacheBase):
27
- """In memory cache."""
27
+ """An in-memory cache for language model lookups.
28
+
29
+ `InMemory` stores LM prompts and their corresponding responses in memory,
30
+ providing a simple and fast caching mechanism for a single session.
31
+ Optionally, it can persist the cache to a JSON file on disk, allowing
32
+ results to be reused across sessions.
33
+
34
+ When a filename is provided, the cache will be loaded from the file upon
35
+ initialization and saved to the file when `save()` is called. This is
36
+ useful for caching results in interactive environments like Colab or
37
+ when running batch jobs.
38
+
39
+ Example:
40
+
41
+ ```python
42
+ import langfun as lf
43
+ # Using in-memory cache without persistence
44
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory())
45
+ r = lm.query('hello')
46
+
47
+ # Using in-memory cache with persistence
48
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory('cache.json'))
49
+ r = lm.query('hello')
50
+ lm.cache.save()
51
+ ```
52
+ """
28
53
 
29
54
  filename: Annotated[
30
55
  str | None,
@@ -144,17 +169,33 @@ class InMemory(base.LMCacheBase):
144
169
 
145
170
  @contextlib.contextmanager
146
171
  def lm_cache(filename: str | None = None) -> Iterator[InMemory]:
147
- """Context manager to enable cache for LMs under the context.
172
+ """Context manager to enable in-memory cache for LMs in the current context.
173
+
174
+ This context manager sets an `InMemory` cache as the default cache for
175
+ any Langfun language model instantiated within its scope, unless a model
176
+ is explicitly configured with a different cache.
177
+
178
+ If a `filename` is provided, the cache will be loaded from the specified
179
+ file at the beginning of the context and automatically saved back to the
180
+ file upon exiting the context. This is a convenient way to manage
181
+ persistent caching for a block of code.
182
+
183
+ Example:
148
184
 
149
- If LMs under the context manager have explicitly specified cache, they will
150
- use their own cache. Otherwise they will use the cache created by the context
151
- manager.
185
+ ```python
186
+ import langfun as lf
187
+ with lf.lm_cache('my_cache.json'):
188
+ # LMs created here will use 'my_cache.json' for caching.
189
+ lm = lf.llms.GeminiPro()
190
+ print(lm.query('hello'))
191
+ ```
152
192
 
153
193
  Args:
154
- filename: If not None, JSON file to load and save the cache.
194
+ filename: If provided, specifies the JSON file for loading and saving
195
+ the cache.
155
196
 
156
197
  Yields:
157
- A cache object created.
198
+ The `InMemory` cache instance created for this context.
158
199
  """
159
200
  cache = InMemory(filename)
160
201
  try:
@@ -21,7 +21,31 @@ import pyglove as pg
21
21
 
22
22
  @pg.use_init_args(['candidates', 'seed'])
23
23
  class RandomChoice(lf.LanguageModel):
24
- """Random choice of a list of LLM models."""
24
+ """A composite language model that randomly selects from a list of candidates.
25
+
26
+ `RandomChoice` acts as a proxy that forwards each request (`sample`, `score`,
27
+ `tokenize`, or `__call__`) to one of the `candidates` selected randomly.
28
+ This can be useful for load balancing across multiple LLM endpoints,
29
+ for A/B testing different models, or for ensembling model outputs
30
+ by calling it multiple times.
31
+
32
+ The selection is determined by the provided `seed`, ensuring reproducibility
33
+ if needed.
34
+
35
+ Example:
36
+
37
+ ```python
38
+ import langfun as lf
39
+
40
+ lm = lf.llms.RandomChoice([
41
+ lf.llms.GeminiPro(),
42
+ lf.llms.GPT4(),
43
+ ])
44
+
45
+ # This call will be handled by either GeminiPro or GPT4, chosen randomly.
46
+ r = lm.sample('hello')
47
+ ```
48
+ """
25
49
 
26
50
  candidates: Annotated[
27
51
  list[lf.LanguageModel],
@@ -94,7 +94,35 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
94
94
  # Reference: https://api-docs.deepseek.com/
95
95
  @lf.use_init_args(['model'])
96
96
  class DeepSeek(openai_compatible.OpenAIChatCompletionAPI):
97
- """DeepSeek model."""
97
+ """DeepSeek models.
98
+
99
+ **Quick Start:**
100
+
101
+ ```python
102
+ import langfun as lf
103
+
104
+ # Call DeepSeek-V3 using API key from environment variable
105
+ # 'DEEPSEEK_API_KEY'.
106
+ lm = lf.llms.DeepSeekV3()
107
+ r = lm('Who are you?')
108
+ print(r)
109
+ ```
110
+
111
+ **Setting up API key:**
112
+
113
+ The DeepSeek API key can be specified in following ways:
114
+
115
+ 1. At model instantiation:
116
+
117
+ ```python
118
+ lm = lf.llms.DeepSeekV3(api_key='MY_API_KEY')
119
+ ```
120
+ 2. via environment variable `DEEPSEEK_API_KEY`.
121
+
122
+ **References:**
123
+
124
+ * https://api-docs.deepseek.com/
125
+ """
98
126
 
99
127
  model: pg.typing.Annotated[
100
128
  pg.typing.Enum(
langfun/core/llms/fake.py CHANGED
@@ -20,7 +20,38 @@ import langfun.core as lf
20
20
 
21
21
 
22
22
  class Fake(lf.LanguageModel):
23
- """The base class for all fake language models."""
23
+ """Base class for fake language models, used for testing.
24
+
25
+ Fake models simulate the behavior of real language models but return
26
+ pre-defined responses, making them useful for testing prompts,
27
+ data processing logic, and agent behavior without incurring API costs
28
+ or relying on external services.
29
+
30
+ Langfun provides several fake models:
31
+ * `lf.llms.Echo`: Echoes the prompt back as the response.
32
+ * `lf.llms.StaticResponse`: Returns a fixed, pre-defined response for
33
+ any prompt.
34
+ * `lf.llms.StaticMapping`: Returns responses based on a prompt-to-response
35
+ dictionary.
36
+ * `lf.llms.StaticSequence`: Returns responses from a pre-defined sequence
37
+ in order.
38
+
39
+ **Example:**
40
+
41
+ ```python
42
+ import langfun as lf
43
+
44
+ # Use Echo model for testing
45
+ lm = lf.llms.Echo()
46
+ response = lm('hello')
47
+ assert response.text == 'hello'
48
+
49
+ # Use StaticResponse model
50
+ lm = lf.llms.StaticResponse('world')
51
+ response = lm('hello')
52
+ assert response.text == 'world'
53
+ ```
54
+ """
24
55
 
25
56
  def _score(self, prompt: lf.Message| list[lf.Message],
26
57
  completions: list[lf.Message]):
@@ -696,7 +696,15 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
696
696
 
697
697
  @pg.use_init_args(['model'])
698
698
  class Gemini(rest.REST):
699
- """Language models provided by Google GenAI."""
699
+ """Base class for Gemini models served on Google GenAI and Vertex AI.
700
+
701
+ This class implements the Gemini API protocol, shared by
702
+ `lf.llms.GoogleGenAI` and `lf.llms.VertexAI`, providing common request
703
+ formatting and response parsing for Gemini models.
704
+
705
+ It is not intended to be used directly. Please use `lf.llms.GoogleGenAI` or
706
+ `lf.llms.VertexAI` instead.
707
+ """
700
708
 
701
709
  model: pg.typing.Annotated[
702
710
  pg.typing.Enum(
@@ -25,7 +25,35 @@ import pyglove as pg
25
25
  @lf.use_init_args(['model'])
26
26
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
27
27
  class GenAI(gemini.Gemini):
28
- """Language models provided by Google GenAI."""
28
+ """Google GenAI models.
29
+
30
+ **Quick Start:**
31
+
32
+ ```python
33
+ import langfun as lf
34
+
35
+ # Call Gemini 1.5 Flash using API key from environment variable
36
+ # 'GOOGLE_API_KEY'.
37
+ lm = lf.llms.Gemini15Flash()
38
+ r = lm('Who are you?')
39
+ print(r)
40
+ ```
41
+
42
+ **Setting up API key:**
43
+
44
+ The Google API key can be specified in following ways:
45
+
46
+ 1. At model instantiation:
47
+
48
+ ```python
49
+ lm = lf.llms.Gemini15Flash(api_key='MY_API_KEY')
50
+ ```
51
+ 2. via environment variable `GOOGLE_API_KEY`.
52
+
53
+ **References:**
54
+
55
+ * https://ai.google.dev/docs
56
+ """
29
57
 
30
58
  model: pg.typing.Annotated[
31
59
  pg.typing.Enum(
langfun/core/llms/groq.py CHANGED
@@ -260,9 +260,34 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
260
260
 
261
261
  @lf.use_init_args(['model'])
262
262
  class Groq(openai_compatible.OpenAIChatCompletionAPI):
263
- """Groq LLMs through REST APIs (OpenAI compatible).
263
+ """Groq models.
264
264
 
265
- See https://platform.openai.com/docs/api-reference/chat
265
+ **Quick Start:**
266
+
267
+ ```python
268
+ import langfun as lf
269
+
270
+ # Call Llama 3.3 70B on Groq using API key from environment variable
271
+ # 'GROQ_API_KEY'.
272
+ lm = lf.llms.GroqLlama33_70B_Versatile()
273
+ r = lm('Who are you?')
274
+ print(r)
275
+ ```
276
+
277
+ **Setting up API key:**
278
+
279
+ The Groq API key can be specified in following ways:
280
+
281
+ 1. At model instantiation:
282
+
283
+ ```python
284
+ lm = lf.llms.GroqLlama33_70B_Versatile(api_key='MY_API_KEY')
285
+ ```
286
+ 2. via environment variable `GROQ_API_KEY`.
287
+
288
+ **References:**
289
+
290
+ * https://console.groq.com/docs
266
291
  """
267
292
 
268
293
  model: pg.typing.Annotated[
@@ -21,10 +21,29 @@ import pyglove as pg
21
21
  @pg.use_init_args(['url', 'model'])
22
22
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
23
23
  class LlamaCppRemote(openai_compatible.OpenAIChatCompletionAPI):
24
- """The remote LLaMA C++ model.
24
+ """LLaMA C++ models served via a remote server.
25
25
 
26
- The Remote LLaMA C++ models can be launched via
27
- https://github.com/ggerganov/llama.cpp/tree/master/examples/server
26
+ This class provides an interface to interact with language models
27
+ hosted on a LLaMA C++ server, which is compatible with the OpenAI
28
+ Chat Completions API format.
29
+
30
+ **Quick Start:**
31
+
32
+ Assuming a LLaMA C++ server is running at `http://localhost:8080`,
33
+ you can interact with it as follows:
34
+
35
+ ```python
36
+ import langfun as lf
37
+
38
+ # If model name is not specified, it will use server's default.
39
+ lm = lf.llms.LlamaCppRemote(url='http://localhost:8080')
40
+ r = lm('Who are you?')
41
+ print(r)
42
+ ```
43
+
44
+ **References:**
45
+
46
+ * https://github.com/ggerganov/llama.cpp/tree/master/examples/server
28
47
  """
29
48
  url: Annotated[
30
49
  str,
@@ -1032,7 +1032,35 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
1032
1032
 
1033
1033
  @lf.use_init_args(['model'])
1034
1034
  class OpenAI(openai_compatible.OpenAIResponsesAPI):
1035
- """OpenAI model."""
1035
+ """OpenAI models.
1036
+
1037
+ **Quick Start:**
1038
+
1039
+ ```python
1040
+ import langfun as lf
1041
+
1042
+ # Call GPT-4o using API key from environment variable 'OPENAI_API_KEY'.
1043
+ lm = lf.llms.Gpt4o()
1044
+ r = lm('Who are you?')
1045
+ print(r)
1046
+ ```
1047
+
1048
+ **Setting up API key:**
1049
+
1050
+ The OpenAI API key can be specified in following ways:
1051
+
1052
+ 1. At model instantiation:
1053
+
1054
+ ```python
1055
+ lm = lf.llms.Gpt4o(api_key='MY_API_KEY')
1056
+ ```
1057
+ 2. via environment variable `OPENAI_API_KEY`.
1058
+
1059
+ **References:**
1060
+
1061
+ * https://platform.openai.com/docs/models
1062
+ * https://platform.openai.com/docs/api-reference
1063
+ """
1036
1064
 
1037
1065
  model: pg.typing.Annotated[
1038
1066
  pg.typing.Enum(
@@ -24,11 +24,16 @@ import pyglove as pg
24
24
 
25
25
  @lf.use_init_args(['api_endpoint', 'model'])
26
26
  class OpenAIChatCompletionAPI(rest.REST):
27
- """Base for OpenAI compatible models based on ChatCompletion API.
27
+ """Base class for models compatible with OpenAI's Chat Completion API.
28
28
 
29
- See https://platform.openai.com/docs/api-reference/chat
30
- As of 2025-10-23, OpenAI is migrating from ChatCompletion API to Responses
31
- API.
29
+ This class provides a common interface for language models that adhere to
30
+ the OpenAI Chat Completion API format, which is used by providers like
31
+ Groq, DeepSeek, and others. It standardizes request formatting and
32
+ response parsing for these models.
33
+
34
+ **References:**
35
+
36
+ * https://platform.openai.com/docs/api-reference/chat
32
37
  """
33
38
 
34
39
  model: Annotated[
@@ -196,9 +201,16 @@ class OpenAIChatCompletionAPI(rest.REST):
196
201
 
197
202
 
198
203
  class OpenAIResponsesAPI(OpenAIChatCompletionAPI):
199
- """Base for OpenAI compatible models based on Responses API.
204
+ """Base class for models compatible with OpenAI's Responses API.
205
+
206
+ This class provides a common interface for language models that adhere to
207
+ the new OpenAI Responses API format. It standardizes request formatting
208
+ and response parsing for these models, including handling instructions
209
+ (system messages) and structured outputs.
210
+
211
+ **References:**
200
212
 
201
- https://platform.openai.com/docs/api-reference/responses/create
213
+ * https://platform.openai.com/docs/api-reference/responses
202
214
  """
203
215
 
204
216
  def _request_args(
langfun/core/llms/rest.py CHANGED
@@ -22,7 +22,18 @@ import requests
22
22
 
23
23
 
24
24
  class REST(lf.LanguageModel):
25
- """REST-based language model."""
25
+ """Base class for language models accessed via REST APIs.
26
+
27
+ The `REST` class provides a foundation for implementing language models
28
+ that are accessed through RESTful endpoints. It handles the details of
29
+ making HTTP requests, managing sessions, and handling common errors like
30
+ timeouts and connection issues.
31
+
32
+ Subclasses need to implement the `request` and `result` methods to
33
+ convert Langfun messages to API-specific request formats and to parse
34
+ API responses back into `LMSamplingResult` objects. They also need to
35
+ provide the `api_endpoint` and can override `headers` for authentication.
36
+ """
26
37
 
27
38
  api_endpoint: Annotated[
28
39
  str,