langfun 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501150804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. langfun/__init__.py +20 -2
  2. langfun/core/__init__.py +16 -5
  3. langfun/core/agentic/__init__.py +30 -0
  4. langfun/core/agentic/action.py +854 -0
  5. langfun/core/agentic/action_eval.py +150 -0
  6. langfun/core/agentic/action_eval_test.py +109 -0
  7. langfun/core/agentic/action_test.py +136 -0
  8. langfun/core/coding/python/__init__.py +5 -11
  9. langfun/core/coding/python/correction.py +37 -21
  10. langfun/core/coding/python/correction_test.py +29 -3
  11. langfun/core/coding/python/execution.py +40 -216
  12. langfun/core/coding/python/execution_test.py +29 -89
  13. langfun/core/coding/python/generation.py +21 -11
  14. langfun/core/coding/python/generation_test.py +2 -2
  15. langfun/core/coding/python/parsing.py +108 -193
  16. langfun/core/coding/python/parsing_test.py +2 -105
  17. langfun/core/component.py +63 -2
  18. langfun/core/component_test.py +53 -0
  19. langfun/core/concurrent.py +414 -117
  20. langfun/core/concurrent_test.py +111 -24
  21. langfun/core/console.py +17 -5
  22. langfun/core/console_test.py +17 -0
  23. langfun/core/eval/__init__.py +16 -1
  24. langfun/core/eval/base.py +622 -174
  25. langfun/core/eval/base_test.py +200 -54
  26. langfun/core/eval/matching.py +63 -76
  27. langfun/core/eval/matching_test.py +17 -8
  28. langfun/core/eval/patching.py +130 -0
  29. langfun/core/eval/patching_test.py +170 -0
  30. langfun/core/eval/scoring.py +26 -26
  31. langfun/core/eval/scoring_test.py +19 -2
  32. langfun/core/eval/v2/__init__.py +42 -0
  33. langfun/core/eval/v2/checkpointing.py +380 -0
  34. langfun/core/eval/v2/checkpointing_test.py +228 -0
  35. langfun/core/eval/v2/eval_test_helper.py +136 -0
  36. langfun/core/eval/v2/evaluation.py +725 -0
  37. langfun/core/eval/v2/evaluation_test.py +180 -0
  38. langfun/core/eval/v2/example.py +305 -0
  39. langfun/core/eval/v2/example_test.py +128 -0
  40. langfun/core/eval/v2/experiment.py +1048 -0
  41. langfun/core/eval/v2/experiment_test.py +433 -0
  42. langfun/core/eval/v2/metric_values.py +156 -0
  43. langfun/core/eval/v2/metric_values_test.py +80 -0
  44. langfun/core/eval/v2/metrics.py +357 -0
  45. langfun/core/eval/v2/metrics_test.py +203 -0
  46. langfun/core/eval/v2/progress.py +348 -0
  47. langfun/core/eval/v2/progress_test.py +82 -0
  48. langfun/core/eval/v2/progress_tracking.py +210 -0
  49. langfun/core/eval/v2/progress_tracking_test.py +66 -0
  50. langfun/core/eval/v2/reporting.py +270 -0
  51. langfun/core/eval/v2/reporting_test.py +158 -0
  52. langfun/core/eval/v2/runners.py +488 -0
  53. langfun/core/eval/v2/runners_test.py +334 -0
  54. langfun/core/langfunc.py +4 -17
  55. langfun/core/langfunc_test.py +22 -6
  56. langfun/core/language_model.py +577 -39
  57. langfun/core/language_model_test.py +470 -56
  58. langfun/core/llms/__init__.py +87 -16
  59. langfun/core/llms/anthropic.py +312 -87
  60. langfun/core/llms/anthropic_test.py +71 -3
  61. langfun/core/llms/cache/base.py +21 -2
  62. langfun/core/llms/cache/in_memory.py +13 -0
  63. langfun/core/llms/cache/in_memory_test.py +53 -2
  64. langfun/core/llms/compositional.py +101 -0
  65. langfun/core/llms/compositional_test.py +73 -0
  66. langfun/core/llms/deepseek.py +117 -0
  67. langfun/core/llms/deepseek_test.py +61 -0
  68. langfun/core/llms/fake.py +11 -7
  69. langfun/core/llms/fake_test.py +14 -0
  70. langfun/core/llms/gemini.py +507 -0
  71. langfun/core/llms/gemini_test.py +195 -0
  72. langfun/core/llms/google_genai.py +62 -218
  73. langfun/core/llms/google_genai_test.py +9 -202
  74. langfun/core/llms/groq.py +160 -144
  75. langfun/core/llms/groq_test.py +31 -137
  76. langfun/core/llms/llama_cpp.py +15 -42
  77. langfun/core/llms/llama_cpp_test.py +4 -30
  78. langfun/core/llms/openai.py +395 -203
  79. langfun/core/llms/openai_compatible.py +179 -0
  80. langfun/core/llms/openai_compatible_test.py +495 -0
  81. langfun/core/llms/openai_test.py +30 -395
  82. langfun/core/llms/rest.py +113 -0
  83. langfun/core/llms/rest_test.py +111 -0
  84. langfun/core/llms/vertexai.py +192 -0
  85. langfun/core/llms/vertexai_test.py +52 -0
  86. langfun/core/logging.py +284 -0
  87. langfun/core/logging_test.py +125 -0
  88. langfun/core/message.py +319 -9
  89. langfun/core/message_test.py +190 -13
  90. langfun/core/modalities/__init__.py +6 -2
  91. langfun/core/modalities/audio.py +30 -0
  92. langfun/core/modalities/audio_test.py +63 -0
  93. langfun/core/modalities/image.py +39 -20
  94. langfun/core/modalities/image_test.py +52 -9
  95. langfun/core/modalities/mime.py +206 -29
  96. langfun/core/modalities/mime_test.py +90 -9
  97. langfun/core/modalities/ms_office.py +117 -0
  98. langfun/core/modalities/ms_office_test.py +389 -0
  99. langfun/core/modalities/pdf.py +22 -0
  100. langfun/core/modalities/pdf_test.py +57 -0
  101. langfun/core/modalities/video.py +9 -26
  102. langfun/core/modalities/video_test.py +3 -3
  103. langfun/core/modality.py +26 -3
  104. langfun/core/modality_test.py +2 -2
  105. langfun/core/sampling.py +11 -11
  106. langfun/core/structured/__init__.py +12 -16
  107. langfun/core/structured/completion.py +32 -5
  108. langfun/core/structured/completion_test.py +7 -6
  109. langfun/core/structured/description.py +2 -2
  110. langfun/core/structured/description_test.py +3 -3
  111. langfun/core/structured/function_generation.py +60 -27
  112. langfun/core/structured/function_generation_test.py +72 -2
  113. langfun/core/structured/mapping.py +97 -47
  114. langfun/core/structured/mapping_test.py +90 -2
  115. langfun/core/structured/parsing.py +33 -21
  116. langfun/core/structured/parsing_test.py +53 -9
  117. langfun/core/structured/querying.py +746 -0
  118. langfun/core/structured/{prompting_test.py → querying_test.py} +469 -51
  119. langfun/core/structured/schema.py +204 -97
  120. langfun/core/structured/schema_generation.py +1 -1
  121. langfun/core/structured/schema_test.py +130 -29
  122. langfun/core/structured/scoring.py +125 -19
  123. langfun/core/structured/scoring_test.py +30 -0
  124. langfun/core/structured/tokenization.py +64 -0
  125. langfun/core/structured/tokenization_test.py +48 -0
  126. langfun/core/template.py +115 -1
  127. langfun/core/template_test.py +71 -1
  128. langfun/core/templates/conversation.py +9 -0
  129. langfun/core/templates/conversation_test.py +4 -3
  130. langfun/core/templates/selfplay_test.py +10 -2
  131. langfun-0.1.2.dev202501150804.dist-info/METADATA +225 -0
  132. langfun-0.1.2.dev202501150804.dist-info/RECORD +153 -0
  133. {langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501150804.dist-info}/WHEEL +1 -1
  134. langfun/core/coding/python/errors.py +0 -108
  135. langfun/core/coding/python/errors_test.py +0 -99
  136. langfun/core/coding/python/permissions.py +0 -90
  137. langfun/core/coding/python/permissions_test.py +0 -86
  138. langfun/core/structured/prompting.py +0 -238
  139. langfun/core/text_formatting.py +0 -162
  140. langfun/core/text_formatting_test.py +0 -47
  141. langfun-0.0.2.dev20240429.dist-info/METADATA +0 -100
  142. langfun-0.0.2.dev20240429.dist-info/RECORD +0 -108
  143. {langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501150804.dist-info}/LICENSE +0 -0
  144. {langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501150804.dist-info}/top_level.txt +0 -0
@@ -11,89 +11,10 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Tests for Groq models."""
15
-
16
14
  import os
17
- from typing import Any
18
15
  import unittest
19
- from unittest import mock
20
- from langfun.core import modalities as lf_modalities
16
+ import langfun.core as lf
21
17
  from langfun.core.llms import groq
22
- import pyglove as pg
23
- import requests
24
-
25
-
26
- def mock_requests_post(url: str, json: dict[str, Any], **kwargs):
27
- del url, kwargs
28
-
29
- response = requests.Response()
30
- response.status_code = 200
31
- response._content = pg.to_json_str({
32
- 'choices': [{
33
- 'message': {
34
- 'content': [{
35
- 'type': 'text',
36
- 'text': (
37
- f'hello with temperature={json.get("temperature")}, '
38
- f'top_p={json.get("top_p")}, '
39
- f'max_tokens={json.get("max_tokens")}, '
40
- f'stop={json.get("stop")}.'
41
- ),
42
- }],
43
- }
44
- }],
45
- 'usage': {
46
- 'prompt_tokens': 2,
47
- 'completion_tokens': 1,
48
- 'total_tokens': 3,
49
- },
50
- }).encode()
51
- return response
52
-
53
-
54
- def mock_mm_requests_post(url: str, json: dict[str, Any], **kwargs):
55
- del url, kwargs
56
- v = json['messages'][0]['content'][0]
57
- image = lf_modalities.Image.from_uri(v['image_url'])
58
-
59
- response = requests.Response()
60
- response.status_code = 200
61
- response._content = pg.to_json_str({
62
- 'choices': [
63
- {
64
- 'message': {
65
- 'content': [{
66
- 'type': 'text',
67
- 'text': image.uri,
68
- }],
69
- }
70
- }
71
- ],
72
- 'usage': {
73
- 'prompt_tokens': 2,
74
- 'completion_tokens': 1,
75
- 'total_tokens': 3,
76
- },
77
- }).encode()
78
- return response
79
-
80
-
81
- def mock_requests_post_error(status_code, error_type, error_message):
82
- def _mock_requests(url: str, json: dict[str, Any], **kwargs):
83
- del url, json, kwargs
84
- response = requests.Response()
85
- response.status_code = status_code
86
- response._content = pg.to_json_str(
87
- {
88
- 'error': {
89
- 'type': error_type,
90
- 'message': error_message,
91
- }
92
- }
93
- ).encode()
94
- return response
95
-
96
- return _mock_requests
97
18
 
98
19
 
99
20
  class AuthropicTest(unittest.TestCase):
@@ -101,69 +22,42 @@ class AuthropicTest(unittest.TestCase):
101
22
  def test_basics(self):
102
23
  self.assertEqual(groq.GroqMistral_8x7B().model_id, 'mixtral-8x7b-32768')
103
24
  self.assertEqual(groq.GroqMistral_8x7B().max_concurrency, 16)
25
+ self.assertEqual(groq.GroqMistral_8x7B().estimate_cost(100, 100), 4.8e-5)
26
+
27
+ def test_request_args(self):
28
+ args = groq.GroqMistral_8x7B()._request_args(
29
+ lf.LMSamplingOptions(
30
+ temperature=1.0, stop=['\n'], n=1, random_seed=123,
31
+ logprobs=True, top_logprobs=True
32
+ )
33
+ )
34
+ self.assertNotIn('logprobs', args)
35
+ self.assertNotIn('top_logprobs', args)
104
36
 
105
37
  def test_api_key(self):
106
38
  lm = groq.GroqMistral_8x7B()
107
39
  with self.assertRaisesRegex(ValueError, 'Please specify `api_key`'):
108
- lm('hi')
109
-
110
- with mock.patch('requests.Session.post') as mock_request:
111
- mock_request.side_effect = mock_requests_post
112
-
113
- lm = groq.GroqMistral_8x7B(api_key='fake key')
114
- self.assertRegex(lm('hi').text, 'hello.*')
115
-
116
- os.environ['GROQ_API_KEY'] = 'abc'
117
- lm = groq.GroqMistral_8x7B()
118
- self.assertRegex(lm('hi').text, 'hello.*')
119
- del os.environ['GROQ_API_KEY']
120
-
121
- def test_call(self):
122
- with mock.patch('requests.Session.post') as mock_request:
123
- mock_request.side_effect = mock_requests_post
124
- lm = groq.GroqLlama3_70B(api_key='fake_key')
125
- response = lm(
126
- 'hello',
127
- temperature=0.0,
128
- max_tokens=1024,
129
- top_k=0.1,
130
- top_p=0.2,
131
- stop=['\n'],
132
- )
133
- self.assertEqual(
134
- response.text,
135
- (
136
- 'hello with temperature=0.0, top_p=0.2, '
137
- "max_tokens=1024, stop=['\\n']."
138
- ),
139
- )
140
- self.assertIsNotNone(response.usage)
141
- self.assertIsNotNone(response.usage.prompt_tokens, 2)
142
- self.assertIsNotNone(response.usage.completion_tokens, 1)
143
- self.assertIsNotNone(response.usage.total_tokens, 3)
40
+ _ = lm.headers
144
41
 
145
- def test_mm_call(self):
146
- with mock.patch('requests.Session.post') as mock_mm_request:
147
- mock_mm_request.side_effect = mock_mm_requests_post
148
- lm = groq.GroqLlama3_70B(multimodal=True, api_key='fake_key')
149
- response = lm(lf_modalities.Image.from_uri('https://fake/image.jpg'))
150
- self.assertEqual(response.text, 'https://fake/image.jpg')
42
+ lm = groq.GroqMistral_8x7B(api_key='fake key')
43
+ self.assertEqual(
44
+ lm.headers,
45
+ {
46
+ 'Content-Type': 'application/json',
47
+ 'Authorization': 'Bearer fake key',
48
+ }
49
+ )
151
50
 
152
- def test_call_errors(self):
153
- for status_code, error_type, error_message in [
154
- (429, 'rate_limit', 'Rate limit exceeded.'),
155
- (503, 'service_unavailable', 'Service unavailable.'),
156
- (500, 'bad_request', 'Bad request.'),
157
- ]:
158
- with mock.patch('requests.Session.post') as mock_mm_request:
159
- mock_mm_request.side_effect = mock_requests_post_error(
160
- status_code, error_type, error_message
161
- )
162
- lm = groq.GroqLlama3_70B(api_key='fake_key')
163
- with self.assertRaisesRegex(
164
- Exception, f'{status_code}:.*{error_type}'
165
- ):
166
- lm('hello', lm=lm, max_attempts=1)
51
+ os.environ['GROQ_API_KEY'] = 'abc'
52
+ lm = groq.GroqMistral_8x7B()
53
+ self.assertEqual(
54
+ lm.headers,
55
+ {
56
+ 'Content-Type': 'application/json',
57
+ 'Authorization': 'Bearer abc',
58
+ }
59
+ )
60
+ del os.environ['GROQ_API_KEY']
167
61
 
168
62
 
169
63
  if __name__ == '__main__':
@@ -14,61 +14,34 @@
14
14
  """Language models from llama.cpp."""
15
15
 
16
16
  from typing import Annotated
17
+ from langfun.core.llms import openai_compatible
18
+ import pyglove as pg
17
19
 
18
- import langfun.core as lf
19
- import requests
20
20
 
21
-
22
- @lf.use_init_args(["url"])
23
- class LlamaCppRemote(lf.LanguageModel):
21
+ @pg.use_init_args(['url', 'model'])
22
+ @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
23
+ class LlamaCppRemote(openai_compatible.OpenAICompatible):
24
24
  """The remote LLaMA C++ model.
25
25
 
26
26
  The Remote LLaMA C++ models can be launched via
27
27
  https://github.com/ggerganov/llama.cpp/tree/master/examples/server
28
28
  """
29
-
30
29
  url: Annotated[
31
30
  str,
32
- "The name of the model to use.",
33
- ] = ""
31
+ 'The URL of the LLaMA C++ server.',
32
+ ]
34
33
 
35
- name: Annotated[
34
+ model: Annotated[
36
35
  str,
37
- "The abbreviation for the LLaMA CPP-based model name.",
38
- ] = ""
36
+ 'The name of the model to use.',
37
+ ] = ''
38
+
39
+ @property
40
+ def api_endpoint(self) -> str:
41
+ return self.url + '/completion'
39
42
 
40
43
  @property
41
44
  def model_id(self) -> str:
42
45
  """Returns a string to identify the model."""
43
- return f"LLaMAC++({self.name})"
44
-
45
- def _sample(self, prompts: list[lf.Message]) -> list[lf.LMSamplingResult]:
46
- def _complete_fn(cur_prompts):
47
- results = []
48
- for prompt in cur_prompts:
49
- result = lf.LMSamplingResult()
50
- for _ in range(self.sampling_options.n or 1):
51
- data = {
52
- "prompt": prompt.text,
53
- "n_predict": self.sampling_options.max_tokens,
54
- "top_k": self.sampling_options.top_k or 50,
55
- "top_p": self.sampling_options.top_p or 0.95,
56
- }
57
- if self.sampling_options.temperature is not None:
58
- data["temperature"] = self.sampling_options.temperature
59
-
60
- response = requests.post(
61
- f"{self.url}/completion",
62
- json=data,
63
- headers={"Content-Type": "application/json"},
64
- timeout=self.timeout,
65
- )
66
- decoded_response = response.json()
67
- response = decoded_response["content"]
68
- result.samples.append(lf.LMSample(response, score=0.0))
69
- results.append(result)
70
- return results
46
+ return f'LLaMAC++({self.model or ""})'
71
47
 
72
- return self._parallel_execute_with_currency_control(
73
- _complete_fn, [prompts]
74
- )[0]
@@ -11,44 +11,18 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Tests for llama cpp models."""
15
-
16
- import typing
17
14
  import unittest
18
- from unittest import mock
19
-
20
- import langfun.core as lf
21
15
  from langfun.core.llms import llama_cpp
22
16
 
23
17
 
24
- def mock_requests_post(url: str, json: typing.Dict[str, typing.Any], **kwargs):
25
- del kwargs
26
-
27
- class TEMP:
28
-
29
- def json(self):
30
- return {"content": json["prompt"] + "\n" + url}
31
-
32
- return TEMP()
33
-
34
-
35
18
  class LlamaCppRemoteTest(unittest.TestCase):
36
19
  """Tests for the LlamaCppRemote model."""
37
20
 
38
- def test_call_completion(self):
39
- with mock.patch("requests.post") as mock_request:
40
- mock_request.side_effect = mock_requests_post
41
- lm = llama_cpp.LlamaCppRemote(url="http://127.0.0.1:8080")
42
- response = lm("hello", sampling_options=lf.LMSamplingOptions(n=1))
43
- self.assertEqual(
44
- response.text,
45
- "hello\nhttp://127.0.0.1:8080/completion",
46
- )
47
-
48
- def test_name(self):
49
- lm = llama_cpp.LlamaCppRemote()
21
+ def test_basics(self):
22
+ lm = llama_cpp.LlamaCppRemote("http://127.0.0.1:8080")
23
+ self.assertEqual(lm.api_endpoint, "http://127.0.0.1:8080/completion")
50
24
  self.assertEqual(lm.model_id, "LLaMAC++()")
51
- lm = llama_cpp.LlamaCppRemote(url="xxx", name="x")
25
+ lm = llama_cpp.LlamaCppRemote("xxx", model="x")
52
26
  self.assertEqual(lm.model_id, "LLaMAC++(x)")
53
27
 
54
28