langfun 0.0.2.dev20240429__py3-none-any.whl → 0.0.2.dev20240501__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/core/eval/__init__.py +14 -1
- langfun/core/eval/base.py +490 -105
- langfun/core/eval/base_test.py +185 -53
- langfun/core/eval/matching.py +22 -21
- langfun/core/eval/matching_test.py +23 -2
- langfun/core/eval/patching.py +130 -0
- langfun/core/eval/patching_test.py +170 -0
- langfun/core/eval/scoring.py +4 -4
- langfun/core/eval/scoring_test.py +19 -2
- langfun/core/llms/openai.py +1 -1
- langfun/core/llms/openai_test.py +2 -1
- {langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/METADATA +1 -2
- {langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/RECORD +16 -14
- {langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/LICENSE +0 -0
- {langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/WHEEL +0 -0
- {langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,170 @@
|
|
1
|
+
# Copyright 2024 The Langfun Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Tests for evaluation patching."""
|
15
|
+
|
16
|
+
import unittest
|
17
|
+
from langfun.core import llms as lf_llms
|
18
|
+
from langfun.core.eval import base
|
19
|
+
from langfun.core.eval import patching
|
20
|
+
import pyglove as pg
|
21
|
+
|
22
|
+
|
23
|
+
class PatchingCommonTest(unittest.TestCase):
|
24
|
+
|
25
|
+
def test_patch_member(self):
|
26
|
+
class A(pg.Object):
|
27
|
+
x: int = 1
|
28
|
+
|
29
|
+
class B(pg.Object):
|
30
|
+
a: A
|
31
|
+
|
32
|
+
b = B(A())
|
33
|
+
pg.patch(b, [patching.patch_member(A, 'x', 2)])
|
34
|
+
self.assertEqual(b, B(A(2)))
|
35
|
+
|
36
|
+
def test_patch_args(self):
|
37
|
+
s = base.Suite(
|
38
|
+
[base.Evaluation(inputs=base.as_inputs([1]))],
|
39
|
+
additional_args=dict(x=1, y=2),
|
40
|
+
)
|
41
|
+
pg.patch(s, [patching.patch_additional_args(x=3, z=4)])
|
42
|
+
self.assertTrue(
|
43
|
+
pg.eq(
|
44
|
+
s,
|
45
|
+
base.Suite(
|
46
|
+
[
|
47
|
+
base.Evaluation(
|
48
|
+
inputs=base.as_inputs([1]),
|
49
|
+
additional_args=dict(x=3, y=2, z=4),
|
50
|
+
)
|
51
|
+
],
|
52
|
+
additional_args=dict(x=3, y=2, z=4),
|
53
|
+
),
|
54
|
+
)
|
55
|
+
)
|
56
|
+
|
57
|
+
def test_patch_lm(self):
|
58
|
+
s = base.Suite(
|
59
|
+
[base.Evaluation(inputs=base.as_inputs([1]))],
|
60
|
+
lm=lf_llms.Gpt35Turbo(),
|
61
|
+
)
|
62
|
+
pg.patch(
|
63
|
+
s, [patching.patch_lm(pg.oneof([lf_llms.Gpt35Turbo(), lf_llms.Gpt4()]))]
|
64
|
+
)
|
65
|
+
self.assertTrue(
|
66
|
+
pg.eq(
|
67
|
+
s,
|
68
|
+
base.Suite(
|
69
|
+
[
|
70
|
+
base.Evaluation(
|
71
|
+
inputs=base.as_inputs([1]),
|
72
|
+
lm=pg.oneof([lf_llms.Gpt35Turbo(), lf_llms.Gpt4()]),
|
73
|
+
)
|
74
|
+
],
|
75
|
+
lm=pg.oneof([lf_llms.Gpt35Turbo(), lf_llms.Gpt4()]),
|
76
|
+
),
|
77
|
+
)
|
78
|
+
)
|
79
|
+
|
80
|
+
def test_patch_parsing_lm(self):
|
81
|
+
s = base.Suite(
|
82
|
+
[base.Evaluation(inputs=base.as_inputs([1]))],
|
83
|
+
lm=lf_llms.Gpt4(),
|
84
|
+
)
|
85
|
+
pg.patch(s, [patching.patch_parsing_lm(lf_llms.Gpt35Turbo())])
|
86
|
+
self.assertTrue(
|
87
|
+
pg.eq(
|
88
|
+
s,
|
89
|
+
base.Suite(
|
90
|
+
[
|
91
|
+
base.Evaluation(
|
92
|
+
inputs=base.as_inputs([1]),
|
93
|
+
lm=lf_llms.Gpt4(),
|
94
|
+
parsing_lm=lf_llms.Gpt35Turbo(),
|
95
|
+
)
|
96
|
+
],
|
97
|
+
# NOTE(daiyip): Suite does not have `parsing_lm` as one of its
|
98
|
+
# variable keyword fields yet, so patching does not add to it.
|
99
|
+
# This is okay since we only care about the leaf nodes.
|
100
|
+
lm=lf_llms.Gpt4(),
|
101
|
+
),
|
102
|
+
)
|
103
|
+
)
|
104
|
+
|
105
|
+
def test_patch_prompt(self):
|
106
|
+
e = base.Evaluation(inputs=base.as_inputs([1]))
|
107
|
+
pg.patch(e, [patching.patch_prompt('Q: {{example.question}}')])
|
108
|
+
self.assertTrue(
|
109
|
+
pg.eq(
|
110
|
+
e,
|
111
|
+
base.Evaluation(
|
112
|
+
inputs=base.as_inputs([1]),
|
113
|
+
prompt='Q: {{example.question}}',
|
114
|
+
),
|
115
|
+
)
|
116
|
+
)
|
117
|
+
|
118
|
+
def test_patch_inputs(self):
|
119
|
+
e = base.Evaluation(inputs=base.as_inputs([1]))
|
120
|
+
pg.patch(e, [patching.patch_inputs(base.as_inputs([2]))])
|
121
|
+
self.assertTrue(
|
122
|
+
pg.eq(
|
123
|
+
e,
|
124
|
+
base.Evaluation(
|
125
|
+
inputs=base.as_inputs([2]),
|
126
|
+
),
|
127
|
+
)
|
128
|
+
)
|
129
|
+
|
130
|
+
def test_patch_schema_fn(self):
|
131
|
+
@pg.functor()
|
132
|
+
def int_schema():
|
133
|
+
return int
|
134
|
+
|
135
|
+
e = base.Evaluation(inputs=base.as_inputs([1]))
|
136
|
+
pg.patch(e, [patching.patch_schema_fn(int_schema())])
|
137
|
+
self.assertTrue(
|
138
|
+
pg.eq(
|
139
|
+
e,
|
140
|
+
base.Evaluation(
|
141
|
+
inputs=base.as_inputs([1]),
|
142
|
+
schema_fn=int_schema(),
|
143
|
+
),
|
144
|
+
)
|
145
|
+
)
|
146
|
+
|
147
|
+
|
148
|
+
class StringPatcheTest(unittest.TestCase):
|
149
|
+
|
150
|
+
def test_lm(self):
|
151
|
+
target = pg.patch(
|
152
|
+
base.Evaluation(inputs=base.as_inputs([1])),
|
153
|
+
['lm?haiku:gpt4', 'max_tokens?1024', 'temperature?0.7'],
|
154
|
+
)
|
155
|
+
self.assertEqual(
|
156
|
+
target.lm,
|
157
|
+
pg.oneof([
|
158
|
+
lf_llms.Claude3Haiku(temperature=0.7, max_tokens=1024),
|
159
|
+
lf_llms.Gpt4(temperature=0.7, max_tokens=1024),
|
160
|
+
]),
|
161
|
+
)
|
162
|
+
with self.assertRaisesRegex(ValueError, 'Unknown model name'):
|
163
|
+
pg.patch(
|
164
|
+
base.Evaluation(inputs=base.as_inputs([1])),
|
165
|
+
['lm?gpt2'],
|
166
|
+
)
|
167
|
+
|
168
|
+
|
169
|
+
if __name__ == '__main__':
|
170
|
+
unittest.main()
|
langfun/core/eval/scoring.py
CHANGED
@@ -113,8 +113,8 @@ class Scoring(base.Evaluation):
|
|
113
113
|
m.total,
|
114
114
|
)
|
115
115
|
|
116
|
-
def
|
117
|
-
result = super().
|
116
|
+
def finalize(self) -> pg.Dict:
|
117
|
+
result = super().finalize()
|
118
118
|
result.metrics.update(
|
119
119
|
num_scored=self.num_scored,
|
120
120
|
score_rate=self.score_rate,
|
@@ -168,7 +168,7 @@ class Scoring(base.Evaluation):
|
|
168
168
|
)
|
169
169
|
)
|
170
170
|
|
171
|
-
def
|
171
|
+
def _render_summary_metrics(self, s: io.StringIO) -> None:
|
172
172
|
"""Renders metrics in HTML."""
|
173
173
|
assert self.result is not None
|
174
174
|
m = self.result.metrics
|
@@ -182,7 +182,7 @@ class Scoring(base.Evaluation):
|
|
182
182
|
)
|
183
183
|
)
|
184
184
|
s.write(' | ')
|
185
|
-
super().
|
185
|
+
super()._render_summary_metrics(s)
|
186
186
|
|
187
187
|
def _render_scored(self, s: io.StringIO) -> None:
|
188
188
|
"""Formats the matched cases into html."""
|
@@ -98,6 +98,11 @@ class ScoringTest(unittest.TestCase):
|
|
98
98
|
total=2,
|
99
99
|
failures=0,
|
100
100
|
failure_rate=0.0,
|
101
|
+
oop_failures=0,
|
102
|
+
oop_failure_rate=0.0,
|
103
|
+
non_oop_failures=0,
|
104
|
+
non_oop_failure_rate=0.0,
|
105
|
+
failure_breakdown={},
|
101
106
|
num_scored=2,
|
102
107
|
score_rate=1.0,
|
103
108
|
avg_score=0.5,
|
@@ -124,7 +129,12 @@ class ScoringTest(unittest.TestCase):
|
|
124
129
|
)
|
125
130
|
self.assertTrue(
|
126
131
|
os.path.exists(
|
127
|
-
os.path.join(s.dir, scoring.Scoring.
|
132
|
+
os.path.join(s.dir, scoring.Scoring.OOP_FAILURES_JSON)
|
133
|
+
)
|
134
|
+
)
|
135
|
+
self.assertTrue(
|
136
|
+
os.path.exists(
|
137
|
+
os.path.join(s.dir, scoring.Scoring.NON_OOP_FAILURES_JSON)
|
128
138
|
)
|
129
139
|
)
|
130
140
|
self.assertTrue(
|
@@ -143,7 +153,14 @@ class ScoringTest(unittest.TestCase):
|
|
143
153
|
self.assertTrue(
|
144
154
|
os.path.exists(
|
145
155
|
os.path.join(
|
146
|
-
s.dir, scoring.Scoring.
|
156
|
+
s.dir, scoring.Scoring.OOP_FAILURES_HTML
|
157
|
+
)
|
158
|
+
)
|
159
|
+
)
|
160
|
+
self.assertTrue(
|
161
|
+
os.path.exists(
|
162
|
+
os.path.join(
|
163
|
+
s.dir, scoring.Scoring.NON_OOP_FAILURES_HTML
|
147
164
|
)
|
148
165
|
)
|
149
166
|
)
|
langfun/core/llms/openai.py
CHANGED
@@ -234,7 +234,7 @@ class OpenAI(lf.LanguageModel):
|
|
234
234
|
if isinstance(chunk, str):
|
235
235
|
item = dict(type='text', text=chunk)
|
236
236
|
elif isinstance(chunk, lf_modalities.Image) and chunk.uri:
|
237
|
-
item = dict(type='image_url', image_url=chunk.uri)
|
237
|
+
item = dict(type='image_url', image_url=dict(url=chunk.uri))
|
238
238
|
else:
|
239
239
|
raise ValueError(f'Unsupported modality object: {chunk!r}.')
|
240
240
|
content.append(item)
|
langfun/core/llms/openai_test.py
CHANGED
@@ -66,7 +66,8 @@ def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
|
|
66
66
|
del kwargs
|
67
67
|
choices = []
|
68
68
|
urls = [
|
69
|
-
c['image_url']
|
69
|
+
c['image_url']['url']
|
70
|
+
for c in messages[0]['content'] if c['type'] == 'image_url'
|
70
71
|
]
|
71
72
|
for k in range(n):
|
72
73
|
choices.append(pg.Dict(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langfun
|
3
|
-
Version: 0.0.2.
|
3
|
+
Version: 0.0.2.dev20240501
|
4
4
|
Summary: Langfun: Language as Functions.
|
5
5
|
Home-page: https://github.com/google/langfun
|
6
6
|
Author: Langfun Authors
|
@@ -21,7 +21,6 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
21
|
Classifier: Topic :: Software Development :: Libraries
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
License-File: LICENSE
|
24
|
-
Requires-Dist: absl-py >=1.0.0
|
25
24
|
Requires-Dist: google-generativeai >=0.3.2
|
26
25
|
Requires-Dist: jinja2 >=3.1.2
|
27
26
|
Requires-Dist: openai ==0.27.2
|
@@ -39,13 +39,15 @@ langfun/core/coding/python/parsing.py,sha256=uyvI1c5OLZhMVK2Oltkl3oJxSLlG0wadlpQ
|
|
39
39
|
langfun/core/coding/python/parsing_test.py,sha256=9vAWF484kWIm6JZq8NFiMgKUDhXV-deRl1QMmNERfAA,7386
|
40
40
|
langfun/core/coding/python/permissions.py,sha256=1QWGHvzL8MM0Ok_auQ9tURqZHtdOfJaDpBzZ29GUE-c,2544
|
41
41
|
langfun/core/coding/python/permissions_test.py,sha256=w5EDb8QxpxgJyZkojyzVWQvDfg366zn99-g__6TbPQ0,2699
|
42
|
-
langfun/core/eval/__init__.py,sha256=
|
43
|
-
langfun/core/eval/base.py,sha256=
|
44
|
-
langfun/core/eval/base_test.py,sha256=
|
45
|
-
langfun/core/eval/matching.py,sha256=
|
46
|
-
langfun/core/eval/matching_test.py,sha256=
|
47
|
-
langfun/core/eval/
|
48
|
-
langfun/core/eval/
|
42
|
+
langfun/core/eval/__init__.py,sha256=Evt-E4FEhZF2tXL6-byh_AyA7Cc_ZoGmvnN7vkAZedk,1898
|
43
|
+
langfun/core/eval/base.py,sha256=VgHdnfkHeGPp0XjIGHw9LDZsR0Z4-yuWIkzn4pqJj3Y,73967
|
44
|
+
langfun/core/eval/base_test.py,sha256=cHOTIWVW4Dp8gKKIKcZrAcJ-w84j2GIozTzJoiAX7p4,26743
|
45
|
+
langfun/core/eval/matching.py,sha256=Y4vFoNTQEOwko6IA8l9OZ52-vt52e3VGmcTtvLA67wM,9782
|
46
|
+
langfun/core/eval/matching_test.py,sha256=f7iVyXH5KGJBWt4Wp14Bt9J3X59A6Ayfog9MbuFvPew,5532
|
47
|
+
langfun/core/eval/patching.py,sha256=R0s2eAd1m97exQt06dmUL0V_MBG0W2Hxg7fhNB7cXW0,3866
|
48
|
+
langfun/core/eval/patching_test.py,sha256=8kCd54Egjju22FMgtJuxEsrXkW8ifs-UUBHtrCG1L6w,4775
|
49
|
+
langfun/core/eval/scoring.py,sha256=1J7IATo-8FXUR0SBqk9icztHiM0lWkBFcWUo-vUURgQ,6376
|
50
|
+
langfun/core/eval/scoring_test.py,sha256=O8olHbrUEg60gMxwOkWzKBJZpZoUlmVnBANX5Se2SXM,4546
|
49
51
|
langfun/core/llms/__init__.py,sha256=1bPg1QI8duOZCYINm-jWi094x0JtLmsk4KX60qIC_gs,3245
|
50
52
|
langfun/core/llms/anthropic.py,sha256=7W9YdPN3SlAFhAIQlihMkrpo7tTY_4NvD0KIlCrqcsk,8505
|
51
53
|
langfun/core/llms/anthropic_test.py,sha256=TMM30myyEhwF99Le4RvJEXOn8RYl0q1FRkt9Q9nl1jk,5540
|
@@ -57,8 +59,8 @@ langfun/core/llms/groq.py,sha256=NaGItVL_pkOpqPpI4bPGU27xLFRoaeizZ49v2s-4ERs,784
|
|
57
59
|
langfun/core/llms/groq_test.py,sha256=M6GtlrsOvDun_j-sR8cPh4W_moHWZNSTiThu3kuwbbc,5281
|
58
60
|
langfun/core/llms/llama_cpp.py,sha256=Y_KkMUf3Xfac49koMUtUslKl3h-HWp3-ntq7Jaa3bdo,2385
|
59
61
|
langfun/core/llms/llama_cpp_test.py,sha256=ZxC6defGd_HX9SFRU9U4cJiQnBKundbOrchbXuC1Z2M,1683
|
60
|
-
langfun/core/llms/openai.py,sha256=
|
61
|
-
langfun/core/llms/openai_test.py,sha256=
|
62
|
+
langfun/core/llms/openai.py,sha256=rPwO4qPGEwbB4O7TaQD0spg_PXIfF2ioRI_ilE3Pg6Y,13257
|
63
|
+
langfun/core/llms/openai_test.py,sha256=asSA1sVy_7hnXioD_2HTxtSDpVTKBUO_EjZuyHpwbn0,14854
|
62
64
|
langfun/core/llms/cache/__init__.py,sha256=QAo3InUMDM_YpteNnVCSejI4zOsnjSMWKJKzkb3VY64,993
|
63
65
|
langfun/core/llms/cache/base.py,sha256=cFfYvOIUae842pncqCAsRvqXCk2AnAsRYVx0mcIoAeY,3338
|
64
66
|
langfun/core/llms/cache/in_memory.py,sha256=YfFyJEhLs73cUiB0ZfhMxYpdE8Iuxxw-dvMFwGHTSHw,4742
|
@@ -101,8 +103,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
|
|
101
103
|
langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
|
102
104
|
langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
|
103
105
|
langfun/core/templates/selfplay_test.py,sha256=DYVrkk7uNKCqJGEHH31HssU2BPuMItU1vJLzfcXIlYg,2156
|
104
|
-
langfun-0.0.2.
|
105
|
-
langfun-0.0.2.
|
106
|
-
langfun-0.0.2.
|
107
|
-
langfun-0.0.2.
|
108
|
-
langfun-0.0.2.
|
106
|
+
langfun-0.0.2.dev20240501.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
107
|
+
langfun-0.0.2.dev20240501.dist-info/METADATA,sha256=SUhJ4RRQcyqLKu16sGip7Z2D875PI5EarCo3VDAGxuQ,3405
|
108
|
+
langfun-0.0.2.dev20240501.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
109
|
+
langfun-0.0.2.dev20240501.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
|
110
|
+
langfun-0.0.2.dev20240501.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|