freeplay 0.3.12__tar.gz → 0.3.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {freeplay-0.3.12 → freeplay-0.3.15}/PKG-INFO +2 -2
- {freeplay-0.3.12 → freeplay-0.3.15}/pyproject.toml +1 -1
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/__init__.py +2 -1
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/freeplay.py +2 -0
- freeplay-0.3.15/src/freeplay/model.py +64 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/resources/prompts.py +2 -2
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/resources/recordings.py +63 -14
- freeplay-0.3.15/src/freeplay/resources/test_cases.py +55 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/support.py +45 -1
- freeplay-0.3.12/src/freeplay/model.py +0 -18
- {freeplay-0.3.12 → freeplay-0.3.15}/LICENSE +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/README.md +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/api_support.py +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/errors.py +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/freeplay_cli.py +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/llm_parameters.py +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/py.typed +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/resources/__init__.py +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/resources/customer_feedback.py +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/resources/sessions.py +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/resources/test_runs.py +0 -0
- {freeplay-0.3.12 → freeplay-0.3.15}/src/freeplay/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
from .freeplay import Freeplay
|
2
2
|
from .resources.prompts import PromptInfo
|
3
|
-
from .resources.recordings import CallInfo, ResponseInfo, RecordPayload, TestRunInfo
|
3
|
+
from .resources.recordings import CallInfo, ResponseInfo, RecordPayload, TestRunInfo, UsageTokens
|
4
4
|
from .resources.sessions import SessionInfo, TraceInfo
|
5
5
|
|
6
6
|
__all__ = [
|
@@ -12,4 +12,5 @@ __all__ = [
|
|
12
12
|
'SessionInfo',
|
13
13
|
'TestRunInfo',
|
14
14
|
'TraceInfo',
|
15
|
+
'UsageTokens',
|
15
16
|
]
|
@@ -5,6 +5,7 @@ from freeplay.resources.customer_feedback import CustomerFeedback
|
|
5
5
|
from freeplay.resources.prompts import Prompts, APITemplateResolver, TemplateResolver
|
6
6
|
from freeplay.resources.recordings import Recordings
|
7
7
|
from freeplay.resources.sessions import Sessions
|
8
|
+
from freeplay.resources.test_cases import TestCases
|
8
9
|
from freeplay.resources.test_runs import TestRuns
|
9
10
|
from freeplay.support import CallSupport
|
10
11
|
|
@@ -38,3 +39,4 @@ class Freeplay:
|
|
38
39
|
self.recordings = Recordings(self.call_support)
|
39
40
|
self.sessions = Sessions(self.call_support)
|
40
41
|
self.test_runs = TestRuns(self.call_support)
|
42
|
+
self.test_cases = TestCases(self.call_support)
|
@@ -0,0 +1,64 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import List, Union, Any, Dict, Mapping, TypedDict, Literal
|
3
|
+
|
4
|
+
InputValue = Union[str, int, bool, float, Dict[str, Any], List[Any]]
|
5
|
+
InputVariables = Mapping[str, InputValue]
|
6
|
+
TestRunInput = Mapping[str, InputValue]
|
7
|
+
FeedbackValue = Union[bool, str, int, float]
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class TestRun:
|
12
|
+
id: str
|
13
|
+
inputs: List[TestRunInput]
|
14
|
+
|
15
|
+
|
16
|
+
class OpenAIFunctionCall(TypedDict):
|
17
|
+
name: str
|
18
|
+
arguments: str
|
19
|
+
|
20
|
+
|
21
|
+
@dataclass
|
22
|
+
class TextBlock:
|
23
|
+
text: str
|
24
|
+
type: Literal["text"] = "text"
|
25
|
+
|
26
|
+
|
27
|
+
@dataclass
|
28
|
+
class ToolResultBlock:
|
29
|
+
# AKA tool_use_id -- the ID of the tool call that this message is responding to.
|
30
|
+
tool_call_id: str
|
31
|
+
content: Union[str, List[TextBlock]]
|
32
|
+
type: Literal["tool_result"] = "tool_result"
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass
|
36
|
+
class ToolCallBlock:
|
37
|
+
id: str
|
38
|
+
name: str
|
39
|
+
arguments: Any
|
40
|
+
type: Literal["tool_call"] = "tool_call"
|
41
|
+
|
42
|
+
|
43
|
+
ContentBlock = Union[TextBlock, ToolResultBlock, ToolCallBlock]
|
44
|
+
|
45
|
+
|
46
|
+
@dataclass
|
47
|
+
class UserMessage:
|
48
|
+
content: Union[str, List[ContentBlock]]
|
49
|
+
role: Literal["user"] = "user"
|
50
|
+
|
51
|
+
|
52
|
+
@dataclass
|
53
|
+
class SystemMessage:
|
54
|
+
content: str
|
55
|
+
role: Literal["system"] = "system"
|
56
|
+
|
57
|
+
|
58
|
+
@dataclass
|
59
|
+
class AssistantMessage:
|
60
|
+
content: Union[str, List[ContentBlock]]
|
61
|
+
role: Literal["assistant"] = "assistant"
|
62
|
+
|
63
|
+
# Largely used for history in dataset test cases presently
|
64
|
+
NormalizedMessage = Union[UserMessage, SystemMessage, AssistantMessage]
|
@@ -35,14 +35,14 @@ class UnsupportedToolSchemaError(FreeplayConfigurationError):
|
|
35
35
|
|
36
36
|
# A content block a la OpenAI or Anthropic. Intentionally over-permissive to allow schema evolution by the providers.
|
37
37
|
@runtime_checkable
|
38
|
-
class
|
38
|
+
class ProviderMessageContentBlock(Protocol):
|
39
39
|
def model_dump(self) -> Dict[str, Any]:
|
40
40
|
pass
|
41
41
|
|
42
42
|
|
43
43
|
# A content/role pair with a type-safe content for common provider recording. If not using a common provider,
|
44
44
|
# use {'content': str, 'role': str} to record. If using a common provider, this is usually the `.content` field.
|
45
|
-
GenericProviderMessage = Union[Dict[str, Any],
|
45
|
+
GenericProviderMessage = Union[Dict[str, Any], ProviderMessageContentBlock]
|
46
46
|
|
47
47
|
|
48
48
|
# SDK-Exposed Classes
|
@@ -17,6 +17,12 @@ from freeplay.support import CallSupport
|
|
17
17
|
logger = logging.getLogger(__name__)
|
18
18
|
|
19
19
|
|
20
|
+
@dataclass
|
21
|
+
class UsageTokens:
|
22
|
+
prompt_tokens: int
|
23
|
+
completion_tokens: int
|
24
|
+
|
25
|
+
|
20
26
|
@dataclass
|
21
27
|
class CallInfo:
|
22
28
|
provider: str
|
@@ -25,9 +31,15 @@ class CallInfo:
|
|
25
31
|
end_time: float
|
26
32
|
model_parameters: LLMParameters
|
27
33
|
provider_info: Optional[Dict[str, Any]] = None
|
34
|
+
usage: Optional[UsageTokens] = None
|
28
35
|
|
29
36
|
@staticmethod
|
30
|
-
def from_prompt_info(
|
37
|
+
def from_prompt_info(
|
38
|
+
prompt_info: PromptInfo,
|
39
|
+
start_time: float,
|
40
|
+
end_time: float,
|
41
|
+
usage: Optional[UsageTokens] = None
|
42
|
+
) -> 'CallInfo':
|
31
43
|
return CallInfo(
|
32
44
|
provider=prompt_info.provider,
|
33
45
|
model=prompt_info.model,
|
@@ -35,6 +47,7 @@ class CallInfo:
|
|
35
47
|
end_time=end_time,
|
36
48
|
model_parameters=prompt_info.model_parameters,
|
37
49
|
provider_info=prompt_info.provider_info,
|
50
|
+
usage=usage
|
38
51
|
)
|
39
52
|
|
40
53
|
|
@@ -68,6 +81,14 @@ class RecordPayload:
|
|
68
81
|
completion_id: Optional[UUID] = None
|
69
82
|
|
70
83
|
|
84
|
+
@dataclass
|
85
|
+
class RecordUpdatePayload:
|
86
|
+
project_id: str
|
87
|
+
completion_id: str
|
88
|
+
new_messages: Optional[List[Dict[str, Any]]] = None
|
89
|
+
eval_results: Optional[Dict[str, Union[bool, float]]] = None
|
90
|
+
|
91
|
+
|
71
92
|
@dataclass
|
72
93
|
class RecordResponse:
|
73
94
|
completion_id: str
|
@@ -77,7 +98,7 @@ class Recordings:
|
|
77
98
|
def __init__(self, call_support: CallSupport):
|
78
99
|
self.call_support = call_support
|
79
100
|
|
80
|
-
def create(self, record_payload: RecordPayload) -> RecordResponse:
|
101
|
+
def create(self, record_payload: RecordPayload) -> RecordResponse: # type: ignore
|
81
102
|
if len(record_payload.all_messages) < 1:
|
82
103
|
raise FreeplayClientError("Messages list must have at least one message. "
|
83
104
|
"The last message should be the current response.")
|
@@ -130,6 +151,12 @@ class Recordings:
|
|
130
151
|
"trace_id": record_payload.trace_info.trace_id
|
131
152
|
}
|
132
153
|
|
154
|
+
if record_payload.call_info.usage is not None:
|
155
|
+
record_api_payload['call_info']['usage'] = {
|
156
|
+
"prompt_tokens": record_payload.call_info.usage.prompt_tokens,
|
157
|
+
"completion_tokens": record_payload.call_info.usage.completion_tokens,
|
158
|
+
}
|
159
|
+
|
133
160
|
try:
|
134
161
|
recorded_response = api_support.post_raw(
|
135
162
|
api_key=self.call_support.freeplay_api_key,
|
@@ -143,18 +170,7 @@ class Recordings:
|
|
143
170
|
message = f'There was an error recording to Freeplay. Call will not be logged. ' \
|
144
171
|
f'Status: {e.response.status_code}. '
|
145
172
|
|
146
|
-
|
147
|
-
try:
|
148
|
-
content = e.response.content
|
149
|
-
json_body = json.loads(content)
|
150
|
-
if 'message' in json_body:
|
151
|
-
message += json_body['message']
|
152
|
-
except:
|
153
|
-
pass
|
154
|
-
else:
|
155
|
-
message += f'{e.__class__}'
|
156
|
-
|
157
|
-
raise FreeplayError(message) from e
|
173
|
+
self.__handle_and_raise_api_error(e, message)
|
158
174
|
|
159
175
|
except Exception as e:
|
160
176
|
status_code = -1
|
@@ -165,3 +181,36 @@ class Recordings:
|
|
165
181
|
f'Status: {status_code}. {e.__class__}'
|
166
182
|
|
167
183
|
raise FreeplayError(message) from e
|
184
|
+
|
185
|
+
def update(self, record_update_payload: RecordUpdatePayload) -> RecordResponse: # type: ignore
|
186
|
+
record_update_api_payload: Dict[str, Any] = {
|
187
|
+
"new_messages": record_update_payload.new_messages,
|
188
|
+
"eval_results": record_update_payload.eval_results,
|
189
|
+
}
|
190
|
+
|
191
|
+
try:
|
192
|
+
record_update_response = api_support.post_raw(
|
193
|
+
api_key=self.call_support.freeplay_api_key,
|
194
|
+
url=f'{self.call_support.api_base}/v2/projects/{record_update_payload.project_id}/completions/{record_update_payload.completion_id}',
|
195
|
+
payload=record_update_api_payload
|
196
|
+
)
|
197
|
+
record_update_response.raise_for_status()
|
198
|
+
json_dom = record_update_response.json()
|
199
|
+
return RecordResponse(completion_id=str(json_dom['completion_id']))
|
200
|
+
except HTTPError as e:
|
201
|
+
message = f'There was an error updating the completion. Status: {e.response.status_code}.'
|
202
|
+
self.__handle_and_raise_api_error(e, message)
|
203
|
+
|
204
|
+
@staticmethod
|
205
|
+
def __handle_and_raise_api_error(e: HTTPError, messages: str) -> None:
|
206
|
+
if e.response.content:
|
207
|
+
try:
|
208
|
+
content = e.response.content
|
209
|
+
json_body = json.loads(content)
|
210
|
+
if 'message' in json_body:
|
211
|
+
messages += json_body['message']
|
212
|
+
except:
|
213
|
+
pass
|
214
|
+
else:
|
215
|
+
messages += f'{e.__class__}'
|
216
|
+
raise FreeplayError(messages) from e
|
@@ -0,0 +1,55 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import List, Optional, Dict, Any
|
3
|
+
|
4
|
+
from freeplay.model import InputVariables, NormalizedMessage
|
5
|
+
from freeplay.support import CallSupport, DatasetTestCaseRequest, DatasetTestCasesRetrievalResponse
|
6
|
+
|
7
|
+
|
8
|
+
@dataclass
|
9
|
+
class DatasetTestCase:
|
10
|
+
def __init__(
|
11
|
+
self,
|
12
|
+
inputs: InputVariables,
|
13
|
+
output: Optional[str],
|
14
|
+
history: Optional[List[NormalizedMessage]] = None,
|
15
|
+
metadata: Optional[Dict[str, str]] = None,
|
16
|
+
id: Optional[str] = None, # Only set on retrieval
|
17
|
+
):
|
18
|
+
self.inputs = inputs
|
19
|
+
self.output = output
|
20
|
+
self.history = history
|
21
|
+
self.metadata = metadata
|
22
|
+
self.id = id
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
@dataclass
|
27
|
+
class Dataset:
|
28
|
+
def __init__(self, dataset_id: str, test_cases: List[DatasetTestCase]):
|
29
|
+
self.dataset_id = dataset_id
|
30
|
+
self.test_cases = test_cases
|
31
|
+
|
32
|
+
|
33
|
+
@dataclass
|
34
|
+
class DatasetResults:
|
35
|
+
def __init__(self, dataset_id: str, test_cases: List[DatasetTestCase]) -> None:
|
36
|
+
self.dataset_id = dataset_id
|
37
|
+
self.test_cases = test_cases
|
38
|
+
|
39
|
+
class TestCases:
|
40
|
+
def __init__(self, call_support: CallSupport) -> None:
|
41
|
+
self.call_support = call_support
|
42
|
+
|
43
|
+
def create(self, project_id: str, dataset_id: str, test_case: DatasetTestCase) -> Dataset:
|
44
|
+
return self.create_many(project_id, dataset_id, [test_case])
|
45
|
+
|
46
|
+
def create_many(self, project_id: str, dataset_id: str, test_cases: List[DatasetTestCase]) -> Dataset:
|
47
|
+
dataset_test_cases = [DatasetTestCaseRequest(test_case.history, test_case.inputs, test_case.metadata, test_case.output) for test_case in test_cases]
|
48
|
+
self.call_support.create_test_cases(project_id, dataset_id, dataset_test_cases)
|
49
|
+
return Dataset(dataset_id, test_cases)
|
50
|
+
|
51
|
+
def get(self, project_id: str, dataset_id: str) -> DatasetResults:
|
52
|
+
test_case_results: DatasetTestCasesRetrievalResponse = self.call_support.get_test_cases(project_id, dataset_id)
|
53
|
+
dataset_test_cases = test_case_results.test_cases
|
54
|
+
|
55
|
+
return DatasetResults(dataset_id, [DatasetTestCase(id=test_case.id, history=test_case.history, output=test_case.output, inputs=test_case.values, metadata=test_case.metadata) for test_case in dataset_test_cases])
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import json
|
1
2
|
from dataclasses import dataclass
|
2
3
|
from json import JSONEncoder
|
3
4
|
from typing import Optional, Dict, Any, List, Union
|
@@ -5,7 +6,7 @@ from typing import Optional, Dict, Any, List, Union
|
|
5
6
|
from freeplay import api_support
|
6
7
|
from freeplay.api_support import try_decode
|
7
8
|
from freeplay.errors import freeplay_response_error, FreeplayServerError
|
8
|
-
from freeplay.model import InputVariables, FeedbackValue
|
9
|
+
from freeplay.model import InputVariables, FeedbackValue, NormalizedMessage
|
9
10
|
|
10
11
|
|
11
12
|
@dataclass
|
@@ -87,6 +88,28 @@ class TestRunRetrievalResponse:
|
|
87
88
|
human_evaluation=summary_statistics['human_evaluation']
|
88
89
|
)
|
89
90
|
|
91
|
+
class DatasetTestCaseRequest:
|
92
|
+
def __init__(self, history: Optional[List[NormalizedMessage]], inputs: InputVariables, metadata: Optional[Dict[str, str]], output: Optional[str]) -> None:
|
93
|
+
self.history: Optional[List[NormalizedMessage]] = history
|
94
|
+
self.inputs: InputVariables = inputs
|
95
|
+
self.metadata: Optional[Dict[str, str]] = metadata
|
96
|
+
self.output: Optional[str] = output
|
97
|
+
|
98
|
+
|
99
|
+
class DatasetTestCaseResponse:
|
100
|
+
def __init__(self, test_case: Dict[str, Any]):
|
101
|
+
self.values: InputVariables = test_case['values']
|
102
|
+
self.id: str = test_case['id']
|
103
|
+
self.output: Optional[str] = test_case.get('output')
|
104
|
+
self.history: Optional[List[NormalizedMessage]] = test_case.get('history')
|
105
|
+
self.metadata: Optional[Dict[str, str]] = test_case.get('metadata')
|
106
|
+
|
107
|
+
class DatasetTestCasesRetrievalResponse:
|
108
|
+
def __init__(self, test_cases: List[Dict[str, Any]]) -> None:
|
109
|
+
self.test_cases = [
|
110
|
+
DatasetTestCaseResponse(test_case)
|
111
|
+
for test_case in test_cases
|
112
|
+
]
|
90
113
|
|
91
114
|
class CallSupport:
|
92
115
|
def __init__(
|
@@ -253,3 +276,24 @@ class CallSupport:
|
|
253
276
|
if response.status_code != 201:
|
254
277
|
raise freeplay_response_error('Error while deleting session.', response)
|
255
278
|
|
279
|
+
def create_test_cases(self, project_id: str, dataset_id: str, test_cases: List[DatasetTestCaseRequest]) -> None:
|
280
|
+
examples = [{"history": test_case.history, "output": test_case.output, "metadata": test_case.metadata, "inputs": test_case.inputs} for test_case in test_cases]
|
281
|
+
payload: Dict[str, Any] = {"examples": examples}
|
282
|
+
url = f'{self.api_base}/v2/projects/{project_id}/datasets/id/{dataset_id}/test-cases'
|
283
|
+
|
284
|
+
response = api_support.post_raw(self.freeplay_api_key, url, payload)
|
285
|
+
if response.status_code != 201:
|
286
|
+
raise freeplay_response_error('Error while creating test cases.', response)
|
287
|
+
|
288
|
+
def get_test_cases(self, project_id: str, dataset_id: str) -> DatasetTestCasesRetrievalResponse:
|
289
|
+
url = f'{self.api_base}/v2/projects/{project_id}/datasets/id/{dataset_id}/test-cases'
|
290
|
+
response = api_support.get_raw(self.freeplay_api_key, url)
|
291
|
+
|
292
|
+
if response.status_code != 200:
|
293
|
+
raise freeplay_response_error('Error while getting test cases.', response)
|
294
|
+
|
295
|
+
json_dom = response.json()
|
296
|
+
|
297
|
+
return DatasetTestCasesRetrievalResponse(
|
298
|
+
test_cases=[{"history": jsn["history"], "id": jsn["id"], "output": jsn["output"], "values": jsn["values"], "metadata": jsn["metadata"] if 'metadata' in jsn.keys() else None} for jsn in json_dom]
|
299
|
+
)
|
@@ -1,18 +0,0 @@
|
|
1
|
-
from dataclasses import dataclass
|
2
|
-
from typing import List, Union, Any, Dict, Mapping, TypedDict
|
3
|
-
|
4
|
-
InputValue = Union[str, int, bool, float, Dict[str, Any], List[Any]]
|
5
|
-
InputVariables = Mapping[str, InputValue]
|
6
|
-
TestRunInput = Mapping[str, InputValue]
|
7
|
-
FeedbackValue = Union[bool, str, int, float]
|
8
|
-
|
9
|
-
|
10
|
-
@dataclass
|
11
|
-
class TestRun:
|
12
|
-
id: str
|
13
|
-
inputs: List[TestRunInput]
|
14
|
-
|
15
|
-
|
16
|
-
class OpenAIFunctionCall(TypedDict):
|
17
|
-
name: str
|
18
|
-
arguments: str
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|