freeplay 0.3.16__tar.gz → 0.3.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {freeplay-0.3.16 → freeplay-0.3.18}/PKG-INFO +1 -1
- {freeplay-0.3.16 → freeplay-0.3.18}/pyproject.toml +1 -1
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/__init__.py +2 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/resources/prompts.py +77 -28
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/resources/sessions.py +40 -7
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/support.py +41 -6
- {freeplay-0.3.16 → freeplay-0.3.18}/LICENSE +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/README.md +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/api_support.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/errors.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/freeplay.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/freeplay_cli.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/llm_parameters.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/model.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/py.typed +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/resources/__init__.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/resources/customer_feedback.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/resources/recordings.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/resources/test_cases.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/resources/test_runs.py +0 -0
- {freeplay-0.3.16 → freeplay-0.3.18}/src/freeplay/utils.py +0 -0
@@ -2,9 +2,11 @@ from .freeplay import Freeplay
|
|
2
2
|
from .resources.prompts import PromptInfo
|
3
3
|
from .resources.recordings import CallInfo, ResponseInfo, RecordPayload, TestRunInfo, UsageTokens
|
4
4
|
from .resources.sessions import SessionInfo, TraceInfo
|
5
|
+
from .support import CustomMetadata
|
5
6
|
|
6
7
|
__all__ = [
|
7
8
|
'CallInfo',
|
9
|
+
'CustomMetadata',
|
8
10
|
'Freeplay',
|
9
11
|
'PromptInfo',
|
10
12
|
'RecordPayload',
|
@@ -1,16 +1,37 @@
|
|
1
1
|
import copy
|
2
2
|
import json
|
3
3
|
import logging
|
4
|
+
import warnings
|
4
5
|
from abc import ABC, abstractmethod
|
5
6
|
from dataclasses import asdict, dataclass
|
6
7
|
from pathlib import Path
|
7
|
-
from typing import
|
8
|
-
|
9
|
-
|
8
|
+
from typing import (
|
9
|
+
Any,
|
10
|
+
Dict,
|
11
|
+
List,
|
12
|
+
Optional,
|
13
|
+
Protocol,
|
14
|
+
Sequence,
|
15
|
+
TypedDict,
|
16
|
+
Union,
|
17
|
+
cast,
|
18
|
+
runtime_checkable,
|
19
|
+
)
|
20
|
+
|
21
|
+
from freeplay.errors import (
|
22
|
+
FreeplayClientError,
|
23
|
+
FreeplayConfigurationError,
|
24
|
+
log_freeplay_client_warning,
|
25
|
+
)
|
10
26
|
from freeplay.llm_parameters import LLMParameters
|
11
27
|
from freeplay.model import InputVariables
|
12
|
-
from freeplay.support import
|
13
|
-
|
28
|
+
from freeplay.support import (
|
29
|
+
CallSupport,
|
30
|
+
PromptTemplate,
|
31
|
+
PromptTemplateMetadata,
|
32
|
+
PromptTemplates,
|
33
|
+
ToolSchema,
|
34
|
+
)
|
14
35
|
from freeplay.utils import bind_template_variables, convert_provider_message_to_dict
|
15
36
|
|
16
37
|
logger = logging.getLogger(__name__)
|
@@ -33,16 +54,30 @@ class UnsupportedToolSchemaError(FreeplayConfigurationError):
|
|
33
54
|
|
34
55
|
# Models ==
|
35
56
|
|
36
|
-
# A content block
|
57
|
+
# A content block compatible with stainless generated SDKs (such as Anthropic
|
58
|
+
# and OpenAI). This lets us generate a dictionary from the stainless classes
|
59
|
+
# correctly. Intentionally over-permissive to allow schema evolution by the
|
60
|
+
# providers.
|
37
61
|
@runtime_checkable
|
38
|
-
class
|
62
|
+
class ProviderMessageProtocol(Protocol):
|
39
63
|
def model_dump(self) -> Dict[str, Any]:
|
40
64
|
pass
|
41
65
|
|
42
66
|
|
43
|
-
|
44
|
-
|
45
|
-
|
67
|
+
class MessageDict(TypedDict):
|
68
|
+
role: str
|
69
|
+
content: Any
|
70
|
+
|
71
|
+
|
72
|
+
# This type represents a struct or dict containing a role and content. The role
|
73
|
+
# should be one of user, assistant or system. This type should be compatible
|
74
|
+
# with OpenAI and Anthropic's message format, as well as most other SDKs. If
|
75
|
+
# not using a common provider, use {'content': str, 'role': str} to record. If
|
76
|
+
# using a common provider, this is usually the `.content` field.
|
77
|
+
ProviderMessage = Union[MessageDict, Dict[str, Any], ProviderMessageProtocol]
|
78
|
+
|
79
|
+
# DEPRECATED: Use ProviderMessage instead
|
80
|
+
GenericProviderMessage = ProviderMessage
|
46
81
|
|
47
82
|
|
48
83
|
# SDK-Exposed Classes
|
@@ -69,7 +104,7 @@ class FormattedPrompt:
|
|
69
104
|
formatted_prompt_text: Optional[str] = None,
|
70
105
|
tool_schema: Optional[List[Dict[str, Any]]] = None
|
71
106
|
):
|
72
|
-
# These two definitions allow us to operate on typed fields
|
107
|
+
# These two definitions allow us to operate on typed fields until we expose them as Any for client use.
|
73
108
|
self._llm_prompt = formatted_prompt
|
74
109
|
self._tool_schema = tool_schema
|
75
110
|
|
@@ -81,11 +116,18 @@ class FormattedPrompt:
|
|
81
116
|
(message['content'] for message in messages if message['role'] == 'system'), None)
|
82
117
|
self.system_content = maybe_system_content
|
83
118
|
|
84
|
-
|
85
|
-
|
119
|
+
self._messages = messages
|
120
|
+
|
121
|
+
@property
|
122
|
+
def messages(self) -> List[Dict[str, str]]:
|
123
|
+
warnings.warn(
|
124
|
+
"The 'messages' attribute is deprecated and will be removed in a future version. It is not formatted for the provider. Use 'llm_prompt' instead.",
|
125
|
+
DeprecationWarning,
|
126
|
+
stacklevel=2,
|
127
|
+
)
|
128
|
+
return self._messages
|
86
129
|
|
87
130
|
@property
|
88
|
-
# We know this is a list of dict[str,str], but we use Any to avoid typing issues with client SDK libraries, which require strict TypedDict.
|
89
131
|
def llm_prompt(self) -> Any:
|
90
132
|
return self._llm_prompt
|
91
133
|
|
@@ -93,12 +135,9 @@ class FormattedPrompt:
|
|
93
135
|
def tool_schema(self) -> Any:
|
94
136
|
return self._tool_schema
|
95
137
|
|
96
|
-
def all_messages(
|
97
|
-
self,
|
98
|
-
new_message: GenericProviderMessage
|
99
|
-
) -> List[Dict[str, Any]]:
|
138
|
+
def all_messages(self, new_message: ProviderMessage) -> List[Dict[str, Any]]:
|
100
139
|
converted_message = convert_provider_message_to_dict(new_message)
|
101
|
-
return self.
|
140
|
+
return self._messages + [converted_message]
|
102
141
|
|
103
142
|
|
104
143
|
class BoundPrompt:
|
@@ -117,7 +156,13 @@ class BoundPrompt:
|
|
117
156
|
flavor_name: str,
|
118
157
|
messages: List[Dict[str, str]]
|
119
158
|
) -> Union[str, List[Dict[str, str]]]:
|
120
|
-
if flavor_name in [
|
159
|
+
if flavor_name in [
|
160
|
+
'azure_openai_chat',
|
161
|
+
'openai_chat',
|
162
|
+
'baseten_mistral_chat',
|
163
|
+
'mistral_chat',
|
164
|
+
'perplexity_chat'
|
165
|
+
]:
|
121
166
|
# We need a deepcopy here to avoid referential equality with the llm_prompt
|
122
167
|
return copy.deepcopy(messages)
|
123
168
|
elif flavor_name == 'anthropic_chat':
|
@@ -211,7 +256,11 @@ class TemplatePrompt:
|
|
211
256
|
self.tool_schema = tool_schema
|
212
257
|
self.messages = messages
|
213
258
|
|
214
|
-
def bind(
|
259
|
+
def bind(
|
260
|
+
self,
|
261
|
+
variables: InputVariables,
|
262
|
+
history: Optional[Sequence[ProviderMessage]] = None,
|
263
|
+
) -> BoundPrompt:
|
215
264
|
# check history for a system message
|
216
265
|
history_clean = []
|
217
266
|
if history:
|
@@ -521,13 +570,13 @@ class Prompts:
|
|
521
570
|
return TemplatePrompt(prompt_info, prompt.content, prompt.tool_schema)
|
522
571
|
|
523
572
|
def get_formatted(
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
573
|
+
self,
|
574
|
+
project_id: str,
|
575
|
+
template_name: str,
|
576
|
+
environment: str,
|
577
|
+
variables: InputVariables,
|
578
|
+
history: Optional[Sequence[ProviderMessage]] = None,
|
579
|
+
flavor_name: Optional[str] = None,
|
531
580
|
) -> FormattedPrompt:
|
532
581
|
bound_prompt = self.get(
|
533
582
|
project_id=project_id,
|
@@ -3,9 +3,7 @@ from dataclasses import dataclass
|
|
3
3
|
from typing import Optional, Dict, Union
|
4
4
|
|
5
5
|
from freeplay.errors import FreeplayClientError
|
6
|
-
from freeplay.support import CallSupport
|
7
|
-
|
8
|
-
CustomMetadata = Optional[Dict[str, Union[str, int, float, bool]]]
|
6
|
+
from freeplay.support import CallSupport, CustomMetadata
|
9
7
|
|
10
8
|
|
11
9
|
@dataclass
|
@@ -18,6 +16,8 @@ class TraceInfo:
|
|
18
16
|
session_id: str
|
19
17
|
trace_id: str
|
20
18
|
input: Optional[str] = None
|
19
|
+
agent_name: Optional[str] = None
|
20
|
+
custom_metadata: CustomMetadata = None
|
21
21
|
_call_support: CallSupport
|
22
22
|
|
23
23
|
def __init__(
|
@@ -26,16 +26,34 @@ class TraceInfo:
|
|
26
26
|
session_id: str,
|
27
27
|
_call_support: CallSupport,
|
28
28
|
input: Optional[str] = None,
|
29
|
+
agent_name: Optional[str] = None,
|
30
|
+
custom_metadata: CustomMetadata = None,
|
29
31
|
):
|
30
32
|
self.trace_id = trace_id
|
31
33
|
self.session_id = session_id
|
32
34
|
self.input = input
|
35
|
+
self.agent_name = agent_name
|
36
|
+
self.custom_metadata = custom_metadata
|
33
37
|
self._call_support = _call_support
|
34
38
|
|
35
|
-
def record_output(
|
39
|
+
def record_output(
|
40
|
+
self,
|
41
|
+
project_id: str,
|
42
|
+
output: str,
|
43
|
+
eval_results: Optional[Dict[str, Union[bool, float]]] = None
|
44
|
+
) -> None:
|
36
45
|
if self.input is None:
|
37
46
|
raise FreeplayClientError("Input must be set before recording output")
|
38
|
-
self._call_support.record_trace(
|
47
|
+
self._call_support.record_trace(
|
48
|
+
project_id,
|
49
|
+
self.session_id,
|
50
|
+
self.trace_id,
|
51
|
+
self.input,
|
52
|
+
output,
|
53
|
+
agent_name=self.agent_name,
|
54
|
+
custom_metadata=self.custom_metadata,
|
55
|
+
eval_results=eval_results
|
56
|
+
)
|
39
57
|
|
40
58
|
|
41
59
|
@dataclass
|
@@ -53,19 +71,34 @@ class Session:
|
|
53
71
|
def session_info(self) -> SessionInfo:
|
54
72
|
return self._session_info
|
55
73
|
|
56
|
-
def create_trace(
|
74
|
+
def create_trace(
|
75
|
+
self,
|
76
|
+
input: str,
|
77
|
+
agent_name: Optional[str] = None,
|
78
|
+
custom_metadata: CustomMetadata = None
|
79
|
+
) -> TraceInfo:
|
57
80
|
return TraceInfo(
|
58
81
|
trace_id=str(uuid.uuid4()),
|
59
82
|
session_id=self.session_id,
|
60
83
|
input=input,
|
84
|
+
agent_name=agent_name,
|
85
|
+
custom_metadata=custom_metadata,
|
61
86
|
_call_support=self._call_support
|
62
87
|
)
|
63
88
|
|
64
|
-
def restore_trace(
|
89
|
+
def restore_trace(
|
90
|
+
self,
|
91
|
+
trace_id: uuid.UUID,
|
92
|
+
input: Optional[str],
|
93
|
+
agent_name: Optional[str] = None,
|
94
|
+
custom_metadata: CustomMetadata = None
|
95
|
+
) -> TraceInfo:
|
65
96
|
return TraceInfo(
|
66
97
|
trace_id=str(trace_id),
|
67
98
|
session_id=self.session_id,
|
68
99
|
input=input,
|
100
|
+
agent_name=agent_name,
|
101
|
+
custom_metadata=custom_metadata,
|
69
102
|
_call_support=self._call_support
|
70
103
|
)
|
71
104
|
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import json
|
2
1
|
from dataclasses import dataclass
|
3
2
|
from json import JSONEncoder
|
4
3
|
from typing import Optional, Dict, Any, List, Union
|
@@ -8,6 +7,8 @@ from freeplay.api_support import try_decode
|
|
8
7
|
from freeplay.errors import freeplay_response_error, FreeplayServerError
|
9
8
|
from freeplay.model import InputVariables, FeedbackValue, NormalizedMessage
|
10
9
|
|
10
|
+
CustomMetadata = Optional[Dict[str, Union[str, int, float, bool]]]
|
11
|
+
|
11
12
|
|
12
13
|
@dataclass
|
13
14
|
class PromptTemplateMetadata:
|
@@ -17,12 +18,14 @@ class PromptTemplateMetadata:
|
|
17
18
|
params: Optional[Dict[str, Any]] = None
|
18
19
|
provider_info: Optional[Dict[str, Any]] = None
|
19
20
|
|
21
|
+
|
20
22
|
@dataclass
|
21
23
|
class ToolSchema:
|
22
24
|
name: str
|
23
25
|
description: str
|
24
26
|
parameters: Dict[str, Any]
|
25
27
|
|
28
|
+
|
26
29
|
@dataclass
|
27
30
|
class PromptTemplate:
|
28
31
|
prompt_template_id: str
|
@@ -40,6 +43,7 @@ class PromptTemplate:
|
|
40
43
|
class PromptTemplates:
|
41
44
|
prompt_templates: List[PromptTemplate]
|
42
45
|
|
46
|
+
|
43
47
|
@dataclass
|
44
48
|
class SummaryStatistics:
|
45
49
|
auto_evaluation: Dict[str, Any]
|
@@ -88,8 +92,10 @@ class TestRunRetrievalResponse:
|
|
88
92
|
human_evaluation=summary_statistics['human_evaluation']
|
89
93
|
)
|
90
94
|
|
95
|
+
|
91
96
|
class DatasetTestCaseRequest:
|
92
|
-
def __init__(self, history: Optional[List[NormalizedMessage]], inputs: InputVariables,
|
97
|
+
def __init__(self, history: Optional[List[NormalizedMessage]], inputs: InputVariables,
|
98
|
+
metadata: Optional[Dict[str, str]], output: Optional[str]) -> None:
|
93
99
|
self.history: Optional[List[NormalizedMessage]] = history
|
94
100
|
self.inputs: InputVariables = inputs
|
95
101
|
self.metadata: Optional[Dict[str, str]] = metadata
|
@@ -104,6 +110,7 @@ class DatasetTestCaseResponse:
|
|
104
110
|
self.history: Optional[List[NormalizedMessage]] = test_case.get('history')
|
105
111
|
self.metadata: Optional[Dict[str, str]] = test_case.get('metadata')
|
106
112
|
|
113
|
+
|
107
114
|
class DatasetTestCasesRetrievalResponse:
|
108
115
|
def __init__(self, test_cases: List[Dict[str, Any]]) -> None:
|
109
116
|
self.test_cases = [
|
@@ -111,6 +118,7 @@ class DatasetTestCasesRetrievalResponse:
|
|
111
118
|
for test_case in test_cases
|
112
119
|
]
|
113
120
|
|
121
|
+
|
114
122
|
class CallSupport:
|
115
123
|
def __init__(
|
116
124
|
self,
|
@@ -256,13 +264,26 @@ class CallSupport:
|
|
256
264
|
summary_statistics=json_dom['summary_statistics']
|
257
265
|
)
|
258
266
|
|
259
|
-
def record_trace(
|
267
|
+
def record_trace(
|
268
|
+
self,
|
269
|
+
project_id: str,
|
270
|
+
session_id: str,
|
271
|
+
trace_id: str,
|
272
|
+
input: str,
|
273
|
+
output: str,
|
274
|
+
agent_name: Optional[str] = None,
|
275
|
+
custom_metadata: CustomMetadata = None,
|
276
|
+
eval_results: Optional[Dict[str, Union[bool, float]]] = None
|
277
|
+
) -> None:
|
260
278
|
response = api_support.post_raw(
|
261
279
|
self.freeplay_api_key,
|
262
280
|
f'{self.api_base}/v2/projects/{project_id}/sessions/{session_id}/traces/id/{trace_id}',
|
263
281
|
{
|
282
|
+
'agent_name': agent_name,
|
264
283
|
'input': input,
|
265
|
-
'output': output
|
284
|
+
'output': output,
|
285
|
+
'custom_metadata': custom_metadata,
|
286
|
+
'eval_results': eval_results,
|
266
287
|
}
|
267
288
|
)
|
268
289
|
if response.status_code != 201:
|
@@ -277,7 +298,13 @@ class CallSupport:
|
|
277
298
|
raise freeplay_response_error('Error while deleting session.', response)
|
278
299
|
|
279
300
|
def create_test_cases(self, project_id: str, dataset_id: str, test_cases: List[DatasetTestCaseRequest]) -> None:
|
280
|
-
examples = [
|
301
|
+
examples = [
|
302
|
+
{
|
303
|
+
"history": test_case.history,
|
304
|
+
"output": test_case.output,
|
305
|
+
"metadata": test_case.metadata,
|
306
|
+
"inputs": test_case.inputs
|
307
|
+
} for test_case in test_cases]
|
281
308
|
payload: Dict[str, Any] = {"examples": examples}
|
282
309
|
url = f'{self.api_base}/v2/projects/{project_id}/datasets/id/{dataset_id}/test-cases'
|
283
310
|
|
@@ -295,5 +322,13 @@ class CallSupport:
|
|
295
322
|
json_dom = response.json()
|
296
323
|
|
297
324
|
return DatasetTestCasesRetrievalResponse(
|
298
|
-
test_cases=[
|
325
|
+
test_cases=[
|
326
|
+
{
|
327
|
+
"history": jsn["history"],
|
328
|
+
"id": jsn["id"],
|
329
|
+
"output": jsn["output"],
|
330
|
+
"values": jsn["values"],
|
331
|
+
"metadata": jsn["metadata"] if 'metadata' in jsn.keys() else None
|
332
|
+
} for jsn in json_dom
|
333
|
+
]
|
299
334
|
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|