latitude-sdk 2.1.1__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/PKG-INFO +1 -1
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/pyproject.toml +1 -1
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/client/client.py +1 -1
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/client/payloads.py +10 -17
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/client/router.py +34 -31
- latitude_sdk-3.0.0/src/latitude_sdk/sdk/evaluations.py +58 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/sdk/latitude.py +1 -2
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/sdk/types.py +11 -23
- latitude_sdk-2.1.1/tests/evaluations/create_result_test.py → latitude_sdk-3.0.0/tests/evaluations/annotate_test.py +15 -12
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/utils/fixtures.py +66 -36
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/uv.lock +176 -175
- latitude_sdk-2.1.1/src/latitude_sdk/sdk/evaluations.py +0 -70
- latitude_sdk-2.1.1/tests/evaluations/trigger_test.py +0 -52
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/.gitignore +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/.python-version +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/README.md +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/scripts/format.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/scripts/lint.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/scripts/test.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/client/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/env/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/env/env.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/py.typed +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/sdk/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/sdk/errors.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/sdk/logs.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/sdk/prompts.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/util/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/src/latitude_sdk/util/utils.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/evaluations/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/logs/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/logs/create_test.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/prompts/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/prompts/chat_test.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/prompts/get_all_test.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/prompts/get_or_create_test.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/prompts/get_test.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/prompts/render_chain_test.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/prompts/render_test.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/prompts/run_test.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/utils/__init__.py +0 -0
- {latitude_sdk-2.1.1 → latitude_sdk-3.0.0}/tests/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: latitude-sdk
|
3
|
-
Version:
|
3
|
+
Version: 3.0.0
|
4
4
|
Summary: Latitude SDK for Python
|
5
5
|
Project-URL: repository, https://github.com/latitude-dev/latitude-llm/tree/main/packages/sdks/python
|
6
6
|
Project-URL: homepage, https://github.com/latitude-dev/latitude-llm/tree/main/packages/sdks/python#readme
|
@@ -75,21 +75,17 @@ class EvaluationRequestParams(Model):
|
|
75
75
|
conversation_uuid: str
|
76
76
|
|
77
77
|
|
78
|
-
class
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
class TriggerEvaluationRequestBody(Model):
|
83
|
-
evaluation_uuids: Optional[List[str]] = Field(default=None, alias=str("evaluationUuids"))
|
78
|
+
class AnnotateEvaluationRequestParams(EvaluationRequestParams, Model):
|
79
|
+
evaluation_uuid: str
|
84
80
|
|
85
81
|
|
86
|
-
class
|
87
|
-
|
82
|
+
class AnnotateEvaluationRequestBody(Model):
|
83
|
+
score: int
|
88
84
|
|
85
|
+
class Metadata(Model):
|
86
|
+
reason: str
|
89
87
|
|
90
|
-
|
91
|
-
result: Union[str, bool, int]
|
92
|
-
reason: str
|
88
|
+
metadata: Optional[Metadata] = None
|
93
89
|
|
94
90
|
|
95
91
|
RequestParams = Union[
|
@@ -99,8 +95,7 @@ RequestParams = Union[
|
|
99
95
|
RunPromptRequestParams,
|
100
96
|
ChatPromptRequestParams,
|
101
97
|
CreateLogRequestParams,
|
102
|
-
|
103
|
-
CreateEvaluationResultRequestParams,
|
98
|
+
AnnotateEvaluationRequestParams,
|
104
99
|
]
|
105
100
|
|
106
101
|
|
@@ -109,8 +104,7 @@ RequestBody = Union[
|
|
109
104
|
RunPromptRequestBody,
|
110
105
|
ChatPromptRequestBody,
|
111
106
|
CreateLogRequestBody,
|
112
|
-
|
113
|
-
CreateEvaluationResultRequestBody,
|
107
|
+
AnnotateEvaluationRequestBody,
|
114
108
|
]
|
115
109
|
|
116
110
|
|
@@ -121,5 +115,4 @@ class RequestHandler(StrEnum):
|
|
121
115
|
RunPrompt = "RUN_PROMPT"
|
122
116
|
ChatPrompt = "CHAT_PROMPT"
|
123
117
|
CreateLog = "CREATE_LOG"
|
124
|
-
|
125
|
-
CreateEvaluationResult = "CREATE_EVALUATION_RESULT"
|
118
|
+
AnnotateEvaluation = "ANNOTATE_EVALUATION"
|
@@ -1,8 +1,8 @@
|
|
1
1
|
from typing import Callable, Optional, Tuple
|
2
2
|
|
3
3
|
from latitude_sdk.client.payloads import (
|
4
|
+
AnnotateEvaluationRequestParams,
|
4
5
|
ChatPromptRequestParams,
|
5
|
-
CreateEvaluationResultRequestParams,
|
6
6
|
CreateLogRequestParams,
|
7
7
|
GetAllPromptRequestParams,
|
8
8
|
GetOrCreatePromptRequestParams,
|
@@ -10,7 +10,6 @@ from latitude_sdk.client.payloads import (
|
|
10
10
|
RequestHandler,
|
11
11
|
RequestParams,
|
12
12
|
RunPromptRequestParams,
|
13
|
-
TriggerEvaluationRequestParams,
|
14
13
|
)
|
15
14
|
from latitude_sdk.sdk.types import GatewayOptions
|
16
15
|
from latitude_sdk.util import Model
|
@@ -40,26 +39,35 @@ class Router:
|
|
40
39
|
if handler == RequestHandler.GetAllPrompts:
|
41
40
|
assert isinstance(params, GetAllPromptRequestParams)
|
42
41
|
|
43
|
-
return
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
return (
|
43
|
+
"GET",
|
44
|
+
self.prompts(
|
45
|
+
project_id=params.project_id,
|
46
|
+
version_uuid=params.version_uuid,
|
47
|
+
).all_prompts,
|
48
|
+
)
|
47
49
|
|
48
50
|
elif handler == RequestHandler.GetOrCreatePrompt:
|
49
51
|
assert isinstance(params, GetOrCreatePromptRequestParams)
|
50
52
|
|
51
|
-
return
|
52
|
-
|
53
|
-
|
54
|
-
|
53
|
+
return (
|
54
|
+
"POST",
|
55
|
+
self.prompts(
|
56
|
+
project_id=params.project_id,
|
57
|
+
version_uuid=params.version_uuid,
|
58
|
+
).get_or_create,
|
59
|
+
)
|
55
60
|
|
56
61
|
elif handler == RequestHandler.RunPrompt:
|
57
62
|
assert isinstance(params, RunPromptRequestParams)
|
58
63
|
|
59
|
-
return
|
60
|
-
|
61
|
-
|
62
|
-
|
64
|
+
return (
|
65
|
+
"POST",
|
66
|
+
self.prompts(
|
67
|
+
project_id=params.project_id,
|
68
|
+
version_uuid=params.version_uuid,
|
69
|
+
).run,
|
70
|
+
)
|
63
71
|
|
64
72
|
elif handler == RequestHandler.ChatPrompt:
|
65
73
|
assert isinstance(params, ChatPromptRequestParams)
|
@@ -69,36 +77,31 @@ class Router:
|
|
69
77
|
elif handler == RequestHandler.CreateLog:
|
70
78
|
assert isinstance(params, CreateLogRequestParams)
|
71
79
|
|
72
|
-
return
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
return "POST", self.conversations().evaluate(params.conversation_uuid)
|
80
|
+
return (
|
81
|
+
"POST",
|
82
|
+
self.prompts(
|
83
|
+
project_id=params.project_id,
|
84
|
+
version_uuid=params.version_uuid,
|
85
|
+
).logs,
|
86
|
+
)
|
81
87
|
|
82
|
-
elif handler == RequestHandler.
|
83
|
-
assert isinstance(params,
|
88
|
+
elif handler == RequestHandler.AnnotateEvaluation:
|
89
|
+
assert isinstance(params, AnnotateEvaluationRequestParams)
|
84
90
|
|
85
|
-
return "POST", self.conversations().
|
91
|
+
return "POST", self.conversations().annotate(params.conversation_uuid, params.evaluation_uuid)
|
86
92
|
|
87
93
|
raise TypeError(f"Unknown handler: {handler}")
|
88
94
|
|
89
95
|
class Conversations(Model):
|
90
96
|
chat: Callable[[str], str]
|
91
|
-
|
92
|
-
evaluation_result: Callable[[str, str], str]
|
97
|
+
annotate: Callable[[str, str], str]
|
93
98
|
|
94
99
|
def conversations(self) -> Conversations:
|
95
100
|
base_url = f"{self.options.gateway.base_url}/conversations"
|
96
101
|
|
97
102
|
return self.Conversations(
|
98
103
|
chat=lambda uuid: f"{base_url}/{uuid}/chat",
|
99
|
-
|
100
|
-
evaluation_result=lambda conversation_uuid,
|
101
|
-
evaluation_uuid: f"{base_url}/{conversation_uuid}/evaluations/{evaluation_uuid}/evaluation-results",
|
104
|
+
annotate=lambda uuid, evaluation_uuid: f"{base_url}/{uuid}/evaluations/{evaluation_uuid}/annotate",
|
102
105
|
)
|
103
106
|
|
104
107
|
class Prompts(Model):
|
@@ -0,0 +1,58 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from typing import Any, Optional, Union
|
3
|
+
|
4
|
+
from latitude_sdk.client import (
|
5
|
+
AnnotateEvaluationRequestBody,
|
6
|
+
AnnotateEvaluationRequestParams,
|
7
|
+
Client,
|
8
|
+
RequestHandler,
|
9
|
+
)
|
10
|
+
from latitude_sdk.sdk.types import SdkOptions
|
11
|
+
from latitude_sdk.util import Field, Model
|
12
|
+
|
13
|
+
|
14
|
+
class AnnotateEvaluationOptions(Model):
|
15
|
+
reason: str
|
16
|
+
|
17
|
+
|
18
|
+
class AnnotateEvaluationResult(Model):
|
19
|
+
uuid: str
|
20
|
+
score: int
|
21
|
+
normalized_score: int = Field(alias=str("normalizedScore"))
|
22
|
+
metadata: dict[str, Any]
|
23
|
+
has_passed: bool = Field(alias=str("hasPassed"))
|
24
|
+
created_at: datetime = Field(alias=str("createdAt"))
|
25
|
+
updated_at: datetime = Field(alias=str("updatedAt"))
|
26
|
+
version_uuid: str = Field(alias=str("versionUuid"))
|
27
|
+
error: Optional[Union[str, None]] = None
|
28
|
+
|
29
|
+
|
30
|
+
class Evaluations:
|
31
|
+
_options: SdkOptions
|
32
|
+
_client: Client
|
33
|
+
|
34
|
+
def __init__(self, client: Client, options: SdkOptions):
|
35
|
+
self._options = options
|
36
|
+
self._client = client
|
37
|
+
|
38
|
+
async def annotate(
|
39
|
+
self,
|
40
|
+
uuid: str,
|
41
|
+
evaluation_uuid: str,
|
42
|
+
score: int,
|
43
|
+
options: Optional[AnnotateEvaluationOptions] = None,
|
44
|
+
) -> AnnotateEvaluationResult:
|
45
|
+
options = AnnotateEvaluationOptions(**{**dict(self._options), **dict(options or {})})
|
46
|
+
|
47
|
+
async with self._client.request(
|
48
|
+
handler=RequestHandler.AnnotateEvaluation,
|
49
|
+
params=AnnotateEvaluationRequestParams(
|
50
|
+
conversation_uuid=uuid,
|
51
|
+
evaluation_uuid=evaluation_uuid,
|
52
|
+
),
|
53
|
+
body=AnnotateEvaluationRequestBody(
|
54
|
+
score=score,
|
55
|
+
metadata=(AnnotateEvaluationRequestBody.Metadata(reason=options.reason) if options.reason else None),
|
56
|
+
),
|
57
|
+
) as response:
|
58
|
+
return AnnotateEvaluationResult.model_validate_json(response.content)
|
@@ -34,7 +34,7 @@ DEFAULT_INTERNAL_OPTIONS = InternalOptions(
|
|
34
34
|
source=LogSources.Api,
|
35
35
|
retries=3,
|
36
36
|
delay=0.5,
|
37
|
-
timeout=
|
37
|
+
timeout=None,
|
38
38
|
)
|
39
39
|
|
40
40
|
|
@@ -63,7 +63,6 @@ class Latitude:
|
|
63
63
|
assert self._options.internal.source is not None
|
64
64
|
assert self._options.internal.retries is not None
|
65
65
|
assert self._options.internal.delay is not None
|
66
|
-
assert self._options.internal.timeout is not None
|
67
66
|
|
68
67
|
self._client = Client(
|
69
68
|
ClientOptions(
|
@@ -1,5 +1,15 @@
|
|
1
1
|
from datetime import datetime
|
2
|
-
from typing import
|
2
|
+
from typing import (
|
3
|
+
Any,
|
4
|
+
Callable,
|
5
|
+
List,
|
6
|
+
Literal,
|
7
|
+
Optional,
|
8
|
+
Protocol,
|
9
|
+
Sequence,
|
10
|
+
Union,
|
11
|
+
runtime_checkable,
|
12
|
+
)
|
3
13
|
|
4
14
|
from promptl_ai import Message, MessageLike
|
5
15
|
|
@@ -230,28 +240,6 @@ class Log(Model):
|
|
230
240
|
updated_at: datetime = Field(alias=str("updatedAt"))
|
231
241
|
|
232
242
|
|
233
|
-
class EvaluationResultType(StrEnum):
|
234
|
-
Boolean = "evaluation_resultable_booleans"
|
235
|
-
Text = "evaluation_resultable_texts"
|
236
|
-
Number = "evaluation_resultable_numbers"
|
237
|
-
|
238
|
-
|
239
|
-
class EvaluationResult(Model):
|
240
|
-
id: int
|
241
|
-
uuid: str
|
242
|
-
evaluation_id: int = Field(alias=str("evaluationId"))
|
243
|
-
document_log_id: int = Field(alias=str("documentLogId"))
|
244
|
-
evaluated_provider_log_id: Optional[int] = Field(default=None, alias=str("evaluatedProviderLogId"))
|
245
|
-
evaluation_provider_log_id: Optional[int] = Field(default=None, alias=str("evaluationProviderLogId"))
|
246
|
-
resultable_type: Optional[EvaluationResultType] = Field(default=None, alias=str("resultableType"))
|
247
|
-
resultable_id: Optional[int] = Field(default=None, alias=str("resultableId"))
|
248
|
-
result: Optional[Union[str, bool, int]] = None
|
249
|
-
source: Optional[LogSources] = None
|
250
|
-
reason: Optional[str] = None
|
251
|
-
created_at: datetime = Field(alias=str("createdAt"))
|
252
|
-
updated_at: datetime = Field(alias=str("updatedAt"))
|
253
|
-
|
254
|
-
|
255
243
|
class StreamCallbacks(Model):
|
256
244
|
@runtime_checkable
|
257
245
|
class OnEvent(Protocol):
|
@@ -2,7 +2,7 @@ from typing import List, cast
|
|
2
2
|
|
3
3
|
import httpx
|
4
4
|
|
5
|
-
from latitude_sdk import
|
5
|
+
from latitude_sdk import AnnotateEvaluationOptions, AnnotateEvaluationResult
|
6
6
|
from tests.utils import TestCase, fixtures
|
7
7
|
|
8
8
|
|
@@ -10,13 +10,13 @@ class TestCreateEvaluationResult(TestCase):
|
|
10
10
|
async def test_success(self):
|
11
11
|
conversation_uuid = "conversation-uuid"
|
12
12
|
evaluation_uuid = "evaluation-uuid"
|
13
|
-
options =
|
14
|
-
endpoint = f"/conversations/{conversation_uuid}/evaluations/{evaluation_uuid}/
|
13
|
+
options = AnnotateEvaluationOptions(reason="Because Yes")
|
14
|
+
endpoint = f"/conversations/{conversation_uuid}/evaluations/{evaluation_uuid}/annotate"
|
15
15
|
endpoint_mock = self.gateway_mock.post(endpoint).mock(
|
16
16
|
return_value=httpx.Response(200, json=fixtures.EVALUATION_RESULT_RESPONSE)
|
17
17
|
)
|
18
18
|
|
19
|
-
result = await self.sdk.evaluations.
|
19
|
+
result = await self.sdk.evaluations.annotate(conversation_uuid, evaluation_uuid, 1, options)
|
20
20
|
request, _ = endpoint_mock.calls.last
|
21
21
|
|
22
22
|
self.assert_requested(
|
@@ -24,24 +24,27 @@ class TestCreateEvaluationResult(TestCase):
|
|
24
24
|
method="POST",
|
25
25
|
endpoint=endpoint,
|
26
26
|
body={
|
27
|
-
"
|
28
|
-
"reason": options.reason,
|
27
|
+
"score": 1,
|
28
|
+
"metadata": {"reason": options.reason},
|
29
29
|
},
|
30
30
|
)
|
31
31
|
self.assertEqual(endpoint_mock.call_count, 1)
|
32
|
-
self.assertEqual(
|
32
|
+
self.assertEqual(
|
33
|
+
result,
|
34
|
+
AnnotateEvaluationResult(**dict(fixtures.EVALUATION_RESULT)),
|
35
|
+
)
|
33
36
|
|
34
37
|
async def test_fails(self):
|
35
38
|
conversation_uuid = "conversation-uuid"
|
36
39
|
evaluation_uuid = "evaluation-uuid"
|
37
|
-
options =
|
38
|
-
endpoint = f"/conversations/{conversation_uuid}/evaluations/{evaluation_uuid}/
|
40
|
+
options = AnnotateEvaluationOptions(reason="Because Yes")
|
41
|
+
endpoint = f"/conversations/{conversation_uuid}/evaluations/{evaluation_uuid}/annotate"
|
39
42
|
endpoint_mock = self.gateway_mock.post(endpoint).mock(
|
40
43
|
return_value=httpx.Response(500, json=fixtures.ERROR_RESPONSE)
|
41
44
|
)
|
42
45
|
|
43
46
|
with self.assertRaisesRegex(type(fixtures.ERROR), fixtures.ERROR.message):
|
44
|
-
await self.sdk.evaluations.
|
47
|
+
await self.sdk.evaluations.annotate(conversation_uuid, evaluation_uuid, 1, options)
|
45
48
|
requests = cast(List[httpx.Request], [request for request, _ in endpoint_mock.calls]) # type: ignore
|
46
49
|
|
47
50
|
[
|
@@ -50,8 +53,8 @@ class TestCreateEvaluationResult(TestCase):
|
|
50
53
|
method="POST",
|
51
54
|
endpoint=endpoint,
|
52
55
|
body={
|
53
|
-
"
|
54
|
-
"reason": options.reason,
|
56
|
+
"score": 1,
|
57
|
+
"metadata": {"reason": options.reason},
|
55
58
|
},
|
56
59
|
)
|
57
60
|
for request in requests
|
@@ -16,6 +16,7 @@ from promptl_ai import (
|
|
16
16
|
|
17
17
|
from latitude_sdk import (
|
18
18
|
AGENT_END_TOOL_NAME,
|
19
|
+
AnnotateEvaluationResult,
|
19
20
|
ApiError,
|
20
21
|
ApiErrorCodes,
|
21
22
|
ChainError,
|
@@ -29,8 +30,6 @@ from latitude_sdk import (
|
|
29
30
|
ChainEventStepStarted,
|
30
31
|
ChainEventToolsRequested,
|
31
32
|
ChainTextResponse,
|
32
|
-
EvaluationResult,
|
33
|
-
EvaluationResultType,
|
34
33
|
FinishedResult,
|
35
34
|
FinishReason,
|
36
35
|
Log,
|
@@ -173,36 +172,28 @@ EVALUATIONS = [
|
|
173
172
|
]
|
174
173
|
|
175
174
|
EVALUATION_RESULT_RESPONSE: dict[str, Any] = {
|
176
|
-
"id": 31,
|
177
175
|
"uuid": "e25a317b-c682-4c25-a704-a87ac79507c4",
|
178
|
-
"
|
179
|
-
"
|
180
|
-
"
|
181
|
-
"
|
182
|
-
"resultableType": "evaluation_resultable_booleans",
|
183
|
-
"resultableId": 31,
|
184
|
-
"result": True,
|
185
|
-
"source": "api",
|
186
|
-
"reason": "Because Yes",
|
176
|
+
"score": 1,
|
177
|
+
"normalizedScore": 1,
|
178
|
+
"metadata": {"reason": "Because Yes"},
|
179
|
+
"hasPassed": True,
|
187
180
|
"createdAt": "2025-01-01 00:00:00.000",
|
188
181
|
"updatedAt": "2025-01-01 00:00:00.000",
|
182
|
+
"versionUuid": "e25a317b-c682-4c25-a704-a87ac79507c4",
|
183
|
+
"error": None,
|
189
184
|
}
|
190
185
|
|
191
186
|
|
192
|
-
EVALUATION_RESULT =
|
193
|
-
id=31,
|
187
|
+
EVALUATION_RESULT = AnnotateEvaluationResult(
|
194
188
|
uuid="e25a317b-c682-4c25-a704-a87ac79507c4",
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
resultable_type=EvaluationResultType.Boolean,
|
200
|
-
resultable_id=31,
|
201
|
-
result=True,
|
202
|
-
source=LogSources.Api,
|
203
|
-
reason="Because Yes",
|
189
|
+
score=1,
|
190
|
+
normalized_score=1,
|
191
|
+
metadata={"reason": "Because Yes"},
|
192
|
+
has_passed=True,
|
204
193
|
created_at=datetime(2025, 1, 1, 0, 0, 0, 0),
|
205
194
|
updated_at=datetime(2025, 1, 1, 0, 0, 0, 0),
|
195
|
+
version_uuid="e25a317b-c682-4c25-a704-a87ac79507c4",
|
196
|
+
error=None,
|
206
197
|
)
|
207
198
|
|
208
199
|
CONVERSATION_EVENTS_STREAM: list[str] = [
|
@@ -843,7 +834,10 @@ CONVERSATION_EVENTS: list[StreamEvent] = [
|
|
843
834
|
"finishReason": "stop",
|
844
835
|
"isContinued": False,
|
845
836
|
"experimental_providerMetadata": {"openai": {"reasoningTokens": 0, "cachedPromptTokens": 0}},
|
846
|
-
"response": {
|
837
|
+
"response": {
|
838
|
+
"timestamp": "2025-01-02T12:29:13.000Z",
|
839
|
+
"modelId": "gpt-4o-mini-latest",
|
840
|
+
},
|
847
841
|
"usage": {"promptTokens": 31, "completionTokens": 9, "totalTokens": 40},
|
848
842
|
},
|
849
843
|
{
|
@@ -851,7 +845,10 @@ CONVERSATION_EVENTS: list[StreamEvent] = [
|
|
851
845
|
"type": "finish",
|
852
846
|
"finishReason": "stop",
|
853
847
|
"experimental_providerMetadata": {"openai": {"reasoningTokens": 0, "cachedPromptTokens": 0}},
|
854
|
-
"response": {
|
848
|
+
"response": {
|
849
|
+
"timestamp": "2025-01-02T12:29:13.000Z",
|
850
|
+
"modelId": "gpt-4o-mini-latest",
|
851
|
+
},
|
855
852
|
"usage": {"promptTokens": 31, "completionTokens": 9, "totalTokens": 40},
|
856
853
|
},
|
857
854
|
ChainEventProviderCompleted(
|
@@ -956,7 +953,10 @@ CONVERSATION_EVENTS: list[StreamEvent] = [
|
|
956
953
|
"finishReason": "tool-calls",
|
957
954
|
"isContinued": False,
|
958
955
|
"experimental_providerMetadata": {"openai": {"reasoningTokens": 0, "cachedPromptTokens": 0}},
|
959
|
-
"response": {
|
956
|
+
"response": {
|
957
|
+
"timestamp": "2025-01-02T12:29:16.000Z",
|
958
|
+
"modelId": "gpt-4o-mini-latest",
|
959
|
+
},
|
960
960
|
"usage": {"promptTokens": 61, "completionTokens": 9, "totalTokens": 70},
|
961
961
|
},
|
962
962
|
{
|
@@ -964,7 +964,10 @@ CONVERSATION_EVENTS: list[StreamEvent] = [
|
|
964
964
|
"type": "finish",
|
965
965
|
"finishReason": "tool-calls",
|
966
966
|
"experimental_providerMetadata": {"openai": {"reasoningTokens": 0, "cachedPromptTokens": 0}},
|
967
|
-
"response": {
|
967
|
+
"response": {
|
968
|
+
"timestamp": "2025-01-02T12:29:16.000Z",
|
969
|
+
"modelId": "gpt-4o-mini-latest",
|
970
|
+
},
|
968
971
|
"usage": {"promptTokens": 61, "completionTokens": 9, "totalTokens": 70},
|
969
972
|
},
|
970
973
|
ChainEventProviderCompleted(
|
@@ -1033,9 +1036,15 @@ CONVERSATION_EVENTS: list[StreamEvent] = [
|
|
1033
1036
|
AssistantMessage(content=[TextContent(text="I should look at their decimals.")]),
|
1034
1037
|
],
|
1035
1038
|
tools=[
|
1036
|
-
ToolCall(id="toolu_01ARatRfRidTDshkg1UuQhW2", name="calculator", arguments={"expression": "9.9 > 9.11"}),
|
1037
1039
|
ToolCall(
|
1038
|
-
id="
|
1040
|
+
id="toolu_01ARatRfRidTDshkg1UuQhW2",
|
1041
|
+
name="calculator",
|
1042
|
+
arguments={"expression": "9.9 > 9.11"},
|
1043
|
+
),
|
1044
|
+
ToolCall(
|
1045
|
+
id="toolu_B0398l23AOdTDshkg1UuQhZ3",
|
1046
|
+
name="calculator",
|
1047
|
+
arguments={"expression": "9.9 less than 9.11"},
|
1039
1048
|
),
|
1040
1049
|
ToolCall(
|
1041
1050
|
id="toolu_K12398312kjadbsadZ77JAS4",
|
@@ -1173,11 +1182,26 @@ CONVERSATION_FINISHED_RESULT_RESPONSE: dict[str, Any] = {
|
|
1173
1182
|
},
|
1174
1183
|
],
|
1175
1184
|
"conversation": [
|
1176
|
-
{
|
1177
|
-
|
1178
|
-
|
1179
|
-
|
1180
|
-
{
|
1185
|
+
{
|
1186
|
+
"role": "system",
|
1187
|
+
"content": [{"type": "text", "text": "Reason before answering."}],
|
1188
|
+
},
|
1189
|
+
{
|
1190
|
+
"role": "user",
|
1191
|
+
"content": [{"type": "text", "text": "My question is: Is 9.9 greater than 9.11?"}],
|
1192
|
+
},
|
1193
|
+
{
|
1194
|
+
"role": "assistant",
|
1195
|
+
"content": [{"type": "text", "text": "I should look at their decimals."}],
|
1196
|
+
},
|
1197
|
+
{
|
1198
|
+
"role": "system",
|
1199
|
+
"content": [{"type": "text", "text": "Now answer succinctly."}],
|
1200
|
+
},
|
1201
|
+
{
|
1202
|
+
"role": "user",
|
1203
|
+
"content": [{"type": "text", "text": "My question was: Is 9.9 greater than 9.11?"}],
|
1204
|
+
},
|
1181
1205
|
{
|
1182
1206
|
"role": "assistant",
|
1183
1207
|
"content": [
|
@@ -1522,7 +1546,10 @@ FOLLOW_UP_CONVERSATION_EVENTS: list[StreamEvent] = [
|
|
1522
1546
|
"finishReason": "stop",
|
1523
1547
|
"isContinued": False,
|
1524
1548
|
"experimental_providerMetadata": {"openai": {"reasoningTokens": 0, "cachedPromptTokens": 0}},
|
1525
|
-
"response": {
|
1549
|
+
"response": {
|
1550
|
+
"timestamp": "2025-01-02T12:29:13.000Z",
|
1551
|
+
"modelId": "gpt-4o-mini-latest",
|
1552
|
+
},
|
1526
1553
|
"usage": {"promptTokens": 77, "completionTokens": 3, "totalTokens": 80},
|
1527
1554
|
},
|
1528
1555
|
{
|
@@ -1530,7 +1557,10 @@ FOLLOW_UP_CONVERSATION_EVENTS: list[StreamEvent] = [
|
|
1530
1557
|
"type": "finish",
|
1531
1558
|
"finishReason": "stop",
|
1532
1559
|
"experimental_providerMetadata": {"openai": {"reasoningTokens": 0, "cachedPromptTokens": 0}},
|
1533
|
-
"response": {
|
1560
|
+
"response": {
|
1561
|
+
"timestamp": "2025-01-02T12:29:13.000Z",
|
1562
|
+
"modelId": "gpt-4o-mini-latest",
|
1563
|
+
},
|
1534
1564
|
"usage": {"promptTokens": 77, "completionTokens": 3, "totalTokens": 80},
|
1535
1565
|
},
|
1536
1566
|
ChainEventProviderCompleted(
|