codemie-test-harness 0.1.136__py3-none-any.whl → 0.1.137__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of codemie-test-harness might be problematic. Click here for more details.
- codemie_test_harness/tests/__init__.py +1 -0
- codemie_test_harness/tests/conftest.py +17 -0
- codemie_test_harness/tests/service/test_assistant_service.py +349 -379
- codemie_test_harness/tests/service/test_datasource_service.py +276 -292
- codemie_test_harness/tests/service/test_integration_service.py +133 -122
- codemie_test_harness/tests/service/test_llm_service.py +16 -17
- codemie_test_harness/tests/service/test_task_service.py +108 -120
- codemie_test_harness/tests/service/test_user_service.py +36 -19
- codemie_test_harness/tests/service/test_workflow_execution_service.py +142 -169
- codemie_test_harness/tests/service/test_workflow_service.py +145 -144
- codemie_test_harness/tests/utils/assistant_utils.py +39 -4
- codemie_test_harness/tests/utils/llm_utils.py +9 -0
- codemie_test_harness/tests/utils/search_utils.py +11 -5
- codemie_test_harness/tests/utils/user_utils.py +9 -0
- codemie_test_harness/tests/utils/workflow_utils.py +34 -6
- {codemie_test_harness-0.1.136.dist-info → codemie_test_harness-0.1.137.dist-info}/METADATA +2 -2
- {codemie_test_harness-0.1.136.dist-info → codemie_test_harness-0.1.137.dist-info}/RECORD +19 -17
- {codemie_test_harness-0.1.136.dist-info → codemie_test_harness-0.1.137.dist-info}/WHEEL +0 -0
- {codemie_test_harness-0.1.136.dist-info → codemie_test_harness-0.1.137.dist-info}/entry_points.txt +0 -0
|
@@ -1,339 +1,306 @@
|
|
|
1
|
-
"""Integration tests for AssistantService."""
|
|
2
|
-
|
|
3
1
|
import os
|
|
4
2
|
import uuid
|
|
5
3
|
from datetime import datetime
|
|
6
4
|
|
|
7
5
|
import pytest
|
|
8
|
-
|
|
9
|
-
from codemie_sdk import CodeMieClient
|
|
10
6
|
from codemie_sdk.models.assistant import (
|
|
11
7
|
AssistantBase,
|
|
12
8
|
Assistant,
|
|
13
|
-
AssistantCreateRequest,
|
|
14
9
|
AssistantUpdateRequest,
|
|
15
10
|
AssistantEvaluationRequest,
|
|
16
11
|
ToolKitDetails,
|
|
17
12
|
ToolDetails,
|
|
18
|
-
AssistantChatRequest,
|
|
19
13
|
ChatMessage,
|
|
20
14
|
ChatRole,
|
|
21
15
|
BaseModelResponse,
|
|
22
16
|
)
|
|
23
|
-
from
|
|
17
|
+
from hamcrest import (
|
|
18
|
+
assert_that,
|
|
19
|
+
has_property,
|
|
20
|
+
has_item,
|
|
21
|
+
greater_than,
|
|
22
|
+
is_not,
|
|
23
|
+
all_of,
|
|
24
|
+
instance_of,
|
|
25
|
+
has_length,
|
|
26
|
+
equal_to,
|
|
27
|
+
starts_with,
|
|
28
|
+
is_,
|
|
29
|
+
any_of,
|
|
30
|
+
contains_string,
|
|
31
|
+
greater_than_or_equal_to,
|
|
32
|
+
less_than_or_equal_to,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
from codemie_test_harness.tests import PROJECT
|
|
24
36
|
from codemie_test_harness.tests.utils.base_utils import get_random_name
|
|
25
37
|
|
|
26
38
|
|
|
27
|
-
def
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
39
|
+
def validate_assistant_full_response(assistant):
|
|
40
|
+
assert_that(
|
|
41
|
+
assistant,
|
|
42
|
+
all_of(
|
|
43
|
+
instance_of(Assistant),
|
|
44
|
+
has_property("system_prompt"),
|
|
45
|
+
has_property("project"),
|
|
46
|
+
has_property("name"),
|
|
47
|
+
has_property("description"),
|
|
48
|
+
has_property("shared", instance_of(bool)),
|
|
49
|
+
has_property("is_react", instance_of(bool)),
|
|
50
|
+
has_property("is_global", instance_of(bool)),
|
|
51
|
+
has_property("system_prompt_history", instance_of(list)),
|
|
52
|
+
has_property("user_prompts", instance_of(list)),
|
|
53
|
+
has_property("context", instance_of(list)),
|
|
54
|
+
has_property("toolkits", instance_of(list)),
|
|
55
|
+
),
|
|
56
|
+
)
|
|
57
|
+
|
|
31
58
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
59
|
+
@pytest.mark.regression
|
|
60
|
+
def test_get_tools(assistant_utils):
|
|
61
|
+
toolkits = assistant_utils.get_assistant_tools()
|
|
62
|
+
|
|
63
|
+
assert_that(toolkits, instance_of(list))
|
|
64
|
+
assert_that(toolkits, has_length(greater_than(0)))
|
|
35
65
|
|
|
36
|
-
# Verify toolkit structure
|
|
37
66
|
toolkit = toolkits[0]
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
67
|
+
assert_that(
|
|
68
|
+
toolkit,
|
|
69
|
+
all_of(
|
|
70
|
+
instance_of(ToolKitDetails),
|
|
71
|
+
has_property("toolkit"),
|
|
72
|
+
has_property("tools", instance_of(list)),
|
|
73
|
+
),
|
|
74
|
+
)
|
|
42
75
|
|
|
43
|
-
# Verify tools have required fields
|
|
44
76
|
if toolkit.tools:
|
|
45
77
|
tool = toolkit.tools[0]
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
with pytest.raises(Exception):
|
|
55
|
-
invalid_client = CodeMieClient(
|
|
56
|
-
auth_server_url=os.getenv("AUTH_SERVER_URL"),
|
|
57
|
-
auth_client_id=os.getenv("AUTH_CLIENT_ID"),
|
|
58
|
-
auth_client_secret=os.getenv("AUTH_CLIENT_SECRET"),
|
|
59
|
-
auth_realm_name=os.getenv("AUTH_REALM_NAME"),
|
|
60
|
-
codemie_api_domain=os.getenv("CODEMIE_API_DOMAIN"),
|
|
61
|
-
verify_ssl=os.getenv("VERIFY_SSL").lower() == "true",
|
|
62
|
-
username="invalid",
|
|
63
|
-
password="invalid",
|
|
78
|
+
assert_that(
|
|
79
|
+
tool,
|
|
80
|
+
all_of(
|
|
81
|
+
instance_of(ToolDetails),
|
|
82
|
+
has_property("name"),
|
|
83
|
+
has_property("label"),
|
|
84
|
+
has_property("settings_config", instance_of(bool)),
|
|
85
|
+
),
|
|
64
86
|
)
|
|
65
|
-
invalid_client.assistants.get_tools()
|
|
66
|
-
|
|
67
87
|
|
|
68
|
-
def test_list_assistants_minimal(client: CodeMieClient):
|
|
69
|
-
"""Test listing assistants with minimal response."""
|
|
70
|
-
# Get list of assistants with minimal response (default)
|
|
71
|
-
assistants = client.assistants.list()
|
|
72
88
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
89
|
+
def test_list_assistants_minimal_response(assistant_utils):
|
|
90
|
+
assistants = assistant_utils.get_assistants()
|
|
91
|
+
assert_that(assistants, all_of(instance_of(list), has_length(greater_than(0))))
|
|
76
92
|
|
|
77
|
-
# Verify each assistant has the correct type and required fields
|
|
78
93
|
for assistant in assistants:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
94
|
+
assert_that(
|
|
95
|
+
assistant,
|
|
96
|
+
all_of(
|
|
97
|
+
instance_of(AssistantBase),
|
|
98
|
+
has_property("id"),
|
|
99
|
+
has_property("name"),
|
|
100
|
+
has_property("description"),
|
|
101
|
+
),
|
|
102
|
+
)
|
|
84
103
|
|
|
85
|
-
def test_list_assistants_full(client: CodeMieClient):
|
|
86
|
-
"""Test listing assistants with full response."""
|
|
87
|
-
# Get list of assistants with full response
|
|
88
|
-
assistants = client.assistants.list(minimal_response=False)
|
|
89
104
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
105
|
+
def test_list_assistants_full_response(assistant_utils):
|
|
106
|
+
assistants = assistant_utils.get_assistants(minimal_response=False)
|
|
107
|
+
assert_that(assistants, all_of(instance_of(list), has_length(greater_than(0))))
|
|
93
108
|
|
|
94
|
-
# Verify each assistant has the correct type and required fields
|
|
95
109
|
for assistant in assistants:
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
assert assistant.creator is not None
|
|
109
|
-
assert isinstance(assistant.user_abilities, list)
|
|
110
|
-
|
|
111
|
-
# Verify nested structures
|
|
112
|
-
assert isinstance(assistant.toolkits, list)
|
|
113
|
-
assert isinstance(assistant.context, list)
|
|
114
|
-
assert isinstance(assistant.system_prompt_history, list)
|
|
115
|
-
assert isinstance(assistant.user_prompts, list)
|
|
116
|
-
|
|
117
|
-
# Verify creator structure if present
|
|
118
|
-
if assistant.created_by:
|
|
119
|
-
assert assistant.created_by.user_id is not None
|
|
120
|
-
assert assistant.created_by.username is not None
|
|
121
|
-
assert assistant.created_by.name is not None
|
|
110
|
+
assert_that(
|
|
111
|
+
assistant,
|
|
112
|
+
all_of(
|
|
113
|
+
instance_of(Assistant),
|
|
114
|
+
has_property("id"),
|
|
115
|
+
has_property("llm_model_type"),
|
|
116
|
+
has_property("creator"),
|
|
117
|
+
has_property("user_abilities", instance_of(list)),
|
|
118
|
+
has_property("created_date", instance_of(datetime)),
|
|
119
|
+
),
|
|
120
|
+
"Assistant should have valid core properties",
|
|
121
|
+
)
|
|
122
122
|
|
|
123
|
-
|
|
124
|
-
assert isinstance(assistant.created_date, datetime)
|
|
123
|
+
validate_assistant_full_response(assistant)
|
|
125
124
|
|
|
125
|
+
if assistant.created_by:
|
|
126
|
+
assert_that(
|
|
127
|
+
assistant.created_by,
|
|
128
|
+
all_of(
|
|
129
|
+
has_property("user_id", is_not(None)),
|
|
130
|
+
has_property("username", is_not(None)),
|
|
131
|
+
has_property("name", is_not(None)),
|
|
132
|
+
),
|
|
133
|
+
"created_by should have valid properties when present",
|
|
134
|
+
)
|
|
126
135
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
# Get list of assistants with filters
|
|
136
|
+
|
|
137
|
+
def test_list_assistants_with_filters(assistant_utils):
|
|
130
138
|
filters = {"project": PROJECT, "shared": False}
|
|
131
|
-
assistants =
|
|
139
|
+
assistants = assistant_utils.get_assistants(minimal_response=False, filters=filters)
|
|
132
140
|
|
|
133
|
-
|
|
134
|
-
assert isinstance(assistants, list)
|
|
141
|
+
assert_that(assistants, instance_of(list))
|
|
135
142
|
|
|
136
|
-
# Verify each assistant matches the filter criteria
|
|
137
143
|
for assistant in assistants:
|
|
138
|
-
|
|
139
|
-
|
|
144
|
+
assert_that(assistant.project, equal_to(PROJECT))
|
|
145
|
+
assert_that(assistant.shared, is_(False))
|
|
140
146
|
|
|
141
147
|
|
|
142
|
-
def
|
|
143
|
-
"""Test assistant listing with pagination."""
|
|
148
|
+
def test_list_assistants_with_pagination(assistant_utils):
|
|
144
149
|
# Get first page with 5 items
|
|
145
|
-
page_1 =
|
|
146
|
-
|
|
150
|
+
page_1 = assistant_utils.get_assistants(page=0, per_page=5)
|
|
151
|
+
assert_that(len(page_1), less_than_or_equal_to(5))
|
|
147
152
|
|
|
148
153
|
# Get second page with 5 items
|
|
149
|
-
page_2 =
|
|
150
|
-
|
|
154
|
+
page_2 = assistant_utils.get_assistants(page=1, per_page=5)
|
|
155
|
+
assert_that(len(page_2), less_than_or_equal_to(5))
|
|
151
156
|
|
|
152
157
|
# Verify pages contain different assistants
|
|
153
158
|
if page_2: # Only if there are items on second page
|
|
154
|
-
|
|
159
|
+
assert_that(page_1[0].id, is_not(page_2[0].id))
|
|
155
160
|
|
|
156
161
|
|
|
157
|
-
def
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
visible_assistants = client.assistants.list(scope="visible_to_user")
|
|
161
|
-
assert isinstance(visible_assistants, list)
|
|
162
|
+
def test_list_assistants_with_different_scopes(assistant_utils):
|
|
163
|
+
visible_assistants = assistant_utils.get_assistants(scope="visible_to_user")
|
|
164
|
+
assert_that(visible_assistants, instance_of(list))
|
|
162
165
|
|
|
163
|
-
|
|
164
|
-
created_assistants
|
|
165
|
-
assert isinstance(created_assistants, list)
|
|
166
|
+
created_assistants = assistant_utils.get_assistants(scope="created_by_user")
|
|
167
|
+
assert_that(created_assistants, instance_of(list))
|
|
166
168
|
|
|
167
169
|
|
|
168
|
-
def
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
assistants = client.assistants.list()
|
|
172
|
-
assert len(assistants) > 0
|
|
170
|
+
def test_get_assistant_by_id(assistant_utils):
|
|
171
|
+
assistants = assistant_utils.get_assistants()
|
|
172
|
+
assert_that(assistants, has_length(greater_than(0)))
|
|
173
173
|
test_assistant_id = assistants[0].id
|
|
174
174
|
|
|
175
|
-
|
|
176
|
-
assistant
|
|
177
|
-
|
|
178
|
-
# Verify the response type and essential fields
|
|
179
|
-
assert isinstance(assistant, Assistant)
|
|
180
|
-
assert assistant.id == test_assistant_id
|
|
181
|
-
assert assistant.name is not None
|
|
182
|
-
assert assistant.description is not None
|
|
183
|
-
assert assistant.system_prompt is not None
|
|
184
|
-
assert assistant.project is not None
|
|
175
|
+
assistant = assistant_utils.get_assistant_by_id(test_assistant_id)
|
|
176
|
+
assert_that(assistant.id, equal_to(test_assistant_id))
|
|
177
|
+
validate_assistant_full_response(assistant)
|
|
185
178
|
|
|
186
|
-
# Verify required structures
|
|
187
|
-
assert isinstance(assistant.toolkits, list)
|
|
188
|
-
assert isinstance(assistant.context, list)
|
|
189
|
-
assert isinstance(assistant.system_prompt_history, list)
|
|
190
|
-
assert isinstance(assistant.user_prompts, list)
|
|
191
179
|
|
|
192
|
-
|
|
193
|
-
assert isinstance(assistant.shared, bool)
|
|
194
|
-
assert isinstance(assistant.is_react, bool)
|
|
195
|
-
assert isinstance(assistant.is_global, bool)
|
|
196
|
-
|
|
197
|
-
# Verify date fields
|
|
198
|
-
assert isinstance(assistant.created_date, datetime)
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
@pytest.mark.skip(reason="Need to fix API to return 404")
|
|
202
|
-
def test_get_assistant_not_found(client: CodeMieClient):
|
|
180
|
+
def test_get_assistant_not_found(assistant_utils):
|
|
203
181
|
"""Test getting a non-existent assistant."""
|
|
204
182
|
with pytest.raises(Exception) as exc_info:
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
exc_info.value
|
|
183
|
+
assistant_utils.get_assistant_by_id("1234")
|
|
184
|
+
assert_that(
|
|
185
|
+
str(exc_info.value).lower(),
|
|
186
|
+
any_of(contains_string("service unavailable"), contains_string("404")),
|
|
208
187
|
)
|
|
209
188
|
|
|
210
189
|
|
|
211
|
-
def test_get_assistant_by_slug(
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
190
|
+
def test_get_assistant_by_slug(assistant_utils, default_llm):
|
|
191
|
+
toolkits = assistant_utils.get_assistant_tools()
|
|
192
|
+
assert_that(
|
|
193
|
+
toolkits,
|
|
194
|
+
has_length(greater_than(0)),
|
|
195
|
+
"At least one toolkit is required for testing",
|
|
196
|
+
)
|
|
216
197
|
|
|
217
198
|
first_toolkit = toolkits[0]
|
|
218
|
-
|
|
199
|
+
assert_that(
|
|
200
|
+
first_toolkit.tools,
|
|
201
|
+
has_length(greater_than(0)),
|
|
202
|
+
"No tools in the first toolkit",
|
|
203
|
+
)
|
|
219
204
|
first_tool = first_toolkit.tools[0]
|
|
220
|
-
|
|
221
|
-
# Step 2: Create assistant with a specific slug
|
|
222
|
-
assistant_project = PROJECT
|
|
223
205
|
test_slug = get_random_name()
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
create_request = AssistantCreateRequest(
|
|
227
|
-
name=assistant_name,
|
|
228
|
-
slug=test_slug, # Set specific slug for testing
|
|
229
|
-
description="Test assistant for slug retrieval",
|
|
230
|
-
system_prompt="You are a helpful test assistant",
|
|
206
|
+
created = assistant_utils.create_assistant(
|
|
231
207
|
llm_model_type=default_llm.base_name,
|
|
232
|
-
project=assistant_project,
|
|
233
208
|
toolkits=[
|
|
234
209
|
ToolKitDetails(
|
|
235
210
|
toolkit=first_toolkit.toolkit, tools=[ToolDetails(name=first_tool.name)]
|
|
236
211
|
)
|
|
237
212
|
],
|
|
213
|
+
system_prompt="You are a helpful test assistant",
|
|
214
|
+
description="Test assistant for slug retrieval",
|
|
215
|
+
slug=test_slug,
|
|
216
|
+
project_name=PROJECT,
|
|
238
217
|
)
|
|
239
218
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
"
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
assert len(retrieved.toolkits) > 0
|
|
261
|
-
assert retrieved.toolkits[0].toolkit == first_toolkit.toolkit
|
|
262
|
-
assert retrieved.toolkits[0].tools[0].name == first_tool.name
|
|
263
|
-
|
|
264
|
-
# Verify other fields
|
|
265
|
-
assert isinstance(retrieved.created_date, datetime)
|
|
266
|
-
assert isinstance(retrieved.shared, bool)
|
|
267
|
-
assert isinstance(retrieved.is_react, bool)
|
|
268
|
-
assert retrieved.system_prompt is not None
|
|
269
|
-
|
|
270
|
-
finally:
|
|
271
|
-
# Step 5: Clean up - delete created assistant
|
|
272
|
-
try:
|
|
273
|
-
client.assistants.delete(retrieved.id)
|
|
274
|
-
|
|
275
|
-
# Verify deletion - try to get by slug should fail
|
|
276
|
-
with pytest.raises(Exception) as exc_info:
|
|
277
|
-
client.assistants.get_by_slug(test_slug)
|
|
278
|
-
assert (
|
|
279
|
-
"404" in str(exc_info.value)
|
|
280
|
-
or "not found" in str(exc_info.value).lower()
|
|
281
|
-
)
|
|
219
|
+
assert_that(created, is_not(None), "Failed to create test assistant")
|
|
220
|
+
full_assistant = assistant_utils.get_assistant_by_id(created.id)
|
|
221
|
+
retrieved = assistant_utils.get_assistant_by_slug(test_slug)
|
|
222
|
+
|
|
223
|
+
assert_that(
|
|
224
|
+
retrieved,
|
|
225
|
+
all_of(
|
|
226
|
+
instance_of(Assistant),
|
|
227
|
+
has_property("slug", test_slug),
|
|
228
|
+
has_property("name", full_assistant.name),
|
|
229
|
+
has_property("description", "Test assistant for slug retrieval"),
|
|
230
|
+
has_property("project", PROJECT),
|
|
231
|
+
has_property("system_prompt", is_not(None)),
|
|
232
|
+
has_property("created_date", instance_of(datetime)),
|
|
233
|
+
has_property("shared", instance_of(bool)),
|
|
234
|
+
has_property("is_react", instance_of(bool)),
|
|
235
|
+
has_property("toolkits", instance_of(list)),
|
|
236
|
+
),
|
|
237
|
+
"Retrieved assistant should have all expected properties",
|
|
238
|
+
)
|
|
282
239
|
|
|
283
|
-
|
|
284
|
-
|
|
240
|
+
assert_that(retrieved.toolkits, has_length(greater_than(0)))
|
|
241
|
+
assert_that(retrieved.toolkits[0].toolkit, equal_to(first_toolkit.toolkit))
|
|
242
|
+
assert_that(retrieved.toolkits[0].tools[0].name, equal_to(first_tool.name))
|
|
285
243
|
|
|
286
244
|
|
|
287
|
-
def
|
|
288
|
-
"""Test getting a non-existent assistant by slug."""
|
|
245
|
+
def test_get_assistant_by_slug_non_found(assistant_utils):
|
|
289
246
|
with pytest.raises(Exception) as exc_info:
|
|
290
|
-
|
|
291
|
-
|
|
247
|
+
assistant_utils.get_assistant_by_slug("non-existent-assistant-slug")
|
|
248
|
+
assert_that(
|
|
249
|
+
str(exc_info.value).lower(),
|
|
250
|
+
any_of(contains_string("404"), contains_string("not found")),
|
|
251
|
+
)
|
|
292
252
|
|
|
293
253
|
|
|
294
|
-
def
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
254
|
+
def test_assistant_full_lifecycle(assistant_utils, default_llm):
|
|
255
|
+
toolkits = assistant_utils.get_assistant_tools()
|
|
256
|
+
assert_that(
|
|
257
|
+
toolkits,
|
|
258
|
+
has_length(greater_than_or_equal_to(2)),
|
|
259
|
+
"At least two toolkits are required for testing",
|
|
260
|
+
)
|
|
299
261
|
|
|
300
262
|
# Get first toolkit and its first tool for initial creation
|
|
301
263
|
first_toolkit = toolkits[0]
|
|
302
|
-
|
|
264
|
+
assert_that(
|
|
265
|
+
first_toolkit.tools,
|
|
266
|
+
has_length(greater_than(0)),
|
|
267
|
+
"No tools in the first toolkit",
|
|
268
|
+
)
|
|
303
269
|
first_tool = first_toolkit.tools[0]
|
|
304
270
|
|
|
305
271
|
# Get second toolkit and its tool for update
|
|
306
272
|
second_toolkit = toolkits[1]
|
|
307
|
-
|
|
273
|
+
assert_that(
|
|
274
|
+
second_toolkit.tools,
|
|
275
|
+
has_length(greater_than(0)),
|
|
276
|
+
"No tools in the second toolkit",
|
|
277
|
+
)
|
|
308
278
|
second_tool = second_toolkit.tools[0]
|
|
309
279
|
|
|
310
280
|
# Step 2: Create assistant with first toolkit/tool
|
|
311
|
-
|
|
312
|
-
assistant_name = get_random_name()
|
|
313
|
-
request = AssistantCreateRequest(
|
|
314
|
-
name=assistant_name,
|
|
315
|
-
slug=assistant_name,
|
|
316
|
-
description="Integration test assistant",
|
|
317
|
-
system_prompt="You are a helpful integration test assistant",
|
|
281
|
+
created = assistant_utils.create_assistant(
|
|
318
282
|
llm_model_type=default_llm.base_name,
|
|
319
|
-
project=assistant_project,
|
|
320
283
|
toolkits=[
|
|
321
284
|
ToolKitDetails(
|
|
322
285
|
toolkit=first_toolkit.toolkit, tools=[ToolDetails(name=first_tool.name)]
|
|
323
286
|
)
|
|
324
287
|
],
|
|
288
|
+
system_prompt="You are a helpful integration test assistant",
|
|
289
|
+
description="Integration test assistant",
|
|
290
|
+
project_name=PROJECT,
|
|
325
291
|
)
|
|
326
292
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
assert created is not None
|
|
293
|
+
assert_that(created, is_not(None))
|
|
294
|
+
assistant_name = created.name
|
|
330
295
|
|
|
331
296
|
# Step 3: Verify assistant exists in the list
|
|
332
|
-
filters = {"project":
|
|
333
|
-
assistants =
|
|
297
|
+
filters = {"project": PROJECT, "shared": False}
|
|
298
|
+
assistants = assistant_utils.get_assistants(filters=filters)
|
|
334
299
|
found_assistant = next((a for a in assistants if a.name == assistant_name), None)
|
|
335
|
-
|
|
336
|
-
|
|
300
|
+
assert_that(
|
|
301
|
+
found_assistant,
|
|
302
|
+
is_not(None),
|
|
303
|
+
f"Created assistant '{assistant_name}' not found in list",
|
|
337
304
|
)
|
|
338
305
|
|
|
339
306
|
# Step 4: Update the assistant with second toolkit/tool
|
|
@@ -349,67 +316,81 @@ def test_assistant_lifecycle(client: CodeMieClient, default_llm):
|
|
|
349
316
|
tools=[ToolDetails(name=second_tool.name)],
|
|
350
317
|
)
|
|
351
318
|
],
|
|
352
|
-
project=
|
|
319
|
+
project=PROJECT,
|
|
353
320
|
is_react=True,
|
|
354
321
|
shared=False,
|
|
355
322
|
)
|
|
356
323
|
|
|
357
|
-
updated =
|
|
358
|
-
|
|
324
|
+
updated = assistant_utils.update_assistant(found_assistant.id, update_request)
|
|
325
|
+
assert_that(updated, is_not(None))
|
|
359
326
|
|
|
360
327
|
# Verify update in the list
|
|
361
|
-
assistants_after_update =
|
|
328
|
+
assistants_after_update = assistant_utils.get_assistants(
|
|
362
329
|
minimal_response=False, filters=filters
|
|
363
330
|
)
|
|
364
331
|
found_updated = next(
|
|
365
332
|
(a for a in assistants_after_update if a.id == found_assistant.id), None
|
|
366
333
|
)
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
334
|
+
|
|
335
|
+
expected_name = f"{assistant_name} Updated"
|
|
336
|
+
expected_description = f"{updated_name} description"
|
|
337
|
+
expected_system_prompt = "You are an updated integration test assistant"
|
|
338
|
+
|
|
339
|
+
assert_that(found_updated, is_not(None), "Updated assistant should exist")
|
|
340
|
+
|
|
341
|
+
# Verify core assistant properties
|
|
342
|
+
assert_that(
|
|
343
|
+
found_updated,
|
|
344
|
+
all_of(
|
|
345
|
+
has_property("name", equal_to(expected_name)),
|
|
346
|
+
has_property("description", equal_to(expected_description)),
|
|
347
|
+
has_property("system_prompt", equal_to(expected_system_prompt)),
|
|
348
|
+
has_property("llm_model_type", equal_to(default_llm.base_name)),
|
|
349
|
+
),
|
|
350
|
+
"Updated assistant should have correct basic properties",
|
|
372
351
|
)
|
|
373
|
-
assert found_updated.llm_model_type == default_llm.base_name
|
|
374
|
-
assert found_updated.toolkits[0].toolkit == second_toolkit.toolkit
|
|
375
|
-
assert found_updated.toolkits[0].tools[0].name == second_tool.name
|
|
376
352
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
353
|
+
expected_toolkit = all_of(
|
|
354
|
+
has_property("toolkit", equal_to(second_toolkit.toolkit)),
|
|
355
|
+
has_property(
|
|
356
|
+
"tools",
|
|
357
|
+
has_item(has_property("name", equal_to(second_tool.name))),
|
|
358
|
+
),
|
|
359
|
+
)
|
|
380
360
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
pytest.fail(f"Failed to clean up assistant: {str(e)}")
|
|
361
|
+
assert_that(
|
|
362
|
+
found_updated.toolkits,
|
|
363
|
+
all_of(
|
|
364
|
+
has_length(greater_than(0)),
|
|
365
|
+
has_item(expected_toolkit),
|
|
366
|
+
),
|
|
367
|
+
"Updated assistant should have correct toolkit configuration",
|
|
368
|
+
)
|
|
390
369
|
|
|
391
370
|
|
|
392
|
-
def
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
371
|
+
def test_assistant_full_chat_functionality(assistant_utils, default_llm):
|
|
372
|
+
toolkits = assistant_utils.get_assistant_tools()
|
|
373
|
+
assert_that(
|
|
374
|
+
toolkits,
|
|
375
|
+
has_length(greater_than(0)),
|
|
376
|
+
"At least one toolkit is required for testing",
|
|
377
|
+
)
|
|
397
378
|
|
|
398
379
|
# Get first toolkit and its first tool
|
|
399
380
|
first_toolkit = toolkits[0]
|
|
400
|
-
|
|
381
|
+
assert_that(
|
|
382
|
+
first_toolkit.tools,
|
|
383
|
+
has_length(greater_than(0)),
|
|
384
|
+
"No tools in the first toolkit",
|
|
385
|
+
)
|
|
401
386
|
first_tool = first_toolkit.tools[0]
|
|
402
387
|
|
|
403
|
-
#
|
|
404
|
-
|
|
405
|
-
assistant_name = get_random_name()
|
|
406
|
-
request = AssistantCreateRequest(
|
|
407
|
-
name=assistant_name,
|
|
408
|
-
slug=assistant_name,
|
|
388
|
+
# Create assistant
|
|
389
|
+
created = assistant_utils.create_assistant(
|
|
409
390
|
description="Integration test assistant for chat",
|
|
410
391
|
system_prompt="You are a helpful integration test assistant. Always respond with 'Test response: ' prefix.",
|
|
411
392
|
llm_model_type=default_llm.base_name,
|
|
412
|
-
|
|
393
|
+
project_name=PROJECT,
|
|
413
394
|
toolkits=[
|
|
414
395
|
ToolKitDetails(
|
|
415
396
|
toolkit=first_toolkit.toolkit, tools=[ToolDetails(name=first_tool.name)]
|
|
@@ -417,149 +398,138 @@ def test_assistant_chat(client: CodeMieClient, default_llm):
|
|
|
417
398
|
],
|
|
418
399
|
)
|
|
419
400
|
|
|
420
|
-
# Create assistant
|
|
421
|
-
created = client.assistants.create(request)
|
|
422
401
|
assert created is not None
|
|
402
|
+
assistant_name = created.name
|
|
423
403
|
|
|
424
|
-
#
|
|
425
|
-
filters = {"project":
|
|
426
|
-
assistants =
|
|
404
|
+
# Find assistant in the list
|
|
405
|
+
filters = {"project": PROJECT, "shared": False}
|
|
406
|
+
assistants = assistant_utils.get_assistants(filters=filters)
|
|
427
407
|
found_assistant = next((a for a in assistants if a.name == assistant_name), None)
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
# Step 4: Test chat functionality
|
|
434
|
-
chat_request = AssistantChatRequest(
|
|
435
|
-
text="Hello, this is a test message",
|
|
436
|
-
conversation_id=str(uuid.uuid4()),
|
|
437
|
-
history=[
|
|
438
|
-
ChatMessage(role=ChatRole.USER, message="Hi there"),
|
|
439
|
-
ChatMessage(
|
|
440
|
-
role=ChatRole.ASSISTANT, message="Hello! How can I help you?"
|
|
441
|
-
),
|
|
442
|
-
],
|
|
443
|
-
stream=False,
|
|
444
|
-
metadata={"langfuse_traces_enabled": LANGFUSE_TRACES_ENABLED},
|
|
445
|
-
)
|
|
408
|
+
assert_that(
|
|
409
|
+
found_assistant,
|
|
410
|
+
is_not(None),
|
|
411
|
+
f"Created assistant '{assistant_name}' not found in list",
|
|
412
|
+
)
|
|
446
413
|
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
414
|
+
# Test chat functionality
|
|
415
|
+
response = assistant_utils.ask_assistant(
|
|
416
|
+
assistant=found_assistant,
|
|
417
|
+
user_prompt="Hello, this is a test message",
|
|
418
|
+
minimal_response=False,
|
|
419
|
+
conversation_id=str(uuid.uuid4()),
|
|
420
|
+
history=[
|
|
421
|
+
ChatMessage(role=ChatRole.USER, message="Hi there"),
|
|
422
|
+
ChatMessage(role=ChatRole.ASSISTANT, message="Hello! How can I help you?"),
|
|
423
|
+
],
|
|
424
|
+
)
|
|
451
425
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
stream=True,
|
|
465
|
-
metadata={"langfuse_traces_enabled": LANGFUSE_TRACES_ENABLED},
|
|
466
|
-
)
|
|
426
|
+
assert_that(
|
|
427
|
+
response,
|
|
428
|
+
all_of(
|
|
429
|
+
instance_of(BaseModelResponse),
|
|
430
|
+
has_property(
|
|
431
|
+
"generated", all_of(is_not(None), starts_with("Test response:"))
|
|
432
|
+
),
|
|
433
|
+
has_property("time_elapsed", is_not(None)),
|
|
434
|
+
has_property("tokens_used", is_(None)),
|
|
435
|
+
),
|
|
436
|
+
"Chat response should be valid BaseModelResponse with expected properties",
|
|
437
|
+
)
|
|
467
438
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
439
|
+
# Test streaming
|
|
440
|
+
stream_response = assistant_utils.ask_assistant(
|
|
441
|
+
assistant=found_assistant,
|
|
442
|
+
user_prompt="Hello, this is a streaming test",
|
|
443
|
+
conversation_id=str(uuid.uuid4()),
|
|
444
|
+
stream=True,
|
|
445
|
+
minimal_response=False,
|
|
446
|
+
)
|
|
472
447
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
assert len(received_chunks) > 0
|
|
479
|
-
finally:
|
|
480
|
-
# Clean up - delete created assistant
|
|
481
|
-
try:
|
|
482
|
-
client.assistants.delete(found_assistant.id)
|
|
483
|
-
|
|
484
|
-
# Verify deletion
|
|
485
|
-
assistants_after = client.assistants.list(
|
|
486
|
-
minimal_response=False, filters=filters
|
|
487
|
-
)
|
|
488
|
-
assert not any(a.name == assistant_name for a in assistants_after), (
|
|
489
|
-
f"Assistant '{assistant_name}' still exists after deletion"
|
|
490
|
-
)
|
|
491
|
-
except Exception as e:
|
|
492
|
-
pytest.fail(f"Failed to clean up assistant: {str(e)}")
|
|
448
|
+
received_chunks = []
|
|
449
|
+
for chunk in stream_response:
|
|
450
|
+
assert_that(chunk, is_not(None))
|
|
451
|
+
received_chunks.append(chunk)
|
|
452
|
+
assert_that(received_chunks, has_length(greater_than(0)))
|
|
493
453
|
|
|
494
454
|
|
|
495
|
-
def test_get_prebuilt_assistants(
|
|
496
|
-
|
|
497
|
-
# Step 1: Get list of prebuilt assistants
|
|
498
|
-
prebuilt_assistants = client.assistants.get_prebuilt()
|
|
455
|
+
def test_get_prebuilt_assistants(assistant_utils):
|
|
456
|
+
prebuilt_assistants = assistant_utils.get_prebuilt_assistant()
|
|
499
457
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
458
|
+
assert_that(prebuilt_assistants, instance_of(list))
|
|
459
|
+
assert_that(
|
|
460
|
+
prebuilt_assistants, has_length(greater_than(0)), "No prebuilt assistants found"
|
|
461
|
+
)
|
|
503
462
|
|
|
504
|
-
#
|
|
463
|
+
# Verify the first assistant by slug
|
|
505
464
|
first_assistant = prebuilt_assistants[0]
|
|
506
|
-
|
|
465
|
+
assert_that(first_assistant.slug, is_not(None), "Prebuilt assistant has no slug")
|
|
507
466
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
# Step 4: Compare assistants
|
|
512
|
-
assert assistant_by_slug is not None, "Failed to get assistant by slug"
|
|
513
|
-
assert assistant_by_slug.id == first_assistant.id, "Assistant IDs don't match"
|
|
514
|
-
assert assistant_by_slug.slug == first_assistant.slug, "Assistant slugs don't match"
|
|
515
|
-
assert assistant_by_slug.name == first_assistant.name, "Assistant names don't match"
|
|
516
|
-
assert assistant_by_slug.description == first_assistant.description, (
|
|
517
|
-
"Assistant descriptions don't match"
|
|
467
|
+
assistant_by_slug = assistant_utils.get_prebuilt_assistant_by_slug(
|
|
468
|
+
first_assistant.slug
|
|
518
469
|
)
|
|
519
|
-
|
|
520
|
-
|
|
470
|
+
|
|
471
|
+
# Compare assistant details
|
|
472
|
+
assert_that(
|
|
473
|
+
assistant_by_slug,
|
|
474
|
+
all_of(
|
|
475
|
+
instance_of(Assistant),
|
|
476
|
+
has_property("id", equal_to(first_assistant.id)),
|
|
477
|
+
has_property("slug", equal_to(first_assistant.slug)),
|
|
478
|
+
has_property("name", equal_to(first_assistant.name)),
|
|
479
|
+
has_property("description", equal_to(first_assistant.description)),
|
|
480
|
+
has_property("system_prompt", equal_to(first_assistant.system_prompt)),
|
|
481
|
+
),
|
|
521
482
|
)
|
|
522
483
|
|
|
523
|
-
# Compare toolkits
|
|
524
|
-
|
|
525
|
-
|
|
484
|
+
# Compare assistant toolkits
|
|
485
|
+
assert_that(
|
|
486
|
+
assistant_by_slug.toolkits, has_length(equal_to(len(first_assistant.toolkits)))
|
|
526
487
|
)
|
|
527
488
|
for toolkit1, toolkit2 in zip(assistant_by_slug.toolkits, first_assistant.toolkits):
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
489
|
+
assert_that(
|
|
490
|
+
toolkit1,
|
|
491
|
+
all_of(
|
|
492
|
+
has_property("toolkit", equal_to(toolkit2.toolkit)),
|
|
493
|
+
has_property("tools", has_length(equal_to(len(toolkit2.tools)))),
|
|
494
|
+
),
|
|
531
495
|
)
|
|
532
496
|
for tool1, tool2 in zip(toolkit1.tools, toolkit2.tools):
|
|
533
|
-
|
|
534
|
-
f"Tool names don't match in toolkit {toolkit1.toolkit}"
|
|
535
|
-
)
|
|
497
|
+
assert_that(tool1, has_property("name", equal_to(tool2.name)))
|
|
536
498
|
|
|
537
499
|
|
|
538
|
-
|
|
500
|
+
@pytest.mark.skipif(
|
|
501
|
+
os.getenv("ENV") != "preview", reason="valid_assistant_id is for preview env"
|
|
502
|
+
)
|
|
503
|
+
def test_assistant_evaluate(assistant_utils):
|
|
504
|
+
valid_assistant_id = "05959338-06de-477d-9cc3-08369f858057"
|
|
505
|
+
valid_dataset_id = "codemie-faq-basic"
|
|
539
506
|
evaluation_request = AssistantEvaluationRequest(
|
|
540
|
-
dataset_id=
|
|
507
|
+
dataset_id=valid_dataset_id, experiment_name=f"Eval {uuid.uuid4()}"
|
|
541
508
|
)
|
|
542
509
|
|
|
543
510
|
# Execute evaluation with minimal request
|
|
544
|
-
|
|
545
|
-
|
|
511
|
+
|
|
512
|
+
result = assistant_utils.send_evaluate_assistant_request(
|
|
513
|
+
valid_assistant_id, evaluation_request
|
|
546
514
|
)
|
|
547
515
|
|
|
548
516
|
# Verify response structure
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
print(result)
|
|
517
|
+
assert_that(result, is_not(None))
|
|
518
|
+
assert_that(result, instance_of(dict))
|
|
552
519
|
|
|
553
520
|
|
|
554
|
-
def test_assistant_evaluate_not_found(
|
|
555
|
-
|
|
521
|
+
def test_assistant_evaluate_not_found(assistant_utils):
|
|
522
|
+
invalid_assistant_id = "non-existent-assistant-id"
|
|
523
|
+
valid_dataset_id = "test-dataset-999"
|
|
556
524
|
evaluation_request = AssistantEvaluationRequest(
|
|
557
|
-
dataset_id=
|
|
525
|
+
dataset_id=valid_dataset_id, experiment_name="Error Test"
|
|
558
526
|
)
|
|
559
527
|
|
|
560
528
|
# Test with non-existent assistant ID
|
|
561
529
|
with pytest.raises(Exception) as exc_info:
|
|
562
|
-
|
|
530
|
+
assistant_utils.send_evaluate_assistant_request(
|
|
531
|
+
invalid_assistant_id, evaluation_request
|
|
532
|
+
)
|
|
563
533
|
|
|
564
534
|
# Verify it's a proper error response
|
|
565
|
-
|
|
535
|
+
assert_that(exc_info.value, is_not(None))
|