codemie-test-harness 0.1.180__py3-none-any.whl → 0.1.182__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of codemie-test-harness might be problematic. Click here for more details.
- codemie_test_harness/tests/assistant/datasource/test_code_datasource.py +17 -4
- codemie_test_harness/tests/assistant/datasource/test_confluence_datasource.py +17 -4
- codemie_test_harness/tests/assistant/datasource/test_file_indexing.py +17 -5
- codemie_test_harness/tests/assistant/datasource/test_google_datasource.py +18 -4
- codemie_test_harness/tests/assistant/datasource/test_jira_datasource.py +19 -6
- codemie_test_harness/tests/assistant/default_integrations/test_default_integrations_for_tool.py +21 -5
- codemie_test_harness/tests/assistant/default_integrations/test_default_integrations_for_tool_kit.py +21 -5
- codemie_test_harness/tests/assistant/default_integrations/test_default_integrations_for_tool_with_datasource.py +21 -5
- codemie_test_harness/tests/assistant/test_assistants.py +31 -13
- codemie_test_harness/tests/assistant/tools/access_management/test_keycloak_tool.py +6 -1
- codemie_test_harness/tests/assistant/tools/ado/test_assistant_for_ado_test_plan_tools.py +37 -9
- codemie_test_harness/tests/assistant/tools/ado/test_assistant_for_ado_wiki_tools.py +25 -6
- codemie_test_harness/tests/assistant/tools/ado/test_assistant_for_ado_work_item_tools.py +25 -5
- codemie_test_harness/tests/assistant/tools/cloud/test_cloud_tools.py +8 -2
- codemie_test_harness/tests/assistant/tools/codebase/test_codebase_tools.py +11 -2
- codemie_test_harness/tests/assistant/tools/datamanagement/test_assistant_with_data_management_tools.py +30 -9
- codemie_test_harness/tests/assistant/tools/filemanagement/test_assistant_with_file_management_tools.py +33 -8
- codemie_test_harness/tests/assistant/tools/git/test_assistant_with_git_tools.py +41 -10
- codemie_test_harness/tests/assistant/tools/mcp/test_cli_mcp_server.py +14 -7
- codemie_test_harness/tests/assistant/tools/mcp/test_mcp_servers.py +12 -6
- codemie_test_harness/tests/assistant/tools/notification/test_assistant_notification_tools.py +9 -2
- codemie_test_harness/tests/assistant/tools/openapi/test_assistant_with_open_api_tools.py +5 -1
- codemie_test_harness/tests/assistant/tools/plugin/test_assistant_with_development_plugin.py +25 -6
- codemie_test_harness/tests/assistant/tools/plugin/test_assistant_with_plugin_and_mcp_servers.py +15 -3
- codemie_test_harness/tests/assistant/tools/plugin/test_single_assistant_dual_time_plugins.py +11 -2
- codemie_test_harness/tests/assistant/tools/project_management/test_assistant_pm_tools.py +30 -11
- codemie_test_harness/tests/assistant/tools/report_portal/test_assistant_report_portal_tools.py +6 -1
- codemie_test_harness/tests/assistant/tools/research/test_assistant_research_tools.py +15 -3
- codemie_test_harness/tests/assistant/tools/servicenow/test_servicenow_tools.py +5 -1
- codemie_test_harness/tests/assistant/tools/vcs/test_assistant_with_vcs_tools.py +6 -1
- codemie_test_harness/tests/enums/tools.py +32 -0
- codemie_test_harness/tests/test_data/file_test_data.py +17 -0
- codemie_test_harness/tests/utils/assistant_utils.py +34 -1
- codemie_test_harness/tests/utils/base_utils.py +61 -0
- codemie_test_harness/tests/workflow/assistant_tools/mcp/test_workflow_with_assistant_with_mcp_server.py +3 -5
- codemie_test_harness/tests/workflow/virtual_assistant_tools/mcp/test_workflow_with_mcp_server.py +3 -4
- {codemie_test_harness-0.1.180.dist-info → codemie_test_harness-0.1.182.dist-info}/METADATA +2 -2
- {codemie_test_harness-0.1.180.dist-info → codemie_test_harness-0.1.182.dist-info}/RECORD +40 -40
- {codemie_test_harness-0.1.180.dist-info → codemie_test_harness-0.1.182.dist-info}/WHEEL +0 -0
- {codemie_test_harness-0.1.180.dist-info → codemie_test_harness-0.1.182.dist-info}/entry_points.txt +0 -0
|
@@ -2,12 +2,15 @@ import os
|
|
|
2
2
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
|
-
from codemie_test_harness.tests.enums.tools import Toolkit
|
|
5
|
+
from codemie_test_harness.tests.enums.tools import Toolkit, PluginTool
|
|
6
6
|
from codemie_test_harness.tests.test_data.plugin_tools_test_data import (
|
|
7
7
|
list_files_plugin_tools_test_data,
|
|
8
8
|
CREATE_READ_DELETE_FILE_TEST_DATA,
|
|
9
9
|
)
|
|
10
|
-
from codemie_test_harness.tests.utils.base_utils import
|
|
10
|
+
from codemie_test_harness.tests.utils.base_utils import (
|
|
11
|
+
get_random_name,
|
|
12
|
+
assert_tool_triggered,
|
|
13
|
+
)
|
|
11
14
|
from codemie_test_harness.tests.utils.constants import TESTS_PATH
|
|
12
15
|
|
|
13
16
|
|
|
@@ -31,7 +34,12 @@ def test_assistant_with_list_files_plugin_tools(
|
|
|
31
34
|
):
|
|
32
35
|
assistant = assistant(Toolkit.PLUGIN, Toolkit.PLUGIN, settings=development_plugin)
|
|
33
36
|
|
|
34
|
-
response = assistant_utils.ask_assistant(
|
|
37
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
38
|
+
assistant, prompt, minimal_response=False
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
assert_tool_triggered(tool_name, triggered_tools)
|
|
42
|
+
|
|
35
43
|
similarity_check.check_similarity(response, expected_response)
|
|
36
44
|
|
|
37
45
|
|
|
@@ -51,12 +59,16 @@ def test_assistant_with_modify_files_plugin_tools(
|
|
|
51
59
|
Toolkit.PLUGIN, Toolkit.PLUGIN, settings=development_plugin
|
|
52
60
|
)
|
|
53
61
|
|
|
54
|
-
response = assistant_utils.ask_assistant(
|
|
62
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
55
63
|
assistant,
|
|
56
64
|
CREATE_READ_DELETE_FILE_TEST_DATA["create_file_prompt"].format(
|
|
57
65
|
file_name, file_name
|
|
58
66
|
),
|
|
67
|
+
minimal_response=False,
|
|
59
68
|
)
|
|
69
|
+
|
|
70
|
+
assert_tool_triggered(PluginTool.WRITE_FILE_TO_FILE_SYSTEM, triggered_tools)
|
|
71
|
+
|
|
60
72
|
similarity_check.check_similarity(
|
|
61
73
|
response,
|
|
62
74
|
CREATE_READ_DELETE_FILE_TEST_DATA["create_file_response"].format(
|
|
@@ -64,21 +76,28 @@ def test_assistant_with_modify_files_plugin_tools(
|
|
|
64
76
|
),
|
|
65
77
|
)
|
|
66
78
|
|
|
67
|
-
response = assistant_utils.ask_assistant(
|
|
79
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
68
80
|
assistant,
|
|
69
81
|
CREATE_READ_DELETE_FILE_TEST_DATA["git_command_prompt"].format(file_name),
|
|
82
|
+
minimal_response=False,
|
|
70
83
|
)
|
|
84
|
+
assert_tool_triggered(PluginTool.GENERIC_GIT_TOOL, triggered_tools)
|
|
85
|
+
|
|
71
86
|
similarity_check.check_similarity(
|
|
72
87
|
response,
|
|
73
88
|
CREATE_READ_DELETE_FILE_TEST_DATA["git_command_response"].format(file_name),
|
|
74
89
|
)
|
|
75
90
|
|
|
76
|
-
response = assistant_utils.ask_assistant(
|
|
91
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
77
92
|
assistant,
|
|
78
93
|
CREATE_READ_DELETE_FILE_TEST_DATA["show_file_content_prompt"].format(
|
|
79
94
|
file_name
|
|
80
95
|
),
|
|
96
|
+
minimal_response=False,
|
|
81
97
|
)
|
|
98
|
+
|
|
99
|
+
assert_tool_triggered(PluginTool.READ_FILE_FROM_FILE_SYSTEM, triggered_tools)
|
|
100
|
+
|
|
82
101
|
similarity_check.check_similarity(
|
|
83
102
|
response,
|
|
84
103
|
CREATE_READ_DELETE_FILE_TEST_DATA["show_file_content_response"].format(
|
codemie_test_harness/tests/assistant/tools/plugin/test_assistant_with_plugin_and_mcp_servers.py
CHANGED
|
@@ -2,12 +2,13 @@ import os
|
|
|
2
2
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
|
-
from codemie_test_harness.tests.enums.tools import Toolkit
|
|
5
|
+
from codemie_test_harness.tests.enums.tools import Toolkit, CliMcpServer, PluginTool
|
|
6
6
|
from codemie_test_harness.tests.test_data.mcp_server_test_data import (
|
|
7
7
|
cli_mcp_server_with_plugin_test_data,
|
|
8
8
|
filesystem_mcp_server_with_plugin_test_data,
|
|
9
9
|
)
|
|
10
10
|
from codemie_test_harness.tests.utils.constants import TESTS_PATH
|
|
11
|
+
from codemie_test_harness.tests.utils.base_utils import assert_tool_triggered
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
@pytest.mark.assistant
|
|
@@ -29,7 +30,12 @@ def test_assistant_with_plugin_and_cli_mcp_server(
|
|
|
29
30
|
):
|
|
30
31
|
assistant = assistant(Toolkit.PLUGIN, Toolkit.PLUGIN, settings=cli_server)
|
|
31
32
|
|
|
32
|
-
response = assistant_utils.ask_assistant(
|
|
33
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
34
|
+
assistant, prompt, minimal_response=False
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
assert_tool_triggered(CliMcpServer.RUN_COMMAND, triggered_tools)
|
|
38
|
+
|
|
33
39
|
similarity_check.check_similarity(response, expected_response)
|
|
34
40
|
|
|
35
41
|
|
|
@@ -56,7 +62,13 @@ def test_assistant_with_plugin_and_filesystem_mcp_server(
|
|
|
56
62
|
Toolkit.PLUGIN, Toolkit.PLUGIN, settings=filesystem_server
|
|
57
63
|
)
|
|
58
64
|
|
|
59
|
-
response = assistant_utils.ask_assistant(
|
|
65
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
66
|
+
assistant, prompt, minimal_response=False
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if tool_name != PluginTool.READ_FILE:
|
|
70
|
+
assert_tool_triggered(tool_name, triggered_tools)
|
|
71
|
+
|
|
60
72
|
similarity_check.check_similarity(response, expected_response)
|
|
61
73
|
finally:
|
|
62
74
|
file_to_remove = f"{str(TESTS_PATH / 'sdk_tests')}.properties"
|
codemie_test_harness/tests/assistant/tools/plugin/test_single_assistant_dual_time_plugins.py
CHANGED
|
@@ -13,13 +13,14 @@ from typing import Tuple, List
|
|
|
13
13
|
|
|
14
14
|
import pytest
|
|
15
15
|
|
|
16
|
-
from codemie_test_harness.tests.enums.tools import Toolkit
|
|
16
|
+
from codemie_test_harness.tests.enums.tools import Toolkit, McpServerTime
|
|
17
17
|
from codemie_sdk.models.integration import CredentialTypes
|
|
18
18
|
from codemie_test_harness.tests.conftest import cleanup_plugin_process
|
|
19
19
|
from codemie_test_harness.tests.test_data.plugin_tools_test_data import (
|
|
20
20
|
dual_time_plugin_test_data,
|
|
21
21
|
)
|
|
22
22
|
from codemie_test_harness.tests.utils.credentials_manager import CredentialsManager
|
|
23
|
+
from codemie_test_harness.tests.utils.base_utils import assert_tool_triggered
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
CACHE_DIR = Path.home() / ".cache" / "uv" / "archive-v0"
|
|
@@ -162,5 +163,13 @@ def test_single_assistant_dual_time_plugins(
|
|
|
162
163
|
Toolkit.PLUGIN, Toolkit.PLUGIN, settings=dual_time_plugins
|
|
163
164
|
)
|
|
164
165
|
|
|
165
|
-
response = assistant_utils.ask_assistant(
|
|
166
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
167
|
+
assistant_instance, prompt, minimal_response=False
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
if "tools" in prompt.lower():
|
|
171
|
+
pass
|
|
172
|
+
else:
|
|
173
|
+
assert_tool_triggered(McpServerTime.GET_CURRENT_TIME, triggered_tools)
|
|
174
|
+
|
|
166
175
|
similarity_check.check_similarity(response, expected_response)
|
|
@@ -14,7 +14,10 @@ from codemie_test_harness.tests.test_data.project_management_test_data import (
|
|
|
14
14
|
pm_tools_test_data,
|
|
15
15
|
)
|
|
16
16
|
from codemie_test_harness.tests.utils.credentials_manager import CredentialsManager
|
|
17
|
-
from codemie_test_harness.tests.utils.base_utils import
|
|
17
|
+
from codemie_test_harness.tests.utils.base_utils import (
|
|
18
|
+
credentials_to_dict,
|
|
19
|
+
assert_tool_triggered,
|
|
20
|
+
)
|
|
18
21
|
from codemie_test_harness.tests.utils.constants import (
|
|
19
22
|
project_management_integrations,
|
|
20
23
|
)
|
|
@@ -48,7 +51,10 @@ def test_assistant_with_project_management_tools(
|
|
|
48
51
|
settings=integration,
|
|
49
52
|
)
|
|
50
53
|
|
|
51
|
-
response = assistant_utils.ask_assistant(
|
|
54
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
55
|
+
assistant, prompt, minimal_response=False
|
|
56
|
+
)
|
|
57
|
+
assert_tool_triggered(tool_name, triggered_tools)
|
|
52
58
|
similarity_check.check_similarity(response, expected_response)
|
|
53
59
|
|
|
54
60
|
|
|
@@ -72,9 +78,10 @@ def test_assistant_with_jira_tool_and_integration_id_in_chat(
|
|
|
72
78
|
settings=invalid_settings,
|
|
73
79
|
)
|
|
74
80
|
|
|
75
|
-
response = assistant_utils.ask_assistant(
|
|
76
|
-
assistant, JIRA_TOOL_PROMPT, tools_config=[tool_config]
|
|
81
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
82
|
+
assistant, JIRA_TOOL_PROMPT, tools_config=[tool_config], minimal_response=False
|
|
77
83
|
)
|
|
84
|
+
assert_tool_triggered(ProjectManagementTool.JIRA, triggered_tools)
|
|
78
85
|
similarity_check.check_similarity(response, RESPONSE_FOR_JIRA_TOOL)
|
|
79
86
|
|
|
80
87
|
|
|
@@ -95,9 +102,10 @@ def test_assistant_with_jira_tool_and_credentials_in_chat(
|
|
|
95
102
|
ProjectManagementTool.JIRA,
|
|
96
103
|
)
|
|
97
104
|
|
|
98
|
-
response = assistant_utils.ask_assistant(
|
|
99
|
-
assistant, JIRA_TOOL_PROMPT, tools_config=[tool_config]
|
|
105
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
106
|
+
assistant, JIRA_TOOL_PROMPT, tools_config=[tool_config], minimal_response=False
|
|
100
107
|
)
|
|
108
|
+
assert_tool_triggered(ProjectManagementTool.JIRA, triggered_tools)
|
|
101
109
|
similarity_check.check_similarity(response, RESPONSE_FOR_JIRA_TOOL)
|
|
102
110
|
|
|
103
111
|
|
|
@@ -125,9 +133,13 @@ def test_assistant_with_confluence_tool_and_integration_id_in_chat(
|
|
|
125
133
|
settings=invalid_settings,
|
|
126
134
|
)
|
|
127
135
|
|
|
128
|
-
response = assistant_utils.ask_assistant(
|
|
129
|
-
assistant,
|
|
136
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
137
|
+
assistant,
|
|
138
|
+
CONFLUENCE_TOOL_PROMPT,
|
|
139
|
+
tools_config=[tool_config],
|
|
140
|
+
minimal_response=False,
|
|
130
141
|
)
|
|
142
|
+
assert_tool_triggered(ProjectManagementTool.CONFLUENCE, triggered_tools)
|
|
131
143
|
similarity_check.check_similarity(response, RESPONSE_FOR_CONFLUENCE_TOOL)
|
|
132
144
|
|
|
133
145
|
|
|
@@ -145,9 +157,13 @@ def test_assistant_with_confluence_tool_and_credentials_in_chat(
|
|
|
145
157
|
|
|
146
158
|
assistant = assistant(Toolkit.PROJECT_MANAGEMENT, ProjectManagementTool.CONFLUENCE)
|
|
147
159
|
|
|
148
|
-
response = assistant_utils.ask_assistant(
|
|
149
|
-
assistant,
|
|
160
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
161
|
+
assistant,
|
|
162
|
+
CONFLUENCE_TOOL_PROMPT,
|
|
163
|
+
tools_config=[tool_config],
|
|
164
|
+
minimal_response=False,
|
|
150
165
|
)
|
|
166
|
+
assert_tool_triggered(ProjectManagementTool.CONFLUENCE, triggered_tools)
|
|
151
167
|
similarity_check.check_similarity(response, RESPONSE_FOR_CONFLUENCE_TOOL)
|
|
152
168
|
|
|
153
169
|
|
|
@@ -161,7 +177,10 @@ def test_assistant_with_confluence_tool_and_without_credentials(
|
|
|
161
177
|
):
|
|
162
178
|
assistant = assistant(Toolkit.PROJECT_MANAGEMENT, ProjectManagementTool.CONFLUENCE)
|
|
163
179
|
|
|
164
|
-
response = assistant_utils.ask_assistant(
|
|
180
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
181
|
+
assistant, CONFLUENCE_TOOL_PROMPT, minimal_response=False
|
|
182
|
+
)
|
|
183
|
+
assert_tool_triggered(ProjectManagementTool.CONFLUENCE, triggered_tools)
|
|
165
184
|
similarity_check.check_similarity(
|
|
166
185
|
response, RESPONSE_FOR_CONFLUENCE_TOOL_UNAUTHORIZED
|
|
167
186
|
)
|
codemie_test_harness/tests/assistant/tools/report_portal/test_assistant_report_portal_tools.py
CHANGED
|
@@ -3,6 +3,7 @@ import pytest
|
|
|
3
3
|
from codemie_test_harness.tests.test_data.report_portal_tools_test_data import (
|
|
4
4
|
rp_test_data,
|
|
5
5
|
)
|
|
6
|
+
from codemie_test_harness.tests.utils.base_utils import assert_tool_triggered
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
@pytest.mark.assistant
|
|
@@ -28,5 +29,9 @@ def test_assistant_with_report_portal_tools(
|
|
|
28
29
|
tool_name,
|
|
29
30
|
settings=report_portal_integration,
|
|
30
31
|
)
|
|
31
|
-
response = assistant_utils.ask_assistant(
|
|
32
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
33
|
+
assistant, prompt, minimal_response=False
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
assert_tool_triggered(tool_name, triggered_tools)
|
|
32
37
|
similarity_check.check_similarity(response, expected_response)
|
|
@@ -6,7 +6,10 @@ from codemie_test_harness.tests.test_data.research_tools_test_data import (
|
|
|
6
6
|
search_tools_test_data,
|
|
7
7
|
interactions_tools_test_data,
|
|
8
8
|
)
|
|
9
|
-
from codemie_test_harness.tests.utils.base_utils import
|
|
9
|
+
from codemie_test_harness.tests.utils.base_utils import (
|
|
10
|
+
percent_of_relevant_titles,
|
|
11
|
+
assert_tool_triggered,
|
|
12
|
+
)
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
@pytest.mark.assistant
|
|
@@ -26,7 +29,11 @@ def test_assistant_with_search_tools(
|
|
|
26
29
|
):
|
|
27
30
|
assistant = assistant(Toolkit.RESEARCH, tool_name)
|
|
28
31
|
|
|
29
|
-
response = assistant_utils.ask_assistant(
|
|
32
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
33
|
+
assistant, prompt, minimal_response=False
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
assert_tool_triggered(tool_name, triggered_tools)
|
|
30
37
|
|
|
31
38
|
percent = percent_of_relevant_titles(response)
|
|
32
39
|
|
|
@@ -55,5 +62,10 @@ def test_assistant_with_interaction_tools(
|
|
|
55
62
|
):
|
|
56
63
|
assistant = assistant(Toolkit.RESEARCH, tool_name)
|
|
57
64
|
|
|
58
|
-
response = assistant_utils.ask_assistant(
|
|
65
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
66
|
+
assistant, prompt, minimal_response=False
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
assert_tool_triggered(tool_name, triggered_tools)
|
|
70
|
+
|
|
59
71
|
similarity_check.check_similarity(response, expected_response)
|
|
@@ -5,6 +5,7 @@ from codemie_test_harness.tests.test_data.servicenow_tools_test_data import (
|
|
|
5
5
|
PROMPT,
|
|
6
6
|
EXPECTED_RESPONSE,
|
|
7
7
|
)
|
|
8
|
+
from codemie_test_harness.tests.utils.base_utils import assert_tool_triggered
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
@pytest.mark.assistant
|
|
@@ -20,6 +21,9 @@ def test_assistant_with_servicenow_tools(
|
|
|
20
21
|
Toolkit.SERVICENOW, ServiceNowTool.SERVICE_NOW, settings=service_now_integration
|
|
21
22
|
)
|
|
22
23
|
|
|
23
|
-
response = assistant_utils.ask_assistant(
|
|
24
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
25
|
+
servicenow_assistant, PROMPT, minimal_response=False
|
|
26
|
+
)
|
|
24
27
|
|
|
28
|
+
assert_tool_triggered(ServiceNowTool.SERVICE_NOW, triggered_tools)
|
|
25
29
|
similarity_check.check_similarity(response, EXPECTED_RESPONSE, 80)
|
|
@@ -6,6 +6,7 @@ from codemie_test_harness.tests.enums.tools import (
|
|
|
6
6
|
from codemie_test_harness.tests.test_data.vcs_tools_test_data import (
|
|
7
7
|
vcs_tools_test_data,
|
|
8
8
|
)
|
|
9
|
+
from codemie_test_harness.tests.utils.base_utils import assert_tool_triggered
|
|
9
10
|
from codemie_test_harness.tests.utils.constants import vcs_integrations
|
|
10
11
|
|
|
11
12
|
|
|
@@ -31,6 +32,10 @@ def test_create_assistant_with_vcs_tool(
|
|
|
31
32
|
|
|
32
33
|
assistant = assistant(Toolkit.VCS, tool_name, settings=integration)
|
|
33
34
|
|
|
34
|
-
response = assistant_utils.ask_assistant(
|
|
35
|
+
response, triggered_tools = assistant_utils.ask_assistant(
|
|
36
|
+
assistant, prompt, minimal_response=False
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
assert_tool_triggered(tool_name, triggered_tools)
|
|
35
40
|
|
|
36
41
|
similarity_check.check_similarity(response, expected_response)
|
|
@@ -199,3 +199,35 @@ class ReportPortalTool(str, Enum):
|
|
|
199
199
|
GET_LOGS_FOR_TEST_ITEM = "get_logs_for_test_item"
|
|
200
200
|
GET_USER_INFORMATION = "get_user_information"
|
|
201
201
|
GET_DASHBOARD_DATA = "get_dashboard_data"
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class McpServerTime(str, Enum):
|
|
205
|
+
"""Enum for Time MCP Server tool names."""
|
|
206
|
+
|
|
207
|
+
GET_CURRENT_TIME = "get_current_time"
|
|
208
|
+
CONVERT_TIME = "convert_time"
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class CliMcpServer(str, Enum):
|
|
212
|
+
"""Enum for Cli MCP Server tool names."""
|
|
213
|
+
|
|
214
|
+
RUN_COMMAND = "run command"
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class McpServerFetch(str, Enum):
|
|
218
|
+
"""Enum for MCP Server Fetch tool names."""
|
|
219
|
+
|
|
220
|
+
FETCH = "fetch"
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class Default(str, Enum):
|
|
224
|
+
"""Enum for tools that attached automatically e.g. on added datasource or file uploaded."""
|
|
225
|
+
|
|
226
|
+
GET_REPOSITORY_FILE_TREE = "get_repository_file_tree"
|
|
227
|
+
SEARCH_KB = "search_kb"
|
|
228
|
+
FILE_ANALYSIS = "file_analysis"
|
|
229
|
+
DOCX_TOOL = "docx_tool"
|
|
230
|
+
EXCEL_TOOL = "excel_tool"
|
|
231
|
+
PYTHON_REPL_AST = "python_repl_ast"
|
|
232
|
+
PPTX_TOOL = "pptx_tool"
|
|
233
|
+
PDF_TOOL = "pdf_tool"
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from codemie_test_harness.tests.enums.tools import Default
|
|
2
|
+
|
|
1
3
|
file_test_data = [
|
|
2
4
|
(
|
|
3
5
|
"test.docx",
|
|
@@ -25,6 +27,7 @@ file_test_data = [
|
|
|
25
27
|
• Chinese Yuan Renminbi
|
|
26
28
|
The system should display costs in the chosen currency, along with the currency code and numeric identifier, to help travelers more accurately budget and decide on accommodations. For example, AUD 36 Australian dollar.
|
|
27
29
|
""",
|
|
30
|
+
Default.DOCX_TOOL,
|
|
28
31
|
),
|
|
29
32
|
(
|
|
30
33
|
"test.xlsx",
|
|
@@ -34,6 +37,7 @@ file_test_data = [
|
|
|
34
37
|
Data Cats 111
|
|
35
38
|
None None Travellers
|
|
36
39
|
""",
|
|
40
|
+
Default.EXCEL_TOOL,
|
|
37
41
|
),
|
|
38
42
|
(
|
|
39
43
|
"test.vtt",
|
|
@@ -46,6 +50,7 @@ file_test_data = [
|
|
|
46
50
|
00:00:02.500 --> 00:00:04.300
|
|
47
51
|
and the way we access it is changing
|
|
48
52
|
""",
|
|
53
|
+
Default.FILE_ANALYSIS,
|
|
49
54
|
),
|
|
50
55
|
(
|
|
51
56
|
"test.ini",
|
|
@@ -61,6 +66,7 @@ file_test_data = [
|
|
|
61
66
|
rp_project = epm-cdme
|
|
62
67
|
rp_launch = Pytest Regression
|
|
63
68
|
""",
|
|
69
|
+
Default.FILE_ANALYSIS,
|
|
64
70
|
),
|
|
65
71
|
(
|
|
66
72
|
"test.csv",
|
|
@@ -73,10 +79,12 @@ file_test_data = [
|
|
|
73
79
|
|
|
74
80
|
1918: 1991, track A: track D
|
|
75
81
|
""",
|
|
82
|
+
Default.PYTHON_REPL_AST,
|
|
76
83
|
),
|
|
77
84
|
(
|
|
78
85
|
"test.json",
|
|
79
86
|
"The content in the context is from a source document titled 'Automation Test' and contains the word 'Test'.json",
|
|
87
|
+
Default.FILE_ANALYSIS,
|
|
80
88
|
),
|
|
81
89
|
(
|
|
82
90
|
"test.yaml",
|
|
@@ -103,6 +111,7 @@ file_test_data = [
|
|
|
103
111
|
and form data specifying the file's path and MIME type. The expected response for a successful upload is a status code of `200`
|
|
104
112
|
with a response body containing the word "Success".
|
|
105
113
|
""",
|
|
114
|
+
Default.FILE_ANALYSIS,
|
|
106
115
|
),
|
|
107
116
|
(
|
|
108
117
|
"test.xml",
|
|
@@ -121,6 +130,7 @@ file_test_data = [
|
|
|
121
130
|
</body>
|
|
122
131
|
</request>
|
|
123
132
|
""",
|
|
133
|
+
Default.FILE_ANALYSIS,
|
|
124
134
|
),
|
|
125
135
|
(
|
|
126
136
|
"test.pptx",
|
|
@@ -149,6 +159,7 @@ file_test_data = [
|
|
|
149
159
|
#### Conclusion
|
|
150
160
|
The document concludes by emphasizing that software testing is crucial for ensuring applications work as intended, thereby reducing bugs and enhancing quality.pptx
|
|
151
161
|
""",
|
|
162
|
+
Default.PPTX_TOOL,
|
|
152
163
|
),
|
|
153
164
|
(
|
|
154
165
|
"test.pdf",
|
|
@@ -156,6 +167,7 @@ file_test_data = [
|
|
|
156
167
|
It contains a simple message stating "This file is for test purpose."
|
|
157
168
|
followed by some whitespace and a separator line.
|
|
158
169
|
""",
|
|
170
|
+
Default.PDF_TOOL,
|
|
159
171
|
),
|
|
160
172
|
(
|
|
161
173
|
"test.txt",
|
|
@@ -165,6 +177,7 @@ file_test_data = [
|
|
|
165
177
|
**Source:** test.txt
|
|
166
178
|
**File Content:** This file is for test purpose.
|
|
167
179
|
""",
|
|
180
|
+
Default.FILE_ANALYSIS,
|
|
168
181
|
),
|
|
169
182
|
(
|
|
170
183
|
"test.gif",
|
|
@@ -176,6 +189,7 @@ file_test_data = [
|
|
|
176
189
|
The dog's expression may appear as if it's imitating a human smile,
|
|
177
190
|
which often results in comical interpretations in GIFs and memes.
|
|
178
191
|
""",
|
|
192
|
+
(),
|
|
179
193
|
),
|
|
180
194
|
(
|
|
181
195
|
"test.jpeg",
|
|
@@ -187,6 +201,7 @@ file_test_data = [
|
|
|
187
201
|
The lighting gives a warm tone to the image,
|
|
188
202
|
possibly indicating natural light coming in from a nearby window.
|
|
189
203
|
""",
|
|
204
|
+
(),
|
|
190
205
|
),
|
|
191
206
|
(
|
|
192
207
|
"test.jpg",
|
|
@@ -198,6 +213,7 @@ file_test_data = [
|
|
|
198
213
|
The lighting gives a warm tone to the image,
|
|
199
214
|
possibly indicating natural light coming in from a nearby window.
|
|
200
215
|
""",
|
|
216
|
+
(),
|
|
201
217
|
),
|
|
202
218
|
(
|
|
203
219
|
"test.png",
|
|
@@ -209,6 +225,7 @@ file_test_data = [
|
|
|
209
225
|
The lighting gives a warm tone to the image,
|
|
210
226
|
possibly indicating natural light coming in from a nearby window.
|
|
211
227
|
""",
|
|
228
|
+
(),
|
|
212
229
|
),
|
|
213
230
|
]
|
|
214
231
|
|
|
@@ -125,7 +125,40 @@ class AssistantUtils(BaseUtils):
|
|
|
125
125
|
assistant_id=assistant.id, request=chat_request
|
|
126
126
|
)
|
|
127
127
|
|
|
128
|
-
|
|
128
|
+
if minimal_response:
|
|
129
|
+
return response.generated
|
|
130
|
+
else:
|
|
131
|
+
# Extract triggered tools from response thoughts
|
|
132
|
+
triggered_tools = self._extract_triggered_tools(response)
|
|
133
|
+
return response.generated, triggered_tools
|
|
134
|
+
|
|
135
|
+
def _extract_triggered_tools(self, response):
|
|
136
|
+
"""
|
|
137
|
+
Extract triggered tools from response thoughts.
|
|
138
|
+
|
|
139
|
+
Filters out 'Codemie Thoughts' entries and error entries,
|
|
140
|
+
returning a list of tool names in lowercase.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
response: The assistant response containing thoughts
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
list: List of triggered tool names in lowercase
|
|
147
|
+
"""
|
|
148
|
+
triggered_tools = []
|
|
149
|
+
|
|
150
|
+
# Check if response has thoughts attribute
|
|
151
|
+
if hasattr(response, "thoughts") and response.thoughts:
|
|
152
|
+
for thought in response.thoughts:
|
|
153
|
+
# Filter out 'Codemie Thoughts' and error entries
|
|
154
|
+
if thought.get("author_name") != "Codemie Thoughts" and not thought.get(
|
|
155
|
+
"error", False
|
|
156
|
+
):
|
|
157
|
+
author_name = thought.get("author_name", "")
|
|
158
|
+
if author_name:
|
|
159
|
+
triggered_tools.append(author_name.lower())
|
|
160
|
+
|
|
161
|
+
return triggered_tools
|
|
129
162
|
|
|
130
163
|
def send_chat_request(
|
|
131
164
|
self,
|
|
@@ -173,3 +173,64 @@ def assert_error_details(response, status_code, message):
|
|
|
173
173
|
|
|
174
174
|
def credentials_to_dict(credentials):
|
|
175
175
|
return {cred.key: cred.value for cred in credentials}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def assert_tool_triggered(tool_name, triggered_tools):
|
|
179
|
+
"""
|
|
180
|
+
Assert that the expected tool(s) were triggered during assistant interaction.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
tool_name: Either a single tool enum or a tuple of tool enums that should be triggered
|
|
184
|
+
triggered_tools: List of tools that were actually triggered
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
AssertionError: If any of the expected tools were not found in triggered_tools
|
|
188
|
+
(for tuples, ALL tools must be present)
|
|
189
|
+
"""
|
|
190
|
+
# Handle both single tools and tuples of tools
|
|
191
|
+
if isinstance(tool_name, tuple):
|
|
192
|
+
tools_to_check = tool_name
|
|
193
|
+
else:
|
|
194
|
+
tools_to_check = (tool_name,)
|
|
195
|
+
|
|
196
|
+
# Check each expected tool
|
|
197
|
+
found_tools = []
|
|
198
|
+
missing_tools = []
|
|
199
|
+
|
|
200
|
+
for tool in tools_to_check:
|
|
201
|
+
tool_value_lower = tool.value.lower()
|
|
202
|
+
tool_value_with_spaces = tool.value.replace("_", " ").lower()
|
|
203
|
+
|
|
204
|
+
# Check if this specific tool was triggered
|
|
205
|
+
tool_found = False
|
|
206
|
+
for triggered_tool in triggered_tools:
|
|
207
|
+
if (
|
|
208
|
+
triggered_tool.lower() == tool_value_lower
|
|
209
|
+
or triggered_tool.lower() == tool_value_with_spaces
|
|
210
|
+
or tool_value_with_spaces in triggered_tool.lower()
|
|
211
|
+
):
|
|
212
|
+
found_tools.append(tool.value)
|
|
213
|
+
tool_found = True
|
|
214
|
+
break
|
|
215
|
+
|
|
216
|
+
if not tool_found:
|
|
217
|
+
missing_tools.append(tool.value)
|
|
218
|
+
|
|
219
|
+
# Assert that ALL expected tools were found
|
|
220
|
+
if missing_tools:
|
|
221
|
+
expected_tools = [tool.value for tool in tools_to_check]
|
|
222
|
+
|
|
223
|
+
if len(tools_to_check) == 1:
|
|
224
|
+
assert False, (
|
|
225
|
+
f"Tool validation failed:\n"
|
|
226
|
+
f"Expected tool '{expected_tools[0]}' to be triggered\n"
|
|
227
|
+
f"But it was not found in triggered tools: {triggered_tools}\n"
|
|
228
|
+
)
|
|
229
|
+
else:
|
|
230
|
+
assert False, (
|
|
231
|
+
f"Tool validation failed:\n"
|
|
232
|
+
f"Expected ALL of these tools to be triggered: {expected_tools}\n"
|
|
233
|
+
f"Missing tools: {missing_tools}\n"
|
|
234
|
+
f"Found tools: {found_tools}\n"
|
|
235
|
+
f"Actually triggered: {triggered_tools}\n"
|
|
236
|
+
)
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from codemie_test_harness.tests.utils.env_resolver import EnvironmentResolver
|
|
2
|
-
|
|
3
1
|
import pytest
|
|
4
2
|
|
|
5
3
|
from codemie_test_harness.tests.test_data.mcp_server_test_data import (
|
|
@@ -15,9 +13,9 @@ from codemie_test_harness.tests.test_data.mcp_server_test_data import (
|
|
|
15
13
|
CLI_MCP_SERVER,
|
|
16
14
|
)
|
|
17
15
|
|
|
18
|
-
pytestmark = pytest.mark.skipif(
|
|
19
|
-
|
|
20
|
-
)
|
|
16
|
+
# pytestmark = pytest.mark.skipif(
|
|
17
|
+
# EnvironmentResolver.is_localhost(), reason="Skipping this test on local environment"
|
|
18
|
+
# )
|
|
21
19
|
|
|
22
20
|
|
|
23
21
|
@pytest.mark.workflow
|
codemie_test_harness/tests/workflow/virtual_assistant_tools/mcp/test_workflow_with_mcp_server.py
CHANGED
|
@@ -13,11 +13,10 @@ from codemie_test_harness.tests.test_data.mcp_server_test_data import (
|
|
|
13
13
|
CLI_MCP_SERVER,
|
|
14
14
|
)
|
|
15
15
|
from codemie_test_harness.tests.utils.base_utils import get_random_name
|
|
16
|
-
from codemie_test_harness.tests.utils.env_resolver import EnvironmentResolver
|
|
17
16
|
|
|
18
|
-
pytestmark = pytest.mark.skipif(
|
|
19
|
-
|
|
20
|
-
)
|
|
17
|
+
# pytestmark = pytest.mark.skipif(
|
|
18
|
+
# EnvironmentResolver.is_localhost(), reason="Skipping this test on local environment"
|
|
19
|
+
# )
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
@pytest.mark.workflow
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: codemie-test-harness
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.182
|
|
4
4
|
Summary: Autotest for CodeMie backend and UI
|
|
5
5
|
Author: Anton Yeromin
|
|
6
6
|
Author-email: anton_yeromin@epam.com
|
|
@@ -13,7 +13,7 @@ Requires-Dist: aws-assume-role-lib (>=2.10.0,<3.0.0)
|
|
|
13
13
|
Requires-Dist: boto3 (>=1.39.8,<2.0.0)
|
|
14
14
|
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
15
15
|
Requires-Dist: codemie-plugins (>=0.1.123,<0.2.0)
|
|
16
|
-
Requires-Dist: codemie-sdk-python (==0.1.
|
|
16
|
+
Requires-Dist: codemie-sdk-python (==0.1.182)
|
|
17
17
|
Requires-Dist: pytest (>=8.4.1,<9.0.0)
|
|
18
18
|
Requires-Dist: pytest-playwright (>=0.7.0,<0.8.0)
|
|
19
19
|
Requires-Dist: pytest-reportportal (>=5.5.2,<6.0.0)
|