agentscope-runtime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. agentscope_runtime/__init__.py +4 -0
  2. agentscope_runtime/engine/__init__.py +9 -0
  3. agentscope_runtime/engine/agents/__init__.py +2 -0
  4. agentscope_runtime/engine/agents/agentscope_agent/__init__.py +6 -0
  5. agentscope_runtime/engine/agents/agentscope_agent/agent.py +342 -0
  6. agentscope_runtime/engine/agents/agentscope_agent/hooks.py +156 -0
  7. agentscope_runtime/engine/agents/agno_agent.py +220 -0
  8. agentscope_runtime/engine/agents/base_agent.py +29 -0
  9. agentscope_runtime/engine/agents/langgraph_agent.py +59 -0
  10. agentscope_runtime/engine/agents/llm_agent.py +51 -0
  11. agentscope_runtime/engine/deployers/__init__.py +3 -0
  12. agentscope_runtime/engine/deployers/adapter/__init__.py +0 -0
  13. agentscope_runtime/engine/deployers/adapter/a2a/__init__.py +2 -0
  14. agentscope_runtime/engine/deployers/adapter/a2a/a2a_adapter_utils.py +425 -0
  15. agentscope_runtime/engine/deployers/adapter/a2a/a2a_agent_adapter.py +69 -0
  16. agentscope_runtime/engine/deployers/adapter/a2a/a2a_protocol_adapter.py +60 -0
  17. agentscope_runtime/engine/deployers/adapter/protocol_adapter.py +24 -0
  18. agentscope_runtime/engine/deployers/base.py +17 -0
  19. agentscope_runtime/engine/deployers/local_deployer.py +586 -0
  20. agentscope_runtime/engine/helpers/helper.py +127 -0
  21. agentscope_runtime/engine/llms/__init__.py +3 -0
  22. agentscope_runtime/engine/llms/base_llm.py +60 -0
  23. agentscope_runtime/engine/llms/qwen_llm.py +47 -0
  24. agentscope_runtime/engine/misc/__init__.py +0 -0
  25. agentscope_runtime/engine/runner.py +186 -0
  26. agentscope_runtime/engine/schemas/__init__.py +0 -0
  27. agentscope_runtime/engine/schemas/agent_schemas.py +551 -0
  28. agentscope_runtime/engine/schemas/context.py +54 -0
  29. agentscope_runtime/engine/services/__init__.py +9 -0
  30. agentscope_runtime/engine/services/base.py +77 -0
  31. agentscope_runtime/engine/services/context_manager.py +129 -0
  32. agentscope_runtime/engine/services/environment_manager.py +50 -0
  33. agentscope_runtime/engine/services/manager.py +174 -0
  34. agentscope_runtime/engine/services/memory_service.py +270 -0
  35. agentscope_runtime/engine/services/sandbox_service.py +198 -0
  36. agentscope_runtime/engine/services/session_history_service.py +256 -0
  37. agentscope_runtime/engine/tracing/__init__.py +40 -0
  38. agentscope_runtime/engine/tracing/base.py +309 -0
  39. agentscope_runtime/engine/tracing/local_logging_handler.py +356 -0
  40. agentscope_runtime/engine/tracing/tracing_metric.py +69 -0
  41. agentscope_runtime/engine/tracing/wrapper.py +321 -0
  42. agentscope_runtime/sandbox/__init__.py +14 -0
  43. agentscope_runtime/sandbox/box/__init__.py +0 -0
  44. agentscope_runtime/sandbox/box/base/__init__.py +0 -0
  45. agentscope_runtime/sandbox/box/base/base_sandbox.py +37 -0
  46. agentscope_runtime/sandbox/box/base/box/__init__.py +0 -0
  47. agentscope_runtime/sandbox/box/browser/__init__.py +0 -0
  48. agentscope_runtime/sandbox/box/browser/box/__init__.py +0 -0
  49. agentscope_runtime/sandbox/box/browser/browser_sandbox.py +176 -0
  50. agentscope_runtime/sandbox/box/dummy/__init__.py +0 -0
  51. agentscope_runtime/sandbox/box/dummy/dummy_sandbox.py +26 -0
  52. agentscope_runtime/sandbox/box/filesystem/__init__.py +0 -0
  53. agentscope_runtime/sandbox/box/filesystem/box/__init__.py +0 -0
  54. agentscope_runtime/sandbox/box/filesystem/filesystem_sandbox.py +87 -0
  55. agentscope_runtime/sandbox/box/sandbox.py +115 -0
  56. agentscope_runtime/sandbox/box/shared/__init__.py +0 -0
  57. agentscope_runtime/sandbox/box/shared/app.py +44 -0
  58. agentscope_runtime/sandbox/box/shared/dependencies/__init__.py +5 -0
  59. agentscope_runtime/sandbox/box/shared/dependencies/deps.py +22 -0
  60. agentscope_runtime/sandbox/box/shared/routers/__init__.py +12 -0
  61. agentscope_runtime/sandbox/box/shared/routers/generic.py +173 -0
  62. agentscope_runtime/sandbox/box/shared/routers/mcp.py +207 -0
  63. agentscope_runtime/sandbox/box/shared/routers/mcp_utils.py +153 -0
  64. agentscope_runtime/sandbox/box/shared/routers/runtime_watcher.py +187 -0
  65. agentscope_runtime/sandbox/box/shared/routers/workspace.py +325 -0
  66. agentscope_runtime/sandbox/box/training_box/__init__.py +0 -0
  67. agentscope_runtime/sandbox/box/training_box/base.py +120 -0
  68. agentscope_runtime/sandbox/box/training_box/env_service.py +752 -0
  69. agentscope_runtime/sandbox/box/training_box/environments/__init__.py +0 -0
  70. agentscope_runtime/sandbox/box/training_box/environments/appworld/appworld_env.py +987 -0
  71. agentscope_runtime/sandbox/box/training_box/registry.py +54 -0
  72. agentscope_runtime/sandbox/box/training_box/src/trajectory.py +278 -0
  73. agentscope_runtime/sandbox/box/training_box/training_box.py +219 -0
  74. agentscope_runtime/sandbox/build.py +213 -0
  75. agentscope_runtime/sandbox/client/__init__.py +5 -0
  76. agentscope_runtime/sandbox/client/http_client.py +527 -0
  77. agentscope_runtime/sandbox/client/training_client.py +265 -0
  78. agentscope_runtime/sandbox/constant.py +5 -0
  79. agentscope_runtime/sandbox/custom/__init__.py +16 -0
  80. agentscope_runtime/sandbox/custom/custom_sandbox.py +40 -0
  81. agentscope_runtime/sandbox/custom/example.py +37 -0
  82. agentscope_runtime/sandbox/enums.py +68 -0
  83. agentscope_runtime/sandbox/manager/__init__.py +4 -0
  84. agentscope_runtime/sandbox/manager/collections/__init__.py +22 -0
  85. agentscope_runtime/sandbox/manager/collections/base_mapping.py +20 -0
  86. agentscope_runtime/sandbox/manager/collections/base_queue.py +25 -0
  87. agentscope_runtime/sandbox/manager/collections/base_set.py +25 -0
  88. agentscope_runtime/sandbox/manager/collections/in_memory_mapping.py +22 -0
  89. agentscope_runtime/sandbox/manager/collections/in_memory_queue.py +28 -0
  90. agentscope_runtime/sandbox/manager/collections/in_memory_set.py +27 -0
  91. agentscope_runtime/sandbox/manager/collections/redis_mapping.py +26 -0
  92. agentscope_runtime/sandbox/manager/collections/redis_queue.py +27 -0
  93. agentscope_runtime/sandbox/manager/collections/redis_set.py +23 -0
  94. agentscope_runtime/sandbox/manager/container_clients/__init__.py +8 -0
  95. agentscope_runtime/sandbox/manager/container_clients/base_client.py +39 -0
  96. agentscope_runtime/sandbox/manager/container_clients/docker_client.py +170 -0
  97. agentscope_runtime/sandbox/manager/sandbox_manager.py +694 -0
  98. agentscope_runtime/sandbox/manager/server/__init__.py +0 -0
  99. agentscope_runtime/sandbox/manager/server/app.py +194 -0
  100. agentscope_runtime/sandbox/manager/server/config.py +68 -0
  101. agentscope_runtime/sandbox/manager/server/models.py +17 -0
  102. agentscope_runtime/sandbox/manager/storage/__init__.py +10 -0
  103. agentscope_runtime/sandbox/manager/storage/data_storage.py +16 -0
  104. agentscope_runtime/sandbox/manager/storage/local_storage.py +44 -0
  105. agentscope_runtime/sandbox/manager/storage/oss_storage.py +89 -0
  106. agentscope_runtime/sandbox/manager/utils.py +78 -0
  107. agentscope_runtime/sandbox/mcp_server.py +192 -0
  108. agentscope_runtime/sandbox/model/__init__.py +12 -0
  109. agentscope_runtime/sandbox/model/api.py +16 -0
  110. agentscope_runtime/sandbox/model/container.py +72 -0
  111. agentscope_runtime/sandbox/model/manager_config.py +158 -0
  112. agentscope_runtime/sandbox/registry.py +129 -0
  113. agentscope_runtime/sandbox/tools/__init__.py +12 -0
  114. agentscope_runtime/sandbox/tools/base/__init__.py +8 -0
  115. agentscope_runtime/sandbox/tools/base/tool.py +52 -0
  116. agentscope_runtime/sandbox/tools/browser/__init__.py +57 -0
  117. agentscope_runtime/sandbox/tools/browser/tool.py +597 -0
  118. agentscope_runtime/sandbox/tools/filesystem/__init__.py +32 -0
  119. agentscope_runtime/sandbox/tools/filesystem/tool.py +319 -0
  120. agentscope_runtime/sandbox/tools/function_tool.py +321 -0
  121. agentscope_runtime/sandbox/tools/mcp_tool.py +191 -0
  122. agentscope_runtime/sandbox/tools/sandbox_tool.py +104 -0
  123. agentscope_runtime/sandbox/tools/tool.py +123 -0
  124. agentscope_runtime/sandbox/tools/utils.py +68 -0
  125. agentscope_runtime/version.py +2 -0
  126. agentscope_runtime-0.1.0.dist-info/METADATA +327 -0
  127. agentscope_runtime-0.1.0.dist-info/RECORD +131 -0
  128. agentscope_runtime-0.1.0.dist-info/WHEEL +5 -0
  129. agentscope_runtime-0.1.0.dist-info/entry_points.txt +4 -0
  130. agentscope_runtime-0.1.0.dist-info/licenses/LICENSE +202 -0
  131. agentscope_runtime-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,987 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ This file is part of https://github.com/StonyBrookNLP/appworld
4
+
5
+ Copyright 2024 StonyBrookNLP
6
+
7
+ Licensed under the Apache License, Version 2.0 (the "License");
8
+ you may not use this file except in compliance with the License.
9
+ You may obtain a copy of the License at
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ """
18
+
19
+ from typing import Dict, List
20
+ import re
21
+ import json
22
+ from copy import deepcopy
23
+
24
+ from fastapi.encoders import ENCODERS_BY_TYPE
25
+ from jinja2 import Template
26
+
27
+ from appworld import AppWorld
28
+ from appworld.evaluator import evaluate_task
29
+ from appworld import load_task_ids
30
+
31
+ from training_box.base import BaseEnv
32
+ from training_box.registry import Registry
33
+ from training_box.src.trajectory import StateMessage, ActionMessage, ToolCall
34
+
35
+
36
+ ENCODERS_BY_TYPE[type(...)] = lambda _: None # Ellipsis -> None
37
+ try: # openai-python ≥1.0
38
+ from openai._types import NotGiven, NotGivenType # type: ignore
39
+
40
+ ENCODERS_BY_TYPE[NotGiven] = lambda _: None
41
+ ENCODERS_BY_TYPE[NotGivenType] = lambda _: None
42
+ except ImportError: # openai 0.x
43
+ pass
44
+
45
+ # Prompt refer to https://github.com/StonyBrookNLP/appworld
46
+
47
+ Env_prompt = """
48
+ Tools:
49
+ appworld
50
+
51
+ Arguments:
52
+ code
53
+
54
+ Examples:
55
+ "arguments": "{\"code\": \"print(apis.api_docs.show_app_descriptions())\"}",
56
+ "name": "appworld"
57
+
58
+ Descriptions:
59
+
60
+ """
61
+
62
+ SUPER_API_PROMPT = """
63
+
64
+ Here are three key APIs that you need to know to get more information
65
+
66
+ # To get a list of apps that are available to you.
67
+ print(apis.api_docs.show_app_descriptions())
68
+
69
+ # To get the list of apis under any app listed above, e.g. supervisor
70
+ print(apis.api_docs.show_api_descriptions(app_name='supervisor'))
71
+
72
+ # To get the specification of a particular api, e.g. supervisor app's
73
+ # show_account_passwords
74
+ print(apis.api_docs.show_api_doc(app_name='supervisor',
75
+ api_name='show_account_passwords'))
76
+
77
+ Each code execution will produce an output that you can use in
78
+ subsequent calls. Using these APIs, you can now generate code, that
79
+ the environment will execute, to solve the task.
80
+ """
81
+
82
+ full_code_regex = r"```python\n(.*?)```"
83
+ partial_code_regex = r".*```python\n(.*)"
84
+
85
+ # Prompt refer to https://github.com/StonyBrookNLP/appworld/blob/
86
+ # main/experiments/prompts/react.txt
87
+ PROMPT_TEMPLATE = """
88
+ USER:
89
+ I am your supervisor and you are a super intelligent AI Assistant
90
+ whose job is to achieve my day-to-day tasks completely autonomously.
91
+
92
+ To do this, you will need to interact with app/s (e.g., spotify,
93
+ venmo etc) using their associated APIs on my behalf. For this you
94
+ will undertake a *multi-step conversation* using a python REPL
95
+ environment. That is, you will write the python code and the
96
+ environment will execute it and show you the result, based on
97
+ which, you will write python code for the next step and so on,
98
+ until you've achieved the goal. This environment will let you
99
+ interact with app/s using their associated APIs on my behalf.
100
+
101
+ Here are three key APIs that you need to know to get more
102
+ information
103
+
104
+ # To get a list of apps that are available to you.
105
+
106
+ ```python
107
+ print(apis.api_docs.show_app_descriptions())
108
+ ```
109
+
110
+ # To get the list of apis under any app listed above, e.g. spotify
111
+
112
+ ```python
113
+ print(apis.api_docs.show_api_descriptions(app_name='spotify'))
114
+ ```
115
+
116
+ # To get the specification of a particular api, e.g.
117
+ # spotify app's login api
118
+
119
+ ```python
120
+ print(apis.api_docs.show_api_doc(app_name='spotify', api_name='login'))
121
+ ```
122
+
123
+ Each code execution will produce an output that you can use
124
+ in subsequent calls.
125
+ Using these APIs, you can now generate code, that I will
126
+ execute, to solve the task.
127
+ Let's start with the task
128
+
129
+ My name is: {{ main_user.first_name }}
130
+ {{ main_user.last_name }}.
131
+ My personal email is {{ main_user.email }} and phone number
132
+ is {{ main_user.phone_number }}.
133
+ Task: How many playlists do I have in Spotify?
134
+
135
+ ASSISTANT:
136
+ Okay. Lets first find which APIs are available to use in Spotify.
137
+ Code:
138
+ ```python
139
+ print(apis.api_docs.show_api_descriptions(app_name='spotify'))
140
+ ```
141
+
142
+ USER:
143
+ Output:
144
+ ```
145
+ [
146
+ ...
147
+ "login : Login to your account.",
148
+ "logout : Logout from your account.",
149
+ ...
150
+ ]
151
+ ```
152
+
153
+
154
+ ASSISTANT:
155
+ Okay. Looks like I can use the `login` api. Lets find its specifications.
156
+
157
+ Code:
158
+ ```python
159
+ print(apis.api_docs.show_api_doc(app_name='spotify', api_name='login'))
160
+ ```
161
+
162
+
163
+ USER:
164
+ Output:
165
+ ```
166
+ {
167
+ "app_name": "spotify",
168
+ "api_name": "login",
169
+ "path": "/auth/token",
170
+ "method": "POST",
171
+ "description": "Login to your account.",
172
+ "parameters": [
173
+ {
174
+ "name": "username",
175
+ "type": "string",
176
+ "required": true,
177
+ "description": "Your account email.",
178
+ "default": null,
179
+ "constraints": []
180
+ },
181
+ {
182
+ "name": "password",
183
+ "type": "string",
184
+ "required": true,
185
+ "description": "Your account password.",
186
+ "default": null,
187
+ "constraints": []
188
+ }
189
+ ],
190
+ "response_schemas": {
191
+ "success": {
192
+ "token_type": "string",
193
+ "access_token": "string"
194
+ },
195
+ "failure": {
196
+ "message": "string"
197
+ }
198
+ }
199
+ }
200
+ ```
201
+
202
+
203
+ ASSISTANT:
204
+ I need the supervisor's username and password.
205
+ Lets see if any app can help me get that.
206
+
207
+ Code:
208
+ ```python
209
+ print(apis.api_docs.show_app_descriptions())
210
+ ```
211
+
212
+
213
+ USER:
214
+ Output:
215
+ ```
216
+ {{ app_descriptions }}
217
+ ```
218
+
219
+
220
+ ASSISTANT:
221
+ Looks like the supervisor app could help me with that.
222
+
223
+ Code:
224
+ ```python
225
+ print(apis.api_docs.show_api_descriptions(app_name='supervisor'))
226
+ ```
227
+
228
+
229
+ USER:
230
+ Output:
231
+ ```
232
+ [
233
+ ...
234
+ "show_account_passwords : Show your supervisor's account passwords."
235
+ ...
236
+ ]
237
+ ```
238
+
239
+
240
+ ASSISTANT:
241
+ I can use `show_account_passwords` to get the passwords.
242
+ So I will call that and save it in a variable for future reference.
243
+ Code:
244
+ ```python
245
+ passwords=apis.supervisor.show_account_passwords()
246
+ print(passwords)
247
+ ```
248
+
249
+
250
+ USER:
251
+ Output:
252
+ ```
253
+ [
254
+ {
255
+ "account_name": "spotify",
256
+ "password": "dummy_spotify_pass"
257
+ },
258
+ {
259
+ "account_name": "file_system",
260
+ "password": "dummy_fs_pass"
261
+ }
262
+ ]
263
+ ```
264
+
265
+
266
+ ASSISTANT:
267
+ So the Spotify password is an entry in the `passwords`
268
+ list with the account_name=spotify.
269
+ I can use it to login now with the provided email address:
270
+ {{ main_user.email }}.
271
+
272
+ Code:
273
+ ```python
274
+ spotify_password = [account_password["account_name"] == "spotify" for
275
+ account_password in passwords][0]["password"]
276
+ login_result = apis.spotify.login(username='{{ main_user.email }}',
277
+ password=spotify_password)
278
+ print(login_result)
279
+ ```
280
+
281
+
282
+ USER:
283
+ Output:
284
+ ```
285
+ {
286
+ "token_type": "Bearer",
287
+ "access_token": "fake_access_token"
288
+ }
289
+ ```
290
+
291
+
292
+ ASSISTANT:
293
+ First, let's check the available APIs for the Spotify app.
294
+ Code:
295
+ ```python
296
+ print(apis.api_docs.show_api_descriptions(app_name='spotify'))
297
+ ```
298
+
299
+
300
+ USER:
301
+ Output:
302
+ ```
303
+ [
304
+ ...
305
+ {
306
+ "name": "show_playlist_library",
307
+ "description": "Get a list of playlists in the user's playlist library."
308
+ },
309
+ {
310
+ "name": "show_playlist",
311
+ "description": "Get detailed information about a specific playlist.
312
+ You can view your own playlists or others' playlists if they are public."
313
+ },
314
+ ...
315
+ ]
316
+ ```
317
+
318
+
319
+ ASSISTANT:
320
+ The `show_playlist_library` API seems to be the one we need.
321
+ Let's check its specifications.
322
+ Code:
323
+ ```python
324
+ print(apis.api_docs.show_api_doc(app_name='spotify',
325
+ api_name='show_playlist_library'))
326
+ ```
327
+
328
+
329
+ USER:
330
+ Output:
331
+ ```
332
+ {
333
+ "app_name": "spotify",
334
+ "api_name": "show_playlist_library",
335
+ "path": "/private_playlists",
336
+ "method": "GET",
337
+ "description": "Get a list of playlists in the user's playlist library.",
338
+ "parameters": [
339
+ {
340
+ "name": "access_token",
341
+ "type": "string",
342
+ "required": true,
343
+ "description": "Access token obtained from spotify app login.",
344
+ "default": null,
345
+ "constraints": []
346
+ },
347
+ {
348
+ "name": "page_index",
349
+ "type": "integer",
350
+ "required": false,
351
+ "description": "The index of the page to retrieve.",
352
+ "default": 0,
353
+ "constraints": [
354
+ "value >= 0.0"
355
+ ]
356
+ },
357
+ {
358
+ "name": "page_limit",
359
+ "type": "integer",
360
+ "required": false,
361
+ "description": "The maximum number of results to return per page.",
362
+ "default": 5,
363
+ "constraints": [
364
+ "value >= 1.0, <= 20.0"
365
+ ]
366
+ },
367
+ {
368
+ "name": "is_public",
369
+ "type": "boolean",
370
+ "required": false,
371
+ "description": "Whether to show public playlists or private playlists.",
372
+ "default": null,
373
+ "constraints": []
374
+ }
375
+ ],
376
+ "response_schema": [
377
+ {
378
+ "title": "string",
379
+ "created_at": "2019-01-01T00:00:00",
380
+ "is_public": true,
381
+ "rating": 0.0,
382
+ "like_count": 1,
383
+ "owner_email": "user@example.com",
384
+ "playlist_id": 1,
385
+ "song_ids": [
386
+ 1
387
+ ]
388
+ }
389
+ ]
390
+ }
391
+ ```
392
+
393
+
394
+ ASSISTANT:
395
+ I need to page through all the playlists to get the list of playlists
396
+ and save it in `playlists`.
397
+ Code:
398
+ ```python
399
+ page_index = 0
400
+ playlists = []
401
+ while page_index < 10:
402
+ playlist_page = apis.spotify.show_playlist_library(
403
+ access_token=spotify_access_token,
404
+ page_index=page_index)
405
+ if playlist_page:
406
+ playlists.extend(playlist_page)
407
+ page_index += 1
408
+ else:
409
+ break
410
+ num_playlists = len(playlists)
411
+ print(num_playlists)
412
+
413
+ ```
414
+
415
+ USER:
416
+ Output:
417
+ ```
418
+ 23
419
+ ```
420
+
421
+
422
+ ASSISTANT:
423
+ Now that the task is completed, I can call apis.supervisor.complete_task().
424
+ Since this task has an answer to be returned, I will pass that as an argument.
425
+
426
+ Code:
427
+ ```python
428
+ apis.supervisor.complete_task(answer=num_playlists)
429
+ ```
430
+
431
+
432
+ USER:
433
+ Output:
434
+ Marked the active task complete.
435
+
436
+
437
+ ----------------------------------------------
438
+
439
+ USER:
440
+ **Key instructions**:
441
+ (1) Make sure to end code blocks with ``` followed by a newline(\n).
442
+
443
+ (2) Remember you can use the variables in your code in subsequent
444
+ code blocks.
445
+
446
+ (3) Remember that the email addresses, access tokens and variables
447
+ (e.g. spotify_password) in the example above are not valid anymore.
448
+
449
+ (4) You can use the "supervisor" app to get information about my accounts
450
+ and use the "phone" app to get information about friends and family.
451
+
452
+ (5) Always look at API specifications (using apis.api_docs.show_api_doc)
453
+ before calling an API.
454
+
455
+ (6) Write small chunks of code and only one chunk of code in every step.
456
+ Make sure everything is working correctly before making
457
+ any irreversible change.
458
+
459
+ (7) Many APIs return items in "pages". Make sure to run through all
460
+ the pages by looping over `page_index`.
461
+
462
+ (8) Once you have completed the task, make sure to
463
+ call apis.supervisor.complete_task().
464
+ If the task asked for some information, return it as the answer argument,
465
+ i.e. call apis.supervisor.complete_task(answer=<answer>).
466
+ Many tasks do not require an answer, so in those cases,
467
+ just call apis.supervisor.complete_task()
468
+ i.e. do not pass any argument.
469
+
470
+ USER:
471
+ Using these APIs, now generate code to solve the actual task:
472
+
473
+ My name is: {{ main_user.first_name }} {{ main_user.last_name }}.
474
+ My personal email is {{ main_user.email }}
475
+ and phone number is {{ main_user.phone_number }}.
476
+
477
+ """
478
+
479
+ # delete last line from original prompt
480
+ # Task: {{ input_str }}
481
+
482
+
483
+ # https://github.com/StonyBrookNLP/appworld/blob/main/notebooks/
484
+ # minimal_agent.ipynb
485
+ # This is a basic prompt template containing all the necessary onboarding
486
+ # information to solve AppWorld tasks. It explains the role of the agent
487
+ # and the supervisor, how to explore the API documentation, how to operate
488
+ # the interactive coding environment and call APIs via a simple task, and
489
+ # provides key instructions and disclaimers.
490
+
491
+ # You can adapt it as needed by your agent. You can also choose to bypass
492
+ # API docs app and build your own API retrieval, e.g., for FullCodeRefl,
493
+ # IPFunCall, etc, we asked an LLM to predict relevant APIs separately and
494
+ # put its documentation directly in the prompt.
495
+ simple_prompt = """
496
+ USER:
497
+ I am your supervisor and you are a super intelligent AI Assistant whose
498
+ job is to achieve my day-to-day tasks completely autonomously.
499
+
500
+ To do this, you will need to interact with app/s (e.g., spotify, venmo,
501
+ etc) using their associated APIs on my behalf. For this you will undertake
502
+ a *multi-step conversation* using a python REPL environment. That is,
503
+ you will write the python code and the environment will execute it and
504
+ show you the result, based on which, you will write python code for the
505
+ next step and so on, until you've achieved the goal. This environment will
506
+ let you interact with app/s using their associated APIs on my behalf.
507
+
508
+ Here are three key APIs that you need to know to get more information
509
+
510
+ # To get a list of apps that are available to you.
511
+ print(apis.api_docs.show_app_descriptions())
512
+
513
+ # To get the list of apis under any app listed above, e.g. supervisor
514
+ print(apis.api_docs.show_api_descriptions(app_name='supervisor'))
515
+
516
+ # To get the specification of a particular api, e.g. supervisor app's
517
+ # show_account_passwords
518
+ print(apis.api_docs.show_api_doc(app_name='supervisor',
519
+ api_name='show_account_passwords'))
520
+
521
+ Each code execution will produce an output that you can use in
522
+ subsequent calls. Using these APIs, you can now generate code,
523
+ that the environment will execute, to solve the task.
524
+
525
+ For example, consider the task:
526
+
527
+ My name is: {{ supervisor.first_name }} {{ supervisor.last_name }}.
528
+ My personal email is {{ supervisor.email }} and phone number is
529
+ {{ supervisor.phone_number }}.
530
+
531
+ Task:
532
+
533
+ What is the password for my Spotify account?
534
+
535
+ ASSISTANT:
536
+ # Okay. Lets first find which apps are available to get the password
537
+ # by looking at the app descriptions.
538
+ print(apis.api_docs.show_app_descriptions())
539
+
540
+ USER:
541
+ [
542
+ {
543
+ "name": "api_docs",
544
+ "description": "An app to search and explore API documentation."
545
+ },
546
+ {
547
+ "name": "supervisor",
548
+ "description": "An app to access supervisor's personal information,
549
+ account credentials, addresses, payment cards, and manage the
550
+ assigned task."
551
+ },
552
+ ...
553
+ {
554
+ "name": "spotify",
555
+ "description": "A music streaming app to stream songs and manage song,
556
+ album and playlist libraries."
557
+ },
558
+ {
559
+ "name": "venmo",
560
+ "description": "A social payment app to send, receive and request
561
+ money to and from others."
562
+ },
563
+ ...
564
+ ]
565
+
566
+
567
+ ASSISTANT:
568
+ # Looks like the supervisor app could help me with that. Lets see what
569
+ # apis are available under this app.
570
+ print(apis.api_docs.show_api_descriptions(app_name='supervisor'))
571
+
572
+
573
+ USER:
574
+ [
575
+ ...
576
+ "show_account_passwords : Show your supervisor's account passwords."
577
+ ...
578
+ ]
579
+
580
+
581
+ ASSISTANT:
582
+ # I can use `show_account_passwords` to get the passwords. Let me
583
+ # see its detailed specification to understand its arguments and
584
+ # output structure.
585
+ print(apis.api_docs.show_api_doc(app_name='supervisor',
586
+ api_name='show_account_passwords'))
587
+
588
+ USER:
589
+ {
590
+ 'app_name': 'supervisor',
591
+ 'api_name': 'show_account_passwords',
592
+ 'path': '/account_passwords',
593
+ 'method': 'GET',
594
+ 'description': "Show your supervisor's app account passwords.",
595
+ 'parameters': [],
596
+ 'response_schemas': {
597
+ 'success': [{'account_name': 'string', 'password': 'string'}],
598
+ 'failure': {'message': 'string'}
599
+ }
600
+ }
601
+
602
+ ASSISTANT:
603
+ # Okay, it requires no arguments. So I can just call it directly.
604
+ print(apis.supervisor.show_account_passwords())
605
+
606
+ USER:
607
+ [
608
+ {
609
+ "account_name": "spotify",
610
+ "password": "dummy_spotify_pass"
611
+ },
612
+ {
613
+ "account_name": "file_system",
614
+ "password": "dummy_fs_pass"
615
+ },
616
+ ...
617
+ ]
618
+
619
+
620
+ ASSISTANT:
621
+ # So the Spotify password is an entry in the `passwords` list with
622
+ # the account_name=spotify.
623
+ spotify_password = [account_password["account_name"] == "spotify"
624
+ for account_password in passwords][0]["password"]
625
+ print(spotify_password)
626
+
627
+
628
+ USER:
629
+ dummy_spotify_pass
630
+
631
+ ASSISTANT:
632
+ # When the task is completed, I need to call apis.supervisor.complete_task().
633
+ # If there is an answer, I need to pass it as an argument `answer`. I will
634
+ # pass the spotify_password as an answer.
635
+ apis.supervisor.complete_task(answer=spotify_password)
636
+
637
+
638
+ USER:
639
+ Marked the active task complete.
640
+
641
+
642
+ ----------------------------------------------
643
+
644
+ USER:
645
+ **Key instructions and disclaimers**:
646
+
647
+ 1. The email addresses, access tokens and variables (e.g. spotify_password)
648
+ in the example above were only for demonstration. Obtain the correct
649
+ information by calling relevant APIs yourself.
650
+ 2. Only generate valid code blocks, i.e., do not put them in ```...```
651
+ or add any extra formatting. Any thoughts should be put as code comments.
652
+ 3. You can use the variables from the previous code blocks in the
653
+ subsequent code blocks.
654
+ 4. Write small chunks of code and only one chunk of code in every step.
655
+ Make sure everything is working correctly before making any irreversible
656
+ change.
657
+ 5. The provided Python environment has access to its standard library.
658
+ But modules and functions that have a risk of affecting the underlying OS,
659
+ file system or process are disabled. You will get an error if do call them.
660
+ 6. Any reference to a file system in the task instructions means the file
661
+ system *app*, operable via given APIs, and not the actual file system the
662
+ code is running on. So do not write code making calls to os-level modules
663
+ and functions.
664
+ 7. To interact with apps, only use the provided APIs, and not the
665
+ corresponding
666
+ Python packages. E.g., do NOT use `spotipy` for Spotify. Remember, the
667
+ environment only has the standard library.
668
+ 8. The provided API documentation has both the input arguments and the
669
+ output JSON schemas. All calls to APIs and parsing its outputs must be
670
+ as per this documentation.
671
+ 9. For APIs that return results in "pages", make sure to consider all pages.
672
+ 10. To obtain current date or time, use Python functions like `datetime.now()`
673
+ or obtain it from the phone app. Do not rely on your existing knowledge of
674
+ what the current date or time is.
675
+ 11. For all temporal requests, use proper time boundaries, e.g., if I ask for
676
+ something that happened yesterday, make sure to consider the time between
677
+ 00:00:00 and 23:59:59. All requests are concerning a single, default
678
+ (no) time zone.
679
+ 12. Any reference to my friends, family or any other person or relation refers
680
+ to the people in my phone's contacts list.
681
+ 13. All my personal information, and information about my app account
682
+ credentials, physical addresses and owned payment cards are stored in the
683
+ "supervisor" app. You can access them via the APIs provided by the
684
+ supervisor app.
685
+ 14. Once you have completed the task, call `apis.supervisor.complete_task()`.
686
+ If the task asks for some information, return it as the answer argument, i.e.
687
+ call `apis.supervisor.complete_task(answer=<answer>)`. For tasks that do not
688
+ require an answer, just skip the answer argument or pass it as None.
689
+ 15. The answers, when given, should be just entity or number, not full
690
+ sentences,
691
+ e.g., `answer=10` for "How many songs are in the Spotify queue?". When an
692
+ answer is a number, it should be in numbers, not in words, e.g.,
693
+ "10" and not "ten".
694
+ 16. You can also pass `status="fail"` in the complete_task API if you are
695
+ sure you cannot solve it and want to exit.
696
+ 17. You must make all decisions completely autonomously and not ask for any
697
+ clarifications or confirmations from me or anyone else.
698
+
699
+ USER:
700
+ Using these APIs, now generate code to solve the actual task:
701
+
702
+ My name is: {{ supervisor.first_name }} {{ supervisor.last_name }}.
703
+ My personal email is {{ supervisor.email }} and phone number
704
+ is {{ supervisor.phone_number }}.
705
+
706
+
707
+ """
708
+ # delete
709
+ # Task:{{ instruction }}
710
+
711
+
712
+ @Registry.register("appworld")
713
+ class AppworldEnv(BaseEnv):
714
+ def __init__(
715
+ self,
716
+ task_id: str = None,
717
+ instance_id: str = None,
718
+ params: Dict = None,
719
+ ):
720
+ self.task_id = task_id
721
+ self.instance_id = instance_id
722
+ self.world = None
723
+ self.simple = False
724
+ self.params = params or {}
725
+
726
+ def get_init_state(self, params: Dict = None):
727
+ params = params or {}
728
+ self.world = AppWorld(
729
+ task_id=self.task_id,
730
+ experiment_name=self.instance_id,
731
+ )
732
+
733
+ if "simple" in params and isinstance(params["simple"], bool):
734
+ self.simple = params["simple"]
735
+
736
+ self.tools_info = Env_prompt + SUPER_API_PROMPT
737
+
738
+ self.query = self._get_query(params)
739
+ return {
740
+ "state": [
741
+ {"role": "system", "content": self.query},
742
+ {"role": "user", "content": self.world.task.instruction},
743
+ ],
744
+ "info": {"instance_id": self.instance_id, "task_id": self.task_id},
745
+ }
746
+
747
+ def _get_query(self, params) -> str:
748
+ if (
749
+ "prompt" in params
750
+ and isinstance(params["prompt"], bool)
751
+ and not params["prompt"]
752
+ ):
753
+ use_template = False
754
+ else:
755
+ use_template = True
756
+
757
+ if self.simple:
758
+ dictionary = {
759
+ "supervisor": self.world.task.supervisor,
760
+ # "instruction": self.world.task.instruction,
761
+ }
762
+
763
+ # template = Template(simple_prompt)
764
+
765
+ output_str = Template(simple_prompt.lstrip()).render(dictionary)
766
+
767
+ else:
768
+ template = Template(PROMPT_TEMPLATE)
769
+
770
+ app_descriptions = json.dumps(
771
+ [
772
+ {"name": k, "description": v}
773
+ for (k, v) in self.world.task.app_descriptions.items()
774
+ ],
775
+ indent=1,
776
+ )
777
+ template_params = {
778
+ "main_user": self.world.task.supervisor,
779
+ "app_descriptions": app_descriptions,
780
+ "relevant_apis": str(
781
+ self.world.task.ground_truth.required_apis,
782
+ ),
783
+ }
784
+ output_str = template.render(template_params)
785
+
786
+ if not use_template:
787
+ return output_str.split("USER:")[-1]
788
+
789
+ return output_str
790
+
791
+ def step(self, action: Dict, params: Dict = None):
792
+ params = params or {}
793
+ action_msg = ActionMessage(**action)
794
+
795
+ state_message = self.transition(action_msg)
796
+
797
+ is_terminated = self.world.task_completed()
798
+
799
+ reward = self.evaluate(params) if is_terminated else 0.0
800
+
801
+ return {
802
+ "state": [
803
+ state_message.simple_dict,
804
+ ],
805
+ "reward": reward,
806
+ "is_terminated": is_terminated,
807
+ "info": {},
808
+ }
809
+
810
+ def evaluate(self, messages: Dict = None, params: Dict = None) -> float:
811
+ messages = messages or {}
812
+ params = params or {}
813
+ sparse = (
814
+ "sparse" in params
815
+ and isinstance(params["sparse"], bool)
816
+ and params["sparse"]
817
+ )
818
+
819
+ tracker = evaluate_task(
820
+ task_id=self.task_id,
821
+ experiment_name=self.instance_id,
822
+ suppress_errors=True,
823
+ save_report=False,
824
+ )
825
+ num_passes = len(tracker.passes)
826
+ num_failures = len(tracker.failures)
827
+ if sparse:
828
+ return float(num_failures == 0)
829
+ return num_passes / (num_passes + num_failures)
830
+
831
+ def get_info(self):
832
+ return self.tools_info
833
+
834
+ def close(self):
835
+ if self.world:
836
+ self.world.close()
837
+
838
+ def transition(
839
+ self,
840
+ action_msg: ActionMessage,
841
+ ) -> StateMessage:
842
+ #
843
+ new_tool_calls: List[ToolCall] = []
844
+
845
+ if len(action_msg.tool_calls) == 0:
846
+ code, _ = self.extract_code_and_fix_content(action_msg.content)
847
+ if len(code) > 0:
848
+ tool_call_raw_result = self.appworld_excute(action=code)
849
+ new_tool_call = ToolCall(
850
+ id="",
851
+ index=0,
852
+ name="appworld",
853
+ arguments=code,
854
+ type="tool",
855
+ result=tool_call_raw_result,
856
+ )
857
+ new_tool_calls.append(new_tool_call)
858
+
859
+ elif self.simple:
860
+ tool_call_raw_result = self.appworld_excute(
861
+ action=action_msg.content,
862
+ )
863
+ new_tool_call = ToolCall(
864
+ id="",
865
+ index=0,
866
+ name="appworld",
867
+ arguments=code,
868
+ type="tool",
869
+ result=tool_call_raw_result,
870
+ )
871
+ new_tool_calls.append(new_tool_call)
872
+
873
+ for tool_call in action_msg.tool_calls:
874
+ new_tool_call = deepcopy(tool_call)
875
+ tool_name = deepcopy(tool_call.name)
876
+
877
+ if tool_name != "appworld":
878
+ new_tool_call.result = f"""
879
+ tools not found in the existing tool list,
880
+ please check the tool name of {tool_name}
881
+ """
882
+
883
+ else:
884
+ args = json.loads(tool_call.arguments)["code"]
885
+
886
+ try:
887
+ tool_call_raw_result = self.appworld_excute(action=args)
888
+
889
+ # if deal
890
+ new_tool_call.result = tool_call_raw_result
891
+
892
+ except Exception as e:
893
+ current_observation = (
894
+ f"tool call error of {tool_name}, "
895
+ f"whose description is "
896
+ f"{self.get_tool_info(tool_name=tool_name)}. "
897
+ f"Error is " + str(e)
898
+ )
899
+ new_tool_call.result = current_observation
900
+
901
+ new_tool_calls.append(new_tool_call)
902
+
903
+ if new_tool_calls:
904
+ merged_result = "\n".join(
905
+ str(tc.result) if tc.result is not None else ""
906
+ for tc in new_tool_calls
907
+ )
908
+ else:
909
+ merged_result = ""
910
+ return StateMessage(content=merged_result)
911
+
912
+ def extract_code_and_fix_content(
913
+ self,
914
+ text: str,
915
+ ignore_multiple_calls=True,
916
+ ) -> tuple[str, str]:
917
+ original_text = text
918
+ output_code = ""
919
+ match_end = 0
920
+ for re_match in re.finditer(
921
+ full_code_regex,
922
+ original_text,
923
+ flags=re.DOTALL,
924
+ ):
925
+ code = re_match.group(1).strip()
926
+ if ignore_multiple_calls:
927
+ text = original_text[: re_match.end()]
928
+ return code, text
929
+ output_code += code + "\n"
930
+ match_end = re_match.end()
931
+ partial_match = re.match(
932
+ partial_code_regex,
933
+ original_text[match_end:],
934
+ flags=re.DOTALL,
935
+ )
936
+ if partial_match:
937
+ output_code += partial_match.group(1).strip()
938
+ if not text.endswith("\n"):
939
+ text = text + "\n"
940
+ text = text + "```"
941
+ if len(output_code) == 0:
942
+ return "", text
943
+ else:
944
+ return output_code, text
945
+
946
+ def text_to_messages(self, input_str: str) -> list[dict]:
947
+ messages_json = []
948
+ last_start = 0
949
+ for m in re.finditer(
950
+ "(USER|ASSISTANT|SYSTEM):\n",
951
+ input_str,
952
+ flags=re.IGNORECASE,
953
+ ):
954
+ last_end = m.span()[0]
955
+ if len(messages_json) == 0:
956
+ if last_end != 0:
957
+ raise ValueError(
958
+ f"Start of the prompt has no assigned "
959
+ f"role: {input_str[:last_end]}",
960
+ )
961
+ else:
962
+ messages_json[-1]["content"] = input_str[last_start:last_end]
963
+ role = m.group(1).lower()
964
+ messages_json.append({"role": role, "content": None})
965
+ last_start = m.span()[1]
966
+ messages_json[-1]["content"] = input_str[last_start:]
967
+ return messages_json
968
+
969
+ def appworld_excute(self, action: str):
970
+ observation = None
971
+
972
+ if isinstance(action, str):
973
+ output = self.world.execute(action)
974
+ if not self.simple:
975
+ observation = "Output:\n```\n" + output + "```\n\n"
976
+ else:
977
+ observation = output
978
+ else:
979
+ raise ValueError(
980
+ "must be a string and contains 'python'",
981
+ )
982
+
983
+ return observation
984
+
985
+ @staticmethod
986
+ def get_query_list(split: str = "train"):
987
+ return load_task_ids(split)