adhoc-api 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adhoc_api-0.1.0/PKG-INFO +54 -0
- adhoc_api-0.1.0/README.md +36 -0
- adhoc_api-0.1.0/adhoc_api/__init__.py +1 -0
- adhoc_api-0.1.0/adhoc_api/agent.py +205 -0
- adhoc_api-0.1.0/adhoc_api/api_agent.yaml +160 -0
- adhoc_api-0.1.0/adhoc_api/default_payload.json +1 -0
- adhoc_api-0.1.0/adhoc_api/test.py +29 -0
- adhoc_api-0.1.0/adhoc_api/tests/queries_(processed).txt +8 -0
- adhoc_api-0.1.0/adhoc_api/tests/queries_(raw).txt +8 -0
- adhoc_api-0.1.0/adhoc_api/tool.py +72 -0
- adhoc_api-0.1.0/pyproject.toml +19 -0
adhoc_api-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: adhoc-api
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Agent automatically figures out how to make API requests given API docs and user query in plain text
|
|
5
|
+
Author: David Samson
|
|
6
|
+
Author-email: david.andrew.engineer@gmail.com
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Requires-Dist: archytas (>=1.2.1,<2.0.0)
|
|
13
|
+
Requires-Dist: easyrepl (>=0.1.3,<0.2.0)
|
|
14
|
+
Requires-Dist: google-generativeai (>=0.8.2,<0.9.0)
|
|
15
|
+
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
|
16
|
+
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# Ad-Hoc API
|
|
20
|
+
An [Archytas](https://github.com/jataware/archytas) tool that eses LLMs to interact with APIs given documentation. User explains what they want in plain english, and then the agent (using the APIs docs for context) writes python code to complete the task.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
```bash
|
|
24
|
+
pip install adhoc-api
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
This is designed to be paired with an Archytas agent. You may omit the python tool, and the agent should instead return the source code to you rather than running it.
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from adhoc_api import AdHocAPI
|
|
33
|
+
from archytas.react import ReActAgent, FailedTaskError
|
|
34
|
+
from archytas.tools import PythonTool
|
|
35
|
+
from easyrepl import REPL
|
|
36
|
+
|
|
37
|
+
def main():
|
|
38
|
+
python = PythonTool()
|
|
39
|
+
adhoc_api = AdhocApi(run_code=python.run)
|
|
40
|
+
tools = [adhoc_api, python]
|
|
41
|
+
agent = ReActAgent(model='gpt-4o', tools=tools, verbose=True)
|
|
42
|
+
|
|
43
|
+
# REPL to interact with agent
|
|
44
|
+
for query in REPL(history_file='.chat'):
|
|
45
|
+
try:
|
|
46
|
+
answer = agent.react(query)
|
|
47
|
+
print(answer)
|
|
48
|
+
except FailedTaskError as e:
|
|
49
|
+
print(f"Error: {e}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__":
|
|
53
|
+
main()
|
|
54
|
+
```
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Ad-Hoc API
|
|
2
|
+
An [Archytas](https://github.com/jataware/archytas) tool that eses LLMs to interact with APIs given documentation. User explains what they want in plain english, and then the agent (using the APIs docs for context) writes python code to complete the task.
|
|
3
|
+
|
|
4
|
+
## Installation
|
|
5
|
+
```bash
|
|
6
|
+
pip install adhoc-api
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## Usage
|
|
10
|
+
|
|
11
|
+
This is designed to be paired with an Archytas agent. You may omit the python tool, and the agent should instead return the source code to you rather than running it.
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from adhoc_api import AdHocAPI
|
|
15
|
+
from archytas.react import ReActAgent, FailedTaskError
|
|
16
|
+
from archytas.tools import PythonTool
|
|
17
|
+
from easyrepl import REPL
|
|
18
|
+
|
|
19
|
+
def main():
|
|
20
|
+
python = PythonTool()
|
|
21
|
+
adhoc_api = AdhocApi(run_code=python.run)
|
|
22
|
+
tools = [adhoc_api, python]
|
|
23
|
+
agent = ReActAgent(model='gpt-4o', tools=tools, verbose=True)
|
|
24
|
+
|
|
25
|
+
# REPL to interact with agent
|
|
26
|
+
for query in REPL(history_file='.chat'):
|
|
27
|
+
try:
|
|
28
|
+
answer = agent.react(query)
|
|
29
|
+
print(answer)
|
|
30
|
+
except FailedTaskError as e:
|
|
31
|
+
print(f"Error: {e}")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
if __name__ == "__main__":
|
|
35
|
+
main()
|
|
36
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .tool import AdhocApi
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import yaml
|
|
3
|
+
from google import generativeai as genai
|
|
4
|
+
from google.generativeai import caching
|
|
5
|
+
import pathlib
|
|
6
|
+
import datetime
|
|
7
|
+
from archytas.agent import Agent
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Callable
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
import pdb
|
|
15
|
+
|
|
16
|
+
here = Path(__file__).resolve().parent
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def simple_info(info: dict):
|
|
21
|
+
print('INFO', info)
|
|
22
|
+
|
|
23
|
+
def simple_error(error: dict):
|
|
24
|
+
print('ERROR', error)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class GeminiDrafter:
|
|
30
|
+
def __init__(self, info: Callable[[dict], None]=simple_info, error: Callable[[dict], None]=simple_error):
|
|
31
|
+
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
|
|
32
|
+
self.cache = APICache(f'{here}/api_agent.yaml')
|
|
33
|
+
self.info = info
|
|
34
|
+
self.error = error
|
|
35
|
+
|
|
36
|
+
def draft_request(self, api: str, query: str) -> str:
|
|
37
|
+
"""Using Gemini, draft source code to make a request to the specified API that fulfills the query."""
|
|
38
|
+
self.info({'api': api, 'goal': query})
|
|
39
|
+
|
|
40
|
+
if api not in self.cache.loaded_apis():
|
|
41
|
+
self.info({'cache': f'api is not loaded: {api}'})
|
|
42
|
+
if api not in self.cache.available_apis():
|
|
43
|
+
self.info({'cache': f'api does not exist: {api}'})
|
|
44
|
+
return f"The selected API was not in the following list: {self.cache.available_apis()}. Please use one of those."
|
|
45
|
+
self.info({'cache': f'loading api: {api}'})
|
|
46
|
+
self.cache.load_api(api)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
agent_response = self.cache.chats[api].send_message(query).text
|
|
50
|
+
prefixes = ['```python', '```']
|
|
51
|
+
suffixes = ['```', '```\n']
|
|
52
|
+
for prefix in prefixes:
|
|
53
|
+
if agent_response.startswith(prefix):
|
|
54
|
+
agent_response = agent_response[len(prefix):]
|
|
55
|
+
for suffix in suffixes:
|
|
56
|
+
if agent_response.endswith(suffix):
|
|
57
|
+
agent_response = agent_response[:-len(suffix)]
|
|
58
|
+
agent_response = '\n'.join([
|
|
59
|
+
'import pandas as pd',
|
|
60
|
+
'import os',
|
|
61
|
+
'import json',
|
|
62
|
+
'import requests',
|
|
63
|
+
agent_response
|
|
64
|
+
])
|
|
65
|
+
except Exception as e:
|
|
66
|
+
self.error({'error': str(e)})
|
|
67
|
+
return f"The agent failed to produce valid code: {str(e)}"
|
|
68
|
+
|
|
69
|
+
return agent_response
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GPTCodeFinalizer:
|
|
74
|
+
def __init__(self, info: Callable[[dict], None]=simple_info, error: Callable[[dict], None]=simple_error):
|
|
75
|
+
self.cache = APICache(f'{here}/api_agent.yaml')
|
|
76
|
+
self.info = info
|
|
77
|
+
self.error = error
|
|
78
|
+
|
|
79
|
+
async def proofread_code(self, api: str, code: str, agent: Agent) -> str:
|
|
80
|
+
"""Proofreads the code, making syntax corrections, and adjusting according to specific notes about the API"""
|
|
81
|
+
transformed_code = code
|
|
82
|
+
syntax_check_prompt: str = self.cache.config.get('syntax_check_prompt','')
|
|
83
|
+
|
|
84
|
+
if syntax_check_prompt != '':
|
|
85
|
+
transformed_code = await agent.query(syntax_check_prompt.format(code=transformed_code))
|
|
86
|
+
if transformed_code.strip() != code.strip():
|
|
87
|
+
self.info({
|
|
88
|
+
"message": "GPT has changed the code output from Gemini in the syntax fix step.",
|
|
89
|
+
"gpt": transformed_code,
|
|
90
|
+
"gemini": code}
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
additional_pass_prompt: str = self.cache.cache[api].get('gpt_additional_pass', '')
|
|
94
|
+
if additional_pass_prompt != '':
|
|
95
|
+
prior_code = transformed_code
|
|
96
|
+
transformed_code = await agent.query(
|
|
97
|
+
additional_pass_prompt.format(code=transformed_code)
|
|
98
|
+
.format_map(self.cache.cache[api])
|
|
99
|
+
)
|
|
100
|
+
if transformed_code.strip() != prior_code.strip():
|
|
101
|
+
self.info({
|
|
102
|
+
"message": "GPT has changed the code output from Gemini or the syntax check in the additional pass step.",
|
|
103
|
+
"gpt": transformed_code,
|
|
104
|
+
"prior": prior_code}
|
|
105
|
+
)
|
|
106
|
+
return transformed_code
|
|
107
|
+
|
|
108
|
+
# requires gemini API key initialization done beforehand
|
|
109
|
+
class APICache:
|
|
110
|
+
|
|
111
|
+
def __init__(self, api_definition_filepath: str, info: Callable[[dict], None]=simple_info, error: Callable[[dict], None]=simple_error):
|
|
112
|
+
self.cache: dict[str, dict] = {}
|
|
113
|
+
self.chats: dict[str, genai.ChatSession] = {}
|
|
114
|
+
self.models: dict[str, genai.GenerativeModel] = {}
|
|
115
|
+
self.config: dict[str, dict] = {}
|
|
116
|
+
self.info = info
|
|
117
|
+
self.error = error
|
|
118
|
+
|
|
119
|
+
with open(api_definition_filepath, 'r') as f:
|
|
120
|
+
try:
|
|
121
|
+
contents = yaml.safe_load(f)
|
|
122
|
+
self.config = contents['config']
|
|
123
|
+
api_definitions = contents['apis']
|
|
124
|
+
except Exception as e:
|
|
125
|
+
print(f"failed to load API definitions file properly. check filepath and/or format: {str(e)}")
|
|
126
|
+
return
|
|
127
|
+
self.cache['default'] = dict(api_definitions['default'])
|
|
128
|
+
for api_name, definition in api_definitions.items():
|
|
129
|
+
if api_name == 'default':
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
# merge w/ overwriting defaults
|
|
133
|
+
self.cache[api_name] = copy.deepcopy(self.cache['default'])
|
|
134
|
+
for key, value in definition.items():
|
|
135
|
+
if isinstance(value, str | int | list | bool):
|
|
136
|
+
self.cache[api_name][key] = value
|
|
137
|
+
elif isinstance(value, dict):
|
|
138
|
+
self.cache[api_name][key] |= value
|
|
139
|
+
|
|
140
|
+
if self.cache[api_name]['disabled']:
|
|
141
|
+
del self.cache[api_name]
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
# fill docs body
|
|
145
|
+
try:
|
|
146
|
+
root_folder = pathlib.Path(__file__).resolve().parent
|
|
147
|
+
filepath = '/'.join([
|
|
148
|
+
str(root_folder),
|
|
149
|
+
self.config["documentation_root"],
|
|
150
|
+
self.cache[api_name]["documentation_file"]
|
|
151
|
+
])
|
|
152
|
+
with open(filepath, 'r') as f:
|
|
153
|
+
self.cache[api_name]['docs'] = f.read()
|
|
154
|
+
except Exception as e:
|
|
155
|
+
raise ValueError(f"failed to open docs for api {api_name}: file path {self.cache[api_name]['documentation_file']}: {str(e)}")
|
|
156
|
+
|
|
157
|
+
# formatting interpolations - don't format API docs though.
|
|
158
|
+
self.cache[api_name] = {
|
|
159
|
+
k: v.format_map(self.cache[api_name])
|
|
160
|
+
if isinstance(v, str) else v
|
|
161
|
+
for k, v in self.cache[api_name].items()
|
|
162
|
+
if k not in self.config['deferred_formatting_fields']
|
|
163
|
+
}
|
|
164
|
+
del self.cache['default']
|
|
165
|
+
|
|
166
|
+
def available_apis(self) -> dict[str, str]:
|
|
167
|
+
"""Returns a mapping of available APIs to their descriptions and full, human readable names."""
|
|
168
|
+
return {key: f"{self.cache[key]['name']}: {self.cache[key]['description']}" for key in self.cache.keys()}
|
|
169
|
+
|
|
170
|
+
def available_api_context(self) -> str:
|
|
171
|
+
"""Nicer formatting for a system prompt of APIs and their descriptions."""
|
|
172
|
+
return "\n".join([f' - {k}: {v}' for k, v in self.available_apis().items()])
|
|
173
|
+
|
|
174
|
+
def loaded_apis(self):
|
|
175
|
+
"""Returns a list of loaded APIs."""
|
|
176
|
+
return self.chats.keys()
|
|
177
|
+
|
|
178
|
+
def load_api(self, api_name: str):
|
|
179
|
+
if api_name not in self.available_apis():
|
|
180
|
+
raise ValueError("requested API is not in available APIs - check definitions file and API name")
|
|
181
|
+
content = caching.CachedContent.list()
|
|
182
|
+
is_cached = False
|
|
183
|
+
for cache_object in content:
|
|
184
|
+
if cache_object.display_name == self.cache[api_name]['cache']['key']:
|
|
185
|
+
is_cached = True
|
|
186
|
+
self.info({'cache': f'found cached content for {api_name}'})
|
|
187
|
+
break
|
|
188
|
+
if not is_cached:
|
|
189
|
+
cache_object = self.build_cache(api_name)
|
|
190
|
+
self.models[api_name] = genai.GenerativeModel.from_cached_content(cached_content=cache_object)
|
|
191
|
+
self.chats[api_name] = self.models[api_name].start_chat()
|
|
192
|
+
|
|
193
|
+
def build_cache(self, api_name):
|
|
194
|
+
if api_name not in self.available_apis():
|
|
195
|
+
raise ValueError("requested API is not in available APIs - check definitions file and API name")
|
|
196
|
+
self.info({'cache': f'building cache for {api_name}'})
|
|
197
|
+
api = self.cache[api_name]
|
|
198
|
+
cache = caching.CachedContent.create(
|
|
199
|
+
model=api['cache']['model'],
|
|
200
|
+
display_name=api['cache']['key'],
|
|
201
|
+
contents=[api['cache_body']],
|
|
202
|
+
ttl=datetime.timedelta(minutes=api['cache']['ttl']),
|
|
203
|
+
system_instruction=api['system_prompt']
|
|
204
|
+
)
|
|
205
|
+
return cache
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
config:
|
|
2
|
+
# prepended with the containing folder path - ./ resolves to the current working directory of this file
|
|
3
|
+
documentation_root: "./api_documentation"
|
|
4
|
+
# fields that need interpolation at runtime versus at load time
|
|
5
|
+
# e.g. prompts that will have variadic input.
|
|
6
|
+
deferred_formatting_fields: ['docs', 'gpt_additional_pass', 'syntax_check_prompt']
|
|
7
|
+
# can be disabled by being left blank or omitted.
|
|
8
|
+
# use {code} for the secondary agent output.
|
|
9
|
+
syntax_check_prompt: |
|
|
10
|
+
The code you received is will be listed below the line of dashes.
|
|
11
|
+
Please fix the python code for syntax errors and only return the python code with fixed syntax errors.
|
|
12
|
+
Ensure the output has no formatting and return just the code, please.
|
|
13
|
+
|
|
14
|
+
If the output has formatting like backticks or a language specifier, be sure to remove all formatting
|
|
15
|
+
and return nothing but the code itself with no additional text.
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
Input:
|
|
19
|
+
```python
|
|
20
|
+
print(a)
|
|
21
|
+
```
|
|
22
|
+
Output:
|
|
23
|
+
print(a)
|
|
24
|
+
Input:
|
|
25
|
+
```python
|
|
26
|
+
de f fn_b(b):
|
|
27
|
+
print(this is an unescaped string)
|
|
28
|
+
def fn_a(a):
|
|
29
|
+
print(a)
|
|
30
|
+
```
|
|
31
|
+
This code has been fixed to correctly solve the task.
|
|
32
|
+
Output:
|
|
33
|
+
def fn_b(b):
|
|
34
|
+
print("this is an unescaped string")
|
|
35
|
+
def fn(a):
|
|
36
|
+
print(a)
|
|
37
|
+
----------
|
|
38
|
+
{code}
|
|
39
|
+
|
|
40
|
+
apis:
|
|
41
|
+
# if a field is not specified, the default field will be used.
|
|
42
|
+
# fields can be referenced by python f-string syntax for interpolation.
|
|
43
|
+
# be sure to use | for block strings so that new lines are given to the prompt.
|
|
44
|
+
default:
|
|
45
|
+
# user-visible name
|
|
46
|
+
name: "Default API"
|
|
47
|
+
# description provided to the agent
|
|
48
|
+
description: ''
|
|
49
|
+
# should this API be disabled
|
|
50
|
+
# (the default value for any given user-defined api is to be enabled
|
|
51
|
+
# - here, default is handled specially as if the "api" was disabled.)
|
|
52
|
+
disabled: false
|
|
53
|
+
cache:
|
|
54
|
+
# key -- no default specified! it is required to be manually filled.
|
|
55
|
+
model: "models/gemini-1.5-flash-001"
|
|
56
|
+
ttl: 30 # in minutes
|
|
57
|
+
# relative to config.documentation_root
|
|
58
|
+
documentation_file: "api_docs.md"
|
|
59
|
+
# docs is a special field added at load time, populated with the contents of documentation above.
|
|
60
|
+
docs: ""
|
|
61
|
+
system_prompt: |
|
|
62
|
+
You are an assistant who will help me query the {name} API.
|
|
63
|
+
You should write clean python code to solve specific queries I pose.
|
|
64
|
+
You should write it as though it will be directly used in a Jupyter notebook.
|
|
65
|
+
You should not include backticks or 'python' at the top of the code blocks, that is unnecessary.
|
|
66
|
+
You should not provide explanation unless I ask a follow up question.
|
|
67
|
+
Assume pandas is installed and is imported with `import pandas as pd`.
|
|
68
|
+
Also assume `requests`, `json`, and `os` are imported properly.
|
|
69
|
+
cache_body: |
|
|
70
|
+
You will be given the entire API documentation.
|
|
71
|
+
When you write code against this API, you should avail yourself of the appropriate query parameters,
|
|
72
|
+
your understanding of the response model, and be cognizant that not all data is public and thus may require a token, etc.
|
|
73
|
+
Unless you receive a 403 forbidden, assume the endpoints are unauthenticated.
|
|
74
|
+
If the user says the API does not require authentication, OMIT code about tokens and token handling and token headers.
|
|
75
|
+
When you are downloading, communicate via stdout that something has been downloaded.
|
|
76
|
+
When you are doing complex things try to break them down step by step and implement appropriate exception handling.
|
|
77
|
+
{additional_cache_body}
|
|
78
|
+
Here is the documentation.
|
|
79
|
+
{docs}
|
|
80
|
+
# inserted after preamble but above full documentation.
|
|
81
|
+
# useful for slight changes without fully replacing the text.
|
|
82
|
+
additional_cache_body: ''
|
|
83
|
+
# if present and a non-empty string, instructions for gpt to observe code output before running to fix enum mistakes
|
|
84
|
+
# use {code} to interpolate the output. this code can also interpolate other things from the API.
|
|
85
|
+
gpt_additional_pass: ''
|
|
86
|
+
|
|
87
|
+
gdc:
|
|
88
|
+
name: "Genomics Data Commons"
|
|
89
|
+
description: |
|
|
90
|
+
The NCI's Genomic Data Commons (GDC) provides the cancer research community with a repository and computational
|
|
91
|
+
platform for cancer researchers who need to understand cancer, its clinical progression, and response to therapy.
|
|
92
|
+
The GDC supports several cancer genome programs at the NCI Center for Cancer Genomics (CCG),
|
|
93
|
+
including The Cancer Genome Atlas (TCGA) and Therapeutically Applicable Research to Generate Effective Treatments (TARGET).
|
|
94
|
+
documentation_file: "gdc.md"
|
|
95
|
+
cache:
|
|
96
|
+
key: "api_assistant_gdc"
|
|
97
|
+
# for brevity, we define `disease_types` below and use it to avoid repetition.
|
|
98
|
+
gpt_additional_pass: |
|
|
99
|
+
You will be provided generated code that often has a problem with an invalid filter for disease types.
|
|
100
|
+
Please correct the usage of the disease type filters to one of the closest valid disease types
|
|
101
|
+
if it is not in the below list.
|
|
102
|
+
|
|
103
|
+
The list is provided between the lines.
|
|
104
|
+
---------------
|
|
105
|
+
{disease_types}
|
|
106
|
+
---------------
|
|
107
|
+
|
|
108
|
+
The code will now be provided below.
|
|
109
|
+
|
|
110
|
+
{code}
|
|
111
|
+
# custom variables can be defined for formatting.
|
|
112
|
+
disease_types: |
|
|
113
|
+
- adenomas and adenocarcinomas
|
|
114
|
+
- ductal and lobular neoplasms
|
|
115
|
+
- myeloid leukemias
|
|
116
|
+
- epithelial neoplasms, nos
|
|
117
|
+
- squamous cell neoplasms
|
|
118
|
+
- gliomas
|
|
119
|
+
- lymphoid leukemias
|
|
120
|
+
- cystic, mucinous and serous neoplasms
|
|
121
|
+
- nevi and melanomas
|
|
122
|
+
- neuroepitheliomatous neoplasms
|
|
123
|
+
- acute lymphoblastic leukemia
|
|
124
|
+
- plasma cell tumors
|
|
125
|
+
- complex mixed and stromal neoplasms
|
|
126
|
+
- mature b-cell lymphomas
|
|
127
|
+
- transitional cell papillomas and carcinomas
|
|
128
|
+
- not applicable
|
|
129
|
+
- osseous and chondromatous neoplasms
|
|
130
|
+
- germ cell neoplasms
|
|
131
|
+
- mesothelial neoplasms
|
|
132
|
+
- not reported
|
|
133
|
+
- acinar cell neoplasms
|
|
134
|
+
- paragangliomas and glomus tumors
|
|
135
|
+
- chronic myeloproliferative disorders
|
|
136
|
+
- neoplasms, nos
|
|
137
|
+
- thymic epithelial neoplasms
|
|
138
|
+
- myomatous neoplasms
|
|
139
|
+
- complex epithelial neoplasms
|
|
140
|
+
- soft tissue tumors and sarcomas, nos
|
|
141
|
+
- lipomatous neoplasms
|
|
142
|
+
- meningiomas
|
|
143
|
+
- fibromatous neoplasms
|
|
144
|
+
- specialized gonadal neoplasms
|
|
145
|
+
- unknown
|
|
146
|
+
- miscellaneous tumors
|
|
147
|
+
- adnexal and skin appendage neoplasms
|
|
148
|
+
- basal cell neoplasms
|
|
149
|
+
- mucoepidermoid neoplasms
|
|
150
|
+
- myelodysplastic syndromes
|
|
151
|
+
- nerve sheath tumors
|
|
152
|
+
- leukemias, nos
|
|
153
|
+
- synovial-like neoplasms
|
|
154
|
+
- fibroepithelial neoplasms
|
|
155
|
+
- miscellaneous bone tumors
|
|
156
|
+
- blood vessel tumors
|
|
157
|
+
- mature t- and nk-cell lymphomas
|
|
158
|
+
- _missing
|
|
159
|
+
|
|
160
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from archytas.react import ReActAgent, FailedTaskError
|
|
2
|
+
from archytas.tools import PythonTool
|
|
3
|
+
from easyrepl import REPL
|
|
4
|
+
from .tool import AdhocApi
|
|
5
|
+
|
|
6
|
+
import pdb
|
|
7
|
+
|
|
8
|
+
def main():
|
|
9
|
+
python = PythonTool()
|
|
10
|
+
adhoc_api = AdhocApi(run_code=python.run)
|
|
11
|
+
|
|
12
|
+
tools = [adhoc_api, python]
|
|
13
|
+
agent = ReActAgent(model='gpt-4o', tools=tools, verbose=True)
|
|
14
|
+
print(agent.prompt)
|
|
15
|
+
|
|
16
|
+
# REPL to interact with agent
|
|
17
|
+
for query in REPL(history_file='.chat'):
|
|
18
|
+
try:
|
|
19
|
+
answer = agent.react(query)
|
|
20
|
+
print(answer)
|
|
21
|
+
except FailedTaskError as e:
|
|
22
|
+
print(f"Error: {e}")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
if __name__ == "__main__":
|
|
28
|
+
main()
|
|
29
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Find gene expression data (raw microarray data, bulk RNA-seq, or scRNA-seq data) from pheochromocytoma and paraganglioma cancer samples. Download metadata about these patient cases, specimens, microarray data files, and sequencing data files.
|
|
2
|
+
Find WGS and WXS data (aligned reads) from normal tissue samples from patients with neuroblastomas. Include patients with ganglioneuroblastomas. Download metadata about these patient cases, specimens, and sequencing data files.
|
|
3
|
+
Find lung cancer specimens with processed gene copy number and gene expression quantification data. Download metadata about the patient cases, specimens, gene copy number, and gene expression data files.
|
|
4
|
+
Find gene expression data (single-cell RNA-seq, bulk RNA-seq, or RNA microarray data) from glioma patient samples. Download metadata about these patient cases, specimens, and sequencing data files.
|
|
5
|
+
Find gene expression data (RNA-seq, scRNA-seq, or RNA microarray data) from colorectal cancer samples. Include cancer samples from sites within the colon and rectum. Download metadata about these patient cases, specimens, and sequencing data files.
|
|
6
|
+
Find patient cases with whole genome sequencing data, whole exome sequencing data, and/or RNA sequencing data from both normal tissue and tumor samples from patients diagnosed with neuroblastoma. Download metadata about these patient cases, specimens, and sequencing data files. Include only raw and aligned sequencing read data files. Include only patients with a diagnosis of neuroblastoma (including Central neuroblastoma and Neuroblastoma, NOS) and do not include patients diagnosed with ganglioneuroblastomas and cases with neuroblastoma in combination with other cancer types.
|
|
7
|
+
Find patient cases with cancer samples with BAM files from whole transcriptome (RNA-Seq), whole exome (WESeq), and/or whole genome (WGSeq) sequencing data for the following pediatric cancer types: Acute Lymphoblastic Leukemia (ALL), Acute Myeloid Leukemia (AML), Neuroblastoma (NBL), Kidney Tumors (WT, RT, CCSK), and Osteosarcoma (OS). Include only pediatric cases, where age at diagnosis is 20 years old or less, if that data is available. Download metadata about these patient cases, specimens, and BAM sequencing data files. Include BAM sequencing files from normal tissue samples from these patients if available, in addition to those from cancer samples.
|
|
8
|
+
Find patient cases with tumor samples with both processed protein abundance data (this does not need to include abundance data for phosphorylated or acetylated forms) and genomic data for identifying single nucleotide polymorphisms (either whole genome sequencing data or genotyping array data). Download metadata about these patient cases, specimens, processed protein abundance data files, and genomic data files.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
I need more gene expression data to supplement the data from the small number of samples in my study. I want microarray and/or RNA-seq data from Pheochromocytomas and Paragangliomas samples. I will use this data to identify cancer subtypes and genes that are differentially expressed between metastatic and non-metastatic cases.
|
|
2
|
+
I want to identify germline variants associated with neuroblastoma cases. I will identify SNPs and insertion/deletion variants in cancer predisposition genes. For this, I want WGS or WES data from normal tissue samples taken from patients with neuroblastomas.
|
|
3
|
+
I want to understand the role of the protein MCL-1 in lung cancer, and would like to investigate the relationship between MCL-1 gene copy number and mRNA expression levels in lung cancer samples.
|
|
4
|
+
In two types of brain stem cell lines, we observed differential gene expression of FOSL1. We would like to test whether this pattern is also observed between brain cancer types in patient samples. For this, we want gene expression data from glioma patient samples.
|
|
5
|
+
We plan to use machine learning to identify critical differentially expressed genes in colorectal cancer. We want whole-transcriptome profiling datasets from CRC samples. We would also like patient data if available to characterize our study sample.
|
|
6
|
+
I am looking for whole genome or whole exome sequencing data, as well as RNA sequencing data, from normal tissue and tumor samples from Neuroblastoma patient cases. I am investigating germline DNA variations in cancer predisposition genes associated with Neuroblastoma risk.
|
|
7
|
+
I want access to BAM files from whole transcriptome (RNA-Seq), whole exome (WESeq), and whole genome (WGSeq) sequencing data across all pediatric cancer types, including Acute Lymphoblastic Leukemia (ALL), Acute Myeloid Leukemia (AML), Neuroblastoma (NBL), Kidney Tumors (WT, RT, CCSK), and Osteosarcoma (OS). I plan to use this data to detect expression and splice junctions in pediatric tumors and identify genetic variants affecting splicing, with the aim of studying associations with clinical factors and outcomes.
|
|
8
|
+
We are seeking genomic variation (SNP data) and protein abundance data from cancer samples from the same patients to conduct genome-wide association studies (GWAS) and generate protein quantitative trait loci (pQTLs). Ultimately, we aim to utilize proteome-wide Mendelian randomization to investigate potential causal links between various proteins and the development or progression of cancer.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# import asyncio
|
|
2
|
+
|
|
3
|
+
from archytas.tool_utils import AgentRef, LoopControllerRef, ReactContextRef, tool
|
|
4
|
+
from .agent import GeminiDrafter, GPTCodeFinalizer
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Callable, Any
|
|
7
|
+
|
|
8
|
+
here = Path(__file__).resolve().parent
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def simple_info(info: dict):
|
|
12
|
+
print('INFO', info)
|
|
13
|
+
|
|
14
|
+
def simple_error(error: dict):
|
|
15
|
+
print('ERROR', error)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AdhocApi:
|
|
21
|
+
def __init__(self, *, run_code: Callable[[str], Any]|None=None, info: Callable[[dict], None]=simple_info, error: Callable[[dict], None]=simple_error):
|
|
22
|
+
self.info = info
|
|
23
|
+
self.error = error
|
|
24
|
+
self.run_code = run_code
|
|
25
|
+
|
|
26
|
+
self.drafter = GeminiDrafter(info, error)
|
|
27
|
+
self.finalizer = GPTCodeFinalizer(info, error)
|
|
28
|
+
|
|
29
|
+
@tool
|
|
30
|
+
async def list_apis(self) -> str:
|
|
31
|
+
"""
|
|
32
|
+
This tool lists all the APIs available to you.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str: A list of the APIs and descriptions
|
|
36
|
+
"""
|
|
37
|
+
return self.drafter.cache.available_api_context()
|
|
38
|
+
|
|
39
|
+
@tool
|
|
40
|
+
async def use_api(self, api: str, goal: str, agent: AgentRef) -> str:#, loop: LoopControllerRef, react_context: ReactContextRef) -> str:
|
|
41
|
+
"""
|
|
42
|
+
This tool provides interaction with external APIs with a second agent.
|
|
43
|
+
You will query external APIs through this tool.
|
|
44
|
+
Based on what that code returns and the user's goal, continue to interact with the API to get to that goal.
|
|
45
|
+
|
|
46
|
+
The output will either be a summary of the code output or an error.
|
|
47
|
+
If it is an error, see if you can modify the code to get it to work, and try running it again.
|
|
48
|
+
|
|
49
|
+
Consult the APIs available to you when specifying which to use.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
api (str): The API to query. Must be one of the available APIs.
|
|
53
|
+
goal (str): The task given to the second agent. If the user states the API is unauthenticated, relay that information here.
|
|
54
|
+
Returns:
|
|
55
|
+
str: A summary of the current step being run, along with the collected stdout, stderr, returned result, display_data items, and any
|
|
56
|
+
errors that may have occurred, or just an error.
|
|
57
|
+
|
|
58
|
+
"""
|
|
59
|
+
draft_code = self.drafter.draft_request(api, goal)
|
|
60
|
+
fixed_code = await self.finalizer.proofread_code(api, draft_code, agent)
|
|
61
|
+
|
|
62
|
+
if self.run_code is None:
|
|
63
|
+
return fixed_code
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
evaluation = self.run_code(fixed_code)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
self.error({'error': str(e)})
|
|
69
|
+
return f"""
|
|
70
|
+
The second agent failed to create valid code. Instruct it to rerun. The error was {str(e)}. The code will be provided for fixes or retry.
|
|
71
|
+
"""
|
|
72
|
+
return evaluation
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "adhoc-api"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Agent automatically figures out how to make API requests given API docs and user query in plain text"
|
|
5
|
+
authors = ["David Samson <david.andrew.engineer@gmail.com>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
|
|
8
|
+
[tool.poetry.dependencies]
|
|
9
|
+
python = "^3.10"
|
|
10
|
+
archytas = "^1.2.1"
|
|
11
|
+
google-generativeai = "^0.8.2"
|
|
12
|
+
requests = "^2.32.3"
|
|
13
|
+
pyyaml = "^6.0.2"
|
|
14
|
+
easyrepl = "^0.1.3"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["poetry-core"]
|
|
19
|
+
build-backend = "poetry.core.masonry.api"
|