omnibioai-tool-runtime 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnibioai_tool_runtime-0.1.0/PKG-INFO +371 -0
- omnibioai_tool_runtime-0.1.0/README.md +352 -0
- omnibioai_tool_runtime-0.1.0/omnibioai_tool_runtime.egg-info/PKG-INFO +371 -0
- omnibioai_tool_runtime-0.1.0/omnibioai_tool_runtime.egg-info/SOURCES.txt +13 -0
- omnibioai_tool_runtime-0.1.0/omnibioai_tool_runtime.egg-info/dependency_links.txt +1 -0
- omnibioai_tool_runtime-0.1.0/omnibioai_tool_runtime.egg-info/requires.txt +8 -0
- omnibioai_tool_runtime-0.1.0/omnibioai_tool_runtime.egg-info/top_level.txt +1 -0
- omnibioai_tool_runtime-0.1.0/pyproject.toml +30 -0
- omnibioai_tool_runtime-0.1.0/setup.cfg +4 -0
- omnibioai_tool_runtime-0.1.0/tests/test_result_uri_parse.py +50 -0
- omnibioai_tool_runtime-0.1.0/tests/test_tools_echo_test.py +24 -0
- omnibioai_tool_runtime-0.1.0/tests/test_upload_dispatch.py +103 -0
- omnibioai_tool_runtime-0.1.0/tools/__init__.py +2 -0
- omnibioai_tool_runtime-0.1.0/tools/echo_test/__init__.py +2 -0
- omnibioai_tool_runtime-0.1.0/tools/echo_test/run.py +56 -0
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: omnibioai-tool-runtime
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Portable tool runtime for OmniBioAI TES (RESULT_URI uploader for s3:// and azureblob://)
|
|
5
|
+
Author: Manish Kumar
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: omnibioai,tes,bioinformatics,batch,azure,aws
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: pydantic>=2.0
|
|
14
|
+
Provides-Extra: aws
|
|
15
|
+
Requires-Dist: boto3>=1.26; extra == "aws"
|
|
16
|
+
Provides-Extra: azure
|
|
17
|
+
Requires-Dist: azure-identity>=1.15; extra == "azure"
|
|
18
|
+
Requires-Dist: azure-storage-blob>=12.19; extra == "azure"
|
|
19
|
+
|
|
20
|
+
# Omni Tool Runtime
|
|
21
|
+
|
|
22
|
+
**Portable, cloud-agnostic execution runtime for OmniBioAI tools**
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Overview
|
|
27
|
+
|
|
28
|
+
`omnibioai-tool-runtime` is a **minimal, deterministic execution runtime** used by
|
|
29
|
+
OmniBioAI’s Tool Execution Service (TES) to run individual tools across **multiple execution backends**, including:
|
|
30
|
+
|
|
31
|
+
* Local Docker execution
|
|
32
|
+
* AWS Batch
|
|
33
|
+
* Azure Batch
|
|
34
|
+
* (future) Kubernetes Jobs
|
|
35
|
+
* (future) TES-compatible HPC schedulers
|
|
36
|
+
|
|
37
|
+
The runtime provides a **strict execution contract** so that:
|
|
38
|
+
|
|
39
|
+
* TES adapters stay thin and backend-specific
|
|
40
|
+
* Tool containers remain portable and backend-agnostic
|
|
41
|
+
* Results are uploaded consistently (S3 / Azure Blob / future backends)
|
|
42
|
+
|
|
43
|
+
This mirrors the design philosophy used throughout OmniBioAI:
|
|
44
|
+
**separate orchestration from execution, and execution from logic.**
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## What This Runtime Is (and Is Not)
|
|
49
|
+
|
|
50
|
+
### ✅ This runtime **is**
|
|
51
|
+
|
|
52
|
+
* A **containerized tool launcher**
|
|
53
|
+
* Responsible for:
|
|
54
|
+
|
|
55
|
+
* Reading tool inputs from environment variables
|
|
56
|
+
* Executing tool logic
|
|
57
|
+
* Writing `results.json`
|
|
58
|
+
* Uploading results to cloud storage
|
|
59
|
+
* Cloud-agnostic (AWS / Azure supported today)
|
|
60
|
+
|
|
61
|
+
### ❌ This runtime is **not**
|
|
62
|
+
|
|
63
|
+
* A workflow engine
|
|
64
|
+
* A scheduler
|
|
65
|
+
* An LLM executor
|
|
66
|
+
* A UI layer
|
|
67
|
+
|
|
68
|
+
Those responsibilities live elsewhere in OmniBioAI.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Execution Contract (Critical)
|
|
73
|
+
|
|
74
|
+
All tools executed via `omnibioai-tool-runtime` **must** follow this contract.
|
|
75
|
+
|
|
76
|
+
### Environment Variables (Injected by TES Adapter)
|
|
77
|
+
|
|
78
|
+
| Variable | Description |
|
|
79
|
+
| ---------------- | -------------------------------------------------- |
|
|
80
|
+
| `TOOL_ID` | Tool identifier (`echo_test`, `blastn`, etc.) |
|
|
81
|
+
| `RUN_ID` | Unique run ID (generated by adapter) |
|
|
82
|
+
| `INPUTS_JSON` | JSON-encoded tool inputs |
|
|
83
|
+
| `RESOURCES_JSON` | JSON-encoded resource request |
|
|
84
|
+
| `S3_RESULT_URI` | (AWS Batch) S3 URI to upload results |
|
|
85
|
+
| `RESULT_URI` | (Azure Batch) `azureblob://` URI to upload results |
|
|
86
|
+
|
|
87
|
+
Only **one** of `S3_RESULT_URI` or `RESULT_URI` is expected per run.
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## Repository Structure
|
|
92
|
+
|
|
93
|
+
```text
|
|
94
|
+
omnibioai-tool-runtime/
|
|
95
|
+
├── Dockerfile
|
|
96
|
+
├── README.md
|
|
97
|
+
├── pyproject.toml
|
|
98
|
+
├── omni_tool_runtime/
|
|
99
|
+
│ ├── __init__.py
|
|
100
|
+
│ ├── result_uri.py # URI parsing & dispatch
|
|
101
|
+
│ ├── upload_result.py # Unified upload logic
|
|
102
|
+
│ └── uploaders/
|
|
103
|
+
│ ├── s3_uploader.py
|
|
104
|
+
│ └── azureblob_uploader.py
|
|
105
|
+
├── tools/
|
|
106
|
+
│ └── echo_test/
|
|
107
|
+
│ ├── __init__.py
|
|
108
|
+
│ └── run.py
|
|
109
|
+
└── tests/
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Example Tool: `echo_test`
|
|
115
|
+
|
|
116
|
+
This is the **reference implementation** for all future tools.
|
|
117
|
+
|
|
118
|
+
### Behavior
|
|
119
|
+
|
|
120
|
+
* Reads `INPUTS_JSON`
|
|
121
|
+
* Echoes a value
|
|
122
|
+
* Writes `results.json`
|
|
123
|
+
* Uploads results to configured storage backend
|
|
124
|
+
|
|
125
|
+
### Minimal tool implementation
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
# tools/echo_test/run.py
|
|
129
|
+
import json
|
|
130
|
+
import os
|
|
131
|
+
from omni_tool_runtime.upload_result import upload_result
|
|
132
|
+
|
|
133
|
+
def main():
|
|
134
|
+
tool_id = os.environ["TOOL_ID"]
|
|
135
|
+
run_id = os.environ["RUN_ID"]
|
|
136
|
+
inputs = json.loads(os.environ.get("INPUTS_JSON", "{}"))
|
|
137
|
+
|
|
138
|
+
text = inputs.get("text", "")
|
|
139
|
+
|
|
140
|
+
result = {
|
|
141
|
+
"ok": True,
|
|
142
|
+
"tool_id": tool_id,
|
|
143
|
+
"run_id": run_id,
|
|
144
|
+
"results": {"echo": text},
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
upload_result(result)
|
|
148
|
+
|
|
149
|
+
if __name__ == "__main__":
|
|
150
|
+
main()
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## How Results Upload Works
|
|
156
|
+
|
|
157
|
+
`upload_result()` automatically detects the backend:
|
|
158
|
+
|
|
159
|
+
| Backend | URI Example |
|
|
160
|
+
| ------- | ------------------------------------------------- |
|
|
161
|
+
| AWS | `s3://bucket/prefix/run_id/results.json` |
|
|
162
|
+
| Azure | `azureblob://account/container/path/results.json` |
|
|
163
|
+
|
|
164
|
+
The runtime:
|
|
165
|
+
|
|
166
|
+
1. Serializes result as JSON
|
|
167
|
+
2. Uploads to correct backend
|
|
168
|
+
3. Prints result to stdout (for debugging)
|
|
169
|
+
|
|
170
|
+
Adapters **never upload results themselves**.
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Building the Docker Image
|
|
175
|
+
|
|
176
|
+
From repository root:
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
docker build -t man4ish/omnibioai-tool-runtime:latest .
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Verify:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
docker images | grep omnibioai-tool-runtime
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Running a Tool Locally (No Cloud)
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
docker run --rm \
|
|
194
|
+
-e TOOL_ID=echo_test \
|
|
195
|
+
-e RUN_ID=local-test-1 \
|
|
196
|
+
-e INPUTS_JSON='{"text":"hello world"}' \
|
|
197
|
+
-e RESOURCES_JSON='{}' \
|
|
198
|
+
man4ish/omnibioai-tool-runtime:latest
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Expected:
|
|
202
|
+
|
|
203
|
+
* JSON output printed to stdout
|
|
204
|
+
* No upload attempted if no result URI is provided
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## AWS Batch Usage
|
|
209
|
+
|
|
210
|
+
### Job Definition
|
|
211
|
+
|
|
212
|
+
* Image: `man4ish/omnibioai-tool-runtime:latest`
|
|
213
|
+
* Command override:
|
|
214
|
+
|
|
215
|
+
```json
|
|
216
|
+
["python", "-m", "tools.echo_test.run"]
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### Injected Environment
|
|
220
|
+
|
|
221
|
+
* `S3_RESULT_URI` provided by `AwsBatchAdapter`
|
|
222
|
+
* IAM Role handles S3 auth
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Azure Batch Usage
|
|
227
|
+
|
|
228
|
+
### Task Settings
|
|
229
|
+
|
|
230
|
+
* Image: `man4ish/omnibioai-tool-runtime:latest`
|
|
231
|
+
* Command:
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
python -m tools.echo_test.run
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
### Injected Environment
|
|
238
|
+
|
|
239
|
+
* `RESULT_URI=azureblob://...`
|
|
240
|
+
* Managed Identity handles Blob auth
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## Pushing the Image
|
|
245
|
+
|
|
246
|
+
### Docker Hub
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
docker push man4ish/omnibioai-tool-runtime:latest
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
### Azure Container Registry
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
az acr login --name YOUR_ACR
|
|
256
|
+
docker tag man4ish/omnibioai-tool-runtime:latest YOUR_ACR.azurecr.io/omnibioai-tool-runtime:latest
|
|
257
|
+
docker push YOUR_ACR.azurecr.io/omnibioai-tool-runtime:latest
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
## Adding a New Tool
|
|
263
|
+
|
|
264
|
+
### Step 1: Create tool folder
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
mkdir tools/my_new_tool
|
|
268
|
+
touch tools/my_new_tool/__init__.py
|
|
269
|
+
touch tools/my_new_tool/run.py
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### Step 2: Implement `run.py`
|
|
273
|
+
|
|
274
|
+
Rules:
|
|
275
|
+
|
|
276
|
+
* Must read env vars
|
|
277
|
+
* Must write result via `upload_result()`
|
|
278
|
+
* Must be deterministic
|
|
279
|
+
|
|
280
|
+
### Step 3: Register tool in adapter config
|
|
281
|
+
|
|
282
|
+
**AWS Batch**
|
|
283
|
+
|
|
284
|
+
```yaml
|
|
285
|
+
job_definition_map:
|
|
286
|
+
my_new_tool: "omnibioai-my-new-tool:1"
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
**Azure Batch**
|
|
290
|
+
|
|
291
|
+
```yaml
|
|
292
|
+
tools:
|
|
293
|
+
my_new_tool:
|
|
294
|
+
image: "man4ish/omnibioai-tool-runtime:latest"
|
|
295
|
+
command: ["python", "-m", "tools.my_new_tool.run"]
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## Current State
|
|
301
|
+
|
|
302
|
+
### Implemented
|
|
303
|
+
|
|
304
|
+
* Unified runtime image
|
|
305
|
+
* AWS Batch support
|
|
306
|
+
* Azure Batch support
|
|
307
|
+
* S3 + Azure Blob uploads
|
|
308
|
+
* Deterministic execution contract
|
|
309
|
+
* Reference `echo_test` tool
|
|
310
|
+
|
|
311
|
+
### Intentionally Missing (by design)
|
|
312
|
+
|
|
313
|
+
* No workflow orchestration
|
|
314
|
+
* No retry logic
|
|
315
|
+
* No state machine
|
|
316
|
+
* No scheduling policy
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## Planned Future Enhancements
|
|
321
|
+
|
|
322
|
+
### Short-term
|
|
323
|
+
|
|
324
|
+
* Tool generator CLI (`omnibioai tool new`)
|
|
325
|
+
* Structured logging
|
|
326
|
+
* Result size validation
|
|
327
|
+
* Runtime version pinning
|
|
328
|
+
|
|
329
|
+
### Medium-term
|
|
330
|
+
|
|
331
|
+
* Kubernetes Job adapter support
|
|
332
|
+
* Streaming stdout to object storage
|
|
333
|
+
* Tool-level resource enforcement
|
|
334
|
+
* Tool metadata introspection
|
|
335
|
+
|
|
336
|
+
### Long-term
|
|
337
|
+
|
|
338
|
+
* Signed result manifests
|
|
339
|
+
* Provenance hashing
|
|
340
|
+
* Deterministic replay support
|
|
341
|
+
* Cross-cloud artifact mirroring
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
## Design Philosophy (Important)
|
|
346
|
+
|
|
347
|
+
This runtime is intentionally **boring**.
|
|
348
|
+
|
|
349
|
+
That’s a feature.
|
|
350
|
+
|
|
351
|
+
* No magic
|
|
352
|
+
* No backend assumptions
|
|
353
|
+
* No hidden orchestration
|
|
354
|
+
* One job → one tool → one result
|
|
355
|
+
|
|
356
|
+
Everything complex belongs **above** this layer.
|
|
357
|
+
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
## Final Note
|
|
361
|
+
|
|
362
|
+
If this runtime feels similar to:
|
|
363
|
+
|
|
364
|
+
* CWL CommandLineTool
|
|
365
|
+
* TES task containers
|
|
366
|
+
* AWS Batch single-purpose images
|
|
367
|
+
|
|
368
|
+
That’s intentional.
|
|
369
|
+
|
|
370
|
+
You’re building the **correct abstraction boundary**.
|
|
371
|
+
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
# Omni Tool Runtime
|
|
2
|
+
|
|
3
|
+
**Portable, cloud-agnostic execution runtime for OmniBioAI tools**
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
`omnibioai-tool-runtime` is a **minimal, deterministic execution runtime** used by
|
|
10
|
+
OmniBioAI’s Tool Execution Service (TES) to run individual tools across **multiple execution backends**, including:
|
|
11
|
+
|
|
12
|
+
* Local Docker execution
|
|
13
|
+
* AWS Batch
|
|
14
|
+
* Azure Batch
|
|
15
|
+
* (future) Kubernetes Jobs
|
|
16
|
+
* (future) TES-compatible HPC schedulers
|
|
17
|
+
|
|
18
|
+
The runtime provides a **strict execution contract** so that:
|
|
19
|
+
|
|
20
|
+
* TES adapters stay thin and backend-specific
|
|
21
|
+
* Tool containers remain portable and backend-agnostic
|
|
22
|
+
* Results are uploaded consistently (S3 / Azure Blob / future backends)
|
|
23
|
+
|
|
24
|
+
This mirrors the design philosophy used throughout OmniBioAI:
|
|
25
|
+
**separate orchestration from execution, and execution from logic.**
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## What This Runtime Is (and Is Not)
|
|
30
|
+
|
|
31
|
+
### ✅ This runtime **is**
|
|
32
|
+
|
|
33
|
+
* A **containerized tool launcher**
|
|
34
|
+
* Responsible for:
|
|
35
|
+
|
|
36
|
+
* Reading tool inputs from environment variables
|
|
37
|
+
* Executing tool logic
|
|
38
|
+
* Writing `results.json`
|
|
39
|
+
* Uploading results to cloud storage
|
|
40
|
+
* Cloud-agnostic (AWS / Azure supported today)
|
|
41
|
+
|
|
42
|
+
### ❌ This runtime is **not**
|
|
43
|
+
|
|
44
|
+
* A workflow engine
|
|
45
|
+
* A scheduler
|
|
46
|
+
* An LLM executor
|
|
47
|
+
* A UI layer
|
|
48
|
+
|
|
49
|
+
Those responsibilities live elsewhere in OmniBioAI.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Execution Contract (Critical)
|
|
54
|
+
|
|
55
|
+
All tools executed via `omnibioai-tool-runtime` **must** follow this contract.
|
|
56
|
+
|
|
57
|
+
### Environment Variables (Injected by TES Adapter)
|
|
58
|
+
|
|
59
|
+
| Variable | Description |
|
|
60
|
+
| ---------------- | -------------------------------------------------- |
|
|
61
|
+
| `TOOL_ID` | Tool identifier (`echo_test`, `blastn`, etc.) |
|
|
62
|
+
| `RUN_ID` | Unique run ID (generated by adapter) |
|
|
63
|
+
| `INPUTS_JSON` | JSON-encoded tool inputs |
|
|
64
|
+
| `RESOURCES_JSON` | JSON-encoded resource request |
|
|
65
|
+
| `S3_RESULT_URI` | (AWS Batch) S3 URI to upload results |
|
|
66
|
+
| `RESULT_URI` | (Azure Batch) `azureblob://` URI to upload results |
|
|
67
|
+
|
|
68
|
+
Only **one** of `S3_RESULT_URI` or `RESULT_URI` is expected per run.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Repository Structure
|
|
73
|
+
|
|
74
|
+
```text
|
|
75
|
+
omnibioai-tool-runtime/
|
|
76
|
+
├── Dockerfile
|
|
77
|
+
├── README.md
|
|
78
|
+
├── pyproject.toml
|
|
79
|
+
├── omni_tool_runtime/
|
|
80
|
+
│ ├── __init__.py
|
|
81
|
+
│ ├── result_uri.py # URI parsing & dispatch
|
|
82
|
+
│ ├── upload_result.py # Unified upload logic
|
|
83
|
+
│ └── uploaders/
|
|
84
|
+
│ ├── s3_uploader.py
|
|
85
|
+
│ └── azureblob_uploader.py
|
|
86
|
+
├── tools/
|
|
87
|
+
│ └── echo_test/
|
|
88
|
+
│ ├── __init__.py
|
|
89
|
+
│ └── run.py
|
|
90
|
+
└── tests/
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Example Tool: `echo_test`
|
|
96
|
+
|
|
97
|
+
This is the **reference implementation** for all future tools.
|
|
98
|
+
|
|
99
|
+
### Behavior
|
|
100
|
+
|
|
101
|
+
* Reads `INPUTS_JSON`
|
|
102
|
+
* Echoes a value
|
|
103
|
+
* Writes `results.json`
|
|
104
|
+
* Uploads results to configured storage backend
|
|
105
|
+
|
|
106
|
+
### Minimal tool implementation
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
# tools/echo_test/run.py
|
|
110
|
+
import json
|
|
111
|
+
import os
|
|
112
|
+
from omni_tool_runtime.upload_result import upload_result
|
|
113
|
+
|
|
114
|
+
def main():
|
|
115
|
+
tool_id = os.environ["TOOL_ID"]
|
|
116
|
+
run_id = os.environ["RUN_ID"]
|
|
117
|
+
inputs = json.loads(os.environ.get("INPUTS_JSON", "{}"))
|
|
118
|
+
|
|
119
|
+
text = inputs.get("text", "")
|
|
120
|
+
|
|
121
|
+
result = {
|
|
122
|
+
"ok": True,
|
|
123
|
+
"tool_id": tool_id,
|
|
124
|
+
"run_id": run_id,
|
|
125
|
+
"results": {"echo": text},
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
upload_result(result)
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
main()
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## How Results Upload Works
|
|
137
|
+
|
|
138
|
+
`upload_result()` automatically detects the backend:
|
|
139
|
+
|
|
140
|
+
| Backend | URI Example |
|
|
141
|
+
| ------- | ------------------------------------------------- |
|
|
142
|
+
| AWS | `s3://bucket/prefix/run_id/results.json` |
|
|
143
|
+
| Azure | `azureblob://account/container/path/results.json` |
|
|
144
|
+
|
|
145
|
+
The runtime:
|
|
146
|
+
|
|
147
|
+
1. Serializes result as JSON
|
|
148
|
+
2. Uploads to correct backend
|
|
149
|
+
3. Prints result to stdout (for debugging)
|
|
150
|
+
|
|
151
|
+
Adapters **never upload results themselves**.
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Building the Docker Image
|
|
156
|
+
|
|
157
|
+
From repository root:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
docker build -t man4ish/omnibioai-tool-runtime:latest .
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Verify:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
docker images | grep omnibioai-tool-runtime
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Running a Tool Locally (No Cloud)
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
docker run --rm \
|
|
175
|
+
-e TOOL_ID=echo_test \
|
|
176
|
+
-e RUN_ID=local-test-1 \
|
|
177
|
+
-e INPUTS_JSON='{"text":"hello world"}' \
|
|
178
|
+
-e RESOURCES_JSON='{}' \
|
|
179
|
+
man4ish/omnibioai-tool-runtime:latest
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Expected:
|
|
183
|
+
|
|
184
|
+
* JSON output printed to stdout
|
|
185
|
+
* No upload attempted if no result URI is provided
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## AWS Batch Usage
|
|
190
|
+
|
|
191
|
+
### Job Definition
|
|
192
|
+
|
|
193
|
+
* Image: `man4ish/omnibioai-tool-runtime:latest`
|
|
194
|
+
* Command override:
|
|
195
|
+
|
|
196
|
+
```json
|
|
197
|
+
["python", "-m", "tools.echo_test.run"]
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Injected Environment
|
|
201
|
+
|
|
202
|
+
* `S3_RESULT_URI` provided by `AwsBatchAdapter`
|
|
203
|
+
* IAM Role handles S3 auth
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## Azure Batch Usage
|
|
208
|
+
|
|
209
|
+
### Task Settings
|
|
210
|
+
|
|
211
|
+
* Image: `man4ish/omnibioai-tool-runtime:latest`
|
|
212
|
+
* Command:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
python -m tools.echo_test.run
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
### Injected Environment
|
|
219
|
+
|
|
220
|
+
* `RESULT_URI=azureblob://...`
|
|
221
|
+
* Managed Identity handles Blob auth
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
## Pushing the Image
|
|
226
|
+
|
|
227
|
+
### Docker Hub
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
docker push man4ish/omnibioai-tool-runtime:latest
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Azure Container Registry
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
az acr login --name YOUR_ACR
|
|
237
|
+
docker tag man4ish/omnibioai-tool-runtime:latest YOUR_ACR.azurecr.io/omnibioai-tool-runtime:latest
|
|
238
|
+
docker push YOUR_ACR.azurecr.io/omnibioai-tool-runtime:latest
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Adding a New Tool
|
|
244
|
+
|
|
245
|
+
### Step 1: Create tool folder
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
mkdir tools/my_new_tool
|
|
249
|
+
touch tools/my_new_tool/__init__.py
|
|
250
|
+
touch tools/my_new_tool/run.py
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### Step 2: Implement `run.py`
|
|
254
|
+
|
|
255
|
+
Rules:
|
|
256
|
+
|
|
257
|
+
* Must read env vars
|
|
258
|
+
* Must write result via `upload_result()`
|
|
259
|
+
* Must be deterministic
|
|
260
|
+
|
|
261
|
+
### Step 3: Register tool in adapter config
|
|
262
|
+
|
|
263
|
+
**AWS Batch**
|
|
264
|
+
|
|
265
|
+
```yaml
|
|
266
|
+
job_definition_map:
|
|
267
|
+
my_new_tool: "omnibioai-my-new-tool:1"
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
**Azure Batch**
|
|
271
|
+
|
|
272
|
+
```yaml
|
|
273
|
+
tools:
|
|
274
|
+
my_new_tool:
|
|
275
|
+
image: "man4ish/omnibioai-tool-runtime:latest"
|
|
276
|
+
command: ["python", "-m", "tools.my_new_tool.run"]
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
## Current State
|
|
282
|
+
|
|
283
|
+
### Implemented
|
|
284
|
+
|
|
285
|
+
* Unified runtime image
|
|
286
|
+
* AWS Batch support
|
|
287
|
+
* Azure Batch support
|
|
288
|
+
* S3 + Azure Blob uploads
|
|
289
|
+
* Deterministic execution contract
|
|
290
|
+
* Reference `echo_test` tool
|
|
291
|
+
|
|
292
|
+
### Intentionally Missing (by design)
|
|
293
|
+
|
|
294
|
+
* No workflow orchestration
|
|
295
|
+
* No retry logic
|
|
296
|
+
* No state machine
|
|
297
|
+
* No scheduling policy
|
|
298
|
+
|
|
299
|
+
---
|
|
300
|
+
|
|
301
|
+
## Planned Future Enhancements
|
|
302
|
+
|
|
303
|
+
### Short-term
|
|
304
|
+
|
|
305
|
+
* Tool generator CLI (`omnibioai tool new`)
|
|
306
|
+
* Structured logging
|
|
307
|
+
* Result size validation
|
|
308
|
+
* Runtime version pinning
|
|
309
|
+
|
|
310
|
+
### Medium-term
|
|
311
|
+
|
|
312
|
+
* Kubernetes Job adapter support
|
|
313
|
+
* Streaming stdout to object storage
|
|
314
|
+
* Tool-level resource enforcement
|
|
315
|
+
* Tool metadata introspection
|
|
316
|
+
|
|
317
|
+
### Long-term
|
|
318
|
+
|
|
319
|
+
* Signed result manifests
|
|
320
|
+
* Provenance hashing
|
|
321
|
+
* Deterministic replay support
|
|
322
|
+
* Cross-cloud artifact mirroring
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## Design Philosophy (Important)
|
|
327
|
+
|
|
328
|
+
This runtime is intentionally **boring**.
|
|
329
|
+
|
|
330
|
+
That’s a feature.
|
|
331
|
+
|
|
332
|
+
* No magic
|
|
333
|
+
* No backend assumptions
|
|
334
|
+
* No hidden orchestration
|
|
335
|
+
* One job → one tool → one result
|
|
336
|
+
|
|
337
|
+
Everything complex belongs **above** this layer.
|
|
338
|
+
|
|
339
|
+
---
|
|
340
|
+
|
|
341
|
+
## Final Note
|
|
342
|
+
|
|
343
|
+
If this runtime feels similar to:
|
|
344
|
+
|
|
345
|
+
* CWL CommandLineTool
|
|
346
|
+
* TES task containers
|
|
347
|
+
* AWS Batch single-purpose images
|
|
348
|
+
|
|
349
|
+
That’s intentional.
|
|
350
|
+
|
|
351
|
+
You’re building the **correct abstraction boundary**.
|
|
352
|
+
|
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: omnibioai-tool-runtime
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Portable tool runtime for OmniBioAI TES (RESULT_URI uploader for s3:// and azureblob://)
|
|
5
|
+
Author: Manish Kumar
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: omnibioai,tes,bioinformatics,batch,azure,aws
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: pydantic>=2.0
|
|
14
|
+
Provides-Extra: aws
|
|
15
|
+
Requires-Dist: boto3>=1.26; extra == "aws"
|
|
16
|
+
Provides-Extra: azure
|
|
17
|
+
Requires-Dist: azure-identity>=1.15; extra == "azure"
|
|
18
|
+
Requires-Dist: azure-storage-blob>=12.19; extra == "azure"
|
|
19
|
+
|
|
20
|
+
# Omni Tool Runtime
|
|
21
|
+
|
|
22
|
+
**Portable, cloud-agnostic execution runtime for OmniBioAI tools**
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Overview
|
|
27
|
+
|
|
28
|
+
`omnibioai-tool-runtime` is a **minimal, deterministic execution runtime** used by
|
|
29
|
+
OmniBioAI’s Tool Execution Service (TES) to run individual tools across **multiple execution backends**, including:
|
|
30
|
+
|
|
31
|
+
* Local Docker execution
|
|
32
|
+
* AWS Batch
|
|
33
|
+
* Azure Batch
|
|
34
|
+
* (future) Kubernetes Jobs
|
|
35
|
+
* (future) TES-compatible HPC schedulers
|
|
36
|
+
|
|
37
|
+
The runtime provides a **strict execution contract** so that:
|
|
38
|
+
|
|
39
|
+
* TES adapters stay thin and backend-specific
|
|
40
|
+
* Tool containers remain portable and backend-agnostic
|
|
41
|
+
* Results are uploaded consistently (S3 / Azure Blob / future backends)
|
|
42
|
+
|
|
43
|
+
This mirrors the design philosophy used throughout OmniBioAI:
|
|
44
|
+
**separate orchestration from execution, and execution from logic.**
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## What This Runtime Is (and Is Not)
|
|
49
|
+
|
|
50
|
+
### ✅ This runtime **is**
|
|
51
|
+
|
|
52
|
+
* A **containerized tool launcher**
|
|
53
|
+
* Responsible for:
|
|
54
|
+
|
|
55
|
+
* Reading tool inputs from environment variables
|
|
56
|
+
* Executing tool logic
|
|
57
|
+
* Writing `results.json`
|
|
58
|
+
* Uploading results to cloud storage
|
|
59
|
+
* Cloud-agnostic (AWS / Azure supported today)
|
|
60
|
+
|
|
61
|
+
### ❌ This runtime is **not**
|
|
62
|
+
|
|
63
|
+
* A workflow engine
|
|
64
|
+
* A scheduler
|
|
65
|
+
* An LLM executor
|
|
66
|
+
* A UI layer
|
|
67
|
+
|
|
68
|
+
Those responsibilities live elsewhere in OmniBioAI.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Execution Contract (Critical)
|
|
73
|
+
|
|
74
|
+
All tools executed via `omnibioai-tool-runtime` **must** follow this contract.
|
|
75
|
+
|
|
76
|
+
### Environment Variables (Injected by TES Adapter)
|
|
77
|
+
|
|
78
|
+
| Variable | Description |
|
|
79
|
+
| ---------------- | -------------------------------------------------- |
|
|
80
|
+
| `TOOL_ID` | Tool identifier (`echo_test`, `blastn`, etc.) |
|
|
81
|
+
| `RUN_ID` | Unique run ID (generated by adapter) |
|
|
82
|
+
| `INPUTS_JSON` | JSON-encoded tool inputs |
|
|
83
|
+
| `RESOURCES_JSON` | JSON-encoded resource request |
|
|
84
|
+
| `S3_RESULT_URI` | (AWS Batch) S3 URI to upload results |
|
|
85
|
+
| `RESULT_URI` | (Azure Batch) `azureblob://` URI to upload results |
|
|
86
|
+
|
|
87
|
+
Only **one** of `S3_RESULT_URI` or `RESULT_URI` is expected per run.
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## Repository Structure
|
|
92
|
+
|
|
93
|
+
```text
|
|
94
|
+
omnibioai-tool-runtime/
|
|
95
|
+
├── Dockerfile
|
|
96
|
+
├── README.md
|
|
97
|
+
├── pyproject.toml
|
|
98
|
+
├── omni_tool_runtime/
|
|
99
|
+
│ ├── __init__.py
|
|
100
|
+
│ ├── result_uri.py # URI parsing & dispatch
|
|
101
|
+
│ ├── upload_result.py # Unified upload logic
|
|
102
|
+
│ └── uploaders/
|
|
103
|
+
│ ├── s3_uploader.py
|
|
104
|
+
│ └── azureblob_uploader.py
|
|
105
|
+
├── tools/
|
|
106
|
+
│ └── echo_test/
|
|
107
|
+
│ ├── __init__.py
|
|
108
|
+
│ └── run.py
|
|
109
|
+
└── tests/
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Example Tool: `echo_test`
|
|
115
|
+
|
|
116
|
+
This is the **reference implementation** for all future tools.
|
|
117
|
+
|
|
118
|
+
### Behavior
|
|
119
|
+
|
|
120
|
+
* Reads `INPUTS_JSON`
|
|
121
|
+
* Echoes a value
|
|
122
|
+
* Writes `results.json`
|
|
123
|
+
* Uploads results to configured storage backend
|
|
124
|
+
|
|
125
|
+
### Minimal tool implementation
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
# tools/echo_test/run.py
|
|
129
|
+
import json
|
|
130
|
+
import os
|
|
131
|
+
from omni_tool_runtime.upload_result import upload_result
|
|
132
|
+
|
|
133
|
+
def main():
|
|
134
|
+
tool_id = os.environ["TOOL_ID"]
|
|
135
|
+
run_id = os.environ["RUN_ID"]
|
|
136
|
+
inputs = json.loads(os.environ.get("INPUTS_JSON", "{}"))
|
|
137
|
+
|
|
138
|
+
text = inputs.get("text", "")
|
|
139
|
+
|
|
140
|
+
result = {
|
|
141
|
+
"ok": True,
|
|
142
|
+
"tool_id": tool_id,
|
|
143
|
+
"run_id": run_id,
|
|
144
|
+
"results": {"echo": text},
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
upload_result(result)
|
|
148
|
+
|
|
149
|
+
if __name__ == "__main__":
|
|
150
|
+
main()
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## How Results Upload Works
|
|
156
|
+
|
|
157
|
+
`upload_result()` automatically detects the backend:
|
|
158
|
+
|
|
159
|
+
| Backend | URI Example |
|
|
160
|
+
| ------- | ------------------------------------------------- |
|
|
161
|
+
| AWS | `s3://bucket/prefix/run_id/results.json` |
|
|
162
|
+
| Azure | `azureblob://account/container/path/results.json` |
|
|
163
|
+
|
|
164
|
+
The runtime:
|
|
165
|
+
|
|
166
|
+
1. Serializes result as JSON
|
|
167
|
+
2. Uploads to correct backend
|
|
168
|
+
3. Prints result to stdout (for debugging)
|
|
169
|
+
|
|
170
|
+
Adapters **never upload results themselves**.
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Building the Docker Image
|
|
175
|
+
|
|
176
|
+
From repository root:
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
docker build -t man4ish/omnibioai-tool-runtime:latest .
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Verify:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
docker images | grep omnibioai-tool-runtime
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Running a Tool Locally (No Cloud)
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
docker run --rm \
|
|
194
|
+
-e TOOL_ID=echo_test \
|
|
195
|
+
-e RUN_ID=local-test-1 \
|
|
196
|
+
-e INPUTS_JSON='{"text":"hello world"}' \
|
|
197
|
+
-e RESOURCES_JSON='{}' \
|
|
198
|
+
man4ish/omnibioai-tool-runtime:latest
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Expected:
|
|
202
|
+
|
|
203
|
+
* JSON output printed to stdout
|
|
204
|
+
* No upload attempted if no result URI is provided
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## AWS Batch Usage
|
|
209
|
+
|
|
210
|
+
### Job Definition
|
|
211
|
+
|
|
212
|
+
* Image: `man4ish/omnibioai-tool-runtime:latest`
|
|
213
|
+
* Command override:
|
|
214
|
+
|
|
215
|
+
```json
|
|
216
|
+
["python", "-m", "tools.echo_test.run"]
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### Injected Environment
|
|
220
|
+
|
|
221
|
+
* `S3_RESULT_URI` provided by `AwsBatchAdapter`
|
|
222
|
+
* IAM Role handles S3 auth
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Azure Batch Usage
|
|
227
|
+
|
|
228
|
+
### Task Settings
|
|
229
|
+
|
|
230
|
+
* Image: `man4ish/omnibioai-tool-runtime:latest`
|
|
231
|
+
* Command:
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
python -m tools.echo_test.run
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
### Injected Environment
|
|
238
|
+
|
|
239
|
+
* `RESULT_URI=azureblob://...`
|
|
240
|
+
* Managed Identity handles Blob auth
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## Pushing the Image
|
|
245
|
+
|
|
246
|
+
### Docker Hub
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
docker push man4ish/omnibioai-tool-runtime:latest
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
### Azure Container Registry
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
az acr login --name YOUR_ACR
|
|
256
|
+
docker tag man4ish/omnibioai-tool-runtime:latest YOUR_ACR.azurecr.io/omnibioai-tool-runtime:latest
|
|
257
|
+
docker push YOUR_ACR.azurecr.io/omnibioai-tool-runtime:latest
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
## Adding a New Tool
|
|
263
|
+
|
|
264
|
+
### Step 1: Create tool folder
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
mkdir tools/my_new_tool
|
|
268
|
+
touch tools/my_new_tool/__init__.py
|
|
269
|
+
touch tools/my_new_tool/run.py
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### Step 2: Implement `run.py`
|
|
273
|
+
|
|
274
|
+
Rules:
|
|
275
|
+
|
|
276
|
+
* Must read env vars
|
|
277
|
+
* Must write result via `upload_result()`
|
|
278
|
+
* Must be deterministic
|
|
279
|
+
|
|
280
|
+
### Step 3: Register tool in adapter config
|
|
281
|
+
|
|
282
|
+
**AWS Batch**
|
|
283
|
+
|
|
284
|
+
```yaml
|
|
285
|
+
job_definition_map:
|
|
286
|
+
my_new_tool: "omnibioai-my-new-tool:1"
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
**Azure Batch**
|
|
290
|
+
|
|
291
|
+
```yaml
|
|
292
|
+
tools:
|
|
293
|
+
my_new_tool:
|
|
294
|
+
image: "man4ish/omnibioai-tool-runtime:latest"
|
|
295
|
+
command: ["python", "-m", "tools.my_new_tool.run"]
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## Current State
|
|
301
|
+
|
|
302
|
+
### Implemented
|
|
303
|
+
|
|
304
|
+
* Unified runtime image
|
|
305
|
+
* AWS Batch support
|
|
306
|
+
* Azure Batch support
|
|
307
|
+
* S3 + Azure Blob uploads
|
|
308
|
+
* Deterministic execution contract
|
|
309
|
+
* Reference `echo_test` tool
|
|
310
|
+
|
|
311
|
+
### Intentionally Missing (by design)
|
|
312
|
+
|
|
313
|
+
* No workflow orchestration
|
|
314
|
+
* No retry logic
|
|
315
|
+
* No state machine
|
|
316
|
+
* No scheduling policy
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## Planned Future Enhancements
|
|
321
|
+
|
|
322
|
+
### Short-term
|
|
323
|
+
|
|
324
|
+
* Tool generator CLI (`omnibioai tool new`)
|
|
325
|
+
* Structured logging
|
|
326
|
+
* Result size validation
|
|
327
|
+
* Runtime version pinning
|
|
328
|
+
|
|
329
|
+
### Medium-term
|
|
330
|
+
|
|
331
|
+
* Kubernetes Job adapter support
|
|
332
|
+
* Streaming stdout to object storage
|
|
333
|
+
* Tool-level resource enforcement
|
|
334
|
+
* Tool metadata introspection
|
|
335
|
+
|
|
336
|
+
### Long-term
|
|
337
|
+
|
|
338
|
+
* Signed result manifests
|
|
339
|
+
* Provenance hashing
|
|
340
|
+
* Deterministic replay support
|
|
341
|
+
* Cross-cloud artifact mirroring
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
## Design Philosophy (Important)
|
|
346
|
+
|
|
347
|
+
This runtime is intentionally **boring**.
|
|
348
|
+
|
|
349
|
+
That’s a feature.
|
|
350
|
+
|
|
351
|
+
* No magic
|
|
352
|
+
* No backend assumptions
|
|
353
|
+
* No hidden orchestration
|
|
354
|
+
* One job → one tool → one result
|
|
355
|
+
|
|
356
|
+
Everything complex belongs **above** this layer.
|
|
357
|
+
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
## Final Note
|
|
361
|
+
|
|
362
|
+
If this runtime feels similar to:
|
|
363
|
+
|
|
364
|
+
* CWL CommandLineTool
|
|
365
|
+
* TES task containers
|
|
366
|
+
* AWS Batch single-purpose images
|
|
367
|
+
|
|
368
|
+
That’s intentional.
|
|
369
|
+
|
|
370
|
+
You’re building the **correct abstraction boundary**.
|
|
371
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
omnibioai_tool_runtime.egg-info/PKG-INFO
|
|
4
|
+
omnibioai_tool_runtime.egg-info/SOURCES.txt
|
|
5
|
+
omnibioai_tool_runtime.egg-info/dependency_links.txt
|
|
6
|
+
omnibioai_tool_runtime.egg-info/requires.txt
|
|
7
|
+
omnibioai_tool_runtime.egg-info/top_level.txt
|
|
8
|
+
tests/test_result_uri_parse.py
|
|
9
|
+
tests/test_tools_echo_test.py
|
|
10
|
+
tests/test_upload_dispatch.py
|
|
11
|
+
tools/__init__.py
|
|
12
|
+
tools/echo_test/__init__.py
|
|
13
|
+
tools/echo_test/run.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tools
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=65", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "omnibioai-tool-runtime"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Portable tool runtime for OmniBioAI TES (RESULT_URI uploader for s3:// and azureblob://)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Manish Kumar" }]
|
|
13
|
+
keywords = ["omnibioai", "tes", "bioinformatics", "batch", "azure", "aws"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
dependencies = [
|
|
21
|
+
"pydantic>=2.0"
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
aws = ["boto3>=1.26"]
|
|
26
|
+
azure = ["azure-identity>=1.15", "azure-storage-blob>=12.19"]
|
|
27
|
+
|
|
28
|
+
[tool.setuptools.packages.find]
|
|
29
|
+
where = ["."]
|
|
30
|
+
include = ["omnibioai_tool_runtime*", "tools*"]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# tests/test_result_uri_parse.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from omni_tool_runtime.result_uri import parse_result_uri
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_parse_s3_uri_ok():
|
|
10
|
+
p = parse_result_uri("s3://my-bucket/some/prefix/results.json")
|
|
11
|
+
assert p.scheme == "s3"
|
|
12
|
+
assert p.account_or_bucket == "my-bucket"
|
|
13
|
+
assert p.container is None
|
|
14
|
+
assert p.path == "some/prefix/results.json"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_parse_s3_uri_requires_bucket_and_key():
|
|
18
|
+
with pytest.raises(ValueError):
|
|
19
|
+
parse_result_uri("s3://")
|
|
20
|
+
with pytest.raises(ValueError):
|
|
21
|
+
parse_result_uri("s3://bucket-only")
|
|
22
|
+
with pytest.raises(ValueError):
|
|
23
|
+
parse_result_uri("s3://bucket/")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_parse_azureblob_uri_ok():
|
|
27
|
+
p = parse_result_uri("azureblob://acct/container/path/to/results.json")
|
|
28
|
+
assert p.scheme == "azureblob"
|
|
29
|
+
assert p.account_or_bucket == "acct"
|
|
30
|
+
assert p.container == "container"
|
|
31
|
+
assert p.path == "path/to/results.json"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_parse_azureblob_uri_requires_container_and_path():
|
|
35
|
+
with pytest.raises(ValueError):
|
|
36
|
+
parse_result_uri("azureblob://acct/")
|
|
37
|
+
with pytest.raises(ValueError):
|
|
38
|
+
parse_result_uri("azureblob://acct/container")
|
|
39
|
+
with pytest.raises(ValueError):
|
|
40
|
+
parse_result_uri("azureblob://acct/container/")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_parse_rejects_unknown_scheme():
|
|
44
|
+
with pytest.raises(ValueError, match="Unsupported"):
|
|
45
|
+
parse_result_uri("gs://bucket/key") # not supported yet
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_parse_rejects_missing_scheme():
|
|
49
|
+
with pytest.raises(ValueError, match="missing scheme"):
|
|
50
|
+
parse_result_uri("bucket/key")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_echo_test_local_mode_no_result_uri():
|
|
8
|
+
env = dict(os.environ)
|
|
9
|
+
env["TOOL_ID"] = "echo_test"
|
|
10
|
+
env["RUN_ID"] = "local123"
|
|
11
|
+
env["INPUTS_JSON"] = json.dumps({"text": "hello"})
|
|
12
|
+
env["RESOURCES_JSON"] = json.dumps({"cpu": 1})
|
|
13
|
+
env["RESULT_URI"] = "" # local mode
|
|
14
|
+
|
|
15
|
+
p = subprocess.run(
|
|
16
|
+
[sys.executable, "-m", "tools.echo_test.run"],
|
|
17
|
+
env=env,
|
|
18
|
+
capture_output=True,
|
|
19
|
+
text=True,
|
|
20
|
+
)
|
|
21
|
+
assert p.returncode == 0
|
|
22
|
+
out = json.loads(p.stdout)
|
|
23
|
+
assert out["ok"] is True
|
|
24
|
+
assert out["results"]["echo"] == "hello"
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# tests/test_upload_dispatch.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
import omni_tool_runtime.upload_result as mod
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class _S3Spy:
|
|
10
|
+
def __init__(self, aws_profile=None):
|
|
11
|
+
self.aws_profile = aws_profile
|
|
12
|
+
self.calls = []
|
|
13
|
+
|
|
14
|
+
def upload_bytes(self, *, bucket: str, key: str, data: bytes, content_type: str) -> None:
|
|
15
|
+
self.calls.append(
|
|
16
|
+
{"bucket": bucket, "key": key, "data": data, "content_type": content_type, "aws_profile": self.aws_profile}
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class _AzureSpy:
|
|
21
|
+
def __init__(self, account_name: str, auth: str = "managed_identity", connection_string=None):
|
|
22
|
+
self.account_name = account_name
|
|
23
|
+
self.auth = auth
|
|
24
|
+
self.connection_string = connection_string
|
|
25
|
+
self.calls = []
|
|
26
|
+
|
|
27
|
+
def upload_bytes(self, *, container: str, blob_path: str, data: bytes, content_type: str) -> None:
|
|
28
|
+
self.calls.append(
|
|
29
|
+
{
|
|
30
|
+
"account_name": self.account_name,
|
|
31
|
+
"auth": self.auth,
|
|
32
|
+
"connection_string": self.connection_string,
|
|
33
|
+
"container": container,
|
|
34
|
+
"blob_path": blob_path,
|
|
35
|
+
"data": data,
|
|
36
|
+
"content_type": content_type,
|
|
37
|
+
}
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_upload_dispatch_to_s3(monkeypatch: pytest.MonkeyPatch):
|
|
42
|
+
s3_spy = _S3Spy(aws_profile="prof1")
|
|
43
|
+
|
|
44
|
+
# Patch constructor used inside upload_to_result_uri
|
|
45
|
+
monkeypatch.setattr(mod, "S3Uploader", lambda aws_profile=None: _S3Spy(aws_profile=aws_profile))
|
|
46
|
+
# But we need the created instance to inspect calls. So patch with closure:
|
|
47
|
+
created = {}
|
|
48
|
+
|
|
49
|
+
def _ctor(aws_profile=None):
|
|
50
|
+
created["inst"] = _S3Spy(aws_profile=aws_profile)
|
|
51
|
+
return created["inst"]
|
|
52
|
+
|
|
53
|
+
monkeypatch.setattr(mod, "S3Uploader", _ctor)
|
|
54
|
+
|
|
55
|
+
uri = "s3://bkt/prefix/run1/results.json"
|
|
56
|
+
payload = b'{"ok": true}'
|
|
57
|
+
mod.upload_to_result_uri(result_uri=uri, data=payload, content_type="application/json", aws_profile="prof1")
|
|
58
|
+
|
|
59
|
+
inst = created["inst"]
|
|
60
|
+
assert len(inst.calls) == 1
|
|
61
|
+
c = inst.calls[0]
|
|
62
|
+
assert c["bucket"] == "bkt"
|
|
63
|
+
assert c["key"] == "prefix/run1/results.json"
|
|
64
|
+
assert c["data"] == payload
|
|
65
|
+
assert c["content_type"] == "application/json"
|
|
66
|
+
assert c["aws_profile"] == "prof1"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_upload_dispatch_to_azureblob(monkeypatch: pytest.MonkeyPatch):
|
|
70
|
+
created = {}
|
|
71
|
+
|
|
72
|
+
def _ctor(account_name: str, auth: str = "managed_identity", connection_string=None):
|
|
73
|
+
created["inst"] = _AzureSpy(account_name=account_name, auth=auth, connection_string=connection_string)
|
|
74
|
+
return created["inst"]
|
|
75
|
+
|
|
76
|
+
monkeypatch.setattr(mod, "AzureBlobUploader", _ctor)
|
|
77
|
+
|
|
78
|
+
uri = "azureblob://acct1/contA/tes-runs/r1/tools/echo_test/results.json"
|
|
79
|
+
payload = b'{"ok": true}'
|
|
80
|
+
|
|
81
|
+
mod.upload_to_result_uri(
|
|
82
|
+
result_uri=uri,
|
|
83
|
+
data=payload,
|
|
84
|
+
content_type="application/json",
|
|
85
|
+
azure_auth="connection_string",
|
|
86
|
+
azure_connection_string="UseDevelopmentStorage=true",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
inst = created["inst"]
|
|
90
|
+
assert len(inst.calls) == 1
|
|
91
|
+
c = inst.calls[0]
|
|
92
|
+
assert c["account_name"] == "acct1"
|
|
93
|
+
assert c["auth"] == "connection_string"
|
|
94
|
+
assert c["connection_string"] == "UseDevelopmentStorage=true"
|
|
95
|
+
assert c["container"] == "contA"
|
|
96
|
+
assert c["blob_path"] == "tes-runs/r1/tools/echo_test/results.json"
|
|
97
|
+
assert c["data"] == payload
|
|
98
|
+
assert c["content_type"] == "application/json"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_upload_unsupported_scheme_raises():
|
|
102
|
+
with pytest.raises(ValueError):
|
|
103
|
+
mod.upload_to_result_uri(result_uri="gs://bucket/key", data=b"x")
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# tools/echo_test/run.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from omni_tool_runtime.upload_result import upload_to_result_uri
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main() -> int:
|
|
12
|
+
tool_id = os.getenv("TOOL_ID", "")
|
|
13
|
+
run_id = os.getenv("RUN_ID", "")
|
|
14
|
+
result_uri = (os.getenv("RESULT_URI", "") or "").strip()
|
|
15
|
+
|
|
16
|
+
inputs_json = os.getenv("INPUTS_JSON", "{}")
|
|
17
|
+
try:
|
|
18
|
+
inputs = json.loads(inputs_json)
|
|
19
|
+
except Exception as e:
|
|
20
|
+
out = {"ok": False, "error": f"bad INPUTS_JSON: {e}", "tool_id": tool_id, "run_id": run_id}
|
|
21
|
+
print(json.dumps(out))
|
|
22
|
+
return 2
|
|
23
|
+
|
|
24
|
+
text = inputs.get("text") # keep strict contract (or: inputs.get("text") or inputs.get("msg"))
|
|
25
|
+
if text is None:
|
|
26
|
+
result_obj = {
|
|
27
|
+
"ok": False,
|
|
28
|
+
"error": "missing inputs.text",
|
|
29
|
+
"tool_id": tool_id,
|
|
30
|
+
"run_id": run_id,
|
|
31
|
+
"inputs": inputs,
|
|
32
|
+
}
|
|
33
|
+
else:
|
|
34
|
+
result_obj = {
|
|
35
|
+
"ok": True,
|
|
36
|
+
"tool_id": tool_id,
|
|
37
|
+
"run_id": run_id,
|
|
38
|
+
"results": {"echo": text},
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
body = json.dumps(result_obj, indent=2)
|
|
42
|
+
|
|
43
|
+
# Always print for logs/debug
|
|
44
|
+
print(body)
|
|
45
|
+
|
|
46
|
+
# If RESULT_URI not set -> local mode: succeed
|
|
47
|
+
if not result_uri:
|
|
48
|
+
return 0
|
|
49
|
+
|
|
50
|
+
# Cloud mode: upload
|
|
51
|
+
upload_to_result_uri(result_uri=result_uri, content=body.encode("utf-8"))
|
|
52
|
+
return 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
if __name__ == "__main__":
|
|
56
|
+
raise SystemExit(main())
|