openrat 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openrat-0.1.0/LICENSE +21 -0
- openrat-0.1.0/PKG-INFO +295 -0
- openrat-0.1.0/README.md +275 -0
- openrat-0.1.0/pyproject.toml +33 -0
- openrat-0.1.0/setup.cfg +4 -0
- openrat-0.1.0/src/openrat/__init__.py +54 -0
- openrat-0.1.0/src/openrat/__main__.py +5 -0
- openrat-0.1.0/src/openrat/api/__init__.py +27 -0
- openrat-0.1.0/src/openrat/api/openrat.py +109 -0
- openrat-0.1.0/src/openrat/api/runner.py +318 -0
- openrat-0.1.0/src/openrat/core/__init__.py +31 -0
- openrat-0.1.0/src/openrat/core/artifact.py +117 -0
- openrat-0.1.0/src/openrat/core/errors.py +60 -0
- openrat-0.1.0/src/openrat/core/experiment_spec.py +116 -0
- openrat-0.1.0/src/openrat/core/governance/__init__.py +24 -0
- openrat-0.1.0/src/openrat/core/governance/autonomy.py +32 -0
- openrat-0.1.0/src/openrat/core/governance/patch.py +36 -0
- openrat-0.1.0/src/openrat/core/protocols.py +75 -0
- openrat-0.1.0/src/openrat/core/session/__init__.py +5 -0
- openrat-0.1.0/src/openrat/core/session/session.py +269 -0
- openrat-0.1.0/src/openrat/executors/__init__.py +40 -0
- openrat-0.1.0/src/openrat/executors/base_executor.py +13 -0
- openrat-0.1.0/src/openrat/executors/docker_executor.py +195 -0
- openrat-0.1.0/src/openrat/executors/registry.py +26 -0
- openrat-0.1.0/src/openrat/model/__init__.py +17 -0
- openrat-0.1.0/src/openrat/model/adapters/base_adapter.py +21 -0
- openrat-0.1.0/src/openrat/model/adapters/claude_adapter.py +41 -0
- openrat-0.1.0/src/openrat/model/adapters/gemini_adapter.py +47 -0
- openrat-0.1.0/src/openrat/model/adapters/oai_adapter.py +87 -0
- openrat-0.1.0/src/openrat/model/agent_loop.py +46 -0
- openrat-0.1.0/src/openrat/model/factory.py +25 -0
- openrat-0.1.0/src/openrat/model/types.py +32 -0
- openrat-0.1.0/src/openrat/sandbox/__init__.py +6 -0
- openrat-0.1.0/src/openrat/sandbox/guardrails.py +23 -0
- openrat-0.1.0/src/openrat/tasks/__init__.py +3 -0
- openrat-0.1.0/src/openrat/tasks/dag/__init__.py +3 -0
- openrat-0.1.0/src/openrat/tasks/dag/dag.py +151 -0
- openrat-0.1.0/src/openrat/tasks/dag/task.py +32 -0
- openrat-0.1.0/src/openrat/tasks/plan/__init__.py +3 -0
- openrat-0.1.0/src/openrat/tasks/plan/plan.py +83 -0
- openrat-0.1.0/src/openrat/tools/__init__.py +29 -0
- openrat-0.1.0/src/openrat/tools/base.py +79 -0
- openrat-0.1.0/src/openrat/tools/executor.py +139 -0
- openrat-0.1.0/src/openrat/tools/file_inspector.py +99 -0
- openrat-0.1.0/src/openrat/tools/log_reader.py +73 -0
- openrat-0.1.0/src/openrat/tools/patch_proposal.py +58 -0
- openrat-0.1.0/src/openrat/tools/registry.py +66 -0
- openrat-0.1.0/src/openrat.egg-info/PKG-INFO +295 -0
- openrat-0.1.0/src/openrat.egg-info/SOURCES.txt +53 -0
- openrat-0.1.0/src/openrat.egg-info/dependency_links.txt +1 -0
- openrat-0.1.0/src/openrat.egg-info/entry_points.txt +2 -0
- openrat-0.1.0/src/openrat.egg-info/requires.txt +1 -0
- openrat-0.1.0/src/openrat.egg-info/top_level.txt +2 -0
- openrat-0.1.0/src/ui/__init__.py +3 -0
- openrat-0.1.0/src/ui/cli.py +38 -0
openrat-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 OpenRat contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
openrat-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openrat
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: OpenRat: experiment orchestration and sandboxed execution framework
|
|
5
|
+
Author: OpenRat contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: experiments,orchestration,automation,research,docker
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: requests>=2.0
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# openrat
|
|
22
|
+
Your personal AI lab rat
|
|
23
|
+
|
|
24
|
+
Openrat is a research‑first, privacy‑preserving experiment agent designed to run, debug, chain, schedule, and report computational experiments while you go about your day.
|
|
25
|
+
|
|
26
|
+
It is built for researchers and research institutions who want automation without losing control, reproducibility, or interpretability.
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
What Openrat does
|
|
30
|
+
* Runs experiments in Docker with sandboxed execution
|
|
31
|
+
* Diagnoses failures and reports actionable diagnostics
|
|
32
|
+
* Chains and branches experiments based on results
|
|
33
|
+
* Safely applies bounded, auditable changes (e.g. configs, hyperparameters)
|
|
34
|
+
* Generates diagnostic artifacts (summaries, diffs, metrics, plots)
|
|
35
|
+
* Cluster / remote execution (coming soon)
|
|
36
|
+
* Optional email notifications (coming soon)
|
|
37
|
+
|
|
38
|
+
Openrat is editor‑agnostic (VS Code, Neovim, any IDE) and shell‑based by design.
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
Install from PyPI:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install openrat
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
For local development:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install -e .
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
Research‑first by design.
|
|
56
|
+
Openrat is not a generic AI coding agent.
|
|
57
|
+
|
|
58
|
+
* It does not rewrite large portions of your codebase
|
|
59
|
+
* It preserves research intent and reproducibility
|
|
60
|
+
* All actions are explicit, logged, and reversible
|
|
61
|
+
* A human remains in the loop whenever ambiguity or risk arises
|
|
62
|
+
|
|
63
|
+
For larger changes, Openrat proposes patches with explanations rather than applying them automatically.
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
Bounded autonomy (capability‑scoped)
|
|
67
|
+
Openrat uses graduated autonomy levels, enforced outside the model:
|
|
68
|
+
|
|
69
|
+
* Level 0 — Observe only: run, diagnose, report
|
|
70
|
+
* Level 1 — Parameter autonomy: modify configs and hyperparameters
|
|
71
|
+
* Level 2 — Runtime repair: apply minimal fixes for common runtime errors (with safeguards)
|
|
72
|
+
* Level 3 — Extended edits (opt‑in): larger patches within explicitly allowed scope
|
|
73
|
+
|
|
74
|
+
Openrat cannot increase its own autonomy. All permissions are user‑controlled and auditable.
|
|
75
|
+
|
|
76
|
+
Governance is configured via `Session`:
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from openrat import Openrat, Session, AutonomyLevel
|
|
80
|
+
|
|
81
|
+
# Allow only observation (run, diagnose)
|
|
82
|
+
session = Session(
|
|
83
|
+
autonomy=AutonomyLevel.OBSERVE,
|
|
84
|
+
patch_policy="disabled"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Build and execute plan within governance constraints
|
|
88
|
+
app = Openrat({"executor": "docker", "docker_image": "python:3.11"})
|
|
89
|
+
plan = app.build_plan(spec, session)
|
|
90
|
+
artifact = app.execute_plan(plan)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
Experiment chaining & branching
|
|
95
|
+
Openrat can:
|
|
96
|
+
* Run experiments sequentially or in parallel
|
|
97
|
+
* Branch based on metrics, failures, or diagnostics
|
|
98
|
+
* Modify parameters or select follow‑up experiments conditionally
|
|
99
|
+
|
|
100
|
+
Example:
|
|
101
|
+
Run experiment B only if validation loss improves after experiment A.
|
|
102
|
+
Chaining logic can be defined via natural language or structured instruction files (.yaml, .json, .md) for maximum reliability.
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
Model‑agnostic
|
|
106
|
+
* Supports local open‑source models or cloud models (GPT, Gemini)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
Privacy & security
|
|
110
|
+
* Local runs
|
|
111
|
+
* No code, data, or experiments leave your machine unless you choose
|
|
112
|
+
* Remote control workflows (coming soon; see `ROADMAP.md`)
|
|
113
|
+
* Autonomy and permissions are enforced by policy, not by the model
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
## Development
|
|
117
|
+
|
|
118
|
+
Run tests:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
pytest tests/ -q
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Set executor policy to production (default is `auto`):
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
Pytesting with EXECUTOR_POLICY=production pytest tests/
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
## Usage Guide
|
|
132
|
+
|
|
133
|
+
### 1. Quick Execution (Simplest)
|
|
134
|
+
|
|
135
|
+
Run a script and capture output:
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from openrat import Openrat
|
|
139
|
+
|
|
140
|
+
app = Openrat({"executor": "docker", "docker_image": "python:3.11"})
|
|
141
|
+
result = app.run(
|
|
142
|
+
"experiments/train.py",
|
|
143
|
+
timeout=120,
|
|
144
|
+
isolate=True, # copies script to temp dir for safety
|
|
145
|
+
memory="1g",
|
|
146
|
+
cpus="2.0",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
print(result["stdout"])
|
|
150
|
+
print(f"Exit code: {result['return_code']}")
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
See `examples/run_experiment.py` for a complete example.
|
|
154
|
+
|
|
155
|
+
Examples require Docker to be running.
|
|
156
|
+
|
|
157
|
+
### 2. Framework Workflow (Recommended)
|
|
158
|
+
|
|
159
|
+
Build and execute an experiment plan with governance:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from openrat import Openrat, Session, AutonomyLevel, ExperimentSpec
|
|
163
|
+
|
|
164
|
+
# Create a session (defines autonomy and governance)
|
|
165
|
+
session = Session(
|
|
166
|
+
autonomy=AutonomyLevel.OBSERVE,
|
|
167
|
+
patch_policy="disabled",
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Define your experiment
|
|
171
|
+
spec = ExperimentSpec(
|
|
172
|
+
goals=["Train model", "Evaluate"],
|
|
173
|
+
metrics=["accuracy"],
|
|
174
|
+
tasks=[...],
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Build and execute plan
|
|
178
|
+
app = Openrat({"executor": "docker", "docker_image": "python:3.11"})
|
|
179
|
+
plan = app.build_plan(spec, session)
|
|
180
|
+
artifact = app.execute_plan(plan, session, tools={...})
|
|
181
|
+
|
|
182
|
+
# Access results
|
|
183
|
+
print(artifact.status)
|
|
184
|
+
print(artifact.governance_report())
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### 3. LLM Agent Loop (Chat Interface)
|
|
188
|
+
|
|
189
|
+
Let a language model decide which experiments to run and interpret results:
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
from openrat import Openrat, Message
|
|
193
|
+
import os
|
|
194
|
+
|
|
195
|
+
app = Openrat({
|
|
196
|
+
"executor": "docker",
|
|
197
|
+
"docker_image": "python:3.11",
|
|
198
|
+
"provider": "openai_compatible",
|
|
199
|
+
"base_url": "https://api.openai.com/v1",
|
|
200
|
+
"api_key": os.environ["OPENAI_API_KEY"],
|
|
201
|
+
"model_name": "gpt-4o",
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
# Chat with the model; it can call tools to run experiments
|
|
205
|
+
messages = [
|
|
206
|
+
Message(role="system", content="You are a research assistant."),
|
|
207
|
+
Message(role="user", content="Run experiments/train.py and summarize the results."),
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
response = app.chat(messages, max_turns=5)
|
|
211
|
+
print(response.content)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
See `examples/chat_agent.py` for a complete example.
|
|
215
|
+
|
|
216
|
+
### 4. Custom Tools (Advanced)
|
|
217
|
+
|
|
218
|
+
Register custom functions that the LLM can call:
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from openrat import Openrat
|
|
222
|
+
|
|
223
|
+
def read_metrics(arguments: dict) -> dict:
|
|
224
|
+
"""Custom tool callable by the LLM."""
|
|
225
|
+
metric = arguments.get("metric", "accuracy")
|
|
226
|
+
# Implement your metric reading logic
|
|
227
|
+
return {"metric": metric, "value": 0.95}
|
|
228
|
+
|
|
229
|
+
app = Openrat({
|
|
230
|
+
"provider": "openai_compatible",
|
|
231
|
+
"api_key": "...",
|
|
232
|
+
"model_name": "...",
|
|
233
|
+
"autonomy": 3,
|
|
234
|
+
"user_approvals": {"host.exec"},
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
# Untrusted callable tools require explicit host.exec opt-in
|
|
238
|
+
app.tool_registry.register("read_metrics", read_metrics, capability="host.exec")
|
|
239
|
+
|
|
240
|
+
# Now the LLM can call this tool
|
|
241
|
+
response = app.chat("Check the accuracy metric for me.")
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
See `examples/custom_tool.py` for a complete example.
|
|
245
|
+
|
|
246
|
+
## Session & Governance
|
|
247
|
+
|
|
248
|
+
Every execution runs within a `Session` that defines:
|
|
249
|
+
|
|
250
|
+
- **Autonomy level** — what the agent can do (observe, modify params, apply fixes, edit code)
|
|
251
|
+
- **Patch policy** — whether patches are proposed (disabled) or auto-applied
|
|
252
|
+
- **Approval scope** — which capabilities require explicit approval
|
|
253
|
+
|
|
254
|
+
All governance decisions are logged in an immutable audit trail, captured in the final `Artifact`:
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
artifact.governance_report()
|
|
258
|
+
# {
|
|
259
|
+
# "session_id": "...",
|
|
260
|
+
# "autonomy": 0, # Level 0 = observe only
|
|
261
|
+
# "used_capabilities": ["observe"],
|
|
262
|
+
# "blocked_capabilities": [],
|
|
263
|
+
# "patches_proposed": [],
|
|
264
|
+
# "events": [...] # Full audit trail
|
|
265
|
+
# }
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Executor policy
|
|
269
|
+
|
|
270
|
+
All execution is routed through the `DockerExecutor`, which runs scripts in an
|
|
271
|
+
ephemeral container with `--network none`, `--security-opt no-new-privileges`,
|
|
272
|
+
`--cap-drop=ALL`, `--read-only`, `--tmpfs /tmp`, `--pids-limit 100`, and
|
|
273
|
+
bounded memory/CPU limits.
|
|
274
|
+
|
|
275
|
+
Openrat is safe-by-default for execution limits:
|
|
276
|
+
|
|
277
|
+
- default timeout: `300s`
|
|
278
|
+
- max timeout: `3600s`
|
|
279
|
+
- default memory: `512m` (max `4g`)
|
|
280
|
+
- default CPU: `1.0` (max `4.0`)
|
|
281
|
+
|
|
282
|
+
Unbounded limits are blocked unless explicitly enabled by user config
|
|
283
|
+
(`allow_unbounded_limits=True`).
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
from openrat.executors import set_executor_policy
|
|
287
|
+
|
|
288
|
+
# Both modes register the DockerExecutor (the only available backend)
|
|
289
|
+
set_executor_policy("production") # explicit
|
|
290
|
+
set_executor_policy("auto") # default
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
## Roadmap
|
|
294
|
+
|
|
295
|
+
Planned items for v0.2+ and v0.3 are tracked in `ROADMAP.md`.
|
openrat-0.1.0/README.md
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# openrat
|
|
2
|
+
Your personal AI lab rat
|
|
3
|
+
|
|
4
|
+
Openrat is a research‑first, privacy‑preserving experiment agent designed to run, debug, chain, schedule, and report computational experiments while you go about your day.
|
|
5
|
+
|
|
6
|
+
It is built for researchers and research institutions who want automation without losing control, reproducibility, or interpretability.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
What Openrat does
|
|
10
|
+
* Runs experiments in Docker with sandboxed execution
|
|
11
|
+
* Diagnoses failures and reports actionable diagnostics
|
|
12
|
+
* Chains and branches experiments based on results
|
|
13
|
+
* Safely applies bounded, auditable changes (e.g. configs, hyperparameters)
|
|
14
|
+
* Generates diagnostic artifacts (summaries, diffs, metrics, plots)
|
|
15
|
+
* Cluster / remote execution (coming soon)
|
|
16
|
+
* Optional email notifications (coming soon)
|
|
17
|
+
|
|
18
|
+
Openrat is editor‑agnostic (VS Code, Neovim, any IDE) and shell‑based by design.
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
Install from PyPI:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install openrat
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
For local development:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install -e .
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
Research‑first by design.
|
|
36
|
+
Openrat is not a generic AI coding agent.
|
|
37
|
+
|
|
38
|
+
* It does not rewrite large portions of your codebase
|
|
39
|
+
* It preserves research intent and reproducibility
|
|
40
|
+
* All actions are explicit, logged, and reversible
|
|
41
|
+
* A human remains in the loop whenever ambiguity or risk arises
|
|
42
|
+
|
|
43
|
+
For larger changes, Openrat proposes patches with explanations rather than applying them automatically.
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
Bounded autonomy (capability‑scoped)
|
|
47
|
+
Openrat uses graduated autonomy levels, enforced outside the model:
|
|
48
|
+
|
|
49
|
+
* Level 0 — Observe only: run, diagnose, report
|
|
50
|
+
* Level 1 — Parameter autonomy: modify configs and hyperparameters
|
|
51
|
+
* Level 2 — Runtime repair: apply minimal fixes for common runtime errors (with safeguards)
|
|
52
|
+
* Level 3 — Extended edits (opt‑in): larger patches within explicitly allowed scope
|
|
53
|
+
|
|
54
|
+
Openrat cannot increase its own autonomy. All permissions are user‑controlled and auditable.
|
|
55
|
+
|
|
56
|
+
Governance is configured via `Session`:
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from openrat import Openrat, Session, AutonomyLevel
|
|
60
|
+
|
|
61
|
+
# Allow only observation (run, diagnose)
|
|
62
|
+
session = Session(
|
|
63
|
+
autonomy=AutonomyLevel.OBSERVE,
|
|
64
|
+
patch_policy="disabled"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Build and execute plan within governance constraints
|
|
68
|
+
app = Openrat({"executor": "docker", "docker_image": "python:3.11"})
|
|
69
|
+
plan = app.build_plan(spec, session)
|
|
70
|
+
artifact = app.execute_plan(plan)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
Experiment chaining & branching
|
|
75
|
+
Openrat can:
|
|
76
|
+
* Run experiments sequentially or in parallel
|
|
77
|
+
* Branch based on metrics, failures, or diagnostics
|
|
78
|
+
* Modify parameters or select follow‑up experiments conditionally
|
|
79
|
+
|
|
80
|
+
Example:
|
|
81
|
+
Run experiment B only if validation loss improves after experiment A.
|
|
82
|
+
Chaining logic can be defined via natural language or structured instruction files (.yaml, .json, .md) for maximum reliability.
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
Model‑agnostic
|
|
86
|
+
* Supports local open‑source models or cloud models (GPT, Gemini)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
Privacy & security
|
|
90
|
+
* Local runs
|
|
91
|
+
* No code, data, or experiments leave your machine unless you choose
|
|
92
|
+
* Remote control workflows (coming soon; see `ROADMAP.md`)
|
|
93
|
+
* Autonomy and permissions are enforced by policy, not by the model
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
## Development
|
|
97
|
+
|
|
98
|
+
Run tests:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
pytest tests/ -q
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Set executor policy to production (default is `auto`):
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
Pytesting with EXECUTOR_POLICY=production pytest tests/
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
## Usage Guide
|
|
112
|
+
|
|
113
|
+
### 1. Quick Execution (Simplest)
|
|
114
|
+
|
|
115
|
+
Run a script and capture output:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from openrat import Openrat
|
|
119
|
+
|
|
120
|
+
app = Openrat({"executor": "docker", "docker_image": "python:3.11"})
|
|
121
|
+
result = app.run(
|
|
122
|
+
"experiments/train.py",
|
|
123
|
+
timeout=120,
|
|
124
|
+
isolate=True, # copies script to temp dir for safety
|
|
125
|
+
memory="1g",
|
|
126
|
+
cpus="2.0",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
print(result["stdout"])
|
|
130
|
+
print(f"Exit code: {result['return_code']}")
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
See `examples/run_experiment.py` for a complete example.
|
|
134
|
+
|
|
135
|
+
Examples require Docker to be running.
|
|
136
|
+
|
|
137
|
+
### 2. Framework Workflow (Recommended)
|
|
138
|
+
|
|
139
|
+
Build and execute an experiment plan with governance:
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from openrat import Openrat, Session, AutonomyLevel, ExperimentSpec
|
|
143
|
+
|
|
144
|
+
# Create a session (defines autonomy and governance)
|
|
145
|
+
session = Session(
|
|
146
|
+
autonomy=AutonomyLevel.OBSERVE,
|
|
147
|
+
patch_policy="disabled",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Define your experiment
|
|
151
|
+
spec = ExperimentSpec(
|
|
152
|
+
goals=["Train model", "Evaluate"],
|
|
153
|
+
metrics=["accuracy"],
|
|
154
|
+
tasks=[...],
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Build and execute plan
|
|
158
|
+
app = Openrat({"executor": "docker", "docker_image": "python:3.11"})
|
|
159
|
+
plan = app.build_plan(spec, session)
|
|
160
|
+
artifact = app.execute_plan(plan, session, tools={...})
|
|
161
|
+
|
|
162
|
+
# Access results
|
|
163
|
+
print(artifact.status)
|
|
164
|
+
print(artifact.governance_report())
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### 3. LLM Agent Loop (Chat Interface)
|
|
168
|
+
|
|
169
|
+
Let a language model decide which experiments to run and interpret results:
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
from openrat import Openrat, Message
|
|
173
|
+
import os
|
|
174
|
+
|
|
175
|
+
app = Openrat({
|
|
176
|
+
"executor": "docker",
|
|
177
|
+
"docker_image": "python:3.11",
|
|
178
|
+
"provider": "openai_compatible",
|
|
179
|
+
"base_url": "https://api.openai.com/v1",
|
|
180
|
+
"api_key": os.environ["OPENAI_API_KEY"],
|
|
181
|
+
"model_name": "gpt-4o",
|
|
182
|
+
})
|
|
183
|
+
|
|
184
|
+
# Chat with the model; it can call tools to run experiments
|
|
185
|
+
messages = [
|
|
186
|
+
Message(role="system", content="You are a research assistant."),
|
|
187
|
+
Message(role="user", content="Run experiments/train.py and summarize the results."),
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
response = app.chat(messages, max_turns=5)
|
|
191
|
+
print(response.content)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
See `examples/chat_agent.py` for a complete example.
|
|
195
|
+
|
|
196
|
+
### 4. Custom Tools (Advanced)
|
|
197
|
+
|
|
198
|
+
Register custom functions that the LLM can call:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
from openrat import Openrat
|
|
202
|
+
|
|
203
|
+
def read_metrics(arguments: dict) -> dict:
|
|
204
|
+
"""Custom tool callable by the LLM."""
|
|
205
|
+
metric = arguments.get("metric", "accuracy")
|
|
206
|
+
# Implement your metric reading logic
|
|
207
|
+
return {"metric": metric, "value": 0.95}
|
|
208
|
+
|
|
209
|
+
app = Openrat({
|
|
210
|
+
"provider": "openai_compatible",
|
|
211
|
+
"api_key": "...",
|
|
212
|
+
"model_name": "...",
|
|
213
|
+
"autonomy": 3,
|
|
214
|
+
"user_approvals": {"host.exec"},
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
# Untrusted callable tools require explicit host.exec opt-in
|
|
218
|
+
app.tool_registry.register("read_metrics", read_metrics, capability="host.exec")
|
|
219
|
+
|
|
220
|
+
# Now the LLM can call this tool
|
|
221
|
+
response = app.chat("Check the accuracy metric for me.")
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
See `examples/custom_tool.py` for a complete example.
|
|
225
|
+
|
|
226
|
+
## Session & Governance
|
|
227
|
+
|
|
228
|
+
Every execution runs within a `Session` that defines:
|
|
229
|
+
|
|
230
|
+
- **Autonomy level** — what the agent can do (observe, modify params, apply fixes, edit code)
|
|
231
|
+
- **Patch policy** — whether patches are proposed (disabled) or auto-applied
|
|
232
|
+
- **Approval scope** — which capabilities require explicit approval
|
|
233
|
+
|
|
234
|
+
All governance decisions are logged in an immutable audit trail, captured in the final `Artifact`:
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
artifact.governance_report()
|
|
238
|
+
# {
|
|
239
|
+
# "session_id": "...",
|
|
240
|
+
# "autonomy": 0, # Level 0 = observe only
|
|
241
|
+
# "used_capabilities": ["observe"],
|
|
242
|
+
# "blocked_capabilities": [],
|
|
243
|
+
# "patches_proposed": [],
|
|
244
|
+
# "events": [...] # Full audit trail
|
|
245
|
+
# }
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Executor policy
|
|
249
|
+
|
|
250
|
+
All execution is routed through the `DockerExecutor`, which runs scripts in an
|
|
251
|
+
ephemeral container with `--network none`, `--security-opt no-new-privileges`,
|
|
252
|
+
`--cap-drop=ALL`, `--read-only`, `--tmpfs /tmp`, `--pids-limit 100`, and
|
|
253
|
+
bounded memory/CPU limits.
|
|
254
|
+
|
|
255
|
+
Openrat is safe-by-default for execution limits:
|
|
256
|
+
|
|
257
|
+
- default timeout: `300s`
|
|
258
|
+
- max timeout: `3600s`
|
|
259
|
+
- default memory: `512m` (max `4g`)
|
|
260
|
+
- default CPU: `1.0` (max `4.0`)
|
|
261
|
+
|
|
262
|
+
Unbounded limits are blocked unless explicitly enabled by user config
|
|
263
|
+
(`allow_unbounded_limits=True`).
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
from openrat.executors import set_executor_policy
|
|
267
|
+
|
|
268
|
+
# Both modes register the DockerExecutor (the only available backend)
|
|
269
|
+
set_executor_policy("production") # explicit
|
|
270
|
+
set_executor_policy("auto") # default
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## Roadmap
|
|
274
|
+
|
|
275
|
+
Planned items for v0.2+ and v0.3 are tracked in `ROADMAP.md`.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "openrat"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "OpenRat: experiment orchestration and sandboxed execution framework"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [{ name = "OpenRat contributors" }]
|
|
11
|
+
license = "MIT"
|
|
12
|
+
requires-python = ">=3.11"
|
|
13
|
+
dependencies = ["requests>=2.0"]
|
|
14
|
+
keywords = ["experiments", "orchestration", "automation", "research", "docker"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: Science/Research",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Operating System :: OS Independent",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.scripts]
|
|
26
|
+
openrat = "ui.cli:main"
|
|
27
|
+
|
|
28
|
+
[tool.setuptools]
|
|
29
|
+
package-dir = {"" = "src"}
|
|
30
|
+
|
|
31
|
+
[tool.setuptools.packages.find]
|
|
32
|
+
where = ["src"]
|
|
33
|
+
include = ["openrat*", "ui*"]
|
openrat-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from importlib import import_module
|
|
2
|
+
|
|
3
|
+
"""OpenRat: Experiment execution framework with planning & autonomy.
|
|
4
|
+
|
|
5
|
+
Primary Public API:
|
|
6
|
+
Openrat: Framework workflow (session → spec → plan → artifact)
|
|
7
|
+
|
|
8
|
+
Data Types & Governance:
|
|
9
|
+
ExperimentSpec: Experiment intent definition
|
|
10
|
+
Session, AutonomyLevel: Execution authority & governance
|
|
11
|
+
Artifact: Execution results
|
|
12
|
+
|
|
13
|
+
Extension Points:
|
|
14
|
+
BaseTool: Tool implementation framework
|
|
15
|
+
ToolRegistry: Named tool registry
|
|
16
|
+
|
|
17
|
+
For workflow documentation, see docs/AGENTS.md.
|
|
18
|
+
For executor configuration, see docs/EXECUTOR_POLICY.md.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"Openrat",
|
|
23
|
+
"BaseTool",
|
|
24
|
+
"Artifact",
|
|
25
|
+
"ExperimentSpec",
|
|
26
|
+
"Session",
|
|
27
|
+
"Message",
|
|
28
|
+
"ModelResponse",
|
|
29
|
+
"AutonomyLevel",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
_EXPORTS = {
|
|
34
|
+
"Openrat": ("openrat.api.openrat", "Openrat"),
|
|
35
|
+
"BaseTool": ("openrat.tools.base", "BaseTool"),
|
|
36
|
+
"Artifact": ("openrat.core.artifact", "Artifact"),
|
|
37
|
+
"ExperimentSpec": ("openrat.core.experiment_spec", "ExperimentSpec"),
|
|
38
|
+
"Session": ("openrat.core.session.session", "Session"),
|
|
39
|
+
"Message": ("openrat.model.types", "Message"),
|
|
40
|
+
"ModelResponse": ("openrat.model.types", "ModelResponse"),
|
|
41
|
+
"AutonomyLevel": ("openrat.core.governance.autonomy", "AutonomyLevel"),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def __getattr__(name):
|
|
46
|
+
if name not in _EXPORTS:
|
|
47
|
+
raise AttributeError(name)
|
|
48
|
+
|
|
49
|
+
module_name, symbol = _EXPORTS[name]
|
|
50
|
+
module = import_module(module_name)
|
|
51
|
+
value = getattr(module, symbol)
|
|
52
|
+
globals()[name] = value
|
|
53
|
+
return value
|
|
54
|
+
|