jaros 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. jaros-0.1.0/LICENSE +21 -0
  2. jaros-0.1.0/PKG-INFO +358 -0
  3. jaros-0.1.0/README.md +333 -0
  4. jaros-0.1.0/jaros/__init__.py +6 -0
  5. jaros-0.1.0/jaros/cli.py +537 -0
  6. jaros-0.1.0/jaros/comms/__init__.py +22 -0
  7. jaros-0.1.0/jaros/comms/fs.py +137 -0
  8. jaros-0.1.0/jaros/comms/queue.py +113 -0
  9. jaros-0.1.0/jaros/core/__init__.py +24 -0
  10. jaros-0.1.0/jaros/core/decision.py +56 -0
  11. jaros-0.1.0/jaros/core/decision_gate.py +108 -0
  12. jaros-0.1.0/jaros/core/json_value.py +69 -0
  13. jaros-0.1.0/jaros/core/reasoning_boundary.py +27 -0
  14. jaros-0.1.0/jaros/daemon.py +527 -0
  15. jaros-0.1.0/jaros/eval/__init__.py +24 -0
  16. jaros-0.1.0/jaros/eval/runner.py +154 -0
  17. jaros-0.1.0/jaros/eval/suite.py +81 -0
  18. jaros-0.1.0/jaros/execution/__init__.py +16 -0
  19. jaros-0.1.0/jaros/execution/determinism.py +52 -0
  20. jaros-0.1.0/jaros/execution/executor.py +115 -0
  21. jaros-0.1.0/jaros/execution/handlers.py +88 -0
  22. jaros-0.1.0/jaros/execution/tools.py +101 -0
  23. jaros-0.1.0/jaros/harness/__init__.py +64 -0
  24. jaros-0.1.0/jaros/harness/capabilities.py +272 -0
  25. jaros-0.1.0/jaros/harness/harness.py +282 -0
  26. jaros-0.1.0/jaros/harness/rules.py +68 -0
  27. jaros-0.1.0/jaros/llm/__init__.py +20 -0
  28. jaros-0.1.0/jaros/llm/adapters/__init__.py +12 -0
  29. jaros-0.1.0/jaros/llm/adapters/default_adapter.py +29 -0
  30. jaros-0.1.0/jaros/llm/adapters/ollama_adapter.py +36 -0
  31. jaros-0.1.0/jaros/llm/adapters/uppercase_adapter.py +29 -0
  32. jaros-0.1.0/jaros/llm/client.py +83 -0
  33. jaros-0.1.0/jaros/llm/config.py +56 -0
  34. jaros-0.1.0/jaros/llm/factory.py +86 -0
  35. jaros-0.1.0/jaros/registry.py +163 -0
  36. jaros-0.1.0/jaros/runtime/__init__.py +23 -0
  37. jaros-0.1.0/jaros/runtime/agent_pool.py +173 -0
  38. jaros-0.1.0/jaros/runtime/agent_thread.py +122 -0
  39. jaros-0.1.0/jaros/runtime/lifecycle.py +51 -0
  40. jaros-0.1.0/jaros/scheduling/__init__.py +22 -0
  41. jaros-0.1.0/jaros/scheduling/cron.py +74 -0
  42. jaros-0.1.0/jaros/scheduling/scheduler.py +195 -0
  43. jaros-0.1.0/jaros/state/__init__.py +100 -0
  44. jaros-0.1.0/jaros/state/coordination.py +123 -0
  45. jaros-0.1.0/jaros/state/decision_log.py +346 -0
  46. jaros-0.1.0/jaros/state/log.py +175 -0
  47. jaros-0.1.0/jaros/state/machine.py +113 -0
  48. jaros-0.1.0/jaros/state/model.py +88 -0
  49. jaros-0.1.0/jaros/state/recover.py +103 -0
  50. jaros-0.1.0/jaros/state/swarm.py +243 -0
  51. jaros-0.1.0/jaros.egg-info/PKG-INFO +358 -0
  52. jaros-0.1.0/jaros.egg-info/SOURCES.txt +90 -0
  53. jaros-0.1.0/jaros.egg-info/dependency_links.txt +1 -0
  54. jaros-0.1.0/jaros.egg-info/entry_points.txt +2 -0
  55. jaros-0.1.0/jaros.egg-info/requires.txt +4 -0
  56. jaros-0.1.0/jaros.egg-info/top_level.txt +1 -0
  57. jaros-0.1.0/pyproject.toml +40 -0
  58. jaros-0.1.0/setup.cfg +4 -0
  59. jaros-0.1.0/tests/test_agent_pool.py +134 -0
  60. jaros-0.1.0/tests/test_agent_thread.py +89 -0
  61. jaros-0.1.0/tests/test_audit.py +63 -0
  62. jaros-0.1.0/tests/test_capabilities.py +122 -0
  63. jaros-0.1.0/tests/test_check_determinism.py +68 -0
  64. jaros-0.1.0/tests/test_check_no_server.py +68 -0
  65. jaros-0.1.0/tests/test_check_zero_infra.py +68 -0
  66. jaros-0.1.0/tests/test_cli.py +230 -0
  67. jaros-0.1.0/tests/test_cli_replay.py +108 -0
  68. jaros-0.1.0/tests/test_coordination.py +75 -0
  69. jaros-0.1.0/tests/test_cron.py +60 -0
  70. jaros-0.1.0/tests/test_daemon.py +169 -0
  71. jaros-0.1.0/tests/test_daemon_scheduling.py +56 -0
  72. jaros-0.1.0/tests/test_decision.py +52 -0
  73. jaros-0.1.0/tests/test_decision_gate.py +109 -0
  74. jaros-0.1.0/tests/test_decision_log.py +125 -0
  75. jaros-0.1.0/tests/test_determinism.py +59 -0
  76. jaros-0.1.0/tests/test_distributed.py +95 -0
  77. jaros-0.1.0/tests/test_dynamic_tools.py +114 -0
  78. jaros-0.1.0/tests/test_eval.py +126 -0
  79. jaros-0.1.0/tests/test_executor.py +113 -0
  80. jaros-0.1.0/tests/test_fs.py +76 -0
  81. jaros-0.1.0/tests/test_harness.py +159 -0
  82. jaros-0.1.0/tests/test_llm.py +103 -0
  83. jaros-0.1.0/tests/test_queue.py +64 -0
  84. jaros-0.1.0/tests/test_readonly_agents.py +103 -0
  85. jaros-0.1.0/tests/test_recover.py +70 -0
  86. jaros-0.1.0/tests/test_registry.py +122 -0
  87. jaros-0.1.0/tests/test_rules.py +41 -0
  88. jaros-0.1.0/tests/test_scheduler.py +106 -0
  89. jaros-0.1.0/tests/test_state_log.py +74 -0
  90. jaros-0.1.0/tests/test_state_machine.py +89 -0
  91. jaros-0.1.0/tests/test_state_model.py +56 -0
  92. jaros-0.1.0/tests/test_swarm.py +285 -0
jaros-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jared Pilcher
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
jaros-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,358 @@
1
+ Metadata-Version: 2.4
2
+ Name: jaros
3
+ Version: 0.1.0
4
+ Summary: A zero-infrastructure runtime that makes agent systems reproducible, testable, and capability-safe — a durable, replayable state machine orchestrating AI agents as lightweight threads.
5
+ Author: Jared Pilcher
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/jaredpilcher/Jaros
8
+ Project-URL: Repository, https://github.com/jaredpilcher/Jaros
9
+ Project-URL: Issues, https://github.com/jaredpilcher/Jaros/issues
10
+ Keywords: agents,llm,state-machine,orchestration,ai
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=7; extra == "dev"
23
+ Requires-Dist: pillow>=10; extra == "dev"
24
+ Dynamic: license-file
25
+
26
+ # Jaros
27
+
28
+ > A zero-infrastructure runtime that makes agent systems **reproducible, testable, and capability-safe by construction** — a durable, replayable state machine that orchestrates AI agents as **lightweight computing threads**, not bloated microservices.
29
+
30
+ ![Jaros OS demo: boot, run a built-in agent + two agents + a custom tool, zero infra](docs/demo.gif)
31
+
32
+ Jaros is the runtime you reach for **the day your agent leaves the demo** — when non-determinism has made it impossible to reproduce, and ambient power has made it unsafe to ship. It delivers that without a server, a database, or a broker: just **files and threads**.
33
+
34
+ It works by decoupling non-deterministic AI reasoning from deterministic system execution. The LLM is an **interchangeable application** that may only *propose* inert, serializable `Decision` data; a deterministic execution plane decides whether and how each decision runs — and may reject it. This is the system's [Prime Directive](.jarify/PRIME-001/intent.md); every part of the codebase exists to serve it.
35
+
36
+ ---
37
+
38
+ ## What sets Jaros apart
39
+
40
+ Most agent frameworks let the model drive: a tool call *is* a side effect. That's fine for a demo and a liability in production. Jaros inverts it — the model writes recommendations on slips of paper; a deterministic clerk decides what actually happens. These properties fall out of that design, and they're the whole point:
41
+
42
+ ### 🐝 Reproducible & accountable swarms
43
+
44
+ The field is moving from one super-agent to **swarms of many small, specialized agents** — and at that scale two failures dominate: you can't **reproduce** what the swarm did, and you can't say **which agent caused it**. Jaros solves both by construction. Every accepted `Decision` is recorded — in one ordered, **hash-chained** log, **tagged with its source agent** — so replaying the log re-executes the *whole hive* to **byte-identical state with zero model calls**, and any failure is **attributed to the exact agent and decision** that produced it. A single agent is just the swarm of one.
45
+
46
+ ![A swarm replay: reconstruct the whole hive byte-identically with no model call, and attribute the bad handoff to the exact agent](docs/swarm-replay.png)
47
+
48
+ One command replays a hive and names the culprit; the console shows the same per-agent breakdown and attribution beside the durable decision log:
49
+
50
+ ![The console Reproducibility page replaying a swarm — per-agent provenance and the failure attributed to the exact agent/decision](console/docs/screenshots/swarm-reproducibility.png)
51
+
52
+ And the agents really **let the model decide**: the LLM's verdict drives the decision's outcome (accept → `DONE`, reject → `FAILED`), yet replay reconstructs whatever the model chose with **zero model calls** — the model decides *what*, the deterministic executor does *how*:
53
+
54
+ ![A real small model (gemma2:2b) on jaros: the model rejects spam to FAILED and accepts a real request to DONE, then jaros replay reconstructs both byte-identically with 0 model calls](docs/swarm-llm-decisions.png)
55
+
56
+ Run it yourself: [`examples/swarm/`](examples/swarm/) (a support-triage hive whose planner/worker/reviewer decisions are model-driven, with a seeded bad handoff) and `python tests/integration/run_swarm_replay_demo.py` (the same, end-to-end in Docker). Realized by [EXT-015](.jarify/EXT-015/requirements.md).
57
+
58
+ ### 🔁 Reproducible by replay
59
+
60
+ The only non-determinism in a run is the model's output, captured as inert `Decision` data and recorded to a durable log **before** any effect is observable. Replaying that log through the deterministic executor reconstructs the run to **byte-identical state — with no model call.** Crash recovery is just a special case of replay.
61
+
62
+ That means a misbehaving agent run is debuggable like any other software: **pin the decision log, replay, reproduce, fix, re-run identically.** No "it only happens sometimes."
63
+
64
+ The guarantee rests on one precondition — **executor handlers must be deterministic** functions of the decision and state — and Jaros doesn't just assume it: replay runs twice into isolated state and **flags any non-deterministic handler** (the console shows `deterministic` next to `byte-identical`; `jaros.execution.replays_agree` checks it in CI). Non-determinism that belongs in a run — a clock read, a random choice, external I/O — goes *outside* the handler or is captured as a decision, which is itself recorded and replayed.
65
+
66
+ ![Reproducibility by replay: re-execute recorded decisions to byte-identical state, no model call](docs/replay.gif)
67
+
68
+ In the [web console](console/) it's one click — browse the durable decision log and replay it, with the reconstructed state, the model-call count, and a byte-identical check shown inline:
69
+
70
+ ![The console's Reproducibility page — the decision log and a replay reconstructing DONE with 0 model calls, byte-identical](console/docs/screenshots/reproducibility.png)
71
+
72
+ ### 🔒 Capability-safe by construction
73
+
74
+ Agents hold only the scoped handles the harness grants them — no ambient access to the file system, queues, or network. A bug or a bad decision **cannot reach what it was never given**, and every mediated action leaves an auditable record. This is structural least-privilege for blast-radius control (host-level isolation against hostile code stays the host's job — process, container, VPC).
75
+
76
+ The console makes that legible — the mediation rules, the role→capability bundles, and the refusal/failure audit, all in one view:
77
+
78
+ ![The console's Harness page — mediation rules, role capability bundles, and the refusal audit](console/docs/screenshots/harness.png)
79
+
80
+ ### 📦 Zero-infrastructure
81
+
82
+ No server, no database, no broker. The whole control plane is the local/shared file system; agents are threads in one process. It runs anywhere files work, and a `check_zero_infra` guardrail fails the build if any code so much as imports a database driver or message broker.
83
+
84
+ ### 🎓 The graduation layer
85
+
86
+ Jaros sits between a prototype (LangGraph, CrewAI) and heavyweight durable-execution infrastructure (Temporal, Dapr):
87
+
88
+ | | Prototype frameworks | **Jaros** | Durable-execution infra |
89
+ | --- | --- | --- | --- |
90
+ | Stand-up cost | none | **none** — files + threads | servers, brokers, databases |
91
+ | Reproducibility | best-effort | **record-and-replay to byte-identical state** | workflow replay (heavy) |
92
+ | Safety model | ambient tool access | **capability-scoped, default-deny** | varies |
93
+ | Model coupling | often hard-wired | **one interface, config swap** | varies |
94
+ | Distribution | single process | **single-node-first, bounded multi-node over the FS** | cluster-scale |
95
+ | Reach for it… | the first ten lines | **the day you ship** | large orgs at cluster scale |
96
+
97
+ It is deliberately **not**: a hardened security sandbox, a cluster-scale distributed system, an agent-authorization/governance gateway, a hello-world prototyping framework, or "unbreakable." It claims only what the architecture delivers — durable, crash-recoverable, replayable, and capability-bounded. (See the [Prime Directive](.jarify/PRIME-001/intent.md) for the full "is / is not.")
98
+
99
+ ---
100
+
101
+ ## Why agent builders use it
102
+
103
+ - **Ship runs you can reproduce.** The decision log turns a flaky prod incident into a deterministic replay you can step through.
104
+ - **Contain the blast radius.** Least-privilege handles mean a misbehaving agent touches only what you granted it — and you can audit every action.
105
+ - **Stand up nothing.** No infra to provision; `pip install` and run, or one Docker container per node.
106
+ - **Swap models freely.** The LLM lives behind one `LlmClient` interface; change provider/model by config, with zero harness changes.
107
+ - **Extend at runtime.** Drop an agent into `agents/` or a custom tool into `tools/` and the daemon loads it on the next tick — no restart, no core edits.
108
+
109
+ ---
110
+
111
+ ## Quickstart
112
+
113
+ For the full day-one-to-production path (first agent → schedule → eval → replay →
114
+ console → distributed Docker), see **[docs/getting-started.md](docs/getting-started.md)**.
115
+
116
+ The whole loop from the CLI — submit work, check status, replay it byte-identically, and run the eval suite (real output, nothing faked):
117
+
118
+ ![A real Jaros CLI session: submit jobs, status, replay --json (0 model calls, byte-identical), and a green eval suite](docs/cli.png)
119
+
120
+ ```bash
121
+ pip install -e ".[dev]"
122
+ ```
123
+
124
+ Stand up the OS on a data directory, then drive it from another shell — work enters **only** through the shared file system:
125
+
126
+ ```bash
127
+ # stage the example agents into the shared volume (see examples/)
128
+ mkdir -p .jaros-data/agents .jaros-data/tools
129
+ cp examples/agents/*.py .jaros-data/agents/
130
+ cp examples/tools/*.py .jaros-data/tools/
131
+
132
+ # boot the long-running daemon (the OS)
133
+ jaros serve --data-dir .jaros-data
134
+ ```
135
+
136
+ ```bash
137
+ # from another terminal: submit work + watch results, all over the shared FS
138
+ jaros submit advance --input '{}' --data-dir .jaros-data
139
+ jaros submit echo --input '{"msg": "hello"}' --data-dir .jaros-data
140
+ jaros submit greeter --input '{"name": "Jaros"}' --data-dir .jaros-data
141
+ jaros watch --data-dir .jaros-data
142
+ ```
143
+
144
+ Then the payoff — reconstruct the entire run from the recorded decisions, with **no model call**:
145
+
146
+ ```bash
147
+ jaros replay --data-dir .jaros-data
148
+ # replayed 3 recorded decisions (3 applied) - model calls: 0
149
+ # reconstructed state : DONE
150
+ # byte-identical : yes
151
+ # reproducible: the recorded decisions reconstruct the run exactly, with no model call.
152
+ ```
153
+
154
+ Each accepted decision is recorded to `.jaros-data/state/decisions.log`, so the whole run is reproducible by replay. See **[`examples/`](examples/)** for the agents used above, and run the end-to-end smoke tests:
155
+
156
+ ```bash
157
+ python tests/integration/run_local_demo.py # local stand-up (no Docker)
158
+ python tests/integration/run_container_demo.py # full Docker container run
159
+ ```
160
+
161
+ ---
162
+
163
+ ## Web console
164
+
165
+ A TypeScript + React administrative and monitoring interface for a running Jaros
166
+ OS lives in **[`console/`](console/)** — submit jobs, install agents and
167
+ custom tools, watch live status, browse the durable decision log, and **replay
168
+ it to byte-identical state** from the browser. It's a host-side companion (a thin
169
+ file-system bridge + SPA); the Jaros node itself stays serverless.
170
+
171
+ The **Overview** is a glanceable NOC view — live machine state, throughput, the agent pool, and the no-server/database/broker profile, all streamed over the file system:
172
+
173
+ ![Jaros Console — Overview](console/docs/screenshots/overview.png)
174
+
175
+ It reflects the *real* runtime, not a hard-coded copy: the **State Machine** view introspects the model straight from `jaros` and renders the live durable transition log beside it.
176
+
177
+ ![Jaros Console — State Machine: the introspected model and the live transition log](console/docs/screenshots/state-machine.png)
178
+
179
+ ```bash
180
+ cd console && npm install
181
+ JAROS_DATA_DIR=/tmp/jaros-demo npm run dev # then open http://localhost:5500
182
+ ```
183
+
184
+ A brief first-run tour, a live get-started checklist, per-page intros, hover
185
+ tooltips, and an in-app **Help & Docs** page (pictures + a copy-pasteable CLI
186
+ quickstart) make it easy to know where to start and what to do next:
187
+
188
+ ![Jaros Console — the first-run tour that guides new operators through the core loop](console/docs/screenshots/tour.png)
189
+
190
+ The **Overview** greets a new operator with a live get-started checklist that lights up each step as it's done, and every screen documents itself with intros and hover tooltips:
191
+
192
+ ![The get-started checklist on the Overview — step 1 done, "submit your first job" highlighted as the next action](console/docs/screenshots/get-started.png)
193
+
194
+ The full page gallery and a walkthrough of every page (with pictures) live in
195
+ **[docs/console.md](docs/console.md)** and the [console README](console/README.md#screenshots).
196
+
197
+ ---
198
+
199
+ ## How it works
200
+
201
+ ![Jaros architecture: Reasoning Plane proposes decisions; the harness validates and the deterministic Execution Plane runs them](docs/architecture.png)
202
+
203
+ Jaros is split into two planes that never merge:
204
+
205
+ - **Reasoning Plane** (non-deterministic): agents think and propose `Decision` data. The LLM lives here as a pluggable application.
206
+ - **Execution Plane** (deterministic): the durable, replayable state machine and its harness validate and execute decisions, persist them, and route all communication.
207
+
208
+ The only channels between an agent and the rest of the system are **rigid queues** and the **shared file system**. There are no direct agent-to-agent calls.
209
+
210
+ ### The LLM decides *what*, not *how*
211
+
212
+ A frequent misreading is "the LLM can't make decisions." It can — that *is* the reasoning. The precise rule is:
213
+
214
+ > **The LLM decides WHAT to propose. The deterministic system decides HOW — and whether — it runs.**
215
+
216
+ An agent's reasoning may only emit an inert, serializable `Decision`. A deterministic validation gate stands between that data and any action; the executor — never the model — drives execution.
217
+
218
+ ```text
219
+ typical agent: LLM ── tool call ──► side effect happens (LLM drives execution)
220
+
221
+ jaros: LLM ── Decision (data) ──► [gate] ──► executor (executor drives execution)
222
+
223
+ └─► may REJECT; LLM has no say
224
+ ```
225
+
226
+ Because the model holds no control, recording its outputs and replaying them through the executor reproduces the run exactly — and the model itself is swappable with zero harness changes.
227
+
228
+ ---
229
+
230
+ ## Build an agent
231
+
232
+ An agent is a `ReasoningBoundary`: **data in → `Decision` data out**, no side effects, no handles. Drop the module into the shared-FS `agents/` folder and the daemon registers it at runtime.
233
+
234
+ ```python
235
+ import uuid
236
+ from jaros.core import create_decision
237
+
238
+ KIND = "greeter" # the agent kind the daemon registers
239
+
240
+ class GreeterBoundary:
241
+ def __init__(self, llm):
242
+ self._llm = llm
243
+
244
+ def decide(self, context) -> list:
245
+ name = context.get("name", "world") if isinstance(context, dict) else "world"
246
+ # Propose an inert decision; the executor (not the agent) acts on it.
247
+ return [create_decision(
248
+ id=f"greet-{uuid.uuid4().hex}",
249
+ source="greeter",
250
+ kind="advance", # built-in handler drives the state machine
251
+ payload={"events": ["start", "complete"], "note": f"hello {name}"},
252
+ )]
253
+
254
+ def build(llm): # agent factory the daemon calls
255
+ return GreeterBoundary(llm)
256
+ ```
257
+
258
+ To bound an agent, restrict its capability grant at spawn time — a *role* is just a named bundle of capabilities:
259
+
260
+ ```python
261
+ from jaros.harness.capabilities import GrantSpec
262
+
263
+ # Grant ONLY file-write inside the layout; the agent can reach nothing else.
264
+ ctx = harness.spawn("greeter", GrantSpec(role="FsWriteRole", fs=shared_fs))
265
+ ```
266
+
267
+ A *custom tool* extends what the system can *do*: drop a class exposing `NAME`, `validate()`, and `execute()` into `tools/`, and an agent proposes a decision of that `kind`. See **[`examples/tools/greet_tool.py`](examples/tools/greet_tool.py)** and the full guide in **[docs/building-agents.md](docs/building-agents.md)**.
268
+
269
+ ### Building with an AI agent
270
+
271
+ Jaros is made to be extended by coding agents. Point any AI coding agent at **[`AGENTS.md`](AGENTS.md)** → **[`agent-kit/`](agent-kit/)** and it has the whole project in one folder: the mental model, a skill for each artifact (agent, tool, eval, schedule), accurate API reference, and runnable templates that pass `jaros eval` unmodified. It can author new Jaros agents and tools and verify them on its own.
272
+
273
+ ---
274
+
275
+ ## Run on Docker
276
+
277
+ The container is the boundary for the **whole Jaros node**; agents run as threads *inside* it — never one container per agent.
278
+
279
+ ```bash
280
+ docker build -t jaros .
281
+
282
+ # one long-running daemon = one node; work arrives over the mounted volume
283
+ docker run -d --name jaros_os -v ${PWD}/.jaros-data:/data jaros
284
+
285
+ # submit from the host, purely over the shared FS
286
+ jaros submit advance --input '{}' --data-dir .jaros-data
287
+ ```
288
+
289
+ ### Scheduling across containers (single-node-first)
290
+
291
+ Because the control plane is files only, scheduling is decoupled and needs no broker:
292
+
293
+ - **Host-side cron** — any scheduler (`cron`, Kubernetes `CronJob`, Task Scheduler) can `jaros submit`.
294
+ - **Multi-container ingest** — run several daemons on the same shared dir; they coordinate over the file system. Each job is claimed by an atomic `inbox/<id>.json → claimed/<id>.json` rename: **exactly-once in the happy path** (one node processes it, siblings skip it). The claim is a **lease** the owner heartbeats; if a node crashes, its lease expires and a live sibling reclaims the job to the inbox — so under failure the contract is **at-least-once** (agents are idempotent — the read-only ones trivially so). Bounded multi-node, no broker or consensus service.
295
+
296
+ ---
297
+
298
+ ## Architecture guardrails
299
+
300
+ Structural constraints are enforced by automated checks (run with `pytest`), so the design can't silently rot:
301
+
302
+ | Check | Enforces |
303
+ | --- | --- |
304
+ | `scripts/check_planes.py` | No Execution-Plane module imports reasoning/LLM code |
305
+ | `scripts/check_no_server.py` | No agent/runtime code opens a listening socket or HTTP server |
306
+ | `scripts/check_comms.py` | No direct agent-to-agent reference, RPC, or network call |
307
+ | `scripts/check_zero_infra.py` | No import of a database driver, message broker, or external server framework |
308
+ | `scripts/check_determinism.py` | The core replay path is deterministic — replaying the same decisions agrees every time (the precondition for byte-identical replay) |
309
+
310
+ ---
311
+
312
+ ## Subsystems
313
+
314
+ | Subsystem | Spec | What it owns |
315
+ | --- | --- | --- |
316
+ | Reasoning / Execution Boundary | [EXT-001](.jarify/EXT-001/requirements.md) | Inert `Decision` contract, reasoning boundary, validation gate, executor |
317
+ | Durable, Replayable State Machine | [EXT-002](.jarify/EXT-002/requirements.md) | Explicit transitions, durable decision log, deterministic replay, crash recovery, bounded multi-node coordination |
318
+ | Agent Thread Runtime | [EXT-003](.jarify/EXT-003/requirements.md) | Cheap agent lifecycle, bounded pool, fault containment |
319
+ | Interchangeable LLM Adapter | [EXT-004](.jarify/EXT-004/requirements.md) | Single `LlmClient` interface, pluggable adapters, config-only swap |
320
+ | Architectural Harness | [EXT-005](.jarify/EXT-005/requirements.md) | Mediated actions, default-deny rules, capability-scoped handles |
321
+ | Communication Fabric | [EXT-006](.jarify/EXT-006/requirements.md) | Rigid typed queues, shared FS layout, exclusivity enforcement |
322
+ | Runtime Daemon (OS Boot) | [EXT-007](.jarify/EXT-007/requirements.md) | Boot, file monitoring, atomic inbox ingestion, zero-infra boot |
323
+ | Host Control CLI | [EXT-008](.jarify/EXT-008/requirements.md) | Command-line management, atomic job submission, agent installer |
324
+ | Dynamic Custom Tools | [EXT-009](.jarify/EXT-009/requirements.md) | Runtime-loaded namespaced tools (`NAME`/`validate`/`execute`) |
325
+ | Admin & Monitoring Console | [EXT-010](.jarify/EXT-010/requirements.md) | Host-side TypeScript + React console: monitor, submit, install, replay |
326
+ | Native Agent Scheduling | [EXT-011](.jarify/EXT-011/requirements.md) | File-based cron + interval + one-shot scheduling, crash-safe, no external cron |
327
+ | Read-Only Agent Library | [EXT-012](.jarify/EXT-012/requirements.md) | Many drop-in read-only agents + tools (health, disk, inventory, text) — run concurrently |
328
+ | Agent Evaluation Framework | [EXT-013](.jarify/EXT-013/requirements.md) | Reproducible, declarative agent evals (`jaros eval`) — input → expected decision/result |
329
+
330
+ The full system-wide design lives in [`.jarify/PRIME-001/design.md`](.jarify/PRIME-001/design.md).
331
+
332
+ ---
333
+
334
+ ## Project layout
335
+
336
+ ```text
337
+ jaros/
338
+ core/ EXT-001 Decision, ReasoningBoundary, validation gate
339
+ execution/ EXT-001 deterministic executor + pluggable handlers; custom tools (EXT-009)
340
+ state/ EXT-002 model, machine, durable transition log, decision log + replay, recover, coordination
341
+ runtime/ EXT-003 AgentThread, AgentPool (lightweight threads)
342
+ llm/ EXT-004 LlmClient interface + pluggable adapters + factory
343
+ harness/ EXT-005 capabilities, rules, Harness (mediates all I/O)
344
+ comms/ EXT-006 Queue, SharedFileSystem
345
+ registry.py EXT-007 agent registration + agent loading
346
+ daemon.py EXT-007 runtime daemon (the OS boot engine)
347
+ cli.py EXT-008 Host Control CLI
348
+ examples/ drop-in example agents + a custom tool
349
+ scripts/ architecture checks (planes / no-server / comms / zero-infra)
350
+ tests/ unit + integration test suites
351
+ .jarify/ Jarify specifications (the source of intent)
352
+ ```
353
+
354
+ ---
355
+
356
+ ## Specification-driven with Jarify
357
+
358
+ Jaros is developed spec-first under `.jarify/`. The Prime Directive (`PRIME-001`) holds the system intent; each feature spec (`EXT-00x`) decomposes one tenet into requirements, design, and tasks, with code traced back to requirements via `index.json`. The directive is the target: where the code lags it, the code changes — not the directive.