glassrail 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. glassrail-0.1.0/.gitignore +84 -0
  2. glassrail-0.1.0/CHANGELOG.md +337 -0
  3. glassrail-0.1.0/LICENSE +201 -0
  4. glassrail-0.1.0/PKG-INFO +355 -0
  5. glassrail-0.1.0/README.md +317 -0
  6. glassrail-0.1.0/pyproject.toml +180 -0
  7. glassrail-0.1.0/src/glassrail/__init__.py +12 -0
  8. glassrail-0.1.0/src/glassrail/channels/__init__.py +9 -0
  9. glassrail-0.1.0/src/glassrail/cli/__init__.py +350 -0
  10. glassrail-0.1.0/src/glassrail/config/__init__.py +47 -0
  11. glassrail-0.1.0/src/glassrail/config/prompts.py +345 -0
  12. glassrail-0.1.0/src/glassrail/config/settings.py +485 -0
  13. glassrail-0.1.0/src/glassrail/core/__init__.py +45 -0
  14. glassrail-0.1.0/src/glassrail/core/errors.py +27 -0
  15. glassrail-0.1.0/src/glassrail/core/execution.py +126 -0
  16. glassrail-0.1.0/src/glassrail/core/ids.py +19 -0
  17. glassrail-0.1.0/src/glassrail/core/plan.py +85 -0
  18. glassrail-0.1.0/src/glassrail/events/__init__.py +48 -0
  19. glassrail-0.1.0/src/glassrail/events/bus.py +96 -0
  20. glassrail-0.1.0/src/glassrail/events/types.py +151 -0
  21. glassrail-0.1.0/src/glassrail/executor/__init__.py +30 -0
  22. glassrail-0.1.0/src/glassrail/executor/context.py +64 -0
  23. glassrail-0.1.0/src/glassrail/executor/executor.py +942 -0
  24. glassrail-0.1.0/src/glassrail/executor/orchestrator.py +505 -0
  25. glassrail-0.1.0/src/glassrail/executor/tool_approval.py +83 -0
  26. glassrail-0.1.0/src/glassrail/gateways/__init__.py +5 -0
  27. glassrail-0.1.0/src/glassrail/gateways/acp/__init__.py +43 -0
  28. glassrail-0.1.0/src/glassrail/gateways/acp/mapping.py +108 -0
  29. glassrail-0.1.0/src/glassrail/gateways/acp/protocol.py +149 -0
  30. glassrail-0.1.0/src/glassrail/gateways/acp/server.py +560 -0
  31. glassrail-0.1.0/src/glassrail/gateways/acp/session.py +54 -0
  32. glassrail-0.1.0/src/glassrail/gateways/rest/__init__.py +16 -0
  33. glassrail-0.1.0/src/glassrail/gateways/rest/app.py +220 -0
  34. glassrail-0.1.0/src/glassrail/gateways/tui/__init__.py +14 -0
  35. glassrail-0.1.0/src/glassrail/gateways/tui/app.py +33 -0
  36. glassrail-0.1.0/src/glassrail/gateways/tui/client.py +51 -0
  37. glassrail-0.1.0/src/glassrail/gateways/tui/dag.py +463 -0
  38. glassrail-0.1.0/src/glassrail/gateways/tui/view.py +198 -0
  39. glassrail-0.1.0/src/glassrail/harness/__init__.py +22 -0
  40. glassrail-0.1.0/src/glassrail/harness/builtin.py +111 -0
  41. glassrail-0.1.0/src/glassrail/harness/integrations/__init__.py +34 -0
  42. glassrail-0.1.0/src/glassrail/harness/integrations/image.py +236 -0
  43. glassrail-0.1.0/src/glassrail/harness/integrations/web.py +308 -0
  44. glassrail-0.1.0/src/glassrail/harness/pathguard.py +41 -0
  45. glassrail-0.1.0/src/glassrail/harness/registry.py +179 -0
  46. glassrail-0.1.0/src/glassrail/planner/__init__.py +9 -0
  47. glassrail-0.1.0/src/glassrail/planner/cookbook.py +165 -0
  48. glassrail-0.1.0/src/glassrail/planner/cookbooks/__init__.py +1 -0
  49. glassrail-0.1.0/src/glassrail/planner/cookbooks/compare_aggregate.json +35 -0
  50. glassrail-0.1.0/src/glassrail/planner/cookbooks/conditional_branch.json +34 -0
  51. glassrail-0.1.0/src/glassrail/planner/cookbooks/direct_answer.json +31 -0
  52. glassrail-0.1.0/src/glassrail/planner/cookbooks/single_tool.json +18 -0
  53. glassrail-0.1.0/src/glassrail/planner/cookbooks/subplan.json +20 -0
  54. glassrail-0.1.0/src/glassrail/planner/cookbooks/web_research.json +20 -0
  55. glassrail-0.1.0/src/glassrail/planner/planner.py +546 -0
  56. glassrail-0.1.0/src/glassrail/planner/tool_digest.py +157 -0
  57. glassrail-0.1.0/src/glassrail/providers/__init__.py +40 -0
  58. glassrail-0.1.0/src/glassrail/providers/base.py +87 -0
  59. glassrail-0.1.0/src/glassrail/providers/factory.py +38 -0
  60. glassrail-0.1.0/src/glassrail/providers/openai_compat.py +290 -0
  61. glassrail-0.1.0/src/glassrail/providers/postprocess.py +34 -0
  62. glassrail-0.1.0/src/glassrail/providers/router.py +160 -0
  63. glassrail-0.1.0/src/glassrail/providers/scripted.py +56 -0
  64. glassrail-0.1.0/src/glassrail/py.typed +0 -0
  65. glassrail-0.1.0/src/glassrail/runtime.py +83 -0
  66. glassrail-0.1.0/src/glassrail/state/__init__.py +16 -0
  67. glassrail-0.1.0/src/glassrail/state/base.py +39 -0
  68. glassrail-0.1.0/src/glassrail/state/memory.py +41 -0
  69. glassrail-0.1.0/src/glassrail/state/sqlite.py +111 -0
  70. glassrail-0.1.0/src/glassrail/telemetry/__init__.py +58 -0
  71. glassrail-0.1.0/src/glassrail/telemetry/tracing.py +136 -0
  72. glassrail-0.1.0/src/glassrail/validator/__init__.py +7 -0
  73. glassrail-0.1.0/src/glassrail/validator/validator.py +222 -0
  74. glassrail-0.1.0/uv.lock +1872 -0
@@ -0,0 +1,84 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ *.egg
8
+ .Python
9
+ build/
10
+ dist/
11
+ develop-eggs/
12
+ downloads/
13
+ eggs/
14
+ .eggs/
15
+ lib/
16
+ lib64/
17
+ parts/
18
+ sdist/
19
+ var/
20
+ wheels/
21
+ share/python-wheels/
22
+ MANIFEST
23
+
24
+ # Virtual environments
25
+ .venv/
26
+ venv/
27
+ ENV/
28
+ env/
29
+ .python-version
30
+
31
+ # uv
32
+ .uv-cache/
33
+
34
+ # Testing & coverage
35
+ .pytest_cache/
36
+ .tox/
37
+ .nox/
38
+ .coverage
39
+ .coverage.*
40
+ htmlcov/
41
+ coverage.xml
42
+ *.cover
43
+ .hypothesis/
44
+
45
+ # Type checkers
46
+ .mypy_cache/
47
+ .pyright/
48
+ .pytype/
49
+ .ruff_cache/
50
+
51
+ # Editors
52
+ .vscode/
53
+ .idea/
54
+ *.swp
55
+ *.swo
56
+ *~
57
+ .DS_Store
58
+
59
+ # Docs
60
+ site/
61
+
62
+ # Project-local state
63
+ SCRATCH.md
64
+ *.sqlite
65
+ *.db
66
+ *.db-journal
67
+ data/
68
+ logs/
69
+ *.log
70
+
71
+ # Secrets
72
+ .env
73
+ .env.*
74
+ !.env.example
75
+
76
+ # Rust (clients/tui)
77
+ target/
78
+
79
+ # Local runtime config & state
80
+ failed_plans/
81
+ config.toml
82
+ !eval-framework/**/config.toml
83
+ !eval-framework/**/suite.toml
84
+ state.sqlite
@@ -0,0 +1,337 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ No unreleased changes.
11
+
12
+ ## [0.1.0] - 2026-06-11
13
+
14
+ ### Added
15
+ - Planner/result prompts now preserve load-bearing facts, branch labels, and
16
+ comparison coverage using structural wording only; prompt tests guard against
17
+ visible eval-task vocabulary, and the comparison eval regex accepts the full
18
+ product spelling alongside the shorthand.
19
+ - Planner/result prompts now also preserve source-of-knowledge constraints,
20
+ require deduction tasks to surface key steps in the final answer, and default
21
+ final answers to prose unless JSON is requested. Candidate coverage regexes
22
+ accept separator variants in product names.
23
+ - Result nodes retry once at the next configured tier after malformed output,
24
+ branch result prompts preserve both the branch label and answer value, and
25
+ closed-book sibling evaluation prompts repeat stable-knowledge instructions.
26
+ The arithmetic eval accepts decimal unit formatting for equivalent weights.
27
+ - First-party file tools now support optional `[tools] fs_roots = [...]` path
28
+ confinement. `file_read` and `image_generate` resolve paths through the shared
29
+ guard, deny symlink/traversal escapes when roots are configured, and preserve
30
+ the current unconfined default with a one-time warning.
31
+ - Tool approval now honors declared tool risk by default: explicit per-tool
32
+ overrides still win, while `write` and `execute` tools resolve to `ask`
33
+ unless overridden. Auto mode continues to treat `ask` as allowed for
34
+ unattended runs.
35
+ - CLI coverage now protects the release-critical command surface: `version`,
36
+ `run --json` envelope shape, `exec-plan --json` with a harness-mechanics
37
+ fixture, and `tui` / `acp` help rendering.
38
+ - Pre-release hygiene cleanup refreshed the name-availability checker defaults
39
+ for the current GitHub organization and documented the opt-in mflux-backed
40
+ `image_generate` tool in the README.
41
+ - Eval-integrity cleanup removed suite-specific vocabulary from conditional
42
+ retry detection and planner cookbook keywords, replaced it with structural
43
+ signals, and added the scripted harness-mechanics regression wall to CI.
44
+ - Added `eval-framework/suites/glassrail-heldout`, a 12-task OpenRouter-backed
45
+ held-out suite with an explicit no-iteration rule for release-gate
46
+ confirmation and overfit-gap reporting.
47
+ - Engineering specs under `docs/specs/` from the June 2026 architecture audit
48
+ — eval integrity (de-overfitting, held-out suite, CI eval gate), parallel
49
+ node execution, node resilience, configurable routing table, security
50
+ baseline, serving hardening, small fixes, and comparative baselines — wired
51
+ into the docs nav and referenced from a restructured roadmap (a release
52
+ 0.1.0 blocking workstream plus Phase 2 sliced into ordered tracks).
53
+ - Per-tool approval policy is now configurable with `allow`, `ask`, and `deny`
54
+ policies plus an `auto` execution mode that treats `ask` as `allow` while
55
+ preserving explicit denies. ACP surfaces `ask` tool calls through
56
+ `session/request_permission`, including an "always allow" promotion for the
57
+ running agent process.
58
+ - Rust TUI graph view now draws routed box-drawing edges between plan nodes.
59
+ The ACP `plan_graph` extension includes explicit data/control edges while
60
+ keeping per-node `deps` for compatibility.
61
+ - Summary nodes now support a `format` hint (`concise`, `medium`, `verbose`).
62
+ The executor selects concise or verbose summary prompts when requested while
63
+ preserving the existing configurable medium/default summary prompt.
64
+ - Added a `subplan-correct` glassrail capability eval that requires a naturally
65
+ partitioned task to include a `subplan` trajectory step.
66
+ - Streaming text events now carry node metadata: `NodeOutputChunk` includes the
67
+ node type, and ACP `agent_message_chunk` updates include glassrail extension
68
+ fields (`nodeId`, `nodeType`, `isFinal`) so clients can distinguish
69
+ intermediate think/summary/synthesis output from the final result.
70
+ - Planner prompts now include a tool capability digest that groups registered
71
+ tools by broad capability before listing the raw JSON schemas, helping the
72
+ planner choose available tool families and reject absent capabilities.
73
+ - Planner cookbook recipes now live as bundled JSON files with descriptions,
74
+ selection keywords, adaptable skeletons, and adaptation notes. The planner
75
+ injects one selected recipe as a scaffold rather than hardcoded prompt text
76
+ or a verbatim template.
77
+ - Planner cookbook selection now injects the top three ranked candidate
78
+ recipes into the planning prompt, letting the model compare nearby DAG shapes
79
+ without an extra planner/classifier call.
80
+ - Planner/eval guidance now tightens tool-name discipline for optional web
81
+ tools, vague-request handling, recommendation phrasing, non-null node
82
+ descriptions, and prose numeric answers based on the latest OpenRouter eval
83
+ failure analysis.
84
+ - OpenRouter eval prompts now identify closed-book research/comparison tasks,
85
+ require explicit comparison axes and prose recommendations, and call out
86
+ planted summary facts so the model does not hide behind missing-context
87
+ caveats or over-compress named entities.
88
+ - OpenRouter mirror eval suites now grade LLM criteria through OpenRouter
89
+ (`anthropic/claude-haiku-4.5`) using `OPENROUTER_API_KEY`, avoiding hidden
90
+ dependence on Claude Code subscription quota for judge calls.
91
+ - Eval criteria now separate trajectory checks from LLM answer-quality checks,
92
+ relax wording-sensitive regexes for cache/migration references and structured
93
+ numeric answers, and make the subplan-correct task respect the configured
94
+ two-subplan cap.
95
+ - Summary evals now capture the installed source documents for the LLM judge,
96
+ so faithfulness checks can compare the answer against the actual fixture
97
+ instead of returning UNKNOWN for lack of evidence.
98
+ - Think/result node prompts now allow well-established stable knowledge when a
99
+ task explicitly asks for it and no file, tool, or live lookup is required,
100
+ avoiding false "missing context" failures in closed-book evals.
101
+
102
+ ### Changed
103
+ - Source distributions now use an explicit release allowlist, keeping eval run
104
+ outputs, tests, docs builds, and local deployment files out of the PyPI
105
+ artifact while preserving the package source and release metadata; CI and the
106
+ publish workflow now fail if forbidden release artifacts reappear.
107
+ - Package metadata now points the Documentation URL at the published MkDocs
108
+ site.
109
+ - README links now use absolute repository and docs URLs so they work from the
110
+ PyPI project page as well as GitHub.
111
+ - Renamed the public root exception from `DagagentError` to `GlassrailError`
112
+ before the first PyPI release.
113
+ - Documentation corrections from the architecture audit: the README no longer
114
+ describes the DAG viewer's layers as "parallel" (node execution is currently
115
+ sequential; parallel execution is specced), the generation-ceiling default
116
+ reads `20000` to match settings, and a Security notes section states the
117
+ current posture plainly. Stale repository URLs in `docs/evals.md` and
118
+ `docs/deployment.md` now point at the current repo, `docs/index.md` reflects
119
+ the Phase 1 gate status, `AGENTS.md` is re-synced with `CLAUDE.md`, and
120
+ `PHASE1_REMAINING.md` is absorbed into `docs/specs/eval-integrity.md` and
121
+ removed.
122
+ - Planner validation now repairs missing or blank node descriptions before
123
+ strict schema validation, including nested subplans, so otherwise-valid plans
124
+ are not discarded for a recoverable LLM omission.
125
+ - Planner output normalization now wraps a terminal synthesis-only plan in a
126
+ result node, and the orchestrator retries conditional-looking requests when
127
+ the planner collapses them into a plan with no decision node.
128
+ - Planner JSON parsing now preserves an earlier non-null value when a model
129
+ repeats the same key later as `null`, recovering otherwise-valid decision
130
+ nodes with duplicate `condition` fields.
131
+ - Subplan execution now includes the parent task text in the nested task
132
+ request, so closed-book subplans retain stable-knowledge instructions without
133
+ seeing unrelated parent-node results.
134
+ - Planner cookbook and prompt guidance now steer obvious binary branches,
135
+ logic-puzzle deductions, and comparison/recommendation tasks toward explicit
136
+ decision, reasoning, and per-axis comparison structure.
137
+ - Planner subplan guidance now explicitly shows the correct nested tool-node
138
+ shape (`"type": "tool", "tool": "web_search"`), contrasts it with the invalid
139
+ `"type": "web_search"` schema, and reminds the model to count subplan nodes
140
+ before exceeding the configured cap.
141
+ - Executor context assembly includes direct dependent-node descriptions in the
142
+ current node's prompt, so upstream summary, synthesis, tool, decision, and
143
+ subplan nodes can shape their output for known downstream consumers without
144
+ seeing unrelated sibling results.
145
+ - Planner invalid-JSON failures now distinguish short parse errors from
146
+ generation stalls using a configurable planner-budget character multiplier.
147
+ Stall attempts preserve the raw output as `error_detail` and feed a truncated
148
+ copy into the next retry prompt so the model does not repeat it.
149
+ - Planner rejections are logged at warning level with structured
150
+ `rejection_reason` and best-effort `rejection_class` fields for operators.
151
+ - Planner subplan guidance now defines good boundaries, anti-patterns, schema
152
+ expectations, and examples so nested plans are used for self-contained
153
+ multi-step sub-tasks instead of single-node wrappers.
154
+ - TUI transcript and composer rendering now pre-wrap to the pane width before
155
+ computing scroll offsets. Long streamed results stay fully scrollable, and
156
+ long prompts wrap inside a composer that grows up to a small cap.
157
+ - TUI live `think` chunks render as dim italic quote-style transcript cells,
158
+ using the ACP node metadata added for intermediate output streams.
159
+ - Tightened the default planner, decision, think, summary, synthesis, result,
160
+ and shape-check prompts to make node roles clearer, preserve downstream
161
+ information, and avoid over-compressed or invented outputs.
162
+ - Plan validation now enforces node-type contracts before execution: tool
163
+ nodes must name a tool, non-tool nodes cannot carry tool fields, decision
164
+ nodes must declare a binary yes/no branch contract, and only subplan nodes
165
+ can carry nested plans.
166
+ - Planning retries now feed schema/validation failures back into the next
167
+ planner attempt, so the model can repair a concrete invalid DAG instead of
168
+ retrying blind.
169
+ - Plan validation now rejects `forced_tier` values outside the configured tier
170
+ range, including inside nested subplans, so planner mistakes fail before
171
+ execution.
172
+ - Planner prompt context now includes the eligible/configured tier surface and
173
+ a concise plan cookbook (direct answer, tool→result, research, aggregation,
174
+ conditional, subplan, rejection) so plans are shaped against the runtime the
175
+ executor will actually use.
176
+ - TUI DAG view (`Tab`): a collapsible panel showing the plan's nodes grouped
177
+ into dependency layers (parallel cohorts), recoloured live by status. The
178
+ adapter sends the graph topology as a `plan_graph` extension update, since
179
+ ACP's flat plan omits edges.
180
+ - TUI composer editing: in-place cursor movement (`←`/`→`, `Home`/`End`,
181
+ `Backspace`/`Del`) with a visible cursor, and submitted-task history recall via
182
+ `Ctrl-P`/`Ctrl-N`. (Multi-line entry is not yet supported.)
183
+ - TUI responsiveness: an animated spinner and a live elapsed-time readout while
184
+ a turn runs (the turn-ended notice reports how long it took), plus
185
+ mouse-wheel scrolling of the transcript.
186
+ - Richer TUI transcript: tool calls show their arguments and a result preview,
187
+ and each node carries a dim tier/confidence annotation (flagged when low). The
188
+ adapter sends tool `rawInput`/`rawOutput` and a `node_meta` extension update on
189
+ node completion; standard ACP clients ignore the extension.
190
+ - Cancellation: a `cancelled` task status and a `TaskCancelled` terminal event.
191
+ The orchestrator handles `asyncio.CancelledError` in run/resume/revise —
192
+ marking the task cancelled, emitting the event, and persisting state — so an
193
+ ACP `session/cancel` (Esc in the TUI) leaves consistent state. The adapter
194
+ cancels the in-flight turn at a single point so cleanup is not interrupted.
195
+ - Dovetailing ACP sessions: a follow-up `session/prompt` in the same session
196
+ carries the previous task's `final_output` forward as a context preamble, so
197
+ tasks build on one another. Threaded as task input, leaving the
198
+ fresh-context-per-node invariant intact.
199
+ - Rust terminal client (`clients/tui`, `glassrail-tui`): a ratatui app that
200
+ spawns `glassrail acp`, submits tasks, streams the plan and node execution,
201
+ and drives the plan-approval gate (approve / reject / reject-with-feedback).
202
+ Polyglot monorepo: a dedicated `rust-tui` CI job runs fmt/clippy/build/test.
203
+ - ACP adapter (`glassrail acp`): a JSON-RPC 2.0 server over stdio exposing the
204
+ agent via the Agent Client Protocol, for the forthcoming Rust TUI and other
205
+ ACP clients. Implements `initialize`, `session/new`, `session/prompt`, and
206
+ `session/cancel`, bridging the EventBus into `session/update` notifications
207
+ (plan, tool calls, message chunks). `fs/*`, `terminal/*`, and `session/load`
208
+ are intentionally unsupported. The adapter drives the HITL plan gate over
209
+ `session/request_permission`: clients approve a plan or reject it with
210
+ free-text feedback to trigger a guided replan.
211
+ - Guided replan in the engine: `Planner.plan`/`plan_attempt` accept `feedback`
212
+ that is woven into the planning prompt, and `Orchestrator.revise(task_id,
213
+ feedback)` re-plans a task paused at the confirmation gate and re-enters the
214
+ gate.
215
+ - Package skeleton (src-layout) with subpackages for core, config, events,
216
+ providers, state, harness, validator, planner, executor, channels,
217
+ gateways, and cli.
218
+ - Core domain types: `Plan`, `Node`, `NodeStatus`, `NodeResult`,
219
+ `BranchLogEntry`, `TaskStatus`, `ExecutionState`, ULID-based `TaskId`.
220
+ - Configuration via `pydantic-settings` with env, `.env`, and `config.toml`
221
+ precedence; structured `TierConfig` for each tier.
222
+ - Tool harness: `@harness.tool` decorator, entry-point discovery
223
+ (`glassrail.tools` group), and built-in tool stubs.
224
+ - LLM provider abstraction: streaming `LLMProvider` Protocol,
225
+ `TierRouter` with `ProviderUnavailableError`-driven fallthrough,
226
+ OpenAI-compatible concrete provider that parses the SSE stream
227
+ token-by-token (content deltas, tool-call accumulation, usage).
228
+ - StateStore Protocol with in-memory and SQLite (aiosqlite) backends,
229
+ and a shared contract test suite every backend must pass.
230
+ - Plan validator: topological sort, cycle detection, tool name checks,
231
+ decision-nesting limit, branch-reference sanity.
232
+ - Planner with JSON-mode prompt; node terminology consistent with core.
233
+ - Executor with per-node fresh context, tool / decision / synthesis
234
+ dispatch, branch skip propagation, low-confidence flagging.
235
+ - Hypothesis property tests asserting the fresh-context invariant
236
+ (no out-of-context content leaks into assembled node prompts).
237
+ - `think` node type for explicit reasoning steps. Defaults to tier 2
238
+ (reasoning tier) and emits a structured `{reasoning, confidence}`
239
+ payload.
240
+ - `summary` node type for condensing noisy upstream context. Defaults
241
+ to tier 0 and emits a `{summary, confidence}` payload.
242
+ - `result` node type as the explicit terminal-output marker. The last
243
+ completed `result` node's output is the task's `final_output`; plans
244
+ without a `result` node fall back to the last completed `synthesis`
245
+ for backward compatibility.
246
+ - `subplan` node type: a node carries its own nested `Plan` which the
247
+ executor runs inline, bubbling the nested `final_output` up as the
248
+ subplan node's output. Validator caps: max 2 subplans per plan, max
249
+ 12 nodes per subplan (both configurable via settings).
250
+ - Orchestrator wrapping planning, optional HITL gate, execution, and
251
+ persistence handoffs.
252
+ - Typed event stream: Pydantic events for every plan, node, branch, and
253
+ task transition on an in-process `EventBus`; the executor and
254
+ orchestrator emit them, and gateways subscribe via an async iterator.
255
+ - FastAPI gateway: `/task`, `/task/{id}`, `/task/{id}/resume`,
256
+ `/task/{id}/branch-log`, `/task/{id}/events` (SSE and WebSocket — the
257
+ WebSocket streams the same typed events and closes on a terminal event),
258
+ `/tools`, `/health`.
259
+ - Typer CLI entry point with a `glassrail run` command — a headless run that
260
+ plans and executes a task in-process and prints a JSON result envelope
261
+ (final output, normalized trajectory, status, token count) for eval harnesses
262
+ to consume — and a `glassrail tui` command: a Rich terminal client that submits
263
+ a task to a running gateway and renders its live SSE event stream (plan →
264
+ per-node progress → final output), built from a thin event client and a pure,
265
+ testable view model.
266
+ - Live DAG view in the TUI: once the plan arrives, the viewer draws it as boxes
267
+ connected by routed edges — nodes grouped into topological layers (same layer
268
+ = runs in parallel), edges split with pass-through vertices so they never
269
+ cross a box — above the existing node table. Each box shows the node's
270
+ id/type and a short summary (its planner `description`); the border is
271
+ recoloured as the node starts, completes, or fails, and decisions show the
272
+ branch they took. Pure render over the plan plus accumulated node statuses,
273
+ onto a character grid that falls back to a compact list when the terminal is
274
+ too narrow; `glassrail tui --no-dag` shows only the table.
275
+ - Shared runtime composition root (`glassrail.runtime.build_runtime`) that wires
276
+ the harness, router, planner, validator, executor, store, and orchestrator
277
+ from settings; the REST gateway and the CLI both build from it.
278
+ - OpenTelemetry tracing (`glassrail.telemetry`): the planner, router, and
279
+ executor emit a span tree (task → plan / node → LLM call) with GenAI
280
+ semantic-convention attributes (system, model, tokens) and `glassrail.*`
281
+ attributes (tier, node type/status, task status). Tracing is a no-op until
282
+ configured via settings; the SDK and OTLP/HTTP exporter ship in the optional
283
+ `otel` extra. The REST gateway configures it at startup.
284
+ - Production `Dockerfile`: multi-stage uv build serving the REST gateway from
285
+ a slim, non-root `python:3.12-slim` image (~60 MB) with a built-in health
286
+ check. CI builds and smoke-tests the image on every change.
287
+ - Vendored `eval-framework/`: a self-contained, stdlib-only harness that runs
288
+ each task k times against a pluggable subject backend, captures output /
289
+ side-effects / trajectory, grades with a deterministic→trajectory→LLM cascade
290
+ (the judge decoupled from the subject), and reports pass@k vs pass^k. Backends:
291
+ `glassrail-cli` and `glassrail-gateway` (drive the real planner + executor over
292
+ the agent's own tier routing), `openai-compat` (baseline a raw model), and
293
+ `claude-cli`. Ships a `glassrail` suite (a decision-branch control pair, a
294
+ calibration fact, and a multistep recommendation). Self-documented (its own
295
+ README/DECISIONS/CLAUDE) and kept out of the package's ruff/pyright/pytest
296
+ scope.
297
+ - Per-node output-token budgets (`settings.budgets`, a `NodeBudgets` table):
298
+ each LLM call's `max_tokens` is configurable per role — planner, think,
299
+ summary, synthesis, result, decision, extract_args, shape_check — with
300
+ generous defaults so reasoning and summaries get room while structured
301
+ micro-calls stay small. Override under `[budgets]` in `config.toml` or
302
+ `GLASSRAIL_BUDGETS__<FIELD>`. Replaces the single `max_node_output_tokens`
303
+ setting and the previously hard-coded caps in the planner and executor.
304
+ - Configurable per-node system prompts (`settings.prompts`, a `NodePrompts`
305
+ table): the planner and executor read each role's prompt from settings
306
+ instead of hard-coding it, so prompts can be tuned without editing source.
307
+ Defaults live in `glassrail.config.prompts`; override under `[prompts]` in
308
+ `config.toml` or `GLASSRAIL_PROMPTS__<FIELD>`.
309
+ - First-party tool integrations layer (`settings.tools`): bundled, opt-in tools
310
+ configured under `[tools.*]` and registered by `build_runtime`, distinct from
311
+ third-party entry-point plugins. First integration: **web** — `web_fetch(url)`
312
+ fetches a page and extracts its main text via trafilatura (boilerplate
313
+ removed), for reading and high-fidelity summarisation of webpages. Off by
314
+ default; needs the optional `web` extra (`pip install glassrail[web]`) and
315
+ `tools.web.fetch = true`. Adds `web_search(query)` behind a pluggable
316
+ provider — `duckduckgo` (HTML scrape, no setup) or `searxng` (self-hosted
317
+ JSON API); switching is a config flip (`tools.web.search`). A non-200 from
318
+ DuckDuckGo (e.g. its HTTP 202 anti-bot challenge) is surfaced as an error
319
+ rather than a silently empty result set. The old `web_search` built-in stub
320
+ is removed in favour of this real implementation.
321
+ - Opt-in third-party tool plugins: with `load_tool_plugins = true`
322
+ (`GLASSRAIL_LOAD_TOOL_PLUGINS`), `build_runtime` discovers and registers tools
323
+ advertised through the `glassrail.tools` entry-point group. The harness has
324
+ supported entry-point discovery all along; the composition root now invokes
325
+ it. Off by default — loading whatever is installed is a deliberate choice.
326
+ - Tooling: uv, ruff, pyright strict, pytest + hypothesis, pre-commit,
327
+ MkDocs + Material. CI on Linux + macOS for Python 3.12 + 3.13.
328
+ - Apache-2.0 license.
329
+
330
+ ### Changed
331
+ - Planner now states the structural budget (max plan nodes, subplan count and
332
+ size) to the model in each request, derived from settings rather than
333
+ hard-coded in the prompt. Previously the top-level node cap was never
334
+ communicated, so the model would overshoot it and the plan would be rejected
335
+ at validation. Raised the default `max_plan_nodes` from 12 to 24 to fit
336
+ real fan-out tasks (an "N things × M aspects" research sweep needs N×M tool
337
+ nodes plus aggregation).
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for describing the origin of the Work and
141
+ reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may accept and charge a
167
+ fee for, or grant warranty, support, indemnity, or other liability
168
+ obligations and/or rights consistent with this License. However,
169
+ in accepting such obligations, You may act only on Your own behalf
170
+ and on Your sole responsibility, not on behalf of any other
171
+ Contributor, and only if You agree to indemnify, defend, and hold
172
+ each Contributor harmless for any liability incurred by, or claims
173
+ asserted against, such Contributor by reason of your accepting any
174
+ such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2026 Andrew Ellis
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.