cross-ai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ cross.log
2
+ data/
3
+ __pycache__/
4
+ *.pyc
5
+ *.egg-info/
6
+ .venv/
7
+ dist/
8
+ build/
9
+ .env
10
+ .tmp/
11
+ .coverage
12
+ evals/results/
cross_ai-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Cross Attention Labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,235 @@
1
+ Metadata-Version: 2.4
2
+ Name: cross-ai
3
+ Version: 0.1.0
4
+ Summary: Agent monitoring proxy and session manager — PTY wrapping, network interception, Slack relay, and gating
5
+ Project-URL: Homepage, https://github.com/rfdearborn/cross
6
+ Project-URL: Repository, https://github.com/rfdearborn/cross
7
+ Project-URL: Issues, https://github.com/rfdearborn/cross/issues
8
+ Author: Cross Attention Labs
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: agent,ai,gating,monitoring,oversight,proxy,safety
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Security
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: httpx>=0.28.0
22
+ Requires-Dist: pydantic-settings>=2.6.0
23
+ Requires-Dist: pyyaml>=6.0
24
+ Requires-Dist: starlette>=0.52.0
25
+ Requires-Dist: uvicorn[standard]>=0.41.0
26
+ Requires-Dist: websockets>=13.0
27
+ Provides-Extra: dev
28
+ Requires-Dist: anyio[trio]>=4.0; extra == 'dev'
29
+ Requires-Dist: mitmproxy>=12.0.0; extra == 'dev'
30
+ Requires-Dist: pytest-anyio>=0.0.0; extra == 'dev'
31
+ Requires-Dist: pytest-cov>=6.0; extra == 'dev'
32
+ Requires-Dist: pytest>=8.0; extra == 'dev'
33
+ Provides-Extra: slack
34
+ Requires-Dist: aiohttp>=3.9.0; extra == 'slack'
35
+ Requires-Dist: slack-sdk>=3.0.0; extra == 'slack'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # cross
39
+
40
+ **Configurable Runtime Oversight and Safety Standard**
41
+
42
+ cross makes AI agents more trustworthy *and* more capable with lightweight gating and monitoring.
43
+
44
+ > **Not sure you need this?** Ask your agent to explain why you do.
45
+
46
+ ## Why cross
47
+
48
+ Your agents are overeager. They're so locked in on finishing your task that they'll `rm -rf` an errant directory or push your credentials public without a second thought.
49
+
50
+ cross watches from the outside, like a copilot keeping situational awareness while the pilot flies the plane, to guard against this. It pairs agents with spotters which, with separate context and fresh eyes, check actions and monitor sessions.
51
+
52
+ cross expands the capability-safety frontier: with a babysitter watching, you can give agents more rope. You can stop pre-screening tool calls without feeling a pit in your stomach. More throughput, fewer surprises.
53
+
54
+ ## Quick Start
55
+
56
+ ```bash
57
+ pip install cross-ai # or: pip install cross-ai[slack] for Slack integration
58
+ cross setup # interactive setup wizard (starts daemon automatically on macOS)
59
+ ```
60
+
61
+ Or install from source:
62
+
63
+ ```bash
64
+ git clone https://github.com/rfdearborn/cross && cd cross
65
+ pip install -e ".[dev]"
66
+ ```
67
+
68
+ Then point your agent's API traffic through cross. For Claude Code, add a shell wrapper:
69
+
70
+ ```bash
71
+ # In your .zshrc / .bashrc
72
+ claude() { cross wrap -- claude "$@"; }
73
+ ```
74
+
75
+ `cross wrap` does two things: wraps the agent in a PTY for I/O control, and routes API traffic through the local proxy (`ANTHROPIC_BASE_URL=http://localhost:2767`) for structured monitoring and gating.
76
+
77
+ ## How It Works
78
+
79
+ cross evaluates every tool call through a 3-layer pipeline:
80
+
81
+ ```
82
+ Tool call arrives
83
+ |
84
+ v
85
+ +-----------+ no match +----------+
86
+ | Denylist | --------------> | ALLOW |
87
+ | Gate | +----------+
88
+ +-----------+
89
+ | match
90
+ v
91
+ +-----------+ false pos +----------+
92
+ | LLM Gate | --------------> | ALLOW |
93
+ | (review) | +----------+
94
+ +-----------+
95
+ | confirmed
96
+ v
97
+ BLOCK / ESCALATE
98
+ (feedback to agent / human review)
99
+
100
+ --- meanwhile, in the background ---
101
+
102
+ +-----------+
103
+ | Sentinel | periodic review of all activity
104
+ +-----------+
105
+ ```
106
+
107
+ **Layer 1: Denylist gate** -- Fast deterministic pattern matching against YAML rules. Sub-millisecond, zero cost. Catches destructive commands (`rm -rf /`), credential exfiltration, reverse shells, system path writes, and more. Intentionally broad -- it's a triage filter, not a final judge.
108
+
109
+ **Layer 2: LLM gate** -- Only invoked when the denylist flags a call. Reviews the tool call with full context (user intent, recent tool history, why it was flagged) and renders a verdict: ALLOW (false positive), BLOCK (confirmed dangerous), or ESCALATE (needs human review). This is why the denylist can be aggressive without generating noise -- the LLM catches false positives.
110
+
111
+ **Layer 3: Sentinel** -- Async periodic reviewer that watches the full event stream over time. Detects patterns that per-call evaluation misses: tool calls that don't match user intent, suspicious sequences (read credentials then network call), escalating privilege patterns, and agents working around restrictions. Reports to the dashboard and Slack.
112
+
113
+ Blocked tool calls are suppressed from the API response stream. The proxy automatically retries with the block reason injected, so the agent self-corrects without user intervention. For critical threats (credential exfiltration, reverse shells), the session is halted entirely until a human intervenes.
114
+
115
+ ## Supported Agents
116
+
117
+ - **Claude Code** -- validated, full PTY + proxy + tool-level gating
118
+ - **OpenClaw** -- validated, PTY + tool-level gating via `beforeToolCall` hook. `cross wrap -- openclaw` automatically injects a Node.js hook that gates every tool call through the cross daemon
119
+ - **Any CLI agent** -- `cross wrap -- <agent-command>` provides PTY wrapping and API proxy for any CLI agent
120
+ - **Any agent using Anthropic APIs** -- set `ANTHROPIC_BASE_URL=http://localhost:2767`
121
+
122
+ ## Dashboard
123
+
124
+ cross ships with a built-in web dashboard at `http://localhost:2767`. No dependencies, no setup -- it's always active when the daemon is running.
125
+
126
+ The dashboard shows:
127
+ - **Pending approvals** -- escalated tool calls waiting for human review, with Approve/Deny buttons
128
+ - **Live event feed** -- real-time stream of tool calls, gate decisions, and sentinel reviews
129
+
130
+ You can also manage pending escalations from the CLI:
131
+
132
+ ```bash
133
+ cross pending # list pending escalations
134
+ cross pending approve <tool_use_id> # approve
135
+ cross pending deny <tool_use_id> # deny
136
+ ```
137
+
138
+ ## Configuration
139
+
140
+ ### LLM Providers
141
+
142
+ cross uses LLMs for the gate reviewer and sentinel. The default is `claude` (`cli/claude`), which uses your existing Claude Code subscription -- no API key needed. You can also use any other supported provider:
143
+
144
+ | Provider | Model format | API key env var | Notes |
145
+ |----------|-------------|-----------------|-------|
146
+ | Claude Code | `cli/claude` (or just `claude`) | (none needed) | Default. Uses your Claude subscription via `claude -p` |
147
+ | Google Gemini | `google/gemini-3-flash-preview` | `GOOGLE_API_KEY` | Free tier available |
148
+ | Anthropic | `anthropic/claude-haiku-4-5` | `ANTHROPIC_API_KEY` | |
149
+ | OpenAI | `openai/gpt-4o` | `OPENAI_API_KEY` | |
150
+ | Ollama | `ollama/llama3` | (none needed) | Local models |
151
+
152
+ Configure via environment variables (all prefixed `CROSS_`):
153
+
154
+ ```bash
155
+ # LLM gate (default uses Claude Code, no key needed)
156
+ CROSS_LLM_GATE_MODEL=cli/claude
157
+
158
+ # Or use an API provider
159
+ CROSS_LLM_GATE_MODEL=google/gemini-3-flash-preview
160
+ CROSS_LLM_GATE_API_KEY=... # or set GOOGLE_API_KEY
161
+
162
+ # Sentinel
163
+ CROSS_LLM_SENTINEL_MODEL=cli/claude
164
+ CROSS_LLM_SENTINEL_INTERVAL_SECONDS=60
165
+ ```
166
+
167
+ Or use `cross setup` for guided interactive configuration.
168
+
169
+ ### Denylist Rules
170
+
171
+ Default rules ship with cross and cover destructive commands, credential exfiltration, reverse shells, and system path writes. Customize with YAML files in `~/.cross/rules.d/`:
172
+
173
+ ```yaml
174
+ # ~/.cross/rules.d/my-rules.yaml
175
+ rules:
176
+ - name: no-docker-push
177
+ tools: [Bash]
178
+ field: command
179
+ action: block
180
+ description: Prevent pushing Docker images
181
+ patterns:
182
+ - 'docker\s+push\b'
183
+
184
+ # Disable a default rule by name
185
+ disable:
186
+ - destructive-rm
187
+ ```
188
+
189
+ Rules support `patterns` (regex, case-insensitive) and `contains` (substring matching), and can target specific tools and input fields.
190
+
191
+ ### All Settings
192
+
193
+ Settings can be set via environment variables (`CROSS_` prefix) or `.env` files. cross loads `~/.cross/local.env` (personal overrides, survives `cross setup`), then `~/.cross/.env` (generated by setup), then `.env` in the working directory:
194
+
195
+ | Setting | Default | Description |
196
+ |---------|---------|-------------|
197
+ | `listen_port` | 2767 | Proxy listen port |
198
+ | `gating_enabled` | true | Enable the denylist gate |
199
+ | `llm_gate_enabled` | true | Enable LLM review of flagged calls |
200
+ | `llm_gate_shadow` | false | Shadow mode: LLM decides but human makes the final call |
201
+ | `llm_gate_threshold` | escalate | Min denylist action to trigger LLM review |
202
+ | `llm_sentinel_enabled` | true | Enable periodic LLM sentinel reviews |
203
+ | `llm_sentinel_interval_seconds` | 60 | Seconds between sentinel review cycles |
204
+ | `gate_approval_timeout` | 300 | Seconds to wait for human approval on escalation |
205
+ | `rules_dir` | ~/.cross/rules.d | Custom rules directory |
206
+
207
+ ## Architecture
208
+
209
+ cross uses two complementary interception layers:
210
+
211
+ **PTY wrapper** (`cross wrap`) -- Wraps any CLI agent in a pseudo-terminal for full I/O control. Enables bidirectional messaging relay (Slack/dashboard to agent), terminal-to-phone handoff, and session management. Agent-agnostic.
212
+
213
+ **Network proxy** -- Intercepts API traffic via `ANTHROPIC_BASE_URL` redirect. Parses streaming SSE responses, buffers tool_use blocks for gate evaluation, and suppresses blocked calls from the response stream. Provides structured monitoring with zero agent modification.
214
+
215
+ Both layers are coordinated by the daemon (`cross daemon`), which runs the proxy, gate chain, sentinel, dashboard, and optional Slack plugin as a single process.
216
+
217
+ ## Notification Channels
218
+
219
+ - **Web dashboard** (default) -- zero dependencies, always active at `/cross/dashboard`
220
+ - **Slack** (optional) -- gate decisions, sentinel reviews, and interactive approval buttons. Configure with `CROSS_SLACK_BOT_TOKEN` and `CROSS_SLACK_APP_TOKEN`. Install the `slack` extra: `pip install cross-ai[slack]`.
221
+
222
+ ## Development
223
+
224
+ ```bash
225
+ git clone https://github.com/rfdearborn/cross
226
+ cd cross
227
+ pip install -e ".[dev,slack]"
228
+ python -m pytest tests/
229
+ ```
230
+
231
+ Ruff for linting (`ruff check`).
232
+
233
+ ## License
234
+
235
+ MIT
@@ -0,0 +1,198 @@
1
+ # cross
2
+
3
+ **Configurable Runtime Oversight and Safety Standard**
4
+
5
+ cross makes AI agents more trustworthy *and* more capable with lightweight gating and monitoring.
6
+
7
+ > **Not sure you need this?** Ask your agent to explain why you do.
8
+
9
+ ## Why cross
10
+
11
+ Your agents are overeager. They're so locked in on finishing your task that they'll `rm -rf` an errant directory or push your credentials public without a second thought.
12
+
13
+ cross watches from the outside, like a copilot keeping situational awareness while the pilot flies the plane, to guard against this. It pairs agents with spotters which, with separate context and fresh eyes, check actions and monitor sessions.
14
+
15
+ cross expands the capability-safety frontier: with a babysitter watching, you can give agents more rope. You can stop pre-screening tool calls without feeling a pit in your stomach. More throughput, fewer surprises.
16
+
17
+ ## Quick Start
18
+
19
+ ```bash
20
+ pip install cross-ai # or: pip install cross-ai[slack] for Slack integration
21
+ cross setup # interactive setup wizard (starts daemon automatically on macOS)
22
+ ```
23
+
24
+ Or install from source:
25
+
26
+ ```bash
27
+ git clone https://github.com/rfdearborn/cross && cd cross
28
+ pip install -e ".[dev]"
29
+ ```
30
+
31
+ Then point your agent's API traffic through cross. For Claude Code, add a shell wrapper:
32
+
33
+ ```bash
34
+ # In your .zshrc / .bashrc
35
+ claude() { cross wrap -- claude "$@"; }
36
+ ```
37
+
38
+ `cross wrap` does two things: wraps the agent in a PTY for I/O control, and routes API traffic through the local proxy (`ANTHROPIC_BASE_URL=http://localhost:2767`) for structured monitoring and gating.
39
+
40
+ ## How It Works
41
+
42
+ cross evaluates every tool call through a 3-layer pipeline:
43
+
44
+ ```
45
+ Tool call arrives
46
+ |
47
+ v
48
+ +-----------+ no match +----------+
49
+ | Denylist | --------------> | ALLOW |
50
+ | Gate | +----------+
51
+ +-----------+
52
+ | match
53
+ v
54
+ +-----------+ false pos +----------+
55
+ | LLM Gate | --------------> | ALLOW |
56
+ | (review) | +----------+
57
+ +-----------+
58
+ | confirmed
59
+ v
60
+ BLOCK / ESCALATE
61
+ (feedback to agent / human review)
62
+
63
+ --- meanwhile, in the background ---
64
+
65
+ +-----------+
66
+ | Sentinel | periodic review of all activity
67
+ +-----------+
68
+ ```
69
+
70
+ **Layer 1: Denylist gate** -- Fast deterministic pattern matching against YAML rules. Sub-millisecond, zero cost. Catches destructive commands (`rm -rf /`), credential exfiltration, reverse shells, system path writes, and more. Intentionally broad -- it's a triage filter, not a final judge.
71
+
72
+ **Layer 2: LLM gate** -- Only invoked when the denylist flags a call. Reviews the tool call with full context (user intent, recent tool history, why it was flagged) and renders a verdict: ALLOW (false positive), BLOCK (confirmed dangerous), or ESCALATE (needs human review). This is why the denylist can be aggressive without generating noise -- the LLM catches false positives.
73
+
74
+ **Layer 3: Sentinel** -- Async periodic reviewer that watches the full event stream over time. Detects patterns that per-call evaluation misses: tool calls that don't match user intent, suspicious sequences (read credentials then network call), escalating privilege patterns, and agents working around restrictions. Reports to the dashboard and Slack.
75
+
76
+ Blocked tool calls are suppressed from the API response stream. The proxy automatically retries with the block reason injected, so the agent self-corrects without user intervention. For critical threats (credential exfiltration, reverse shells), the session is halted entirely until a human intervenes.
77
+
78
+ ## Supported Agents
79
+
80
+ - **Claude Code** -- validated, full PTY + proxy + tool-level gating
81
+ - **OpenClaw** -- validated, PTY + tool-level gating via `beforeToolCall` hook. `cross wrap -- openclaw` automatically injects a Node.js hook that gates every tool call through the cross daemon
82
+ - **Any CLI agent** -- `cross wrap -- <agent-command>` provides PTY wrapping and API proxy for any CLI agent
83
+ - **Any agent using Anthropic APIs** -- set `ANTHROPIC_BASE_URL=http://localhost:2767`
84
+
85
+ ## Dashboard
86
+
87
+ cross ships with a built-in web dashboard at `http://localhost:2767`. No dependencies, no setup -- it's always active when the daemon is running.
88
+
89
+ The dashboard shows:
90
+ - **Pending approvals** -- escalated tool calls waiting for human review, with Approve/Deny buttons
91
+ - **Live event feed** -- real-time stream of tool calls, gate decisions, and sentinel reviews
92
+
93
+ You can also manage pending escalations from the CLI:
94
+
95
+ ```bash
96
+ cross pending # list pending escalations
97
+ cross pending approve <tool_use_id> # approve
98
+ cross pending deny <tool_use_id> # deny
99
+ ```
100
+
101
+ ## Configuration
102
+
103
+ ### LLM Providers
104
+
105
+ cross uses LLMs for the gate reviewer and sentinel. The default is `claude` (`cli/claude`), which uses your existing Claude Code subscription -- no API key needed. You can also use any other supported provider:
106
+
107
+ | Provider | Model format | API key env var | Notes |
108
+ |----------|-------------|-----------------|-------|
109
+ | Claude Code | `cli/claude` (or just `claude`) | (none needed) | Default. Uses your Claude subscription via `claude -p` |
110
+ | Google Gemini | `google/gemini-3-flash-preview` | `GOOGLE_API_KEY` | Free tier available |
111
+ | Anthropic | `anthropic/claude-haiku-4-5` | `ANTHROPIC_API_KEY` | |
112
+ | OpenAI | `openai/gpt-4o` | `OPENAI_API_KEY` | |
113
+ | Ollama | `ollama/llama3` | (none needed) | Local models |
114
+
115
+ Configure via environment variables (all prefixed `CROSS_`):
116
+
117
+ ```bash
118
+ # LLM gate (default uses Claude Code, no key needed)
119
+ CROSS_LLM_GATE_MODEL=cli/claude
120
+
121
+ # Or use an API provider
122
+ CROSS_LLM_GATE_MODEL=google/gemini-3-flash-preview
123
+ CROSS_LLM_GATE_API_KEY=... # or set GOOGLE_API_KEY
124
+
125
+ # Sentinel
126
+ CROSS_LLM_SENTINEL_MODEL=cli/claude
127
+ CROSS_LLM_SENTINEL_INTERVAL_SECONDS=60
128
+ ```
129
+
130
+ Or use `cross setup` for guided interactive configuration.
131
+
132
+ ### Denylist Rules
133
+
134
+ Default rules ship with cross and cover destructive commands, credential exfiltration, reverse shells, and system path writes. Customize with YAML files in `~/.cross/rules.d/`:
135
+
136
+ ```yaml
137
+ # ~/.cross/rules.d/my-rules.yaml
138
+ rules:
139
+ - name: no-docker-push
140
+ tools: [Bash]
141
+ field: command
142
+ action: block
143
+ description: Prevent pushing Docker images
144
+ patterns:
145
+ - 'docker\s+push\b'
146
+
147
+ # Disable a default rule by name
148
+ disable:
149
+ - destructive-rm
150
+ ```
151
+
152
+ Rules support `patterns` (regex, case-insensitive) and `contains` (substring matching), and can target specific tools and input fields.
153
+
154
+ ### All Settings
155
+
156
+ Settings can be set via environment variables (`CROSS_` prefix) or `.env` files. cross loads `~/.cross/local.env` (personal overrides, survives `cross setup`), then `~/.cross/.env` (generated by setup), then `.env` in the working directory:
157
+
158
+ | Setting | Default | Description |
159
+ |---------|---------|-------------|
160
+ | `listen_port` | 2767 | Proxy listen port |
161
+ | `gating_enabled` | true | Enable the denylist gate |
162
+ | `llm_gate_enabled` | true | Enable LLM review of flagged calls |
163
+ | `llm_gate_shadow` | false | Shadow mode: LLM decides but human makes the final call |
164
+ | `llm_gate_threshold` | escalate | Min denylist action to trigger LLM review |
165
+ | `llm_sentinel_enabled` | true | Enable periodic LLM sentinel reviews |
166
+ | `llm_sentinel_interval_seconds` | 60 | Seconds between sentinel review cycles |
167
+ | `gate_approval_timeout` | 300 | Seconds to wait for human approval on escalation |
168
+ | `rules_dir` | ~/.cross/rules.d | Custom rules directory |
169
+
170
+ ## Architecture
171
+
172
+ cross uses two complementary interception layers:
173
+
174
+ **PTY wrapper** (`cross wrap`) -- Wraps any CLI agent in a pseudo-terminal for full I/O control. Enables bidirectional messaging relay (Slack/dashboard to agent), terminal-to-phone handoff, and session management. Agent-agnostic.
175
+
176
+ **Network proxy** -- Intercepts API traffic via `ANTHROPIC_BASE_URL` redirect. Parses streaming SSE responses, buffers tool_use blocks for gate evaluation, and suppresses blocked calls from the response stream. Provides structured monitoring with zero agent modification.
177
+
178
+ Both layers are coordinated by the daemon (`cross daemon`), which runs the proxy, gate chain, sentinel, dashboard, and optional Slack plugin as a single process.
179
+
180
+ ## Notification Channels
181
+
182
+ - **Web dashboard** (default) -- zero dependencies, always active at `/cross/dashboard`
183
+ - **Slack** (optional) -- gate decisions, sentinel reviews, and interactive approval buttons. Configure with `CROSS_SLACK_BOT_TOKEN` and `CROSS_SLACK_APP_TOKEN`. Install the `slack` extra: `pip install cross-ai[slack]`.
184
+
185
+ ## Development
186
+
187
+ ```bash
188
+ git clone https://github.com/rfdearborn/cross
189
+ cd cross
190
+ pip install -e ".[dev,slack]"
191
+ python -m pytest tests/
192
+ ```
193
+
194
+ Ruff for linting (`ruff check`).
195
+
196
+ ## License
197
+
198
+ MIT
@@ -0,0 +1,3 @@
1
+ """cross — Configurable Runtime Oversight and Safety Standard."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,4 @@
1
+ from cross.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1,40 @@
1
+ """ANSI escape code stripping and terminal output cleaning."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ # Matches ANSI escape sequences comprehensively
8
+ _ANSI_RE = re.compile(
9
+ r"\x1b" # ESC character
10
+ r"(?:"
11
+ r"\[[0-9;?]*[A-Za-z]" # CSI sequences: ESC [ params letter
12
+ r"|\][^\x07\x1b]*(?:\x07|\x1b\\)" # OSC sequences
13
+ r"|[()][AB012]" # Character set selection
14
+ r"|[=>]" # Keypad modes
15
+ r"|." # Other single-char escapes
16
+ r")"
17
+ )
18
+
19
+ # Leftover partial CSI params (e.g. "38;2;248;242m" without the ESC[)
20
+ # Require 2+ semicolon-separated numbers to avoid false positives on normal text
21
+ _PARTIAL_CSI_RE = re.compile(r"\d+(?:;\d+)+[mGKHJABCDfsu]")
22
+
23
+ # Control characters to strip (except newline, tab, carriage return)
24
+ _CTRL_RE = re.compile(r"[\x00-\x08\x0b-\x0c\x0e-\x1a\x1c-\x1f\x7f]")
25
+
26
+ # Box drawing and decorative characters that clutter output
27
+ _DECORATION_RE = re.compile(r"[╌─━┌┐└┘├┤┬┴┼╭╮╯╰│║═]+")
28
+
29
+
30
+ def strip_ansi(data: bytes) -> str:
31
+ """Strip ANSI escape codes, control characters, and decoration from terminal output."""
32
+ text = data.decode("utf-8", errors="replace")
33
+ text = _ANSI_RE.sub("", text)
34
+ text = _PARTIAL_CSI_RE.sub("", text)
35
+ text = _CTRL_RE.sub("", text)
36
+ text = _DECORATION_RE.sub("", text)
37
+ # Collapse multiple spaces/blank lines
38
+ text = re.sub(r"[ \t]+", " ", text)
39
+ text = re.sub(r"\n{3,}", "\n\n", text)
40
+ return text
@@ -0,0 +1,147 @@
1
+ """Evaluator chain — runs gate evaluators and aggregates results.
2
+
3
+ Two-stage evaluation:
4
+ Stage 1: Run all gates (denylist, etc.), max action wins.
5
+ Stage 2: If result >= threshold AND review gate exists, run LLM review.
6
+ Review verdict overrides stage 1 (catches false positives).
7
+ On ABSTAIN/error, stage 1 result stands.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import copy
14
+ import logging
15
+ import time
16
+
17
+ from cross.config import settings
18
+ from cross.evaluator import Action, EvaluationResponse, Gate, GateRequest
19
+
20
+ logger = logging.getLogger("cross.chain")
21
+
22
+
23
+ class GateChain:
24
+ """Runs gate evaluators against a tool call, with optional LLM review stage."""
25
+
26
+ def __init__(
27
+ self,
28
+ gates: list[Gate] | None = None,
29
+ review_gate: Gate | None = None,
30
+ review_threshold: Action = Action.BLOCK,
31
+ ):
32
+ self.gates: list[Gate] = gates or []
33
+ self.review_gate: Gate | None = review_gate
34
+ self.review_threshold: Action = review_threshold
35
+
36
+ def add(self, gate: Gate):
37
+ self.gates.append(gate)
38
+
39
+ async def evaluate(self, request: GateRequest) -> EvaluationResponse:
40
+ """Run all gates, optionally escalate to LLM review."""
41
+ # Stage 1: run all gates
42
+ stage1_result = await self._run_gates(request)
43
+
44
+ # Stage 2: LLM review if result >= threshold
45
+ if (
46
+ self.review_gate
47
+ and stage1_result.action.value >= self.review_threshold.value
48
+ and stage1_result.action != Action.ABSTAIN
49
+ ):
50
+ review_result = await self._run_review(request, stage1_result)
51
+ if review_result is not None:
52
+ return review_result
53
+ # ABSTAIN/error → stage 1 stands
54
+
55
+ return stage1_result
56
+
57
+ async def _run_gates(self, request: GateRequest) -> EvaluationResponse:
58
+ """Stage 1: run all gates, max action wins."""
59
+ if not self.gates:
60
+ return EvaluationResponse(action=Action.ALLOW, evaluator="chain:empty")
61
+
62
+ responses: list[EvaluationResponse] = []
63
+
64
+ for gate in self.gates:
65
+ start = time.monotonic()
66
+ try:
67
+ timeout_s = gate.timeout_ms / 1000.0
68
+ resp = await asyncio.wait_for(gate.evaluate(request), timeout=timeout_s)
69
+ except asyncio.TimeoutError:
70
+ elapsed_ms = (time.monotonic() - start) * 1000
71
+ logger.warning(
72
+ f"Gate '{gate.name}' timed out after {elapsed_ms:.1f}ms "
73
+ f"(limit: {gate.timeout_ms}ms), using on_error={gate.on_error.name}"
74
+ )
75
+ resp = EvaluationResponse(
76
+ action=gate.on_error,
77
+ reason=f"Gate timed out after {elapsed_ms:.1f}ms",
78
+ evaluator=gate.name,
79
+ )
80
+ except Exception as e:
81
+ logger.exception(f"Gate '{gate.name}' raised: {e}")
82
+ resp = EvaluationResponse(
83
+ action=gate.on_error,
84
+ reason=f"Gate error: {e}",
85
+ evaluator=gate.name,
86
+ )
87
+ elapsed_ms = (time.monotonic() - start) * 1000
88
+ resp.duration_ms = elapsed_ms
89
+
90
+ if not resp.evaluator:
91
+ resp.evaluator = gate.name
92
+
93
+ responses.append(resp)
94
+
95
+ # Max action wins
96
+ max_response = max(responses, key=lambda r: r.action.value)
97
+
98
+ # If everything abstained, treat as allow
99
+ if max_response.action == Action.ABSTAIN:
100
+ return EvaluationResponse(action=Action.ALLOW, evaluator="chain:all_abstained")
101
+
102
+ return max_response
103
+
104
+ async def _run_review(self, request: GateRequest, stage1_result: EvaluationResponse) -> EvaluationResponse | None:
105
+ """Stage 2: run LLM review gate. Returns None if review abstains/errors."""
106
+ assert self.review_gate is not None
107
+
108
+ # Copy request to avoid mutating the caller's object
109
+ request = copy.copy(request)
110
+ request.prior_result = stage1_result
111
+
112
+ start = time.monotonic()
113
+ try:
114
+ timeout_s = self.review_gate.timeout_ms / 1000.0
115
+ resp = await asyncio.wait_for(self.review_gate.evaluate(request), timeout=timeout_s)
116
+ except asyncio.TimeoutError:
117
+ elapsed_ms = (time.monotonic() - start) * 1000
118
+ logger.warning(f"Review gate timed out after {elapsed_ms:.1f}ms, keeping stage-1 result")
119
+ return None
120
+ except Exception as e:
121
+ logger.warning(f"Review gate error: {e}, keeping stage-1 result")
122
+ return None
123
+
124
+ elapsed_ms = (time.monotonic() - start) * 1000
125
+ resp.duration_ms = elapsed_ms
126
+
127
+ if resp.action == Action.ABSTAIN:
128
+ logger.info("Review gate abstained, keeping stage-1 result")
129
+ return None
130
+
131
+ # Shadow mode: LLM decides, but escalate to human with LLM's reasoning
132
+ if settings.llm_gate_shadow:
133
+ shadow_reason = f"[Shadow] LLM gate would {resp.action.name}: {resp.reason}"
134
+ logger.info(f"Shadow mode — escalating to human. {shadow_reason[:150]}")
135
+ return EvaluationResponse(
136
+ action=Action.ESCALATE,
137
+ reason=shadow_reason,
138
+ evaluator=f"{resp.evaluator}:shadow",
139
+ confidence=resp.confidence,
140
+ duration_ms=resp.duration_ms,
141
+ metadata={"shadow_verdict": resp.action.name, "shadow_reason": resp.reason},
142
+ )
143
+
144
+ logger.info(
145
+ f"Review gate overrides stage-1 ({stage1_result.action.name} → {resp.action.name}): {resp.reason[:100]}"
146
+ )
147
+ return resp