aleph-rlm 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aleph/types.py ADDED
@@ -0,0 +1,216 @@
1
+ """Shared type definitions for Aleph.
2
+
3
+ The library is intentionally type-rich so it works well with pyright/mypy.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass, field
9
+ from datetime import datetime
10
+ from enum import Enum
11
+ from typing import Awaitable, Callable, Literal, TypeAlias
12
+
13
+
14
+ # -----------------------------------------------------------------------------
15
+ # Context Types
16
+ # -----------------------------------------------------------------------------
17
+
18
+ class ContentFormat(Enum):
19
+ """Detected or specified format of context data."""
20
+
21
+ TEXT = "text"
22
+ JSON = "json"
23
+ JSONL = "jsonl"
24
+ CSV = "csv"
25
+ CODE = "code"
26
+ BINARY = "binary"
27
+ MIXED = "mixed"
28
+
29
+
30
+ @dataclass(slots=True)
31
+ class ContextMetadata:
32
+ """Metadata about the loaded context (shown to the root LLM)."""
33
+
34
+ format: ContentFormat
35
+ size_bytes: int
36
+ size_chars: int
37
+ size_lines: int
38
+ size_tokens_estimate: int
39
+ structure_hint: str | None
40
+ sample_preview: str
41
+
42
+
43
+ @dataclass(slots=True)
44
+ class ContextCollection:
45
+ """A multi-document context (e.g., a corpus of files)."""
46
+
47
+ items: list[tuple[str, ContextType]]
48
+ total_size_bytes: int = 0
49
+ total_size_tokens_estimate: int = 0
50
+
51
+
52
+ # A single context payload can be text, bytes, JSON-like, or a collection.
53
+ JsonScalar: TypeAlias = str | int | float | bool | None
54
+ JSONValue: TypeAlias = JsonScalar | list["JSONValue"] | dict[str, "JSONValue"]
55
+ ContextType: TypeAlias = str | bytes | JSONValue | ContextCollection
56
+
57
+
58
+ # -----------------------------------------------------------------------------
59
+ # Execution Types
60
+ # -----------------------------------------------------------------------------
61
+
62
+ @dataclass(slots=True)
63
+ class ExecutionResult:
64
+ """Result of executing code in the sandbox REPL."""
65
+
66
+ stdout: str
67
+ stderr: str
68
+ return_value: object | None
69
+ variables_updated: list[str]
70
+ truncated: bool
71
+ execution_time_ms: float
72
+ error: str | None
73
+
74
+
75
+ @dataclass(slots=True)
76
+ class SubQueryResult:
77
+ """Result of a recursive LLM call (sub_query or sub_aleph)."""
78
+
79
+ answer: str
80
+ tokens_input: int
81
+ tokens_output: int
82
+ cost_usd: float
83
+ model_used: str
84
+ depth: int
85
+
86
+
87
+ # -----------------------------------------------------------------------------
88
+ # Action Types (parsed from LLM output)
89
+ # -----------------------------------------------------------------------------
90
+
91
+ class ActionType(Enum):
92
+ CODE_BLOCK = "code" # execute python
93
+ TOOL_CALL = "tool" # not used by v1 core, but reserved
94
+ FINAL_ANSWER = "final"
95
+ FINAL_VAR = "final_var"
96
+ CONTINUE = "continue"
97
+
98
+
99
+ @dataclass(slots=True)
100
+ class ParsedAction:
101
+ """Parsed instruction from the LLM response."""
102
+
103
+ action_type: ActionType
104
+ content: str
105
+ raw_response: str
106
+
107
+
108
+ # -----------------------------------------------------------------------------
109
+ # Trajectory / Observability Types
110
+ # -----------------------------------------------------------------------------
111
+
112
+ @dataclass(slots=True)
113
+ class TrajectoryStep:
114
+ """Single step in the Aleph execution trace."""
115
+
116
+ step_number: int
117
+ depth: int
118
+ timestamp: datetime
119
+
120
+ prompt_tokens: int
121
+ prompt_summary: str
122
+
123
+ action: ParsedAction
124
+
125
+ result: ExecutionResult | SubQueryResult | str
126
+ result_tokens: int
127
+
128
+ cumulative_tokens: int
129
+ cumulative_cost: float
130
+
131
+
132
+ @dataclass(slots=True)
133
+ class AlephResponse:
134
+ """Final response from an Aleph call."""
135
+
136
+ answer: str
137
+ success: bool
138
+
139
+ total_iterations: int
140
+ max_depth_reached: int
141
+ total_tokens: int
142
+ total_cost_usd: float
143
+ wall_time_seconds: float
144
+
145
+ trajectory: list[TrajectoryStep]
146
+
147
+ error: str | None = None
148
+ error_type: (
149
+ Literal[
150
+ "budget_exceeded",
151
+ "max_iterations",
152
+ "execution_error",
153
+ "provider_error",
154
+ "no_final",
155
+ ]
156
+ | None
157
+ ) = None
158
+
159
+
160
+ # -----------------------------------------------------------------------------
161
+ # Budget Types
162
+ # -----------------------------------------------------------------------------
163
+
164
+ @dataclass(slots=True)
165
+ class Budget:
166
+ """Resource limits for an Aleph call."""
167
+
168
+ max_tokens: int | None = None
169
+ max_iterations: int | None = 100
170
+ max_depth: int | None = 2
171
+ max_wall_time_seconds: float | None = 300.0
172
+ max_sub_queries: int | None = 100
173
+
174
+
175
+ @dataclass(slots=True)
176
+ class BudgetStatus:
177
+ """Current budget consumption."""
178
+
179
+ tokens_used: int = 0
180
+ cost_used: float = 0.0
181
+ iterations_used: int = 0
182
+ depth_current: int = 0
183
+ wall_time_used: float = 0.0
184
+ sub_queries_used: int = 0
185
+
186
+ def exceeds(self, budget: Budget) -> tuple[bool, str | None]:
187
+ """Return (exceeded, reason)."""
188
+
189
+ if budget.max_tokens is not None and self.tokens_used > budget.max_tokens:
190
+ return True, f"Token budget exceeded: used {self.tokens_used} > max {budget.max_tokens}"
191
+
192
+ if budget.max_iterations is not None and self.iterations_used > budget.max_iterations:
193
+ return True, f"Iteration budget exceeded: used {self.iterations_used} > max {budget.max_iterations}"
194
+
195
+ if budget.max_depth is not None and self.depth_current > budget.max_depth:
196
+ return True, f"Depth budget exceeded: current {self.depth_current} > max {budget.max_depth}"
197
+
198
+ if budget.max_wall_time_seconds is not None and self.wall_time_used > budget.max_wall_time_seconds:
199
+ return (
200
+ True,
201
+ f"Wall-time budget exceeded: used {self.wall_time_used:.2f}s > max {budget.max_wall_time_seconds:.2f}s",
202
+ )
203
+
204
+ if budget.max_sub_queries is not None and self.sub_queries_used > budget.max_sub_queries:
205
+ return True, f"Sub-query budget exceeded: used {self.sub_queries_used} > max {budget.max_sub_queries}"
206
+
207
+ return False, None
208
+
209
+
210
+ # -----------------------------------------------------------------------------
211
+ # Convenience types
212
+ # -----------------------------------------------------------------------------
213
+
214
+ Message = dict[str, str]
215
+ SubQueryFn: TypeAlias = Callable[[str, str | None], str | Awaitable[str]]
216
+ SubAlephFn: TypeAlias = Callable[[str, ContextType | None], AlephResponse | Awaitable[AlephResponse]]
@@ -0,0 +1,6 @@
1
+ """Misc utilities."""
2
+
3
+ from .tokens import estimate_tokens
4
+ from .logging import TrajectoryLogger
5
+
6
+ __all__ = ["estimate_tokens", "TrajectoryLogger"]
aleph/utils/logging.py ADDED
@@ -0,0 +1,79 @@
1
+ """Observability utilities.
2
+
3
+ The core Aleph API always returns a full trajectory (unless disabled). This
4
+ module provides small helpers for pretty-printing and exporting it.
5
+
6
+ Optional: install `rich` to get nicer console output.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import logging
13
+ from dataclasses import asdict
14
+ from pathlib import Path
15
+ from typing import Iterable, cast
16
+
17
+ from ..types import TrajectoryStep
18
+
19
+
20
+ class TrajectoryLogger:
21
+ """Logs Aleph trajectory steps to the standard logging system and/or a file."""
22
+
23
+ def __init__(
24
+ self,
25
+ name: str = "aleph",
26
+ level: str | int = "INFO",
27
+ jsonl_path: str | Path | None = None,
28
+ use_rich: bool = True,
29
+ ) -> None:
30
+ self._logger = logging.getLogger(name)
31
+ self._logger.setLevel(level)
32
+ if not self._logger.handlers:
33
+ handler = logging.StreamHandler()
34
+ fmt = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
35
+ handler.setFormatter(fmt)
36
+ self._logger.addHandler(handler)
37
+
38
+ self._jsonl_path = Path(jsonl_path) if jsonl_path else None
39
+ self._use_rich = use_rich
40
+
41
+ self._rich_console = None
42
+ if use_rich:
43
+ try:
44
+ from rich.console import Console
45
+
46
+ self._rich_console = Console()
47
+ except Exception:
48
+ self._rich_console = None
49
+
50
+ def log_step(self, step: TrajectoryStep) -> None:
51
+ msg = self._format_step(step)
52
+ if self._rich_console is not None:
53
+ self._rich_console.print(msg)
54
+ else:
55
+ self._logger.info(msg)
56
+
57
+ if self._jsonl_path is not None:
58
+ self._jsonl_path.parent.mkdir(parents=True, exist_ok=True)
59
+ with self._jsonl_path.open("a", encoding="utf-8") as f:
60
+ f.write(json.dumps(_step_to_json(step), ensure_ascii=False) + "\n")
61
+
62
+ def _format_step(self, step: TrajectoryStep) -> str:
63
+ act = step.action.action_type.value
64
+ return (
65
+ f"[{step.step_number}] depth={step.depth} act={act} "
66
+ f"prompt_toks={step.prompt_tokens} result_toks={step.result_tokens} "
67
+ f"cum_toks={step.cumulative_tokens} cost=${step.cumulative_cost:.4f}"
68
+ )
69
+
70
+
71
+ def _step_to_json(step: TrajectoryStep) -> dict[str, object]:
72
+ d = cast(dict[str, object], asdict(step))
73
+ # datetime isn't JSON serializable by default
74
+ d["timestamp"] = step.timestamp.isoformat()
75
+ return d
76
+
77
+
78
+ def trajectory_to_json(trajectory: Iterable[TrajectoryStep]) -> list[dict[str, object]]:
79
+ return [_step_to_json(s) for s in trajectory]
aleph/utils/tokens.py ADDED
@@ -0,0 +1,43 @@
1
+ """Token counting utilities.
2
+
3
+ Aleph aims to work with minimal dependencies, so by default it uses a rough
4
+ character-based estimate: ~4 chars per token.
5
+
6
+ If optional libraries are installed, providers may use more accurate counters.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Optional
12
+
13
+
14
+ def estimate_tokens(text: str) -> int:
15
+ """Rough token estimate (works reasonably well for English text)."""
16
+
17
+ if not text:
18
+ return 0
19
+ # heuristic: 1 token ~ 4 characters
20
+ return max(1, len(text) // 4)
21
+
22
+
23
+ def try_count_tokens_tiktoken(text: str, model: str) -> Optional[int]:
24
+ """Best-effort token counting using tiktoken (if installed)."""
25
+
26
+ try:
27
+ import tiktoken
28
+ except Exception:
29
+ return None
30
+
31
+ try:
32
+ enc = tiktoken.encoding_for_model(model)
33
+ except Exception:
34
+ # Fallback to a common encoding used by OpenAI chat models.
35
+ try:
36
+ enc = tiktoken.get_encoding("cl100k_base")
37
+ except Exception:
38
+ return None
39
+
40
+ try:
41
+ return len(enc.encode(text))
42
+ except Exception:
43
+ return None
@@ -0,0 +1,358 @@
1
+ Metadata-Version: 2.4
2
+ Name: aleph-rlm
3
+ Version: 0.6.0
4
+ Summary: MCP server for recursive LLM reasoning—load context, iterate with search/code/think tools, converge on answers
5
+ Project-URL: Homepage, https://github.com/Hmbown/aleph
6
+ Author: Aleph Contributors
7
+ License: MIT License
8
+
9
+ Copyright (c) 2025 Aleph Contributors
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+ License-File: LICENSE
29
+ Requires-Python: >=3.10
30
+ Requires-Dist: httpx>=0.27.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: mypy>=1.8.0; extra == 'dev'
33
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
34
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
35
+ Provides-Extra: mcp
36
+ Requires-Dist: mcp>=1.0.0; extra == 'mcp'
37
+ Provides-Extra: openai-tokens
38
+ Requires-Dist: tiktoken>=0.7.0; extra == 'openai-tokens'
39
+ Provides-Extra: rich
40
+ Requires-Dist: rich>=13.0.0; extra == 'rich'
41
+ Provides-Extra: yaml
42
+ Requires-Dist: pyyaml>=6.0; extra == 'yaml'
43
+ Description-Content-Type: text/markdown
44
+
45
+ # Aleph
46
+
47
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
48
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
49
+ [![PyPI version](https://img.shields.io/pypi/v/aleph-rlm.svg)](https://pypi.org/project/aleph-rlm/)
50
+
51
+ **Your RAM is the new context window.**
52
+
53
+ Aleph is an [MCP server](https://modelcontextprotocol.io/) that gives any LLM access to gigabytes of local data without consuming context. Load massive files into a Python process—the model explores them via search, slicing, and sandboxed code execution. Only results enter the context window, never the raw content.
54
+
55
+ Based on the [Recursive Language Model](https://arxiv.org/abs/2512.24601) (RLM) architecture.
56
+
57
+ ## Use Cases
58
+
59
+ | Scenario | What Aleph Does |
60
+ |----------|-----------------|
61
+ | **Large log analysis** | Load 500MB of logs, search for patterns, correlate across time ranges |
62
+ | **Codebase navigation** | Load entire repos, find definitions, trace call chains, extract architecture |
63
+ | **Data exploration** | JSON exports, CSV files, API responses—explore interactively with Python |
64
+ | **Mixed document ingestion** | Load PDFs, Word docs, HTML, and logs like plain text |
65
+ | **Semantic search** | Find relevant sections by meaning, then zoom in with peek |
66
+ | **Research sessions** | Save/resume sessions, track evidence with citations, spawn sub-queries |
67
+
68
+ ## Requirements
69
+
70
+ - Python 3.10+
71
+ - An MCP-compatible client: [Claude Code](https://claude.ai/code), [Cursor](https://cursor.sh), [VS Code](https://code.visualstudio.com/), [Windsurf](https://codeium.com/windsurf), [Codex CLI](https://github.com/openai/codex), or [Claude Desktop](https://claude.ai/download)
72
+
73
+ ## Quickstart
74
+
75
+ ### 1. Install
76
+
77
+ ```bash
78
+ pip install "aleph-rlm[mcp]"
79
+ ```
80
+
81
+ ### 2. Configure your MCP client
82
+
83
+ **Automatic** (recommended):
84
+ ```bash
85
+ aleph-rlm install
86
+ ```
87
+
88
+ This auto-detects your installed clients and configures them.
89
+
90
+ **Manual** (any MCP client):
91
+ ```json
92
+ {
93
+ "mcpServers": {
94
+ "aleph": {
95
+ "command": "aleph",
96
+ "args": ["--enable-actions", "--workspace-mode", "any"]
97
+ }
98
+ }
99
+ }
100
+ ```
101
+
102
+ <details>
103
+ <summary><strong>Config file locations</strong></summary>
104
+
105
+ | Client | macOS/Linux | Windows |
106
+ |--------|-------------|---------|
107
+ | Claude Code | `~/.claude/settings.json` | `%USERPROFILE%\.claude\settings.json` |
108
+ | Claude Desktop | `~/Library/Application Support/Claude/claude_desktop_config.json` | `%APPDATA%\Claude\claude_desktop_config.json` |
109
+ | Cursor | `~/.cursor/mcp.json` | `%USERPROFILE%\.cursor\mcp.json` |
110
+ | VS Code | `~/.vscode/mcp.json` | `%USERPROFILE%\.vscode\mcp.json` |
111
+ | Codex CLI | `~/.codex/config.toml` | `%USERPROFILE%\.codex\config.toml` |
112
+
113
+ </details>
114
+
115
+ See [MCP_SETUP.md](MCP_SETUP.md) for detailed instructions.
116
+
117
+ ### 3. Verify
118
+
119
+ In your assistant, run:
120
+ ```
121
+ get_status()
122
+ ```
123
+
124
+ If using Claude Code, tools are prefixed: `mcp__aleph__get_status`.
125
+
126
+ ## AI Assistant Setup (MCP + `/aleph` Skill) — Copy/Paste
127
+
128
+ Paste this into any AI coding assistant or agentic coder to add Aleph (MCP server + `/aleph` skill):
129
+
130
+ ```
131
+ You are an AI coding assistant. Please set up Aleph (Model Context Protocol / MCP).
132
+
133
+ 1) Add the Aleph MCP server config:
134
+ {
135
+ "mcpServers": {
136
+ "aleph": {
137
+ "command": "aleph",
138
+ "args": ["--enable-actions", "--workspace-mode", "any"]
139
+ }
140
+ }
141
+ }
142
+
143
+ 2) Install the /aleph skill prompt:
144
+ - Claude Code: copy docs/prompts/aleph.md -> ~/.claude/commands/aleph.md
145
+ - Codex CLI: copy docs/prompts/aleph.md -> ~/.codex/skills/aleph/SKILL.md
146
+ - Gemini CLI: copy docs/prompts/aleph.md -> ~/.gemini/skills/aleph/SKILL.md
147
+ Ensure ~/.gemini/settings.json has "experimental": { "skills": true } and restart.
148
+ If this client uses a different skill/command folder, ask me where to place it.
149
+
150
+ 3) Verify: run get_status() or list_contexts().
151
+ If tools are namespaced, use mcp__aleph__get_status or mcp__aleph__list_contexts.
152
+
153
+ 4) (Optional) Enable sub_query (recursive sub-agent):
154
+ - CLI backend (no API key): set ALEPH_SUB_QUERY_BACKEND=claude|codex|gemini
155
+ - API backend: set ALEPH_SUB_QUERY_API_KEY + ALEPH_SUB_QUERY_MODEL (+ optional ALEPH_SUB_QUERY_URL)
156
+ If env vars can't be set in the MCP config, add them to your shell profile and restart.
157
+
158
+ 5) Use the skill: /aleph (Claude Code) or $aleph (Codex CLI).
159
+ Gemini CLI: /skills list (use /skills enable aleph if disabled).
160
+ ```
161
+
162
+ ## The `/aleph` Skill
163
+
164
+ The `/aleph` skill is a prompt that teaches your LLM how to use Aleph effectively. It provides workflow patterns, tool guidance, and troubleshooting tips.
165
+
166
+ **Note:** Aleph works best when paired with the skill prompt + MCP server together.
167
+
168
+ ### What it does
169
+
170
+ - Loads files into searchable in-memory contexts
171
+ - Tracks evidence with citations as you reason
172
+ - Supports semantic search and fast rg-based codebase search
173
+ - Enables recursive sub-queries for deep analysis
174
+ - Persists sessions for later resumption (memory packs)
175
+
176
+ ### How to invoke
177
+
178
+ | Client | Command |
179
+ |--------|---------|
180
+ | Claude Code | `/aleph` |
181
+ | Codex CLI | `$aleph` |
182
+
183
+ For other clients, copy [`docs/prompts/aleph.md`](docs/prompts/aleph.md) and paste it at session start.
184
+
185
+ ### Installing the skill
186
+
187
+ **Option 1: Direct download** (simplest)
188
+
189
+ Download [`docs/prompts/aleph.md`](docs/prompts/aleph.md) and save it to:
190
+ - **Claude Code:** `~/.claude/commands/aleph.md` (macOS/Linux) or `%USERPROFILE%\.claude\commands\aleph.md` (Windows)
191
+ - **Codex CLI:** `~/.codex/skills/aleph/SKILL.md` (macOS/Linux) or `%USERPROFILE%\.codex\skills\aleph\SKILL.md` (Windows)
192
+
193
+ **Option 2: From installed package**
194
+
195
+ <details>
196
+ <summary>macOS/Linux</summary>
197
+
198
+ ```bash
199
+ # Claude Code
200
+ mkdir -p ~/.claude/commands
201
+ cp "$(python -c "import aleph; print(aleph.__path__[0])")/../docs/prompts/aleph.md" ~/.claude/commands/aleph.md
202
+
203
+ # Codex CLI
204
+ mkdir -p ~/.codex/skills/aleph
205
+ cp "$(python -c "import aleph; print(aleph.__path__[0])")/../docs/prompts/aleph.md" ~/.codex/skills/aleph/SKILL.md
206
+ ```
207
+ </details>
208
+
209
+ <details>
210
+ <summary>Windows (PowerShell)</summary>
211
+
212
+ ```powershell
213
+ # Claude Code
214
+ New-Item -ItemType Directory -Force -Path "$env:USERPROFILE\.claude\commands"
215
+ $alephPath = python -c "import aleph; print(aleph.__path__[0])"
216
+ Copy-Item "$alephPath\..\docs\prompts\aleph.md" "$env:USERPROFILE\.claude\commands\aleph.md"
217
+
218
+ # Codex CLI
219
+ New-Item -ItemType Directory -Force -Path "$env:USERPROFILE\.codex\skills\aleph"
220
+ Copy-Item "$alephPath\..\docs\prompts\aleph.md" "$env:USERPROFILE\.codex\skills\aleph\SKILL.md"
221
+ ```
222
+ </details>
223
+
224
+ ## How It Works
225
+
226
+ ```
227
+ ┌───────────────┐ tool calls ┌────────────────────────┐
228
+ │ LLM client │ ────────────────► │ Aleph (Python, RAM) │
229
+ │ (limited ctx) │ ◄──────────────── │ search/peek/exec │
230
+ └───────────────┘ small results └────────────────────────┘
231
+ ```
232
+
233
+ 1. **Load** — `load_context` (paste text) or `load_file` (from disk)
234
+ 2. **Explore** — `search_context`, `semantic_search`, `peek_context`
235
+ 3. **Compute** — `exec_python` with 100+ built-in helpers
236
+ 4. **Reason** — `think`, `evaluate_progress`, `get_evidence`
237
+ 5. **Persist** — `save_session` to resume later
238
+
239
+ ### Quick Example
240
+
241
+ ```python
242
+ # Load log data
243
+ load_context(content=logs, context_id="logs")
244
+ # → "Context loaded 'logs': 445 chars, 7 lines, ~111 tokens"
245
+
246
+ # Search for errors
247
+ search_context(pattern="ERROR", context_id="logs")
248
+ # → Found 2 match(es):
249
+ # Line 1: 2026-01-15 10:23:45 ERROR [auth] Failed login...
250
+ # Line 4: 2026-01-15 10:24:15 ERROR [db] Connection timeout...
251
+
252
+ # Extract structured data
253
+ exec_python(code="emails = extract_emails(); print(emails)", context_id="logs")
254
+ # → [{'value': 'user@example.com', 'line_num': 0, 'start': 50, 'end': 66}, ...]
255
+ ```
256
+
257
+ ### Advanced Workflows
258
+
259
+ **Multi-Context Workflow (code + docs + diffs)**
260
+
261
+ Load multiple sources, then compare or reconcile them:
262
+
263
+ ```python
264
+ # Load a design doc and a repo snapshot (or any two sources)
265
+ load_context(content=design_doc_text, context_id="spec")
266
+ rg_search(pattern="AuthService|JWT|token", paths=["."], load_context_id="repo_hits", confirm=true)
267
+
268
+ # Compare or reconcile
269
+ diff_contexts(a="spec", b="repo_hits")
270
+ search_context(pattern="missing|TODO|mismatch", context_id="repo_hits")
271
+ ```
272
+
273
+ **Advanced Querying with `exec_python`**
274
+
275
+ Treat `exec_python` as a reasoning tool, not just code execution:
276
+
277
+ ```python
278
+ # Example: extract class names or key sections programmatically
279
+ exec_python(code="print(extract_classes())", context_id="repo_hits")
280
+ ```
281
+
282
+ ## Tools
283
+
284
+ **Core** (always available):
285
+ - `load_context`, `list_contexts`, `diff_contexts` — manage in-memory data
286
+ - `search_context`, `semantic_search`, `peek_context`, `chunk_context` — explore data; use `semantic_search` for concepts/fuzzy queries, `search_context` for precise regex
287
+ - `exec_python`, `get_variable` — compute in sandbox (100+ built-in helpers)
288
+ - `think`, `evaluate_progress`, `summarize_so_far`, `get_evidence`, `finalize` — structured reasoning
289
+ - `tasks` — lightweight task tracking per context
290
+ - `get_status` — session state
291
+ - `sub_query` — spawn recursive sub-agents (CLI or API backend)
292
+
293
+ <details>
294
+ <summary><strong>exec_python helpers</strong></summary>
295
+
296
+ The sandbox includes 100+ helpers that operate on the loaded context:
297
+
298
+ | Category | Examples |
299
+ |----------|----------|
300
+ | **Extractors** (25) | `extract_emails()`, `extract_urls()`, `extract_dates()`, `extract_ips()`, `extract_functions()` |
301
+ | **Statistics** (8) | `word_count()`, `line_count()`, `word_frequency()`, `ngrams()` |
302
+ | **Line operations** (12) | `head()`, `tail()`, `grep()`, `sort_lines()`, `columns()` |
303
+ | **Text manipulation** (15) | `replace_all()`, `between()`, `truncate()`, `slugify()` |
304
+ | **Validation** (7) | `is_email()`, `is_url()`, `is_json()`, `is_numeric()` |
305
+ | **Core** | `peek()`, `lines()`, `search()`, `chunk()`, `cite()` |
306
+
307
+ Extractors return `list[dict]` with keys: `value`, `line_num`, `start`, `end`.
308
+
309
+ </details>
310
+
311
+ **Action tools** (requires `--enable-actions`):
312
+ - `load_file`, `read_file`, `write_file` — filesystem (PDFs, Word, HTML, .gz supported)
313
+ - `run_command`, `run_tests`, `rg_search` — shell + fast repo search
314
+ - `save_session`, `load_session` — persist state (memory packs)
315
+ - `add_remote_server`, `list_remote_tools`, `call_remote_tool` — MCP orchestration
316
+
317
+ ## Configuration
318
+
319
+ **Workspace controls:**
320
+ - `--workspace-root <path>` — root for relative paths (default: git root from invocation cwd)
321
+ - `--workspace-mode <fixed|git|any>` — path restrictions
322
+ - `--require-confirmation` — require `confirm=true` on action calls
323
+ - `ALEPH_WORKSPACE_ROOT` — override workspace root via environment
324
+
325
+ **Limits:**
326
+ - `--max-file-size` — max file read (default: 1GB)
327
+ - `--max-write-bytes` — max file write (default: 100MB)
328
+ - `--timeout` — sandbox/command timeout (default: 60s)
329
+ - `--max-output` — max command output (default: 50,000 chars)
330
+
331
+ See [docs/CONFIGURATION.md](docs/CONFIGURATION.md) for all options.
332
+
333
+ ## Documentation
334
+
335
+ - [MCP_SETUP.md](MCP_SETUP.md) — client configuration
336
+ - [docs/CONFIGURATION.md](docs/CONFIGURATION.md) — CLI flags and environment variables
337
+ - [docs/prompts/aleph.md](docs/prompts/aleph.md) — skill prompt and tool reference
338
+ - [CHANGELOG.md](CHANGELOG.md) — release history
339
+ - [DEVELOPMENT.md](DEVELOPMENT.md) — contributing guide
340
+
341
+ ## Development
342
+
343
+ ```bash
344
+ git clone https://github.com/Hmbown/aleph.git
345
+ cd aleph
346
+ pip install -e ".[dev,mcp]"
347
+ pytest
348
+ ```
349
+
350
+ ## References
351
+
352
+ > **Recursive Language Models**
353
+ > Zhang, A. L., Kraska, T., & Khattab, O. (2025)
354
+ > [arXiv:2512.24601](https://arxiv.org/abs/2512.24601)
355
+
356
+ ## License
357
+
358
+ MIT