PyPI - aleph-rlm - Versions diffs - 0.6.0__py3-none-any.whl - Mend

aleph-rlm 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

aleph/__init__.py +49 -0
aleph/cache/__init__.py +6 -0
aleph/cache/base.py +20 -0
aleph/cache/memory.py +27 -0
aleph/cli.py +1044 -0
aleph/config.py +154 -0
aleph/core.py +874 -0
aleph/mcp/__init__.py +30 -0
aleph/mcp/local_server.py +3527 -0
aleph/mcp/server.py +20 -0
aleph/prompts/__init__.py +5 -0
aleph/prompts/system.py +45 -0
aleph/providers/__init__.py +14 -0
aleph/providers/anthropic.py +253 -0
aleph/providers/base.py +59 -0
aleph/providers/openai.py +224 -0
aleph/providers/registry.py +22 -0
aleph/repl/__init__.py +5 -0
aleph/repl/helpers.py +1068 -0
aleph/repl/sandbox.py +777 -0
aleph/sub_query/__init__.py +166 -0
aleph/sub_query/api_backend.py +166 -0
aleph/sub_query/cli_backend.py +327 -0
aleph/types.py +216 -0
aleph/utils/__init__.py +6 -0
aleph/utils/logging.py +79 -0
aleph/utils/tokens.py +43 -0
aleph_rlm-0.6.0.dist-info/METADATA +358 -0
aleph_rlm-0.6.0.dist-info/RECORD +32 -0
aleph_rlm-0.6.0.dist-info/WHEEL +4 -0
aleph_rlm-0.6.0.dist-info/entry_points.txt +3 -0
aleph_rlm-0.6.0.dist-info/licenses/LICENSE +21 -0

aleph/types.py ADDED Viewed

@@ -0,0 +1,216 @@
+"""Shared type definitions for Aleph.
+The library is intentionally type-rich so it works well with pyright/mypy.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Awaitable, Callable, Literal, TypeAlias
+# -----------------------------------------------------------------------------
+# Context Types
+# -----------------------------------------------------------------------------
+class ContentFormat(Enum):
+    """Detected or specified format of context data."""
+    TEXT = "text"
+    JSON = "json"
+    JSONL = "jsonl"
+    CSV = "csv"
+    CODE = "code"
+    BINARY = "binary"
+    MIXED = "mixed"
+@dataclass(slots=True)
+class ContextMetadata:
+    """Metadata about the loaded context (shown to the root LLM)."""
+    format: ContentFormat
+    size_bytes: int
+    size_chars: int
+    size_lines: int
+    size_tokens_estimate: int
+    structure_hint: str | None
+    sample_preview: str
+@dataclass(slots=True)
+class ContextCollection:
+    """A multi-document context (e.g., a corpus of files)."""
+    items: list[tuple[str, ContextType]]
+    total_size_bytes: int = 0
+    total_size_tokens_estimate: int = 0
+# A single context payload can be text, bytes, JSON-like, or a collection.
+JsonScalar: TypeAlias = str | int | float | bool | None
+JSONValue: TypeAlias = JsonScalar | list["JSONValue"] | dict[str, "JSONValue"]
+ContextType: TypeAlias = str | bytes | JSONValue | ContextCollection
+# -----------------------------------------------------------------------------
+# Execution Types
+# -----------------------------------------------------------------------------
+@dataclass(slots=True)
+class ExecutionResult:
+    """Result of executing code in the sandbox REPL."""
+    stdout: str
+    stderr: str
+    return_value: object | None
+    variables_updated: list[str]
+    truncated: bool
+    execution_time_ms: float
+    error: str | None
+@dataclass(slots=True)
+class SubQueryResult:
+    """Result of a recursive LLM call (sub_query or sub_aleph)."""
+    answer: str
+    tokens_input: int
+    tokens_output: int
+    cost_usd: float
+    model_used: str
+    depth: int
+# -----------------------------------------------------------------------------
+# Action Types (parsed from LLM output)
+# -----------------------------------------------------------------------------
+class ActionType(Enum):
+    CODE_BLOCK = "code"  # execute python
+    TOOL_CALL = "tool"  # not used by v1 core, but reserved
+    FINAL_ANSWER = "final"
+    FINAL_VAR = "final_var"
+    CONTINUE = "continue"
+@dataclass(slots=True)
+class ParsedAction:
+    """Parsed instruction from the LLM response."""
+    action_type: ActionType
+    content: str
+    raw_response: str
+# -----------------------------------------------------------------------------
+# Trajectory / Observability Types
+# -----------------------------------------------------------------------------
+@dataclass(slots=True)
+class TrajectoryStep:
+    """Single step in the Aleph execution trace."""
+    step_number: int
+    depth: int
+    timestamp: datetime
+    prompt_tokens: int
+    prompt_summary: str
+    action: ParsedAction
+    result: ExecutionResult | SubQueryResult | str
+    result_tokens: int
+    cumulative_tokens: int
+    cumulative_cost: float
+@dataclass(slots=True)
+class AlephResponse:
+    """Final response from an Aleph call."""
+    answer: str
+    success: bool
+    total_iterations: int
+    max_depth_reached: int
+    total_tokens: int
+    total_cost_usd: float
+    wall_time_seconds: float
+    trajectory: list[TrajectoryStep]
+    error: str | None = None
+    error_type: (
+        Literal[
+            "budget_exceeded",
+            "max_iterations",
+            "execution_error",
+            "provider_error",
+            "no_final",
+        ]
+        | None
+    ) = None
+# -----------------------------------------------------------------------------
+# Budget Types
+# -----------------------------------------------------------------------------
+@dataclass(slots=True)
+class Budget:
+    """Resource limits for an Aleph call."""
+    max_tokens: int | None = None
+    max_iterations: int | None = 100
+    max_depth: int | None = 2
+    max_wall_time_seconds: float | None = 300.0
+    max_sub_queries: int | None = 100
+@dataclass(slots=True)
+class BudgetStatus:
+    """Current budget consumption."""
+    tokens_used: int = 0
+    cost_used: float = 0.0
+    iterations_used: int = 0
+    depth_current: int = 0
+    wall_time_used: float = 0.0
+    sub_queries_used: int = 0
+    def exceeds(self, budget: Budget) -> tuple[bool, str | None]:
+        """Return (exceeded, reason)."""
+        if budget.max_tokens is not None and self.tokens_used > budget.max_tokens:
+            return True, f"Token budget exceeded: used {self.tokens_used} > max {budget.max_tokens}"
+        if budget.max_iterations is not None and self.iterations_used > budget.max_iterations:
+            return True, f"Iteration budget exceeded: used {self.iterations_used} > max {budget.max_iterations}"
+        if budget.max_depth is not None and self.depth_current > budget.max_depth:
+            return True, f"Depth budget exceeded: current {self.depth_current} > max {budget.max_depth}"
+        if budget.max_wall_time_seconds is not None and self.wall_time_used > budget.max_wall_time_seconds:
+            return (
+                True,
+                f"Wall-time budget exceeded: used {self.wall_time_used:.2f}s > max {budget.max_wall_time_seconds:.2f}s",
+            )
+        if budget.max_sub_queries is not None and self.sub_queries_used > budget.max_sub_queries:
+            return True, f"Sub-query budget exceeded: used {self.sub_queries_used} > max {budget.max_sub_queries}"
+        return False, None
+# -----------------------------------------------------------------------------
+# Convenience types
+# -----------------------------------------------------------------------------
+Message = dict[str, str]
+SubQueryFn: TypeAlias = Callable[[str, str | None], str | Awaitable[str]]
+SubAlephFn: TypeAlias = Callable[[str, ContextType | None], AlephResponse | Awaitable[AlephResponse]]

aleph/utils/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Misc utilities."""
+from .tokens import estimate_tokens
+from .logging import TrajectoryLogger
+__all__ = ["estimate_tokens", "TrajectoryLogger"]

aleph/utils/logging.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""Observability utilities.
+The core Aleph API always returns a full trajectory (unless disabled). This
+module provides small helpers for pretty-printing and exporting it.
+Optional: install `rich` to get nicer console output.
+"""
+from __future__ import annotations
+import json
+import logging
+from dataclasses import asdict
+from pathlib import Path
+from typing import Iterable, cast
+from ..types import TrajectoryStep
+class TrajectoryLogger:
+    """Logs Aleph trajectory steps to the standard logging system and/or a file."""
+    def __init__(
+        self,
+        name: str = "aleph",
+        level: str | int = "INFO",
+        jsonl_path: str | Path | None = None,
+        use_rich: bool = True,
+    ) -> None:
+        self._logger = logging.getLogger(name)
+        self._logger.setLevel(level)
+        if not self._logger.handlers:
+            handler = logging.StreamHandler()
+            fmt = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
+            handler.setFormatter(fmt)
+            self._logger.addHandler(handler)
+        self._jsonl_path = Path(jsonl_path) if jsonl_path else None
+        self._use_rich = use_rich
+        self._rich_console = None
+        if use_rich:
+            try:
+                from rich.console import Console
+                self._rich_console = Console()
+            except Exception:
+                self._rich_console = None
+    def log_step(self, step: TrajectoryStep) -> None:
+        msg = self._format_step(step)
+        if self._rich_console is not None:
+            self._rich_console.print(msg)
+        else:
+            self._logger.info(msg)
+        if self._jsonl_path is not None:
+            self._jsonl_path.parent.mkdir(parents=True, exist_ok=True)
+            with self._jsonl_path.open("a", encoding="utf-8") as f:
+                f.write(json.dumps(_step_to_json(step), ensure_ascii=False) + "\n")
+    def _format_step(self, step: TrajectoryStep) -> str:
+        act = step.action.action_type.value
+        return (
+            f"[{step.step_number}] depth={step.depth} act={act} "
+            f"prompt_toks={step.prompt_tokens} result_toks={step.result_tokens} "
+            f"cum_toks={step.cumulative_tokens} cost=${step.cumulative_cost:.4f}"
+        )
+def _step_to_json(step: TrajectoryStep) -> dict[str, object]:
+    d = cast(dict[str, object], asdict(step))
+    # datetime isn't JSON serializable by default
+    d["timestamp"] = step.timestamp.isoformat()
+    return d
+def trajectory_to_json(trajectory: Iterable[TrajectoryStep]) -> list[dict[str, object]]:
+    return [_step_to_json(s) for s in trajectory]

aleph/utils/tokens.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Token counting utilities.
+Aleph aims to work with minimal dependencies, so by default it uses a rough
+character-based estimate: ~4 chars per token.
+If optional libraries are installed, providers may use more accurate counters.
+"""
+from __future__ import annotations
+from typing import Optional
+def estimate_tokens(text: str) -> int:
+    """Rough token estimate (works reasonably well for English text)."""
+    if not text:
+        return 0
+    # heuristic: 1 token ~ 4 characters
+    return max(1, len(text) // 4)
+def try_count_tokens_tiktoken(text: str, model: str) -> Optional[int]:
+    """Best-effort token counting using tiktoken (if installed)."""
+    try:
+        import tiktoken
+    except Exception:
+        return None
+    try:
+        enc = tiktoken.encoding_for_model(model)
+    except Exception:
+        # Fallback to a common encoding used by OpenAI chat models.
+        try:
+            enc = tiktoken.get_encoding("cl100k_base")
+        except Exception:
+            return None
+    try:
+        return len(enc.encode(text))
+    except Exception:
+        return None

aleph_rlm-0.6.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,358 @@
+Metadata-Version: 2.4
+Name: aleph-rlm
+Version: 0.6.0
+Summary: MCP server for recursive LLM reasoning—load context, iterate with search/code/think tools, converge on answers
+Project-URL: Homepage, https://github.com/Hmbown/aleph
+Author: Aleph Contributors
+License: MIT License
+        Copyright (c) 2025 Aleph Contributors
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Requires-Python: >=3.10
+Requires-Dist: httpx>=0.27.0
+Provides-Extra: dev
+Requires-Dist: mypy>=1.8.0; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
+Requires-Dist: pytest>=8.0.0; extra == 'dev'
+Provides-Extra: mcp
+Requires-Dist: mcp>=1.0.0; extra == 'mcp'
+Provides-Extra: openai-tokens
+Requires-Dist: tiktoken>=0.7.0; extra == 'openai-tokens'
+Provides-Extra: rich
+Requires-Dist: rich>=13.0.0; extra == 'rich'
+Provides-Extra: yaml
+Requires-Dist: pyyaml>=6.0; extra == 'yaml'
+Description-Content-Type: text/markdown
+# Aleph
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+[![PyPI version](https://img.shields.io/pypi/v/aleph-rlm.svg)](https://pypi.org/project/aleph-rlm/)
+**Your RAM is the new context window.**
+Aleph is an [MCP server](https://modelcontextprotocol.io/) that gives any LLM access to gigabytes of local data without consuming context. Load massive files into a Python process—the model explores them via search, slicing, and sandboxed code execution. Only results enter the context window, never the raw content.
+Based on the [Recursive Language Model](https://arxiv.org/abs/2512.24601) (RLM) architecture.
+## Use Cases
+| Scenario | What Aleph Does |
+|----------|-----------------|
+| **Large log analysis** | Load 500MB of logs, search for patterns, correlate across time ranges |
+| **Codebase navigation** | Load entire repos, find definitions, trace call chains, extract architecture |
+| **Data exploration** | JSON exports, CSV files, API responses—explore interactively with Python |
+| **Mixed document ingestion** | Load PDFs, Word docs, HTML, and logs like plain text |
+| **Semantic search** | Find relevant sections by meaning, then zoom in with peek |
+| **Research sessions** | Save/resume sessions, track evidence with citations, spawn sub-queries |
+## Requirements
+- Python 3.10+
+- An MCP-compatible client: [Claude Code](https://claude.ai/code), [Cursor](https://cursor.sh), [VS Code](https://code.visualstudio.com/), [Windsurf](https://codeium.com/windsurf), [Codex CLI](https://github.com/openai/codex), or [Claude Desktop](https://claude.ai/download)
+## Quickstart
+### 1. Install
+```bash
+pip install "aleph-rlm[mcp]"
+```
+### 2. Configure your MCP client
+**Automatic** (recommended):
+```bash
+aleph-rlm install
+```
+This auto-detects your installed clients and configures them.
+**Manual** (any MCP client):
+```json
+{
+  "mcpServers": {
+    "aleph": {
+      "command": "aleph",
+      "args": ["--enable-actions", "--workspace-mode", "any"]
+    }
+  }
+}
+```
+<details>
+<summary><strong>Config file locations</strong></summary>
+| Client | macOS/Linux | Windows |
+|--------|-------------|---------|
+| Claude Code | `~/.claude/settings.json` | `%USERPROFILE%\.claude\settings.json` |
+| Claude Desktop | `~/Library/Application Support/Claude/claude_desktop_config.json` | `%APPDATA%\Claude\claude_desktop_config.json` |
+| Cursor | `~/.cursor/mcp.json` | `%USERPROFILE%\.cursor\mcp.json` |
+| VS Code | `~/.vscode/mcp.json` | `%USERPROFILE%\.vscode\mcp.json` |
+| Codex CLI | `~/.codex/config.toml` | `%USERPROFILE%\.codex\config.toml` |
+</details>
+See [MCP_SETUP.md](MCP_SETUP.md) for detailed instructions.
+### 3. Verify
+In your assistant, run:
+```
+get_status()
+```
+If using Claude Code, tools are prefixed: `mcp__aleph__get_status`.
+## AI Assistant Setup (MCP + `/aleph` Skill) — Copy/Paste
+Paste this into any AI coding assistant or agentic coder to add Aleph (MCP server + `/aleph` skill):
+```
+You are an AI coding assistant. Please set up Aleph (Model Context Protocol / MCP).
+1) Add the Aleph MCP server config:
+{
+  "mcpServers": {
+    "aleph": {
+      "command": "aleph",
+      "args": ["--enable-actions", "--workspace-mode", "any"]
+    }
+  }
+}
+2) Install the /aleph skill prompt:
+- Claude Code: copy docs/prompts/aleph.md -> ~/.claude/commands/aleph.md
+- Codex CLI: copy docs/prompts/aleph.md -> ~/.codex/skills/aleph/SKILL.md
+- Gemini CLI: copy docs/prompts/aleph.md -> ~/.gemini/skills/aleph/SKILL.md
+  Ensure ~/.gemini/settings.json has "experimental": { "skills": true } and restart.
+If this client uses a different skill/command folder, ask me where to place it.
+3) Verify: run get_status() or list_contexts().
+If tools are namespaced, use mcp__aleph__get_status or mcp__aleph__list_contexts.
+4) (Optional) Enable sub_query (recursive sub-agent):
+- CLI backend (no API key): set ALEPH_SUB_QUERY_BACKEND=claude|codex|gemini
+- API backend: set ALEPH_SUB_QUERY_API_KEY + ALEPH_SUB_QUERY_MODEL (+ optional ALEPH_SUB_QUERY_URL)
+If env vars can't be set in the MCP config, add them to your shell profile and restart.
+5) Use the skill: /aleph (Claude Code) or $aleph (Codex CLI).
+Gemini CLI: /skills list (use /skills enable aleph if disabled).
+```
+## The `/aleph` Skill
+The `/aleph` skill is a prompt that teaches your LLM how to use Aleph effectively. It provides workflow patterns, tool guidance, and troubleshooting tips.
+**Note:** Aleph works best when paired with the skill prompt + MCP server together.
+### What it does
+- Loads files into searchable in-memory contexts
+- Tracks evidence with citations as you reason
+- Supports semantic search and fast rg-based codebase search
+- Enables recursive sub-queries for deep analysis
+- Persists sessions for later resumption (memory packs)
+### How to invoke
+| Client | Command |
+|--------|---------|
+| Claude Code | `/aleph` |
+| Codex CLI | `$aleph` |
+For other clients, copy [`docs/prompts/aleph.md`](docs/prompts/aleph.md) and paste it at session start.
+### Installing the skill
+**Option 1: Direct download** (simplest)
+Download [`docs/prompts/aleph.md`](docs/prompts/aleph.md) and save it to:
+- **Claude Code:** `~/.claude/commands/aleph.md` (macOS/Linux) or `%USERPROFILE%\.claude\commands\aleph.md` (Windows)
+- **Codex CLI:** `~/.codex/skills/aleph/SKILL.md` (macOS/Linux) or `%USERPROFILE%\.codex\skills\aleph\SKILL.md` (Windows)
+**Option 2: From installed package**
+<details>
+<summary>macOS/Linux</summary>
+```bash
+# Claude Code
+mkdir -p ~/.claude/commands
+cp "$(python -c "import aleph; print(aleph.__path__[0])")/../docs/prompts/aleph.md" ~/.claude/commands/aleph.md
+# Codex CLI
+mkdir -p ~/.codex/skills/aleph
+cp "$(python -c "import aleph; print(aleph.__path__[0])")/../docs/prompts/aleph.md" ~/.codex/skills/aleph/SKILL.md
+```
+</details>
+<details>
+<summary>Windows (PowerShell)</summary>
+```powershell
+# Claude Code
+New-Item -ItemType Directory -Force -Path "$env:USERPROFILE\.claude\commands"
+$alephPath = python -c "import aleph; print(aleph.__path__[0])"
+Copy-Item "$alephPath\..\docs\prompts\aleph.md" "$env:USERPROFILE\.claude\commands\aleph.md"
+# Codex CLI
+New-Item -ItemType Directory -Force -Path "$env:USERPROFILE\.codex\skills\aleph"
+Copy-Item "$alephPath\..\docs\prompts\aleph.md" "$env:USERPROFILE\.codex\skills\aleph\SKILL.md"
+```
+</details>
+## How It Works
+```
+┌───────────────┐    tool calls     ┌────────────────────────┐
+│   LLM client  │ ────────────────► │  Aleph (Python, RAM)   │
+│ (limited ctx) │ ◄──────────────── │  search/peek/exec      │
+└───────────────┘    small results  └────────────────────────┘
+```
+1. **Load** — `load_context` (paste text) or `load_file` (from disk)
+2. **Explore** — `search_context`, `semantic_search`, `peek_context`
+3. **Compute** — `exec_python` with 100+ built-in helpers
+4. **Reason** — `think`, `evaluate_progress`, `get_evidence`
+5. **Persist** — `save_session` to resume later
+### Quick Example
+```python
+# Load log data
+load_context(content=logs, context_id="logs")
+# → "Context loaded 'logs': 445 chars, 7 lines, ~111 tokens"
+# Search for errors
+search_context(pattern="ERROR", context_id="logs")
+# → Found 2 match(es):
+#   Line 1: 2026-01-15 10:23:45 ERROR [auth] Failed login...
+#   Line 4: 2026-01-15 10:24:15 ERROR [db] Connection timeout...
+# Extract structured data
+exec_python(code="emails = extract_emails(); print(emails)", context_id="logs")
+# → [{'value': 'user@example.com', 'line_num': 0, 'start': 50, 'end': 66}, ...]
+```
+### Advanced Workflows
+**Multi-Context Workflow (code + docs + diffs)**
+Load multiple sources, then compare or reconcile them:
+```python
+# Load a design doc and a repo snapshot (or any two sources)
+load_context(content=design_doc_text, context_id="spec")
+rg_search(pattern="AuthService|JWT|token", paths=["."], load_context_id="repo_hits", confirm=true)
+# Compare or reconcile
+diff_contexts(a="spec", b="repo_hits")
+search_context(pattern="missing|TODO|mismatch", context_id="repo_hits")
+```
+**Advanced Querying with `exec_python`**
+Treat `exec_python` as a reasoning tool, not just code execution:
+```python
+# Example: extract class names or key sections programmatically
+exec_python(code="print(extract_classes())", context_id="repo_hits")
+```
+## Tools
+**Core** (always available):
+- `load_context`, `list_contexts`, `diff_contexts` — manage in-memory data
+- `search_context`, `semantic_search`, `peek_context`, `chunk_context` — explore data; use `semantic_search` for concepts/fuzzy queries, `search_context` for precise regex
+- `exec_python`, `get_variable` — compute in sandbox (100+ built-in helpers)
+- `think`, `evaluate_progress`, `summarize_so_far`, `get_evidence`, `finalize` — structured reasoning
+- `tasks` — lightweight task tracking per context
+- `get_status` — session state
+- `sub_query` — spawn recursive sub-agents (CLI or API backend)
+<details>
+<summary><strong>exec_python helpers</strong></summary>
+The sandbox includes 100+ helpers that operate on the loaded context:
+| Category | Examples |
+|----------|----------|
+| **Extractors** (25) | `extract_emails()`, `extract_urls()`, `extract_dates()`, `extract_ips()`, `extract_functions()` |
+| **Statistics** (8) | `word_count()`, `line_count()`, `word_frequency()`, `ngrams()` |
+| **Line operations** (12) | `head()`, `tail()`, `grep()`, `sort_lines()`, `columns()` |
+| **Text manipulation** (15) | `replace_all()`, `between()`, `truncate()`, `slugify()` |
+| **Validation** (7) | `is_email()`, `is_url()`, `is_json()`, `is_numeric()` |
+| **Core** | `peek()`, `lines()`, `search()`, `chunk()`, `cite()` |
+Extractors return `list[dict]` with keys: `value`, `line_num`, `start`, `end`.
+</details>
+**Action tools** (requires `--enable-actions`):
+- `load_file`, `read_file`, `write_file` — filesystem (PDFs, Word, HTML, .gz supported)
+- `run_command`, `run_tests`, `rg_search` — shell + fast repo search
+- `save_session`, `load_session` — persist state (memory packs)
+- `add_remote_server`, `list_remote_tools`, `call_remote_tool` — MCP orchestration
+## Configuration
+**Workspace controls:**
+- `--workspace-root <path>` — root for relative paths (default: git root from invocation cwd)
+- `--workspace-mode <fixed|git|any>` — path restrictions
+- `--require-confirmation` — require `confirm=true` on action calls
+- `ALEPH_WORKSPACE_ROOT` — override workspace root via environment
+**Limits:**
+- `--max-file-size` — max file read (default: 1GB)
+- `--max-write-bytes` — max file write (default: 100MB)
+- `--timeout` — sandbox/command timeout (default: 60s)
+- `--max-output` — max command output (default: 50,000 chars)
+See [docs/CONFIGURATION.md](docs/CONFIGURATION.md) for all options.
+## Documentation
+- [MCP_SETUP.md](MCP_SETUP.md) — client configuration
+- [docs/CONFIGURATION.md](docs/CONFIGURATION.md) — CLI flags and environment variables
+- [docs/prompts/aleph.md](docs/prompts/aleph.md) — skill prompt and tool reference
+- [CHANGELOG.md](CHANGELOG.md) — release history
+- [DEVELOPMENT.md](DEVELOPMENT.md) — contributing guide
+## Development
+```bash
+git clone https://github.com/Hmbown/aleph.git
+cd aleph
+pip install -e ".[dev,mcp]"
+pytest
+```
+## References
+> **Recursive Language Models**
+> Zhang, A. L., Kraska, T., & Khattab, O. (2025)
+> [arXiv:2512.24601](https://arxiv.org/abs/2512.24601)
+## License
+MIT