reasonix 0.4.13 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -762,6 +762,99 @@ interface FilesystemToolsOptions {
762
762
  }
763
763
  declare function registerFilesystemTools(registry: ToolRegistry, opts: FilesystemToolsOptions): ToolRegistry;
764
764
 
765
+ /**
766
+ * Built-in web search + fetch tools.
767
+ *
768
+ * - `web_search(query, topK?)` — Mojeek's public search page. No API
769
+ * key, no signup. We originally shipped this backed by DuckDuckGo's
770
+ * HTML endpoint, but DDG started serving anti-bot interstitials
771
+ * (HTTP 202 with a challenge page) for every unauthenticated POST.
772
+ * Mojeek runs its own independent index, is bot-friendly, and
773
+ * returns parseable HTML.
774
+ * - `web_fetch(url)` — HTTP GET + naïve HTML-to-text extraction.
775
+ *
776
+ * Both are registered by default on `reasonix chat` / `reasonix code`;
777
+ * set `search: false` in config (or `REASONIX_SEARCH=off`) to turn
778
+ * them off. The model decides when to call them based on the query —
779
+ * no slash command required.
780
+ */
781
+
782
+ interface SearchResult {
783
+ title: string;
784
+ url: string;
785
+ snippet: string;
786
+ }
787
+ interface PageContent {
788
+ url: string;
789
+ title?: string;
790
+ text: string;
791
+ /** True when the extracted text was clipped to fit the cap. */
792
+ truncated: boolean;
793
+ }
794
+ interface WebFetchOptions {
795
+ /** Max bytes of extracted text. Defaults to 32_000 to match tool-result cap. */
796
+ maxChars?: number;
797
+ /** Timeout in ms. Defaults to 15_000. */
798
+ timeoutMs?: number;
799
+ signal?: AbortSignal;
800
+ }
801
+ interface WebSearchOptions {
802
+ topK?: number;
803
+ signal?: AbortSignal;
804
+ }
805
+ /**
806
+ * Search the public web via Mojeek. Returns up to `topK` ranked
807
+ * results with title, url, snippet.
808
+ *
809
+ * Mojeek is an independent index (not a Google/Bing front-end) which
810
+ * means coverage on niche or very recent topics can be thinner, but
811
+ * it's reliable from scripts and doesn't gate on cookies or sessions.
812
+ * If the response has 0 results we distinguish "truly empty" from
813
+ * "layout changed or blocked" so the caller isn't left guessing.
814
+ */
815
+ declare function webSearch(query: string, opts?: WebSearchOptions): Promise<SearchResult[]>;
816
+ /**
817
+ * Extract results from a Mojeek search page.
818
+ *
819
+ * Mojeek's stable shape (as of April 2026):
820
+ * <a … class="ob" href="URL"> … breadcrumb … </a>
821
+ * <h2><a class="title" href="URL">Title</a></h2>
822
+ * <p class="s">snippet text …</p>
823
+ *
824
+ * We do two tolerant passes — title anchors, then snippet paragraphs —
825
+ * and pair them positionally. Attribute order inside a tag varies
826
+ * between versions, so each pass captures the whole element and we
827
+ * re-extract href / inner text with a second regex. Exported for
828
+ * unit testing against a fixture.
829
+ */
830
+ declare function parseMojeekResults(html: string): SearchResult[];
831
+ /**
832
+ * Download a URL, strip HTML down to readable text, return it. Times
833
+ * out at 15s, caps extracted text at 32k chars to fit the tool-result
834
+ * budget.
835
+ */
836
+ declare function webFetch(url: string, opts?: WebFetchOptions): Promise<PageContent>;
837
+ /**
838
+ * Strip HTML to readable text. Removes scripts/styles/nav/footer/aside
839
+ * blocks first, then tags, then collapses whitespace. Not a Readability
840
+ * clone — purpose-built to keep the extracted text small enough for the
841
+ * tool-result budget while preserving paragraph breaks.
842
+ */
843
+ declare function htmlToText(html: string): string;
844
+ interface WebToolsOptions {
845
+ /** Default top-K for `web_search` when the model doesn't specify. */
846
+ defaultTopK?: number;
847
+ /** Byte cap for `web_fetch` extracted text. */
848
+ maxFetchChars?: number;
849
+ }
850
+ /**
851
+ * Register `web_search` + `web_fetch` on a ToolRegistry. The model
852
+ * invokes them automatically when a question needs current info —
853
+ * no slash command from the user is required.
854
+ */
855
+ declare function registerWebTools(registry: ToolRegistry, opts?: WebToolsOptions): ToolRegistry;
856
+ declare function formatSearchResults(query: string, results: SearchResult[]): string;
857
+
765
858
  /**
766
859
  * Session persistence.
767
860
  *
@@ -1745,6 +1838,12 @@ interface ReasonixConfig {
1745
1838
  session?: string | null;
1746
1839
  /** Marks that `reasonix setup` has completed at least once. */
1747
1840
  setupCompleted?: boolean;
1841
+ /**
1842
+ * Whether `web_search` + `web_fetch` tools are registered. Default:
1843
+ * enabled (no key required — backed by DuckDuckGo's public HTML
1844
+ * endpoint). Set to `false` to keep the session offline.
1845
+ */
1846
+ search?: boolean;
1748
1847
  }
1749
1848
  declare function defaultConfigPath(): string;
1750
1849
  declare function readConfig(path?: string): ReasonixConfig;
@@ -1758,6 +1857,6 @@ declare function redactKey(key: string): string;
1758
1857
 
1759
1858
  /** Reasonix — DeepSeek-native agent framework. Library entry point. */
1760
1859
 
1761
- declare const VERSION = "0.4.3";
1860
+ declare const VERSION = "0.4.15";
1762
1861
 
1763
- export { AppendOnlyLog, type ApplyResult, type ApplyStatus, type BranchOptions, type BranchProgress, type BranchResult, type BranchSample, type BranchSelector, type BranchSummary, type BridgeOptions, type BridgeResult, CODE_SYSTEM_PROMPT, CacheFirstLoop, type CacheFirstLoopOptions, type CallToolResult, type ChatMessage, type ChatResponse, DEFAULT_MAX_RESULT_CHARS, DeepSeekClient, type DeepSeekClientOptions, type RenderOptions as DiffRenderOptions, type DiffReport, type DiffSide, type EditBlock, type EditSnapshot, type EventRole, type FilesystemToolsOptions, type FlattenDecision, type FlattenOptions, type GetPromptResult, type HarvestOptions, ImmutablePrefix, type ImmutablePrefixOptions, type InitializeResult, type InspectionReport, type JSONSchema, type JsonRpcMessage, type JsonRpcRequest, type JsonRpcResponse, type ListPromptsResult, type ListResourcesResult, type ListToolsResult, type LoopEvent, MCP_PROTOCOL_VERSION, McpClient, type McpClientOptions, type McpContentBlock, type McpProgressHandler, type McpProgressInfo, type McpPrompt, type McpPromptArgument, type McpPromptMessage, type McpPromptResourceBlock, type McpResource, type McpResourceContents, type McpResourceContentsBlob, type McpResourceContentsText, type McpSpec, type McpTool, type McpToolSchema, type McpTransport, type ProgressNotificationParams, type ReadResourceResult, type ReadTranscriptResult, type ReasonixConfig, type ReconfigurableOptions, type RepairReport, type ReplayStats, type RetryInfo, type RetryOptions, type Role, type ScavengeOptions, type ScavengeResult, type SectionResult, type SessionInfo, SessionStats, type SessionSummary, type SseMcpSpec, SseTransport, type SseTransportOptions, type StdioMcpSpec, StdioTransport, type StdioTransportOptions, StormBreaker, type StreamChunk, type ToolCall, type ToolCallContext, ToolCallRepair, type ToolCallRepairOptions, type ToolDefinition, type ToolFunctionSpec, ToolRegistry, type ToolSpec, type TranscriptMeta, type TranscriptRecord, type TruncationRepairResult, type TurnPair, type TurnStats, type TypedPlanState, Usage, VERSION, VolatileScratch, aggregateBranchUsage, analyzeSchema, appendSessionMessage, applyEditBlock, applyEditBlocks, bridgeMcpTools, claudeEquivalentCost, codeSystemPrompt, computeReplayStats, costUsd, defaultConfigPath, defaultSelector, deleteSession, diffTranscripts, emptyPlanState, fetchWithRetry, flattenMcpResult, flattenSchema, formatLoopError, harvest, healLoadedMessages, inputCostUsd, inspectMcpServer, isJsonRpcError, isPlanStateEmpty, isPlausibleKey, listSessions, loadApiKey, loadDotenv, loadSessionMessages, nestArguments, openTranscriptFile, outputCostUsd, parseEditBlocks, parseMcpSpec, parseTranscript, readConfig, readTranscript, recordFromLoopEvent, redactKey, registerFilesystemTools, renderMarkdown as renderDiffMarkdown, renderSummaryTable as renderDiffSummary, repairTruncatedJson, replayFromFile, restoreSnapshots, runBranches, sanitizeName as sanitizeSessionName, saveApiKey, scavengeToolCalls, sessionPath, sessionsDir, similarity, snapshotBeforeEdits, stripHallucinatedToolMarkup, truncateForModel, writeConfig, writeMeta, writeRecord };
1862
+ export { AppendOnlyLog, type ApplyResult, type ApplyStatus, type BranchOptions, type BranchProgress, type BranchResult, type BranchSample, type BranchSelector, type BranchSummary, type BridgeOptions, type BridgeResult, CODE_SYSTEM_PROMPT, CacheFirstLoop, type CacheFirstLoopOptions, type CallToolResult, type ChatMessage, type ChatResponse, DEFAULT_MAX_RESULT_CHARS, DeepSeekClient, type DeepSeekClientOptions, type RenderOptions as DiffRenderOptions, type DiffReport, type DiffSide, type EditBlock, type EditSnapshot, type EventRole, type FilesystemToolsOptions, type FlattenDecision, type FlattenOptions, type GetPromptResult, type HarvestOptions, ImmutablePrefix, type ImmutablePrefixOptions, type InitializeResult, type InspectionReport, type JSONSchema, type JsonRpcMessage, type JsonRpcRequest, type JsonRpcResponse, type ListPromptsResult, type ListResourcesResult, type ListToolsResult, type LoopEvent, MCP_PROTOCOL_VERSION, McpClient, type McpClientOptions, type McpContentBlock, type McpProgressHandler, type McpProgressInfo, type McpPrompt, type McpPromptArgument, type McpPromptMessage, type McpPromptResourceBlock, type McpResource, type McpResourceContents, type McpResourceContentsBlob, type McpResourceContentsText, type McpSpec, type McpTool, type McpToolSchema, type McpTransport, type PageContent, type ProgressNotificationParams, type ReadResourceResult, type ReadTranscriptResult, type ReasonixConfig, type ReconfigurableOptions, type RepairReport, type ReplayStats, type RetryInfo, type RetryOptions, type Role, type ScavengeOptions, type ScavengeResult, type SearchResult, type SectionResult, type SessionInfo, SessionStats, type SessionSummary, type SseMcpSpec, SseTransport, type SseTransportOptions, type StdioMcpSpec, StdioTransport, type StdioTransportOptions, StormBreaker, type StreamChunk, type ToolCall, type ToolCallContext, ToolCallRepair, type ToolCallRepairOptions, type ToolDefinition, type ToolFunctionSpec, ToolRegistry, type ToolSpec, type TranscriptMeta, type TranscriptRecord, type TruncationRepairResult, type TurnPair, type TurnStats, type TypedPlanState, Usage, VERSION, VolatileScratch, type WebFetchOptions, type WebSearchOptions, type WebToolsOptions, aggregateBranchUsage, analyzeSchema, appendSessionMessage, applyEditBlock, applyEditBlocks, bridgeMcpTools, claudeEquivalentCost, codeSystemPrompt, computeReplayStats, costUsd, defaultConfigPath, defaultSelector, deleteSession, diffTranscripts, emptyPlanState, fetchWithRetry, flattenMcpResult, flattenSchema, formatLoopError, formatSearchResults, harvest, healLoadedMessages, htmlToText, inputCostUsd, inspectMcpServer, isJsonRpcError, isPlanStateEmpty, isPlausibleKey, listSessions, loadApiKey, loadDotenv, loadSessionMessages, nestArguments, openTranscriptFile, outputCostUsd, parseEditBlocks, parseMcpSpec, parseMojeekResults, parseTranscript, readConfig, readTranscript, recordFromLoopEvent, redactKey, registerFilesystemTools, registerWebTools, renderMarkdown as renderDiffMarkdown, renderSummaryTable as renderDiffSummary, repairTruncatedJson, replayFromFile, restoreSnapshots, runBranches, sanitizeName as sanitizeSessionName, saveApiKey, scavengeToolCalls, sessionPath, sessionsDir, similarity, snapshotBeforeEdits, stripHallucinatedToolMarkup, truncateForModel, webFetch, webSearch, writeConfig, writeMeta, writeRecord };
package/dist/index.js CHANGED
@@ -2183,6 +2183,187 @@ function lineDiff(a, b) {
2183
2183
  return out;
2184
2184
  }
2185
2185
 
2186
+ // src/tools/web.ts
2187
+ var DEFAULT_FETCH_MAX_CHARS = 32e3;
2188
+ var DEFAULT_FETCH_TIMEOUT_MS = 15e3;
2189
+ var DEFAULT_TOPK = 5;
2190
+ var USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
2191
+ var MOJEEK_ENDPOINT = "https://www.mojeek.com/search";
2192
+ async function webSearch(query, opts = {}) {
2193
+ const topK = Math.max(1, Math.min(10, opts.topK ?? DEFAULT_TOPK));
2194
+ const resp = await fetch(`${MOJEEK_ENDPOINT}?q=${encodeURIComponent(query)}`, {
2195
+ headers: {
2196
+ "User-Agent": USER_AGENT,
2197
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9",
2198
+ "Accept-Language": "en-US,en;q=0.9"
2199
+ },
2200
+ signal: opts.signal,
2201
+ redirect: "follow"
2202
+ });
2203
+ if (!resp.ok) throw new Error(`web_search ${resp.status}`);
2204
+ const html = await resp.text();
2205
+ const results = parseMojeekResults(html).slice(0, topK);
2206
+ if (results.length === 0) {
2207
+ if (/no results found|did not match any documents/i.test(html)) return [];
2208
+ if (/captcha|verify you are human|access denied|forbidden/i.test(html)) {
2209
+ throw new Error("web_search: Mojeek anti-bot page \u2014 rate-limited or blocked");
2210
+ }
2211
+ throw new Error(
2212
+ `web_search: 0 results but response doesn't look like a real empty page (${html.length} chars, first 120: ${html.slice(0, 120).replace(/\s+/g, " ")})`
2213
+ );
2214
+ }
2215
+ return results;
2216
+ }
2217
+ function parseMojeekResults(html) {
2218
+ const titles = [];
2219
+ const titleAnchorRe = /<a\b[^>]*\bclass="title"[^>]*>[\s\S]*?<\/a>/g;
2220
+ let m;
2221
+ while (true) {
2222
+ m = titleAnchorRe.exec(html);
2223
+ if (m === null) break;
2224
+ titles.push(m[0]);
2225
+ }
2226
+ const snippets = [];
2227
+ const snippetRe = /<p\b[^>]*\bclass="s"[^>]*>([\s\S]*?)<\/p>/g;
2228
+ while (true) {
2229
+ m = snippetRe.exec(html);
2230
+ if (m === null) break;
2231
+ snippets.push(m[1] ?? "");
2232
+ }
2233
+ const hrefRe = /href="([^"]+)"/;
2234
+ const innerRe = /<a\b[^>]*>([\s\S]*?)<\/a>/;
2235
+ const results = [];
2236
+ for (let i = 0; i < titles.length; i++) {
2237
+ const anchor = titles[i];
2238
+ const hrefMatch = anchor.match(hrefRe);
2239
+ const innerMatch = anchor.match(innerRe);
2240
+ if (!hrefMatch?.[1]) continue;
2241
+ results.push({
2242
+ title: decodeHtmlEntities(stripHtml(innerMatch?.[1] ?? "")).trim(),
2243
+ url: hrefMatch[1],
2244
+ snippet: decodeHtmlEntities(stripHtml(snippets[i] ?? "")).replace(/\s+/g, " ").trim()
2245
+ });
2246
+ }
2247
+ return results;
2248
+ }
2249
+ async function webFetch(url, opts = {}) {
2250
+ const maxChars = opts.maxChars ?? DEFAULT_FETCH_MAX_CHARS;
2251
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS;
2252
+ const ctl = new AbortController();
2253
+ const timer = setTimeout(() => ctl.abort(), timeoutMs);
2254
+ const cancel = () => ctl.abort();
2255
+ opts.signal?.addEventListener("abort", cancel, { once: true });
2256
+ let resp;
2257
+ try {
2258
+ resp = await fetch(url, {
2259
+ headers: { "User-Agent": USER_AGENT, Accept: "text/html,text/plain,*/*" },
2260
+ signal: ctl.signal,
2261
+ redirect: "follow"
2262
+ });
2263
+ } finally {
2264
+ clearTimeout(timer);
2265
+ opts.signal?.removeEventListener("abort", cancel);
2266
+ }
2267
+ if (!resp.ok) throw new Error(`web_fetch ${resp.status} for ${url}`);
2268
+ const contentType = resp.headers.get("content-type") ?? "";
2269
+ const raw = await resp.text();
2270
+ const title = extractTitle(raw);
2271
+ const text = contentType.includes("text/html") ? htmlToText(raw) : raw;
2272
+ const truncated = text.length > maxChars;
2273
+ const finalText = truncated ? `${text.slice(0, maxChars)}
2274
+
2275
+ [\u2026 truncated ${text.length - maxChars} chars \u2026]` : text;
2276
+ return { url, title, text: finalText, truncated };
2277
+ }
2278
+ function htmlToText(html) {
2279
+ let s = html;
2280
+ s = s.replace(/<script[\s\S]*?<\/script>/gi, "");
2281
+ s = s.replace(/<style[\s\S]*?<\/style>/gi, "");
2282
+ s = s.replace(/<noscript[\s\S]*?<\/noscript>/gi, "");
2283
+ s = s.replace(/<nav[\s\S]*?<\/nav>/gi, "");
2284
+ s = s.replace(/<footer[\s\S]*?<\/footer>/gi, "");
2285
+ s = s.replace(/<aside[\s\S]*?<\/aside>/gi, "");
2286
+ s = s.replace(/<svg[\s\S]*?<\/svg>/gi, "");
2287
+ s = s.replace(/<\/?(p|div|br|h[1-6]|li|tr|section|article)\b[^>]*>/gi, "\n");
2288
+ s = s.replace(/<[^>]+>/g, "");
2289
+ s = decodeHtmlEntities(s);
2290
+ s = s.replace(/[ \t]+/g, " ");
2291
+ s = s.replace(/\n[ \t]+/g, "\n");
2292
+ s = s.replace(/\n{3,}/g, "\n\n");
2293
+ return s.trim();
2294
+ }
2295
+ function stripHtml(s) {
2296
+ return s.replace(/<[^>]+>/g, "");
2297
+ }
2298
+ function decodeHtmlEntities(s) {
2299
+ return s.replace(/&nbsp;/g, " ").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'");
2300
+ }
2301
+ function extractTitle(html) {
2302
+ const m = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
2303
+ if (!m?.[1]) return void 0;
2304
+ return m[1].replace(/\s+/g, " ").trim() || void 0;
2305
+ }
2306
+ function registerWebTools(registry, opts = {}) {
2307
+ const defaultTopK = opts.defaultTopK ?? DEFAULT_TOPK;
2308
+ const maxFetchChars = opts.maxFetchChars ?? DEFAULT_FETCH_MAX_CHARS;
2309
+ registry.register({
2310
+ name: "web_search",
2311
+ description: "Search the public web. Returns ranked results with title, url, and snippet. Use this when the question needs information more current than your training data, when you're unsure of a factual detail, or when the user asks about a specific webpage/library/release you haven't seen.",
2312
+ parameters: {
2313
+ type: "object",
2314
+ properties: {
2315
+ query: { type: "string", description: "Natural-language search query." },
2316
+ topK: {
2317
+ type: "integer",
2318
+ description: `Number of results to return (1..10). Default ${defaultTopK}.`
2319
+ }
2320
+ },
2321
+ required: ["query"]
2322
+ },
2323
+ fn: async (args, ctx) => {
2324
+ const results = await webSearch(args.query, {
2325
+ topK: args.topK ?? defaultTopK,
2326
+ signal: ctx?.signal
2327
+ });
2328
+ return formatSearchResults(args.query, results);
2329
+ }
2330
+ });
2331
+ registry.register({
2332
+ name: "web_fetch",
2333
+ description: "Download a URL and return its visible text content (HTML pages get scripts/styles/nav stripped). Truncated at the tool-result cap. Use after web_search when a snippet isn't enough.",
2334
+ parameters: {
2335
+ type: "object",
2336
+ properties: {
2337
+ url: { type: "string", description: "Absolute http:// or https:// URL." }
2338
+ },
2339
+ required: ["url"]
2340
+ },
2341
+ fn: async (args, ctx) => {
2342
+ if (!/^https?:\/\//i.test(args.url)) {
2343
+ throw new Error("web_fetch: url must start with http:// or https://");
2344
+ }
2345
+ const page = await webFetch(args.url, { maxChars: maxFetchChars, signal: ctx?.signal });
2346
+ const header = page.title ? `${page.title}
2347
+ ${page.url}` : page.url;
2348
+ return `${header}
2349
+
2350
+ ${page.text}`;
2351
+ }
2352
+ });
2353
+ return registry;
2354
+ }
2355
+ function formatSearchResults(query, results) {
2356
+ const lines = [`query: ${query}`, `
2357
+ results (${results.length}):`];
2358
+ results.forEach((r, i) => {
2359
+ lines.push(`
2360
+ ${i + 1}. ${r.title}`);
2361
+ lines.push(` ${r.url}`);
2362
+ if (r.snippet) lines.push(` ${r.snippet}`);
2363
+ });
2364
+ return lines.join("\n");
2365
+ }
2366
+
2186
2367
  // src/env.ts
2187
2368
  import { readFileSync as readFileSync2 } from "fs";
2188
2369
  import { resolve as resolve2 } from "path";
@@ -3556,7 +3737,7 @@ function redactKey(key) {
3556
3737
  }
3557
3738
 
3558
3739
  // src/index.ts
3559
- var VERSION = "0.4.3";
3740
+ var VERSION = "0.4.15";
3560
3741
  export {
3561
3742
  AppendOnlyLog,
3562
3743
  CODE_SYSTEM_PROMPT,
@@ -3594,8 +3775,10 @@ export {
3594
3775
  flattenMcpResult,
3595
3776
  flattenSchema,
3596
3777
  formatLoopError,
3778
+ formatSearchResults,
3597
3779
  harvest,
3598
3780
  healLoadedMessages,
3781
+ htmlToText,
3599
3782
  inputCostUsd,
3600
3783
  inspectMcpServer,
3601
3784
  isJsonRpcError,
@@ -3610,12 +3793,14 @@ export {
3610
3793
  outputCostUsd,
3611
3794
  parseEditBlocks,
3612
3795
  parseMcpSpec,
3796
+ parseMojeekResults,
3613
3797
  parseTranscript,
3614
3798
  readConfig,
3615
3799
  readTranscript,
3616
3800
  recordFromLoopEvent,
3617
3801
  redactKey,
3618
3802
  registerFilesystemTools,
3803
+ registerWebTools,
3619
3804
  renderMarkdown as renderDiffMarkdown,
3620
3805
  renderSummaryTable as renderDiffSummary,
3621
3806
  repairTruncatedJson,
@@ -3631,6 +3816,8 @@ export {
3631
3816
  snapshotBeforeEdits,
3632
3817
  stripHallucinatedToolMarkup,
3633
3818
  truncateForModel,
3819
+ webFetch,
3820
+ webSearch,
3634
3821
  writeConfig,
3635
3822
  writeMeta,
3636
3823
  writeRecord