npm - @lloyal-labs/rig - Versions diffs - 1.2.0 - Mend

@lloyal-labs/rig 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/README.md +188 -0
package/dist/index.d.ts +19 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +36 -0
package/dist/index.js.map +1 -0
package/dist/reranker.d.ts +22 -0
package/dist/reranker.d.ts.map +1 -0
package/dist/reranker.js +76 -0
package/dist/reranker.js.map +1 -0
package/dist/resources/files.d.ts +28 -0
package/dist/resources/files.d.ts.map +1 -0
package/dist/resources/files.js +98 -0
package/dist/resources/files.js.map +1 -0
package/dist/resources/index.d.ts +9 -0
package/dist/resources/index.d.ts.map +1 -0
package/dist/resources/index.js +13 -0
package/dist/resources/index.js.map +1 -0
package/dist/resources/types.d.ts +39 -0
package/dist/resources/types.d.ts.map +1 -0
package/dist/resources/types.js +3 -0
package/dist/resources/types.js.map +1 -0
package/dist/sources/corpus-research.md +14 -0
package/dist/sources/corpus.d.ts +48 -0
package/dist/sources/corpus.d.ts.map +1 -0
package/dist/sources/corpus.js +91 -0
package/dist/sources/corpus.js.map +1 -0
package/dist/sources/extract.md +5 -0
package/dist/sources/index.d.ts +10 -0
package/dist/sources/index.d.ts.map +1 -0
package/dist/sources/index.js +14 -0
package/dist/sources/index.js.map +1 -0
package/dist/sources/search-extract.md +6 -0
package/dist/sources/types.d.ts +28 -0
package/dist/sources/types.d.ts.map +1 -0
package/dist/sources/types.js +3 -0
package/dist/sources/types.js.map +1 -0
package/dist/sources/web-research.md +12 -0
package/dist/sources/web.d.ts +78 -0
package/dist/sources/web.d.ts.map +1 -0
package/dist/sources/web.js +319 -0
package/dist/sources/web.js.map +1 -0
package/dist/tools/fetch-page.d.ts +26 -0
package/dist/tools/fetch-page.d.ts.map +1 -0
package/dist/tools/fetch-page.js +72 -0
package/dist/tools/fetch-page.js.map +1 -0
package/dist/tools/grep.d.ts +30 -0
package/dist/tools/grep.d.ts.map +1 -0
package/dist/tools/grep.js +79 -0
package/dist/tools/grep.js.map +1 -0
package/dist/tools/index.d.ts +39 -0
package/dist/tools/index.d.ts.map +1 -0
package/dist/tools/index.js +49 -0
package/dist/tools/index.js.map +1 -0
package/dist/tools/plan.d.ts +76 -0
package/dist/tools/plan.d.ts.map +1 -0
package/dist/tools/plan.js +98 -0
package/dist/tools/plan.js.map +1 -0
package/dist/tools/read-file.d.ts +62 -0
package/dist/tools/read-file.d.ts.map +1 -0
package/dist/tools/read-file.js +123 -0
package/dist/tools/read-file.js.map +1 -0
package/dist/tools/report.d.ts +22 -0
package/dist/tools/report.d.ts.map +1 -0
package/dist/tools/report.js +26 -0
package/dist/tools/report.js.map +1 -0
package/dist/tools/research.d.ts +57 -0
package/dist/tools/research.d.ts.map +1 -0
package/dist/tools/research.js +117 -0
package/dist/tools/research.js.map +1 -0
package/dist/tools/search.d.ts +34 -0
package/dist/tools/search.d.ts.map +1 -0
package/dist/tools/search.js +69 -0
package/dist/tools/search.js.map +1 -0
package/dist/tools/types.d.ts +84 -0
package/dist/tools/types.d.ts.map +1 -0
package/dist/tools/types.js +3 -0
package/dist/tools/types.js.map +1 -0
package/dist/tools/web-research.d.ts +60 -0
package/dist/tools/web-research.d.ts.map +1 -0
package/dist/tools/web-research.js +136 -0
package/dist/tools/web-research.js.map +1 -0
package/dist/tools/web-search.d.ts +42 -0
package/dist/tools/web-search.d.ts.map +1 -0
package/dist/tools/web-search.js +83 -0
package/dist/tools/web-search.js.map +1 -0
package/package.json +45 -0

package/dist/sources/corpus.d.ts ADDED Viewed

@@ -0,0 +1,48 @@
+import type { Operation } from "effection";
+import { Source } from "@lloyal-labs/lloyal-agents";
+import type { Tool } from "@lloyal-labs/lloyal-agents";
+import type { Resource, Chunk } from "../resources/types";
+import type { SourceContext } from "./types";
+/**
+ * Corpus-backed research source using local file search, read, and grep
+ *
+ * Provides grounding tools (`search`, `read_file`, `grep`) over a set of
+ * loaded {@link Resource} / {@link Chunk} pairs. On {@link bind}, tokenizes
+ * chunks via the reranker and prepends a reranker-backed `search` tool to
+ * the tool list. The `search` tool is ordered first so the model prefers
+ * semantic search before falling back to `read_file` or `grep`.
+ *
+ * The research tool is a self-referential {@link ResearchTool} that spawns
+ * sub-agents with corpus-specific prompts and the full grounding toolkit.
+ *
+ * @category Rig
+ */
+export declare class CorpusSource extends Source<SourceContext, Chunk> {
+    private _chunks;
+    private _tools;
+    private _researchTool;
+    private _bound;
+    /** @inheritDoc */
+    readonly name = "corpus";
+    /**
+     * @param resources - Loaded file resources for read_file and grep tools
+     * @param chunks - Pre-split chunks for reranker-backed search
+     */
+    constructor(resources: Resource[], chunks: Chunk[]);
+    /** @inheritDoc */
+    get researchTool(): Tool;
+    /** @inheritDoc */
+    get groundingTools(): Tool[];
+    /**
+     * Late-bind reranker and build the research toolkit
+     *
+     * Tokenizes all chunks through the reranker, prepends a {@link SearchTool}
+     * to the tool list, then constructs the self-referential
+     * {@link ResearchTool} with corpus-specific prompts. Idempotent — skips
+     * if already bound.
+     *
+     * @inheritDoc
+     */
+    bind(ctx: SourceContext): Operation<void>;
+}
+//# sourceMappingURL=corpus.d.ts.map

package/dist/sources/corpus.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"corpus.d.ts","sourceRoot":"","sources":["../../src/sources/corpus.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAwB,MAAM,4BAA4B,CAAC;AAC1E,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,4BAA4B,CAAC;AACvD,OAAO,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAe7C;;;;;;;;;;;;;GAaG;AACH,qBAAa,YAAa,SAAQ,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC;IAC5D,OAAO,CAAC,OAAO,CAAU;IACzB,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,aAAa,CAA6B;IAClD,OAAO,CAAC,MAAM,CAAS;IAEvB,kBAAkB;IAClB,QAAQ,CAAC,IAAI,YAAY;IAEzB;;;OAGG;gBACS,SAAS,EAAE,QAAQ,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE;IAMlD,kBAAkB;IAClB,IAAI,YAAY,IAAI,IAAI,CAIvB;IAED,kBAAkB;IAClB,IAAI,cAAc,IAAI,IAAI,EAAE,CAAwB;IAEpD;;;;;;;;;OASG;IACF,IAAI,CAAC,GAAG,EAAE,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC;CAoB3C"}

package/dist/sources/corpus.js ADDED Viewed

@@ -0,0 +1,91 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.CorpusSource = void 0;
+const fs = require("node:fs");
+const path = require("node:path");
+const effection_1 = require("effection");
+const lloyal_agents_1 = require("@lloyal-labs/lloyal-agents");
+const search_1 = require("../tools/search");
+const read_file_1 = require("../tools/read-file");
+const grep_1 = require("../tools/grep");
+const research_1 = require("../tools/research");
+function readTask(name) {
+    const raw = fs
+        .readFileSync(path.resolve(__dirname, `${name}.md`), "utf8")
+        .trim();
+    const sep = raw.indexOf("\n---\n");
+    if (sep === -1)
+        return { system: raw, user: "" };
+    return { system: raw.slice(0, sep).trim(), user: raw.slice(sep + 5).trim() };
+}
+/**
+ * Corpus-backed research source using local file search, read, and grep
+ *
+ * Provides grounding tools (`search`, `read_file`, `grep`) over a set of
+ * loaded {@link Resource} / {@link Chunk} pairs. On {@link bind}, tokenizes
+ * chunks via the reranker and prepends a reranker-backed `search` tool to
+ * the tool list. The `search` tool is ordered first so the model prefers
+ * semantic search before falling back to `read_file` or `grep`.
+ *
+ * The research tool is a self-referential {@link ResearchTool} that spawns
+ * sub-agents with corpus-specific prompts and the full grounding toolkit.
+ *
+ * @category Rig
+ */
+class CorpusSource extends lloyal_agents_1.Source {
+    _chunks;
+    _tools = [];
+    _researchTool = null;
+    _bound = false;
+    /** @inheritDoc */
+    name = "corpus";
+    /**
+     * @param resources - Loaded file resources for read_file and grep tools
+     * @param chunks - Pre-split chunks for reranker-backed search
+     */
+    constructor(resources, chunks) {
+        super();
+        this._chunks = chunks;
+        this._tools = [new read_file_1.ReadFileTool(resources), new grep_1.GrepTool(resources)];
+    }
+    /** @inheritDoc */
+    get researchTool() {
+        if (!this._researchTool)
+            throw new Error("CorpusSource: bind() must be called first");
+        return this._researchTool;
+    }
+    /** @inheritDoc */
+    get groundingTools() { return this._tools; }
+    /**
+     * Late-bind reranker and build the research toolkit
+     *
+     * Tokenizes all chunks through the reranker, prepends a {@link SearchTool}
+     * to the tool list, then constructs the self-referential
+     * {@link ResearchTool} with corpus-specific prompts. Idempotent — skips
+     * if already bound.
+     *
+     * @inheritDoc
+     */
+    *bind(ctx) {
+        if (this._bound)
+            return;
+        const tw = yield* lloyal_agents_1.Trace.expect();
+        tw.write({ traceId: tw.nextId(), parentTraceId: null, ts: performance.now(),
+            type: 'source:bind', sourceName: this.name });
+        yield* (0, effection_1.call)(() => ctx.reranker.tokenizeChunks(this._chunks));
+        this._tools.unshift(new search_1.SearchTool(this._chunks, ctx.reranker));
+        const researchPrompt = readTask("corpus-research");
+        const research = new research_1.ResearchTool({
+            systemPrompt: researchPrompt.system,
+            reporterPrompt: ctx.reporterPrompt,
+            maxTurns: ctx.maxTurns,
+            trace: ctx.trace,
+        });
+        const toolkit = (0, lloyal_agents_1.createToolkit)([...this._tools, ctx.reportTool, research]);
+        research.setToolkit(toolkit);
+        this._researchTool = research;
+        this._bound = true;
+    }
+}
+exports.CorpusSource = CorpusSource;
+//# sourceMappingURL=corpus.js.map

package/dist/sources/corpus.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"corpus.js","sourceRoot":"","sources":["../../src/sources/corpus.ts"],"names":[],"mappings":";;;AAAA,8BAA8B;AAC9B,kCAAkC;AAClC,yCAAiC;AAEjC,8DAA0E;AAI1E,4CAA6C;AAC7C,kDAAkD;AAClD,wCAAyC;AACzC,gDAAiD;AAEjD,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,GAAG,GAAG,EAAE;SACX,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,IAAI,KAAK,CAAC,EAAE,MAAM,CAAC;SAC3D,IAAI,EAAE,CAAC;IACV,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IACnC,IAAI,GAAG,KAAK,CAAC,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;IACjD,OAAO,EAAE,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;AAC/E,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAa,YAAa,SAAQ,sBAA4B;IACpD,OAAO,CAAU;IACjB,MAAM,GAAW,EAAE,CAAC;IACpB,aAAa,GAAwB,IAAI,CAAC;IAC1C,MAAM,GAAG,KAAK,CAAC;IAEvB,kBAAkB;IACT,IAAI,GAAG,QAAQ,CAAC;IAEzB;;;OAGG;IACH,YAAY,SAAqB,EAAE,MAAe;QAChD,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;QACtB,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,wBAAY,CAAC,SAAS,CAAC,EAAE,IAAI,eAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;IACvE,CAAC;IAED,kBAAkB;IAClB,IAAI,YAAY;QACd,IAAI,CAAC,IAAI,CAAC,aAAa;YACrB,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC/D,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;IAED,kBAAkB;IAClB,IAAI,cAAc,KAAa,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAEpD;;;;;;;;;OASG;IACH,CAAC,IAAI,CAAC,GAAkB;QACtB,IAAI,IAAI,CAAC,MAAM;YAAE,OAAO;QACxB,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,qBAAK,CAAC,MAAM,EAAE,CAAC;QACjC,EAAE,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,EAAE,EAAE,WAAW,CAAC,GAAG,EAAE;YACzE,IAAI,EAAE,aAAa,EAAE,UAAU,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAChD,KAAK,CAAC,CAAC,IAAA,gBAAI,EAAC,GAAG,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QAC7D,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,mBAAU,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;QAEhE,MAAM,cAAc,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QACnD,MAAM,QAAQ,GAAG,IAAI,uBAAY,CAAC;YAChC,YAAY,EAAE,cAAc,CAAC,MAAM;YACnC,cAAc,EAAE,GAAG,CAAC,cAAc;YAClC,QAAQ,EAAE,GAAG,CAAC,QAAQ;YACtB,KAAK,EAAE,GAAG,CAAC,KAAK;SACjB,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,IAAA,6BAAa,EAAC,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC1E,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QAC7B,IAAI,CAAC,aAAa,GAAG,QAAQ,CAAC;QAC9B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;IACrB,CAAC;CACF;AA3DD,oCA2DC"}

package/dist/sources/extract.md ADDED Viewed

@@ -0,0 +1,5 @@
+You extract key information from web page content. Produce a concise summary and list any URLs/links found in the text that are worth following. Output JSON only.
+---
+Page: {{title}} ({{url}})
+{{content}}

package/dist/sources/index.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * Source implementations for the research pipeline
+ *
+ * @packageDocumentation
+ * @category Rig
+ */
+export { WebSource } from './web';
+export { CorpusSource } from './corpus';
+export type { SourceContext } from './types';
+//# sourceMappingURL=index.d.ts.map

package/dist/sources/index.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sources/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAClC,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC"}

package/dist/sources/index.js ADDED Viewed

@@ -0,0 +1,14 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.CorpusSource = exports.WebSource = void 0;
+/**
+ * Source implementations for the research pipeline
+ *
+ * @packageDocumentation
+ * @category Rig
+ */
+var web_1 = require("./web");
+Object.defineProperty(exports, "WebSource", { enumerable: true, get: function () { return web_1.WebSource; } });
+var corpus_1 = require("./corpus");
+Object.defineProperty(exports, "CorpusSource", { enumerable: true, get: function () { return corpus_1.CorpusSource; } });
+//# sourceMappingURL=index.js.map

package/dist/sources/index.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/sources/index.ts"],"names":[],"mappings":";;;AAAA;;;;;GAKG;AACH,6BAAkC;AAAzB,gGAAA,SAAS,OAAA;AAClB,mCAAwC;AAA/B,sGAAA,YAAY,OAAA"}

package/dist/sources/search-extract.md ADDED Viewed

@@ -0,0 +1,6 @@
+You select the most relevant search results for a research query. Pick URLs most likely to contain substantive information and summarize the key findings. Output JSON only.
+---
+Query: {{query}}
+Search results:
+{{results}}

package/dist/sources/types.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import type { Tool } from '@lloyal-labs/lloyal-agents';
+import type { Reranker } from '../tools/types';
+/**
+ * Runtime context passed to {@link Source.bind} during pipeline setup
+ *
+ * Carries shared dependencies that are not available at source construction
+ * time — the reranker instance, reporter prompt/tool, and pipeline-level
+ * configuration. Each source receives the same context so research and
+ * grounding tools share a consistent environment.
+ *
+ * @category Rig
+ */
+export interface SourceContext {
+    /** Reranker instance used by corpus sources to tokenize chunks and score results */
+    reranker: Reranker;
+    /** System/user prompt pair for the report-writing pass inside research tools */
+    reporterPrompt: {
+        system: string;
+        user: string;
+    };
+    /** Shared report tool instance injected into every source's research toolkit */
+    reportTool: Tool;
+    /** Maximum tool-use turns for research sub-agents before forced termination */
+    maxTurns: number;
+    /** Whether to emit structured trace events during research execution */
+    trace: boolean;
+}
+//# sourceMappingURL=types.d.ts.map

package/dist/sources/types.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/sources/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,4BAA4B,CAAC;AACvD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAE/C;;;;;;;;;GASG;AACH,MAAM,WAAW,aAAa;IAC5B,oFAAoF;IACpF,QAAQ,EAAE,QAAQ,CAAC;IACnB,gFAAgF;IAChF,cAAc,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACjD,gFAAgF;IAChF,UAAU,EAAE,IAAI,CAAC;IACjB,+EAA+E;IAC/E,QAAQ,EAAE,MAAM,CAAC;IACjB,wEAAwE;IACxE,KAAK,EAAE,OAAO,CAAC;CAChB"}

package/dist/sources/types.js ADDED Viewed

@@ -0,0 +1,3 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+//# sourceMappingURL=types.js.map

package/dist/sources/types.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/sources/types.ts"],"names":[],"mappings":""}

package/dist/sources/web-research.md ADDED Viewed

@@ -0,0 +1,12 @@
+You are a research assistant investigating questions using the web. Your tools:
+- **web_search**: search the web — returns results with titles, snippets, and URLs
+- **fetch_page**: fetch a URL and extract its article content — use to read promising search results or follow links
+- **web_research**: spawn parallel sub-agents that each run their own web_search/fetch_page cycle — call with `{"questions": ["q1", "q2", ...]}`
+- **report**: submit your final findings with evidence and source URLs
+Process — follow every step in order:
+1. Search the web with focused queries targeting specific aspects of the question.
+2. Read the most promising results with fetch_page. Follow links within pages when they lead to more authoritative content.
+3. Search again with refined queries based on what you learned. Target gaps in your findings.
+4. Call web_research with sub-questions if you judge there are areas that need deeper investigation.
+5. Report with source URLs and direct quotes as evidence. State what you found and what you checked.

package/dist/sources/web.d.ts ADDED Viewed

@@ -0,0 +1,78 @@
+import type { Operation } from "effection";
+import { Source } from "@lloyal-labs/lloyal-agents";
+import { Tool } from "@lloyal-labs/lloyal-agents";
+import type { Chunk } from "../resources/types";
+import type { SourceContext } from "./types";
+import type { SearchProvider } from "../tools/types";
+/**
+ * Raw page content buffered during web research for post-research reranking
+ *
+ * Populated by {@link BufferingFetchPage} as agents fetch pages. After
+ * the research phase ends, buffered pages are converted to {@link Chunk}
+ * instances via {@link chunkFetchedPages} for reranker scoring.
+ *
+ * @category Rig
+ */
+export interface FetchedPage {
+    /** Resolved URL of the fetched page */
+    url: string;
+    /** Page title extracted during fetch (may be empty) */
+    title: string;
+    /** Full extracted article text */
+    text: string;
+}
+/**
+ * Convert buffered web pages into {@link Chunk} instances for reranking
+ *
+ * Splits each page's text on blank-line paragraph boundaries, filtering
+ * paragraphs shorter than 40 characters. If no paragraphs survive the
+ * filter, the full text is emitted as a single chunk (if long enough).
+ *
+ * @param pages - Buffered pages from web research
+ * @returns Flat array of paragraph-level chunks with `tokens` arrays left empty for later tokenization
+ *
+ * @category Rig
+ */
+export declare function chunkFetchedPages(pages: FetchedPage[]): Chunk[];
+/**
+ * Web-backed research source using search + fetch with scratchpad extraction
+ *
+ * Wires up {@link BufferingWebSearch} and {@link BufferingFetchPage} for
+ * grounding, and a self-referential {@link WebResearchTool} for spawning
+ * parallel research sub-agents. Fetched page content is buffered in memory;
+ * after research completes, {@link getChunks} converts the buffer into
+ * {@link Chunk} instances via {@link chunkFetchedPages} for reranker scoring.
+ *
+ * @category Rig
+ */
+export declare class WebSource extends Source<SourceContext, Chunk> {
+    private _buffer;
+    private _fetchPage;
+    private _webSearch;
+    private _researchPrompt;
+    private _researchTool;
+    /** @inheritDoc */
+    readonly name = "web";
+    /**
+     * @param provider - Search backend (e.g. {@link TavilyProvider}) for web_search calls
+     */
+    constructor(provider: SearchProvider);
+    /** @inheritDoc */
+    get researchTool(): Tool;
+    /** @inheritDoc */
+    get groundingTools(): Tool[];
+    /**
+     * Clear the page buffer and build the self-referential research toolkit
+     *
+     * Resets the internal {@link FetchedPage} buffer on every call so
+     * prior-run content does not leak into a new research pass. Constructs
+     * the {@link WebResearchTool} on first bind only (toolkit is stateless
+     * once built).
+     *
+     * @inheritDoc
+     */
+    bind(ctx: SourceContext): Operation<void>;
+    /** @inheritDoc */
+    getChunks(): Chunk[];
+}
+//# sourceMappingURL=web.d.ts.map

package/dist/sources/web.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"web.d.ts","sourceRoot":"","sources":["../../src/sources/web.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAE,MAAM,4BAA4B,CAAC;AACpD,OAAO,EAAE,IAAI,EAAyD,MAAM,4BAA4B,CAAC;AAIzG,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAC7C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAkBrD;;;;;;;;GAQG;AACH,MAAM,WAAW,WAAW;IAC1B,uCAAuC;IACvC,GAAG,EAAE,MAAM,CAAC;IACZ,uDAAuD;IACvD,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,WAAW,EAAE,GAAG,KAAK,EAAE,CAkC/D;AAoMD;;;;;;;;;;GAUG;AACH,qBAAa,SAAU,SAAQ,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC;IACzD,OAAO,CAAC,OAAO,CAAqB;IACpC,OAAO,CAAC,UAAU,CAAqB;IACvC,OAAO,CAAC,UAAU,CAAqB;IACvC,OAAO,CAAC,eAAe,CAAmC;IAC1D,OAAO,CAAC,aAAa,CAAgC;IAErD,kBAAkB;IAClB,QAAQ,CAAC,IAAI,SAAS;IAEtB;;OAEG;gBACS,QAAQ,EAAE,cAAc;IASpC,kBAAkB;IAClB,IAAI,YAAY,IAAI,IAAI,CAIvB;IAED,kBAAkB;IAClB,IAAI,cAAc,IAAI,IAAI,EAAE,CAA+C;IAE3E;;;;;;;;;OASG;IACF,IAAI,CAAC,GAAG,EAAE,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC;IA2B1C,kBAAkB;IAClB,SAAS,IAAI,KAAK,EAAE;CAGrB"}

package/dist/sources/web.js ADDED Viewed

@@ -0,0 +1,319 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.WebSource = void 0;
+exports.chunkFetchedPages = chunkFetchedPages;
+const fs = require("node:fs");
+const path = require("node:path");
+const effection_1 = require("effection");
+const lloyal_agents_1 = require("@lloyal-labs/lloyal-agents");
+const lloyal_agents_2 = require("@lloyal-labs/lloyal-agents");
+const web_search_1 = require("../tools/web-search");
+const fetch_page_1 = require("../tools/fetch-page");
+const web_research_1 = require("../tools/web-research");
+// ── Task loader ──────────────────────────────────────────────────
+function readTask(name) {
+    const raw = fs
+        .readFileSync(path.resolve(__dirname, `${name}.md`), "utf8")
+        .trim();
+    const sep = raw.indexOf("\n---\n");
+    if (sep === -1)
+        return { system: raw, user: "" };
+    return { system: raw.slice(0, sep).trim(), user: raw.slice(sep + 5).trim() };
+}
+/**
+ * Convert buffered web pages into {@link Chunk} instances for reranking
+ *
+ * Splits each page's text on blank-line paragraph boundaries, filtering
+ * paragraphs shorter than 40 characters. If no paragraphs survive the
+ * filter, the full text is emitted as a single chunk (if long enough).
+ *
+ * @param pages - Buffered pages from web research
+ * @returns Flat array of paragraph-level chunks with `tokens` arrays left empty for later tokenization
+ *
+ * @category Rig
+ */
+function chunkFetchedPages(pages) {
+    const chunks = [];
+    for (const page of pages) {
+        const paragraphs = page.text
+            .split(/\n\s*\n/)
+            .map((p) => p.trim())
+            .filter((p) => p.length > 40);
+        if (paragraphs.length === 0) {
+            if (page.text.trim().length > 40) {
+                chunks.push({
+                    resource: page.url,
+                    heading: page.title || page.url,
+                    text: page.text.trim(),
+                    tokens: [],
+                    startLine: 1,
+                    endLine: 1,
+                });
+            }
+            continue;
+        }
+        for (let i = 0; i < paragraphs.length; i++) {
+            chunks.push({
+                resource: page.url,
+                heading: page.title || page.url,
+                text: paragraphs[i],
+                tokens: [],
+                startLine: i + 1,
+                endLine: i + 1,
+            });
+        }
+    }
+    return chunks;
+}
+// ── BufferingFetchPage ───────────────────────────────────────────
+/**
+ * Fetch-page wrapper that buffers full content and extracts a compact summary
+ *
+ * Wraps {@link FetchPageTool} to intercept successful fetches. Full page
+ * content is pushed into a shared {@link FetchedPage} buffer for
+ * post-research reranking. An attention scratchpad (forked from
+ * {@link ScratchpadParent}) then grammar-constrains a summary + links
+ * extraction, returning the compact result to the calling agent instead
+ * of the full page text. Falls back to the full result if extraction
+ * fails or no scratchpad parent is available.
+ *
+ * @category Rig
+ */
+class BufferingFetchPage extends lloyal_agents_2.Tool {
+    name = "fetch_page";
+    description = "Fetch a web page and extract its article content. Returns a summary and any links worth following. Use to read search results or follow links discovered in pages.";
+    parameters = {
+        type: "object",
+        properties: { url: { type: "string", description: "URL to fetch" } },
+        required: ["url"],
+    };
+    _inner;
+    _buffer;
+    _extractTask;
+    constructor(buffer, extractTask, maxChars) {
+        super();
+        this._inner = new fetch_page_1.FetchPageTool(maxChars);
+        this._buffer = buffer;
+        this._extractTask = extractTask;
+    }
+    *execute(args) {
+        const result = yield* this._inner.execute(args);
+        const r = result;
+        if (typeof r?.content === "string" &&
+            r.content !== "[Could not extract article content]") {
+            const content = r.content;
+            // Buffer full content for reranking
+            this._buffer.push({
+                url: r.url || args.url,
+                title: r.title || "",
+                text: content,
+            });
+            // Attention scratchpad: fork from innermost active root, extract summary + links, prune
+            let parent;
+            try {
+                parent = yield* lloyal_agents_2.ScratchpadParent.expect();
+            }
+            catch { /* no parent — skip extraction */ }
+            if (!parent || parent.disposed)
+                return result;
+            const ctx = yield* lloyal_agents_2.Ctx.expect();
+            const schema = {
+                type: "object",
+                properties: {
+                    summary: { type: "string" },
+                    links: { type: "array", items: { type: "string" } },
+                },
+                required: ["summary", "links"],
+            };
+            const grammar = yield* (0, effection_1.call)(() => ctx.jsonSchemaToGrammar(JSON.stringify(schema)));
+            const extractPrompt = this._extractTask.user
+                .replace("{{url}}", args.url)
+                .replace("{{title}}", r.title || "")
+                .replace("{{content}}", content);
+            const messages = [
+                { role: "system", content: this._extractTask.system },
+                { role: "user", content: extractPrompt },
+            ];
+            const { prompt } = ctx.formatChatSync(JSON.stringify(messages), { enableThinking: false });
+            try {
+                const extracted = yield* (0, lloyal_agents_2.generate)({
+                    prompt,
+                    grammar,
+                    params: { temperature: 0.3 },
+                    parse: (o) => JSON.parse(o),
+                    parent,
+                });
+                return {
+                    url: r.url || args.url,
+                    title: r.title || "",
+                    summary: extracted.parsed?.summary || "",
+                    links: extracted.parsed?.links || [],
+                };
+            }
+            catch {
+                return result; // fallback to full result on extraction failure
+            }
+        }
+        return result;
+    }
+}
+// ── BufferingWebSearch ────────────────────────────────────────────
+/**
+ * Web-search wrapper that extracts a compact summary via attention scratchpad
+ *
+ * Wraps {@link WebSearchTool} and, when a {@link ScratchpadParent} is
+ * available, forks a grammar-constrained generation to distill raw search
+ * results into a list of promising URLs plus a brief summary. The compact
+ * output reduces KV pressure on the calling agent. Falls back to raw
+ * results if extraction fails or no scratchpad parent is available.
+ *
+ * @category Rig
+ */
+class BufferingWebSearch extends lloyal_agents_2.Tool {
+    name = "web_search";
+    description = "Search the web. Returns the most relevant URLs and a summary. Use fetch_page to read full content of promising results.";
+    parameters = {
+        type: "object",
+        properties: { query: { type: "string", description: "Search query" } },
+        required: ["query"],
+    };
+    _inner;
+    _extractTask;
+    constructor(provider, extractTask) {
+        super();
+        this._inner = new web_search_1.WebSearchTool(provider);
+        this._extractTask = extractTask;
+    }
+    *execute(args) {
+        const results = yield* this._inner.execute(args);
+        // If error or not an array, return as-is (no scratchpad needed)
+        if (!Array.isArray(results) || results.length === 0)
+            return results;
+        // Scratchpad: fork from innermost active root, extract URLs + summary
+        let parent;
+        try {
+            parent = yield* lloyal_agents_2.ScratchpadParent.expect();
+        }
+        catch { /* no parent — return raw */ }
+        if (!parent || parent.disposed)
+            return results;
+        const ctx = yield* lloyal_agents_2.Ctx.expect();
+        const schema = {
+            type: "object",
+            properties: {
+                urls: { type: "array", items: { type: "string" }, description: "URLs worth fetching" },
+                summary: { type: "string", description: "Brief summary of what the search found" },
+            },
+            required: ["urls", "summary"],
+        };
+        const grammar = yield* (0, effection_1.call)(() => ctx.jsonSchemaToGrammar(JSON.stringify(schema)));
+        const resultsText = results
+            .map((r, i) => `${i + 1}. ${r.title}\n   ${r.url}\n   ${r.snippet}`)
+            .join("\n\n");
+        const extractPrompt = this._extractTask.user
+            .replace("{{query}}", args.query)
+            .replace("{{results}}", resultsText);
+        const messages = [
+            { role: "system", content: this._extractTask.system },
+            { role: "user", content: extractPrompt },
+        ];
+        const { prompt } = ctx.formatChatSync(JSON.stringify(messages), { enableThinking: false });
+        try {
+            const extracted = yield* (0, lloyal_agents_2.generate)({
+                prompt,
+                grammar,
+                params: { temperature: 0.3 },
+                parse: (o) => JSON.parse(o),
+                parent,
+            });
+            return {
+                urls: extracted.parsed?.urls || [],
+                summary: extracted.parsed?.summary || "",
+                resultCount: results.length,
+            };
+        }
+        catch {
+            return results; // fallback to raw results on extraction failure
+        }
+    }
+}
+// ── WebSource ────────────────────────────────────────────────────
+/**
+ * Web-backed research source using search + fetch with scratchpad extraction
+ *
+ * Wires up {@link BufferingWebSearch} and {@link BufferingFetchPage} for
+ * grounding, and a self-referential {@link WebResearchTool} for spawning
+ * parallel research sub-agents. Fetched page content is buffered in memory;
+ * after research completes, {@link getChunks} converts the buffer into
+ * {@link Chunk} instances via {@link chunkFetchedPages} for reranker scoring.
+ *
+ * @category Rig
+ */
+class WebSource extends lloyal_agents_1.Source {
+    _buffer = [];
+    _fetchPage;
+    _webSearch;
+    _researchPrompt;
+    _researchTool = null;
+    /** @inheritDoc */
+    name = "web";
+    /**
+     * @param provider - Search backend (e.g. {@link TavilyProvider}) for web_search calls
+     */
+    constructor(provider) {
+        super();
+        const extractTask = readTask("extract");
+        const searchExtractTask = readTask("search-extract");
+        this._researchPrompt = readTask("web-research");
+        this._fetchPage = new BufferingFetchPage(this._buffer, extractTask);
+        this._webSearch = new BufferingWebSearch(provider, searchExtractTask);
+    }
+    /** @inheritDoc */
+    get researchTool() {
+        if (!this._researchTool)
+            throw new Error("WebSource: bind() must be called first");
+        return this._researchTool;
+    }
+    /** @inheritDoc */
+    get groundingTools() { return [this._webSearch, this._fetchPage]; }
+    /**
+     * Clear the page buffer and build the self-referential research toolkit
+     *
+     * Resets the internal {@link FetchedPage} buffer on every call so
+     * prior-run content does not leak into a new research pass. Constructs
+     * the {@link WebResearchTool} on first bind only (toolkit is stateless
+     * once built).
+     *
+     * @inheritDoc
+     */
+    *bind(ctx) {
+        this._buffer.length = 0;
+        const tw = yield* lloyal_agents_2.Trace.expect();
+        tw.write({ traceId: tw.nextId(), parentTraceId: null, ts: performance.now(),
+            type: 'source:bind', sourceName: this.name });
+        if (!this._researchTool) {
+            const webResearch = new web_research_1.WebResearchTool({
+                name: "web_research",
+                description: "Spawn parallel web research agents that search the web, fetch pages, and report findings.",
+                systemPrompt: this._researchPrompt.system,
+                reporterPrompt: ctx.reporterPrompt,
+                maxTurns: ctx.maxTurns,
+                trace: ctx.trace,
+            });
+            const toolkit = (0, lloyal_agents_2.createToolkit)([
+                this._webSearch,
+                this._fetchPage,
+                ctx.reportTool,
+                webResearch,
+            ]);
+            webResearch.setToolkit(toolkit);
+            this._researchTool = webResearch;
+        }
+    }
+    /** @inheritDoc */
+    getChunks() {
+        return chunkFetchedPages(this._buffer);
+    }
+}
+exports.WebSource = WebSource;
+//# sourceMappingURL=web.js.map