@lloyal-labs/rig 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +188 -0
  2. package/dist/index.d.ts +19 -0
  3. package/dist/index.d.ts.map +1 -0
  4. package/dist/index.js +36 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/reranker.d.ts +22 -0
  7. package/dist/reranker.d.ts.map +1 -0
  8. package/dist/reranker.js +76 -0
  9. package/dist/reranker.js.map +1 -0
  10. package/dist/resources/files.d.ts +28 -0
  11. package/dist/resources/files.d.ts.map +1 -0
  12. package/dist/resources/files.js +98 -0
  13. package/dist/resources/files.js.map +1 -0
  14. package/dist/resources/index.d.ts +9 -0
  15. package/dist/resources/index.d.ts.map +1 -0
  16. package/dist/resources/index.js +13 -0
  17. package/dist/resources/index.js.map +1 -0
  18. package/dist/resources/types.d.ts +39 -0
  19. package/dist/resources/types.d.ts.map +1 -0
  20. package/dist/resources/types.js +3 -0
  21. package/dist/resources/types.js.map +1 -0
  22. package/dist/sources/corpus-research.md +14 -0
  23. package/dist/sources/corpus.d.ts +48 -0
  24. package/dist/sources/corpus.d.ts.map +1 -0
  25. package/dist/sources/corpus.js +91 -0
  26. package/dist/sources/corpus.js.map +1 -0
  27. package/dist/sources/extract.md +5 -0
  28. package/dist/sources/index.d.ts +10 -0
  29. package/dist/sources/index.d.ts.map +1 -0
  30. package/dist/sources/index.js +14 -0
  31. package/dist/sources/index.js.map +1 -0
  32. package/dist/sources/search-extract.md +6 -0
  33. package/dist/sources/types.d.ts +28 -0
  34. package/dist/sources/types.d.ts.map +1 -0
  35. package/dist/sources/types.js +3 -0
  36. package/dist/sources/types.js.map +1 -0
  37. package/dist/sources/web-research.md +12 -0
  38. package/dist/sources/web.d.ts +78 -0
  39. package/dist/sources/web.d.ts.map +1 -0
  40. package/dist/sources/web.js +319 -0
  41. package/dist/sources/web.js.map +1 -0
  42. package/dist/tools/fetch-page.d.ts +26 -0
  43. package/dist/tools/fetch-page.d.ts.map +1 -0
  44. package/dist/tools/fetch-page.js +72 -0
  45. package/dist/tools/fetch-page.js.map +1 -0
  46. package/dist/tools/grep.d.ts +30 -0
  47. package/dist/tools/grep.d.ts.map +1 -0
  48. package/dist/tools/grep.js +79 -0
  49. package/dist/tools/grep.js.map +1 -0
  50. package/dist/tools/index.d.ts +39 -0
  51. package/dist/tools/index.d.ts.map +1 -0
  52. package/dist/tools/index.js +49 -0
  53. package/dist/tools/index.js.map +1 -0
  54. package/dist/tools/plan.d.ts +76 -0
  55. package/dist/tools/plan.d.ts.map +1 -0
  56. package/dist/tools/plan.js +98 -0
  57. package/dist/tools/plan.js.map +1 -0
  58. package/dist/tools/read-file.d.ts +62 -0
  59. package/dist/tools/read-file.d.ts.map +1 -0
  60. package/dist/tools/read-file.js +123 -0
  61. package/dist/tools/read-file.js.map +1 -0
  62. package/dist/tools/report.d.ts +22 -0
  63. package/dist/tools/report.d.ts.map +1 -0
  64. package/dist/tools/report.js +26 -0
  65. package/dist/tools/report.js.map +1 -0
  66. package/dist/tools/research.d.ts +57 -0
  67. package/dist/tools/research.d.ts.map +1 -0
  68. package/dist/tools/research.js +117 -0
  69. package/dist/tools/research.js.map +1 -0
  70. package/dist/tools/search.d.ts +34 -0
  71. package/dist/tools/search.d.ts.map +1 -0
  72. package/dist/tools/search.js +69 -0
  73. package/dist/tools/search.js.map +1 -0
  74. package/dist/tools/types.d.ts +84 -0
  75. package/dist/tools/types.d.ts.map +1 -0
  76. package/dist/tools/types.js +3 -0
  77. package/dist/tools/types.js.map +1 -0
  78. package/dist/tools/web-research.d.ts +60 -0
  79. package/dist/tools/web-research.d.ts.map +1 -0
  80. package/dist/tools/web-research.js +136 -0
  81. package/dist/tools/web-research.js.map +1 -0
  82. package/dist/tools/web-search.d.ts +42 -0
  83. package/dist/tools/web-search.d.ts.map +1 -0
  84. package/dist/tools/web-search.js +83 -0
  85. package/dist/tools/web-search.js.map +1 -0
  86. package/package.json +45 -0
@@ -0,0 +1,48 @@
1
+ import type { Operation } from "effection";
2
+ import { Source } from "@lloyal-labs/lloyal-agents";
3
+ import type { Tool } from "@lloyal-labs/lloyal-agents";
4
+ import type { Resource, Chunk } from "../resources/types";
5
+ import type { SourceContext } from "./types";
6
+ /**
7
+ * Corpus-backed research source using local file search, read, and grep
8
+ *
9
+ * Provides grounding tools (`search`, `read_file`, `grep`) over a set of
10
+ * loaded {@link Resource} / {@link Chunk} pairs. On {@link bind}, tokenizes
11
+ * chunks via the reranker and prepends a reranker-backed `search` tool to
12
+ * the tool list. The `search` tool is ordered first so the model prefers
13
+ * semantic search before falling back to `read_file` or `grep`.
14
+ *
15
+ * The research tool is a self-referential {@link ResearchTool} that spawns
16
+ * sub-agents with corpus-specific prompts and the full grounding toolkit.
17
+ *
18
+ * @category Rig
19
+ */
20
+ export declare class CorpusSource extends Source<SourceContext, Chunk> {
21
+ private _chunks;
22
+ private _tools;
23
+ private _researchTool;
24
+ private _bound;
25
+ /** @inheritDoc */
26
+ readonly name = "corpus";
27
+ /**
28
+ * @param resources - Loaded file resources for read_file and grep tools
29
+ * @param chunks - Pre-split chunks for reranker-backed search
30
+ */
31
+ constructor(resources: Resource[], chunks: Chunk[]);
32
+ /** @inheritDoc */
33
+ get researchTool(): Tool;
34
+ /** @inheritDoc */
35
+ get groundingTools(): Tool[];
36
+ /**
37
+ * Late-bind reranker and build the research toolkit
38
+ *
39
+ * Tokenizes all chunks through the reranker, prepends a {@link SearchTool}
40
+ * to the tool list, then constructs the self-referential
41
+ * {@link ResearchTool} with corpus-specific prompts. Idempotent — skips
42
+ * if already bound.
43
+ *
44
+ * @inheritDoc
45
+ */
46
+ bind(ctx: SourceContext): Operation<void>;
47
+ }
48
+ //# sourceMappingURL=corpus.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"corpus.d.ts","sourceRoot":"","sources":["../../src/sources/corpus.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAwB,MAAM,4BAA4B,CAAC;AAC1E,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,4BAA4B,CAAC;AACvD,OAAO,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAe7C;;;;;;;;;;;;;GAaG;AACH,qBAAa,YAAa,SAAQ,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC;IAC5D,OAAO,CAAC,OAAO,CAAU;IACzB,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,aAAa,CAA6B;IAClD,OAAO,CAAC,MAAM,CAAS;IAEvB,kBAAkB;IAClB,QAAQ,CAAC,IAAI,YAAY;IAEzB;;;OAGG;gBACS,SAAS,EAAE,QAAQ,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE;IAMlD,kBAAkB;IAClB,IAAI,YAAY,IAAI,IAAI,CAIvB;IAED,kBAAkB;IAClB,IAAI,cAAc,IAAI,IAAI,EAAE,CAAwB;IAEpD;;;;;;;;;OASG;IACF,IAAI,CAAC,GAAG,EAAE,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC;CAoB3C"}
@@ -0,0 +1,91 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CorpusSource = void 0;
4
+ const fs = require("node:fs");
5
+ const path = require("node:path");
6
+ const effection_1 = require("effection");
7
+ const lloyal_agents_1 = require("@lloyal-labs/lloyal-agents");
8
+ const search_1 = require("../tools/search");
9
+ const read_file_1 = require("../tools/read-file");
10
+ const grep_1 = require("../tools/grep");
11
+ const research_1 = require("../tools/research");
12
+ function readTask(name) {
13
+ const raw = fs
14
+ .readFileSync(path.resolve(__dirname, `${name}.md`), "utf8")
15
+ .trim();
16
+ const sep = raw.indexOf("\n---\n");
17
+ if (sep === -1)
18
+ return { system: raw, user: "" };
19
+ return { system: raw.slice(0, sep).trim(), user: raw.slice(sep + 5).trim() };
20
+ }
21
+ /**
22
+ * Corpus-backed research source using local file search, read, and grep
23
+ *
24
+ * Provides grounding tools (`search`, `read_file`, `grep`) over a set of
25
+ * loaded {@link Resource} / {@link Chunk} pairs. On {@link bind}, tokenizes
26
+ * chunks via the reranker and prepends a reranker-backed `search` tool to
27
+ * the tool list. The `search` tool is ordered first so the model prefers
28
+ * semantic search before falling back to `read_file` or `grep`.
29
+ *
30
+ * The research tool is a self-referential {@link ResearchTool} that spawns
31
+ * sub-agents with corpus-specific prompts and the full grounding toolkit.
32
+ *
33
+ * @category Rig
34
+ */
35
+ class CorpusSource extends lloyal_agents_1.Source {
36
+ _chunks;
37
+ _tools = [];
38
+ _researchTool = null;
39
+ _bound = false;
40
+ /** @inheritDoc */
41
+ name = "corpus";
42
+ /**
43
+ * @param resources - Loaded file resources for read_file and grep tools
44
+ * @param chunks - Pre-split chunks for reranker-backed search
45
+ */
46
+ constructor(resources, chunks) {
47
+ super();
48
+ this._chunks = chunks;
49
+ this._tools = [new read_file_1.ReadFileTool(resources), new grep_1.GrepTool(resources)];
50
+ }
51
+ /** @inheritDoc */
52
+ get researchTool() {
53
+ if (!this._researchTool)
54
+ throw new Error("CorpusSource: bind() must be called first");
55
+ return this._researchTool;
56
+ }
57
+ /** @inheritDoc */
58
+ get groundingTools() { return this._tools; }
59
+ /**
60
+ * Late-bind reranker and build the research toolkit
61
+ *
62
+ * Tokenizes all chunks through the reranker, prepends a {@link SearchTool}
63
+ * to the tool list, then constructs the self-referential
64
+ * {@link ResearchTool} with corpus-specific prompts. Idempotent — skips
65
+ * if already bound.
66
+ *
67
+ * @inheritDoc
68
+ */
69
+ *bind(ctx) {
70
+ if (this._bound)
71
+ return;
72
+ const tw = yield* lloyal_agents_1.Trace.expect();
73
+ tw.write({ traceId: tw.nextId(), parentTraceId: null, ts: performance.now(),
74
+ type: 'source:bind', sourceName: this.name });
75
+ yield* (0, effection_1.call)(() => ctx.reranker.tokenizeChunks(this._chunks));
76
+ this._tools.unshift(new search_1.SearchTool(this._chunks, ctx.reranker));
77
+ const researchPrompt = readTask("corpus-research");
78
+ const research = new research_1.ResearchTool({
79
+ systemPrompt: researchPrompt.system,
80
+ reporterPrompt: ctx.reporterPrompt,
81
+ maxTurns: ctx.maxTurns,
82
+ trace: ctx.trace,
83
+ });
84
+ const toolkit = (0, lloyal_agents_1.createToolkit)([...this._tools, ctx.reportTool, research]);
85
+ research.setToolkit(toolkit);
86
+ this._researchTool = research;
87
+ this._bound = true;
88
+ }
89
+ }
90
+ exports.CorpusSource = CorpusSource;
91
+ //# sourceMappingURL=corpus.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"corpus.js","sourceRoot":"","sources":["../../src/sources/corpus.ts"],"names":[],"mappings":";;;AAAA,8BAA8B;AAC9B,kCAAkC;AAClC,yCAAiC;AAEjC,8DAA0E;AAI1E,4CAA6C;AAC7C,kDAAkD;AAClD,wCAAyC;AACzC,gDAAiD;AAEjD,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,GAAG,GAAG,EAAE;SACX,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,IAAI,KAAK,CAAC,EAAE,MAAM,CAAC;SAC3D,IAAI,EAAE,CAAC;IACV,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IACnC,IAAI,GAAG,KAAK,CAAC,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;IACjD,OAAO,EAAE,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;AAC/E,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAa,YAAa,SAAQ,sBAA4B;IACpD,OAAO,CAAU;IACjB,MAAM,GAAW,EAAE,CAAC;IACpB,aAAa,GAAwB,IAAI,CAAC;IAC1C,MAAM,GAAG,KAAK,CAAC;IAEvB,kBAAkB;IACT,IAAI,GAAG,QAAQ,CAAC;IAEzB;;;OAGG;IACH,YAAY,SAAqB,EAAE,MAAe;QAChD,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;QACtB,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,wBAAY,CAAC,SAAS,CAAC,EAAE,IAAI,eAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;IACvE,CAAC;IAED,kBAAkB;IAClB,IAAI,YAAY;QACd,IAAI,CAAC,IAAI,CAAC,aAAa;YACrB,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC/D,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;IAED,kBAAkB;IAClB,IAAI,cAAc,KAAa,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAEpD;;;;;;;;;OASG;IACH,CAAC,IAAI,CAAC,GAAkB;QACtB,IAAI,IAAI,CAAC,MAAM;YAAE,OAAO;QACxB,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,qBAAK,CAAC,MAAM,EAAE,CAAC;QACjC,EAAE,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,EAAE,EAAE,WAAW,CAAC,GAAG,EAAE;YACzE,IAAI,EAAE,aAAa,EAAE,UAAU,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAChD,KAAK,CAAC,CAAC,IAAA,gBAAI,EAAC,GAAG,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QAC7D,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,mBAAU,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;QAEhE,MAAM,cAAc,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QACnD,MAAM,QAAQ,GAAG,IAAI,uBAAY,CAAC;YAChC,YAAY,EAAE,cAAc,CAAC,MAAM;YACnC,cAAc,EAAE,GAAG,CAAC,cAAc;YAClC,QAAQ,EAAE,GAAG,CAAC,QAAQ;YACtB,KAAK,EAAE,GAAG,CAAC,KAAK;SACjB,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,IAAA,6BAAa,EAAC,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC1E,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QAC7B,IAAI,CAAC,aAAa,GAAG,QAAQ,CAAC;QAC9B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;IACrB,CAAC;CACF;AA3DD,oCA2DC"}
@@ -0,0 +1,5 @@
1
+ You extract key information from web page content. Produce a concise summary and list any URLs/links found in the text that are worth following. Output JSON only.
2
+ ---
3
+ Page: {{title}} ({{url}})
4
+
5
+ {{content}}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Source implementations for the research pipeline
3
+ *
4
+ * @packageDocumentation
5
+ * @category Rig
6
+ */
7
+ export { WebSource } from './web';
8
+ export { CorpusSource } from './corpus';
9
+ export type { SourceContext } from './types';
10
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sources/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAClC,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC"}
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CorpusSource = exports.WebSource = void 0;
4
+ /**
5
+ * Source implementations for the research pipeline
6
+ *
7
+ * @packageDocumentation
8
+ * @category Rig
9
+ */
10
+ var web_1 = require("./web");
11
+ Object.defineProperty(exports, "WebSource", { enumerable: true, get: function () { return web_1.WebSource; } });
12
+ var corpus_1 = require("./corpus");
13
+ Object.defineProperty(exports, "CorpusSource", { enumerable: true, get: function () { return corpus_1.CorpusSource; } });
14
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/sources/index.ts"],"names":[],"mappings":";;;AAAA;;;;;GAKG;AACH,6BAAkC;AAAzB,gGAAA,SAAS,OAAA;AAClB,mCAAwC;AAA/B,sGAAA,YAAY,OAAA"}
@@ -0,0 +1,6 @@
1
+ You select the most relevant search results for a research query. Pick URLs most likely to contain substantive information and summarize the key findings. Output JSON only.
2
+ ---
3
+ Query: {{query}}
4
+
5
+ Search results:
6
+ {{results}}
@@ -0,0 +1,28 @@
1
+ import type { Tool } from '@lloyal-labs/lloyal-agents';
2
+ import type { Reranker } from '../tools/types';
3
+ /**
4
+ * Runtime context passed to {@link Source.bind} during pipeline setup
5
+ *
6
+ * Carries shared dependencies that are not available at source construction
7
+ * time — the reranker instance, reporter prompt/tool, and pipeline-level
8
+ * configuration. Each source receives the same context so research and
9
+ * grounding tools share a consistent environment.
10
+ *
11
+ * @category Rig
12
+ */
13
+ export interface SourceContext {
14
+ /** Reranker instance used by corpus sources to tokenize chunks and score results */
15
+ reranker: Reranker;
16
+ /** System/user prompt pair for the report-writing pass inside research tools */
17
+ reporterPrompt: {
18
+ system: string;
19
+ user: string;
20
+ };
21
+ /** Shared report tool instance injected into every source's research toolkit */
22
+ reportTool: Tool;
23
+ /** Maximum tool-use turns for research sub-agents before forced termination */
24
+ maxTurns: number;
25
+ /** Whether to emit structured trace events during research execution */
26
+ trace: boolean;
27
+ }
28
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/sources/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,4BAA4B,CAAC;AACvD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAE/C;;;;;;;;;GASG;AACH,MAAM,WAAW,aAAa;IAC5B,oFAAoF;IACpF,QAAQ,EAAE,QAAQ,CAAC;IACnB,gFAAgF;IAChF,cAAc,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACjD,gFAAgF;IAChF,UAAU,EAAE,IAAI,CAAC;IACjB,+EAA+E;IAC/E,QAAQ,EAAE,MAAM,CAAC;IACjB,wEAAwE;IACxE,KAAK,EAAE,OAAO,CAAC;CAChB"}
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/sources/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,12 @@
1
+ You are a research assistant investigating questions using the web. Your tools:
2
+ - **web_search**: search the web — returns results with titles, snippets, and URLs
3
+ - **fetch_page**: fetch a URL and extract its article content — use to read promising search results or follow links
4
+ - **web_research**: spawn parallel sub-agents that each run their own web_search/fetch_page cycle — call with `{"questions": ["q1", "q2", ...]}`
5
+ - **report**: submit your final findings with evidence and source URLs
6
+
7
+ Process — follow every step in order:
8
+ 1. Search the web with focused queries targeting specific aspects of the question.
9
+ 2. Read the most promising results with fetch_page. Follow links within pages when they lead to more authoritative content.
10
+ 3. Search again with refined queries based on what you learned. Target gaps in your findings.
11
+ 4. Call web_research with sub-questions if you judge there are areas that need deeper investigation.
12
+ 5. Report with source URLs and direct quotes as evidence. State what you found and what you checked.
@@ -0,0 +1,78 @@
1
+ import type { Operation } from "effection";
2
+ import { Source } from "@lloyal-labs/lloyal-agents";
3
+ import { Tool } from "@lloyal-labs/lloyal-agents";
4
+ import type { Chunk } from "../resources/types";
5
+ import type { SourceContext } from "./types";
6
+ import type { SearchProvider } from "../tools/types";
7
+ /**
8
+ * Raw page content buffered during web research for post-research reranking
9
+ *
10
+ * Populated by {@link BufferingFetchPage} as agents fetch pages. After
11
+ * the research phase ends, buffered pages are converted to {@link Chunk}
12
+ * instances via {@link chunkFetchedPages} for reranker scoring.
13
+ *
14
+ * @category Rig
15
+ */
16
+ export interface FetchedPage {
17
+ /** Resolved URL of the fetched page */
18
+ url: string;
19
+ /** Page title extracted during fetch (may be empty) */
20
+ title: string;
21
+ /** Full extracted article text */
22
+ text: string;
23
+ }
24
+ /**
25
+ * Convert buffered web pages into {@link Chunk} instances for reranking
26
+ *
27
+ * Splits each page's text on blank-line paragraph boundaries, filtering
28
+ * paragraphs shorter than 40 characters. If no paragraphs survive the
29
+ * filter, the full text is emitted as a single chunk (if long enough).
30
+ *
31
+ * @param pages - Buffered pages from web research
32
+ * @returns Flat array of paragraph-level chunks with `tokens` arrays left empty for later tokenization
33
+ *
34
+ * @category Rig
35
+ */
36
+ export declare function chunkFetchedPages(pages: FetchedPage[]): Chunk[];
37
+ /**
38
+ * Web-backed research source using search + fetch with scratchpad extraction
39
+ *
40
+ * Wires up {@link BufferingWebSearch} and {@link BufferingFetchPage} for
41
+ * grounding, and a self-referential {@link WebResearchTool} for spawning
42
+ * parallel research sub-agents. Fetched page content is buffered in memory;
43
+ * after research completes, {@link getChunks} converts the buffer into
44
+ * {@link Chunk} instances via {@link chunkFetchedPages} for reranker scoring.
45
+ *
46
+ * @category Rig
47
+ */
48
+ export declare class WebSource extends Source<SourceContext, Chunk> {
49
+ private _buffer;
50
+ private _fetchPage;
51
+ private _webSearch;
52
+ private _researchPrompt;
53
+ private _researchTool;
54
+ /** @inheritDoc */
55
+ readonly name = "web";
56
+ /**
57
+ * @param provider - Search backend (e.g. {@link TavilyProvider}) for web_search calls
58
+ */
59
+ constructor(provider: SearchProvider);
60
+ /** @inheritDoc */
61
+ get researchTool(): Tool;
62
+ /** @inheritDoc */
63
+ get groundingTools(): Tool[];
64
+ /**
65
+ * Clear the page buffer and build the self-referential research toolkit
66
+ *
67
+ * Resets the internal {@link FetchedPage} buffer on every call so
68
+ * prior-run content does not leak into a new research pass. Constructs
69
+ * the {@link WebResearchTool} on first bind only (toolkit is stateless
70
+ * once built).
71
+ *
72
+ * @inheritDoc
73
+ */
74
+ bind(ctx: SourceContext): Operation<void>;
75
+ /** @inheritDoc */
76
+ getChunks(): Chunk[];
77
+ }
78
+ //# sourceMappingURL=web.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"web.d.ts","sourceRoot":"","sources":["../../src/sources/web.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAE,MAAM,4BAA4B,CAAC;AACpD,OAAO,EAAE,IAAI,EAAyD,MAAM,4BAA4B,CAAC;AAIzG,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAC7C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAkBrD;;;;;;;;GAQG;AACH,MAAM,WAAW,WAAW;IAC1B,uCAAuC;IACvC,GAAG,EAAE,MAAM,CAAC;IACZ,uDAAuD;IACvD,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,WAAW,EAAE,GAAG,KAAK,EAAE,CAkC/D;AAoMD;;;;;;;;;;GAUG;AACH,qBAAa,SAAU,SAAQ,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC;IACzD,OAAO,CAAC,OAAO,CAAqB;IACpC,OAAO,CAAC,UAAU,CAAqB;IACvC,OAAO,CAAC,UAAU,CAAqB;IACvC,OAAO,CAAC,eAAe,CAAmC;IAC1D,OAAO,CAAC,aAAa,CAAgC;IAErD,kBAAkB;IAClB,QAAQ,CAAC,IAAI,SAAS;IAEtB;;OAEG;gBACS,QAAQ,EAAE,cAAc;IASpC,kBAAkB;IAClB,IAAI,YAAY,IAAI,IAAI,CAIvB;IAED,kBAAkB;IAClB,IAAI,cAAc,IAAI,IAAI,EAAE,CAA+C;IAE3E;;;;;;;;;OASG;IACF,IAAI,CAAC,GAAG,EAAE,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC;IA2B1C,kBAAkB;IAClB,SAAS,IAAI,KAAK,EAAE;CAGrB"}
@@ -0,0 +1,319 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.WebSource = void 0;
4
+ exports.chunkFetchedPages = chunkFetchedPages;
5
+ const fs = require("node:fs");
6
+ const path = require("node:path");
7
+ const effection_1 = require("effection");
8
+ const lloyal_agents_1 = require("@lloyal-labs/lloyal-agents");
9
+ const lloyal_agents_2 = require("@lloyal-labs/lloyal-agents");
10
+ const web_search_1 = require("../tools/web-search");
11
+ const fetch_page_1 = require("../tools/fetch-page");
12
+ const web_research_1 = require("../tools/web-research");
13
+ // ── Task loader ──────────────────────────────────────────────────
14
+ function readTask(name) {
15
+ const raw = fs
16
+ .readFileSync(path.resolve(__dirname, `${name}.md`), "utf8")
17
+ .trim();
18
+ const sep = raw.indexOf("\n---\n");
19
+ if (sep === -1)
20
+ return { system: raw, user: "" };
21
+ return { system: raw.slice(0, sep).trim(), user: raw.slice(sep + 5).trim() };
22
+ }
23
+ /**
24
+ * Convert buffered web pages into {@link Chunk} instances for reranking
25
+ *
26
+ * Splits each page's text on blank-line paragraph boundaries, filtering
27
+ * paragraphs shorter than 40 characters. If no paragraphs survive the
28
+ * filter, the full text is emitted as a single chunk (if long enough).
29
+ *
30
+ * @param pages - Buffered pages from web research
31
+ * @returns Flat array of paragraph-level chunks with `tokens` arrays left empty for later tokenization
32
+ *
33
+ * @category Rig
34
+ */
35
+ function chunkFetchedPages(pages) {
36
+ const chunks = [];
37
+ for (const page of pages) {
38
+ const paragraphs = page.text
39
+ .split(/\n\s*\n/)
40
+ .map((p) => p.trim())
41
+ .filter((p) => p.length > 40);
42
+ if (paragraphs.length === 0) {
43
+ if (page.text.trim().length > 40) {
44
+ chunks.push({
45
+ resource: page.url,
46
+ heading: page.title || page.url,
47
+ text: page.text.trim(),
48
+ tokens: [],
49
+ startLine: 1,
50
+ endLine: 1,
51
+ });
52
+ }
53
+ continue;
54
+ }
55
+ for (let i = 0; i < paragraphs.length; i++) {
56
+ chunks.push({
57
+ resource: page.url,
58
+ heading: page.title || page.url,
59
+ text: paragraphs[i],
60
+ tokens: [],
61
+ startLine: i + 1,
62
+ endLine: i + 1,
63
+ });
64
+ }
65
+ }
66
+ return chunks;
67
+ }
68
+ // ── BufferingFetchPage ───────────────────────────────────────────
69
+ /**
70
+ * Fetch-page wrapper that buffers full content and extracts a compact summary
71
+ *
72
+ * Wraps {@link FetchPageTool} to intercept successful fetches. Full page
73
+ * content is pushed into a shared {@link FetchedPage} buffer for
74
+ * post-research reranking. An attention scratchpad (forked from
75
+ * {@link ScratchpadParent}) then grammar-constrains a summary + links
76
+ * extraction, returning the compact result to the calling agent instead
77
+ * of the full page text. Falls back to the full result if extraction
78
+ * fails or no scratchpad parent is available.
79
+ *
80
+ * @category Rig
81
+ */
82
+ class BufferingFetchPage extends lloyal_agents_2.Tool {
83
+ name = "fetch_page";
84
+ description = "Fetch a web page and extract its article content. Returns a summary and any links worth following. Use to read search results or follow links discovered in pages.";
85
+ parameters = {
86
+ type: "object",
87
+ properties: { url: { type: "string", description: "URL to fetch" } },
88
+ required: ["url"],
89
+ };
90
+ _inner;
91
+ _buffer;
92
+ _extractTask;
93
+ constructor(buffer, extractTask, maxChars) {
94
+ super();
95
+ this._inner = new fetch_page_1.FetchPageTool(maxChars);
96
+ this._buffer = buffer;
97
+ this._extractTask = extractTask;
98
+ }
99
+ *execute(args) {
100
+ const result = yield* this._inner.execute(args);
101
+ const r = result;
102
+ if (typeof r?.content === "string" &&
103
+ r.content !== "[Could not extract article content]") {
104
+ const content = r.content;
105
+ // Buffer full content for reranking
106
+ this._buffer.push({
107
+ url: r.url || args.url,
108
+ title: r.title || "",
109
+ text: content,
110
+ });
111
+ // Attention scratchpad: fork from innermost active root, extract summary + links, prune
112
+ let parent;
113
+ try {
114
+ parent = yield* lloyal_agents_2.ScratchpadParent.expect();
115
+ }
116
+ catch { /* no parent — skip extraction */ }
117
+ if (!parent || parent.disposed)
118
+ return result;
119
+ const ctx = yield* lloyal_agents_2.Ctx.expect();
120
+ const schema = {
121
+ type: "object",
122
+ properties: {
123
+ summary: { type: "string" },
124
+ links: { type: "array", items: { type: "string" } },
125
+ },
126
+ required: ["summary", "links"],
127
+ };
128
+ const grammar = yield* (0, effection_1.call)(() => ctx.jsonSchemaToGrammar(JSON.stringify(schema)));
129
+ const extractPrompt = this._extractTask.user
130
+ .replace("{{url}}", args.url)
131
+ .replace("{{title}}", r.title || "")
132
+ .replace("{{content}}", content);
133
+ const messages = [
134
+ { role: "system", content: this._extractTask.system },
135
+ { role: "user", content: extractPrompt },
136
+ ];
137
+ const { prompt } = ctx.formatChatSync(JSON.stringify(messages), { enableThinking: false });
138
+ try {
139
+ const extracted = yield* (0, lloyal_agents_2.generate)({
140
+ prompt,
141
+ grammar,
142
+ params: { temperature: 0.3 },
143
+ parse: (o) => JSON.parse(o),
144
+ parent,
145
+ });
146
+ return {
147
+ url: r.url || args.url,
148
+ title: r.title || "",
149
+ summary: extracted.parsed?.summary || "",
150
+ links: extracted.parsed?.links || [],
151
+ };
152
+ }
153
+ catch {
154
+ return result; // fallback to full result on extraction failure
155
+ }
156
+ }
157
+ return result;
158
+ }
159
+ }
160
+ // ── BufferingWebSearch ────────────────────────────────────────────
161
+ /**
162
+ * Web-search wrapper that extracts a compact summary via attention scratchpad
163
+ *
164
+ * Wraps {@link WebSearchTool} and, when a {@link ScratchpadParent} is
165
+ * available, forks a grammar-constrained generation to distill raw search
166
+ * results into a list of promising URLs plus a brief summary. The compact
167
+ * output reduces KV pressure on the calling agent. Falls back to raw
168
+ * results if extraction fails or no scratchpad parent is available.
169
+ *
170
+ * @category Rig
171
+ */
172
+ class BufferingWebSearch extends lloyal_agents_2.Tool {
173
+ name = "web_search";
174
+ description = "Search the web. Returns the most relevant URLs and a summary. Use fetch_page to read full content of promising results.";
175
+ parameters = {
176
+ type: "object",
177
+ properties: { query: { type: "string", description: "Search query" } },
178
+ required: ["query"],
179
+ };
180
+ _inner;
181
+ _extractTask;
182
+ constructor(provider, extractTask) {
183
+ super();
184
+ this._inner = new web_search_1.WebSearchTool(provider);
185
+ this._extractTask = extractTask;
186
+ }
187
+ *execute(args) {
188
+ const results = yield* this._inner.execute(args);
189
+ // If error or not an array, return as-is (no scratchpad needed)
190
+ if (!Array.isArray(results) || results.length === 0)
191
+ return results;
192
+ // Scratchpad: fork from innermost active root, extract URLs + summary
193
+ let parent;
194
+ try {
195
+ parent = yield* lloyal_agents_2.ScratchpadParent.expect();
196
+ }
197
+ catch { /* no parent — return raw */ }
198
+ if (!parent || parent.disposed)
199
+ return results;
200
+ const ctx = yield* lloyal_agents_2.Ctx.expect();
201
+ const schema = {
202
+ type: "object",
203
+ properties: {
204
+ urls: { type: "array", items: { type: "string" }, description: "URLs worth fetching" },
205
+ summary: { type: "string", description: "Brief summary of what the search found" },
206
+ },
207
+ required: ["urls", "summary"],
208
+ };
209
+ const grammar = yield* (0, effection_1.call)(() => ctx.jsonSchemaToGrammar(JSON.stringify(schema)));
210
+ const resultsText = results
211
+ .map((r, i) => `${i + 1}. ${r.title}\n ${r.url}\n ${r.snippet}`)
212
+ .join("\n\n");
213
+ const extractPrompt = this._extractTask.user
214
+ .replace("{{query}}", args.query)
215
+ .replace("{{results}}", resultsText);
216
+ const messages = [
217
+ { role: "system", content: this._extractTask.system },
218
+ { role: "user", content: extractPrompt },
219
+ ];
220
+ const { prompt } = ctx.formatChatSync(JSON.stringify(messages), { enableThinking: false });
221
+ try {
222
+ const extracted = yield* (0, lloyal_agents_2.generate)({
223
+ prompt,
224
+ grammar,
225
+ params: { temperature: 0.3 },
226
+ parse: (o) => JSON.parse(o),
227
+ parent,
228
+ });
229
+ return {
230
+ urls: extracted.parsed?.urls || [],
231
+ summary: extracted.parsed?.summary || "",
232
+ resultCount: results.length,
233
+ };
234
+ }
235
+ catch {
236
+ return results; // fallback to raw results on extraction failure
237
+ }
238
+ }
239
+ }
240
+ // ── WebSource ────────────────────────────────────────────────────
241
+ /**
242
+ * Web-backed research source using search + fetch with scratchpad extraction
243
+ *
244
+ * Wires up {@link BufferingWebSearch} and {@link BufferingFetchPage} for
245
+ * grounding, and a self-referential {@link WebResearchTool} for spawning
246
+ * parallel research sub-agents. Fetched page content is buffered in memory;
247
+ * after research completes, {@link getChunks} converts the buffer into
248
+ * {@link Chunk} instances via {@link chunkFetchedPages} for reranker scoring.
249
+ *
250
+ * @category Rig
251
+ */
252
+ class WebSource extends lloyal_agents_1.Source {
253
+ _buffer = [];
254
+ _fetchPage;
255
+ _webSearch;
256
+ _researchPrompt;
257
+ _researchTool = null;
258
+ /** @inheritDoc */
259
+ name = "web";
260
+ /**
261
+ * @param provider - Search backend (e.g. {@link TavilyProvider}) for web_search calls
262
+ */
263
+ constructor(provider) {
264
+ super();
265
+ const extractTask = readTask("extract");
266
+ const searchExtractTask = readTask("search-extract");
267
+ this._researchPrompt = readTask("web-research");
268
+ this._fetchPage = new BufferingFetchPage(this._buffer, extractTask);
269
+ this._webSearch = new BufferingWebSearch(provider, searchExtractTask);
270
+ }
271
+ /** @inheritDoc */
272
+ get researchTool() {
273
+ if (!this._researchTool)
274
+ throw new Error("WebSource: bind() must be called first");
275
+ return this._researchTool;
276
+ }
277
+ /** @inheritDoc */
278
+ get groundingTools() { return [this._webSearch, this._fetchPage]; }
279
+ /**
280
+ * Clear the page buffer and build the self-referential research toolkit
281
+ *
282
+ * Resets the internal {@link FetchedPage} buffer on every call so
283
+ * prior-run content does not leak into a new research pass. Constructs
284
+ * the {@link WebResearchTool} on first bind only (toolkit is stateless
285
+ * once built).
286
+ *
287
+ * @inheritDoc
288
+ */
289
+ *bind(ctx) {
290
+ this._buffer.length = 0;
291
+ const tw = yield* lloyal_agents_2.Trace.expect();
292
+ tw.write({ traceId: tw.nextId(), parentTraceId: null, ts: performance.now(),
293
+ type: 'source:bind', sourceName: this.name });
294
+ if (!this._researchTool) {
295
+ const webResearch = new web_research_1.WebResearchTool({
296
+ name: "web_research",
297
+ description: "Spawn parallel web research agents that search the web, fetch pages, and report findings.",
298
+ systemPrompt: this._researchPrompt.system,
299
+ reporterPrompt: ctx.reporterPrompt,
300
+ maxTurns: ctx.maxTurns,
301
+ trace: ctx.trace,
302
+ });
303
+ const toolkit = (0, lloyal_agents_2.createToolkit)([
304
+ this._webSearch,
305
+ this._fetchPage,
306
+ ctx.reportTool,
307
+ webResearch,
308
+ ]);
309
+ webResearch.setToolkit(toolkit);
310
+ this._researchTool = webResearch;
311
+ }
312
+ }
313
+ /** @inheritDoc */
314
+ getChunks() {
315
+ return chunkFetchedPages(this._buffer);
316
+ }
317
+ }
318
+ exports.WebSource = WebSource;
319
+ //# sourceMappingURL=web.js.map