@lloyal-labs/rig 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +10 -9
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +12 -17
- package/dist/index.js.map +1 -1
- package/dist/node.d.ts +15 -0
- package/dist/node.d.ts.map +1 -0
- package/dist/node.js +41 -0
- package/dist/node.js.map +1 -0
- package/dist/sources/chunking.d.ts +61 -0
- package/dist/sources/chunking.d.ts.map +1 -0
- package/dist/sources/chunking.js +127 -0
- package/dist/sources/chunking.js.map +1 -0
- package/dist/sources/web.d.ts +8 -43
- package/dist/sources/web.d.ts.map +1 -1
- package/dist/sources/web.js +37 -222
- package/dist/sources/web.js.map +1 -1
- package/dist/tools/fetch-page.d.ts +22 -7
- package/dist/tools/fetch-page.d.ts.map +1 -1
- package/dist/tools/fetch-page.js +128 -13
- package/dist/tools/fetch-page.js.map +1 -1
- package/dist/tools/index.d.ts +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/types.d.ts +6 -0
- package/dist/tools/types.d.ts.map +1 -1
- package/dist/tools/web-search.d.ts +5 -1
- package/dist/tools/web-search.d.ts.map +1 -1
- package/dist/tools/web-search.js +8 -2
- package/dist/tools/web-search.js.map +1 -1
- package/package.json +11 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Rig — research infrastructure for the lloyal agent pipeline
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
4
|
+
* The default export is platform-agnostic. linkedom + @mozilla/readability
|
|
5
|
+
* are pure JS and work in both Node.js and React Native (Hermes).
|
|
6
|
+
*
|
|
7
|
+
* Node-specific exports (createReranker, WebSource, CorpusSource,
|
|
8
|
+
* loadResources, chunkResources) require node:fs and are available
|
|
9
|
+
* via `@lloyal-labs/rig/node`.
|
|
8
10
|
*
|
|
9
11
|
* @packageDocumentation
|
|
10
12
|
* @category Rig
|
|
11
13
|
*/
|
|
12
14
|
export { createTools, reportTool, ResearchTool, WebSearchTool, TavilyProvider, FetchPageTool, WebResearchTool, PlanTool, } from './tools';
|
|
13
15
|
export type { ResearchToolOpts, WebResearchToolOpts, PlanToolOpts, PlanResult, PlanQuestion, SearchProvider, SearchResult, Reranker, ScoredChunk, ScoredResult, } from './tools';
|
|
14
|
-
export {
|
|
15
|
-
export type {
|
|
16
|
-
export {
|
|
17
|
-
export type { Resource, Chunk } from './resources';
|
|
18
|
-
export { createReranker } from './reranker';
|
|
16
|
+
export { chunkFetchedPages, chunkHtml } from './sources/chunking';
|
|
17
|
+
export type { FetchedPage } from './sources/chunking';
|
|
18
|
+
export type { SourceContext } from './sources/types';
|
|
19
|
+
export type { Resource, Chunk } from './resources/types';
|
|
19
20
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,EACL,WAAW,EAAE,UAAU,EACvB,YAAY,EAAE,aAAa,EAAE,cAAc,EAAE,aAAa,EAC1D,eAAe,EAAE,QAAQ,GAC1B,MAAM,SAAS,CAAC;AACjB,YAAY,EACV,gBAAgB,EAAE,mBAAmB,EAAE,YAAY,EACnD,UAAU,EAAE,YAAY,EACxB,cAAc,EAAE,YAAY,EAC5B,QAAQ,EAAE,WAAW,EAAE,YAAY,GACpC,MAAM,SAAS,CAAC;AAGjB,OAAO,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAClE,YAAY,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGtD,YAAY,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAGrD,YAAY,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -2,17 +2,19 @@
|
|
|
2
2
|
/**
|
|
3
3
|
* Rig — research infrastructure for the lloyal agent pipeline
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
5
|
+
* The default export is platform-agnostic. linkedom + @mozilla/readability
|
|
6
|
+
* are pure JS and work in both Node.js and React Native (Hermes).
|
|
7
|
+
*
|
|
8
|
+
* Node-specific exports (createReranker, WebSource, CorpusSource,
|
|
9
|
+
* loadResources, chunkResources) require node:fs and are available
|
|
10
|
+
* via `@lloyal-labs/rig/node`.
|
|
9
11
|
*
|
|
10
12
|
* @packageDocumentation
|
|
11
13
|
* @category Rig
|
|
12
14
|
*/
|
|
13
15
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
-
exports.
|
|
15
|
-
// Tools
|
|
16
|
+
exports.chunkHtml = exports.chunkFetchedPages = exports.PlanTool = exports.WebResearchTool = exports.FetchPageTool = exports.TavilyProvider = exports.WebSearchTool = exports.ResearchTool = exports.reportTool = exports.createTools = void 0;
|
|
17
|
+
// Tools (pure TS + Effection + linkedom — platform-agnostic)
|
|
16
18
|
var tools_1 = require("./tools");
|
|
17
19
|
Object.defineProperty(exports, "createTools", { enumerable: true, get: function () { return tools_1.createTools; } });
|
|
18
20
|
Object.defineProperty(exports, "reportTool", { enumerable: true, get: function () { return tools_1.reportTool; } });
|
|
@@ -22,15 +24,8 @@ Object.defineProperty(exports, "TavilyProvider", { enumerable: true, get: functi
|
|
|
22
24
|
Object.defineProperty(exports, "FetchPageTool", { enumerable: true, get: function () { return tools_1.FetchPageTool; } });
|
|
23
25
|
Object.defineProperty(exports, "WebResearchTool", { enumerable: true, get: function () { return tools_1.WebResearchTool; } });
|
|
24
26
|
Object.defineProperty(exports, "PlanTool", { enumerable: true, get: function () { return tools_1.PlanTool; } });
|
|
25
|
-
//
|
|
26
|
-
var
|
|
27
|
-
Object.defineProperty(exports, "
|
|
28
|
-
Object.defineProperty(exports, "
|
|
29
|
-
// Resources
|
|
30
|
-
var resources_1 = require("./resources");
|
|
31
|
-
Object.defineProperty(exports, "loadResources", { enumerable: true, get: function () { return resources_1.loadResources; } });
|
|
32
|
-
Object.defineProperty(exports, "chunkResources", { enumerable: true, get: function () { return resources_1.chunkResources; } });
|
|
33
|
-
// Reranker
|
|
34
|
-
var reranker_1 = require("./reranker");
|
|
35
|
-
Object.defineProperty(exports, "createReranker", { enumerable: true, get: function () { return reranker_1.createReranker; } });
|
|
27
|
+
// Chunking (platform-agnostic — linkedom is pure JS)
|
|
28
|
+
var chunking_1 = require("./sources/chunking");
|
|
29
|
+
Object.defineProperty(exports, "chunkFetchedPages", { enumerable: true, get: function () { return chunking_1.chunkFetchedPages; } });
|
|
30
|
+
Object.defineProperty(exports, "chunkHtml", { enumerable: true, get: function () { return chunking_1.chunkHtml; } });
|
|
36
31
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;GAYG;;;AAEH,6DAA6D;AAC7D,iCAIiB;AAHf,oGAAA,WAAW,OAAA;AAAE,mGAAA,UAAU,OAAA;AACvB,qGAAA,YAAY,OAAA;AAAE,sGAAA,aAAa,OAAA;AAAE,uGAAA,cAAc,OAAA;AAAE,sGAAA,aAAa,OAAA;AAC1D,wGAAA,eAAe,OAAA;AAAE,iGAAA,QAAQ,OAAA;AAS3B,qDAAqD;AACrD,+CAAkE;AAAzD,6GAAA,iBAAiB,OAAA;AAAE,qGAAA,SAAS,OAAA"}
|
package/dist/node.d.ts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Node.js-specific exports for @lloyal-labs/rig
|
|
3
|
+
*
|
|
4
|
+
* These require node:fs and/or @lloyal-labs/lloyal.node.
|
|
5
|
+
* Import from `@lloyal-labs/rig/node` only in Node.js environments.
|
|
6
|
+
*
|
|
7
|
+
* @packageDocumentation
|
|
8
|
+
* @category Rig
|
|
9
|
+
*/
|
|
10
|
+
export * from './index';
|
|
11
|
+
export { createReranker } from './reranker';
|
|
12
|
+
export { WebSource } from './sources/web';
|
|
13
|
+
export { CorpusSource } from './sources/corpus';
|
|
14
|
+
export { loadResources, chunkResources } from './resources';
|
|
15
|
+
//# sourceMappingURL=node.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"node.d.ts","sourceRoot":"","sources":["../src/node.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAGH,cAAc,SAAS,CAAC;AAGxB,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAG5C,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAC1C,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAGhD,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC"}
|
package/dist/node.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Node.js-specific exports for @lloyal-labs/rig
|
|
4
|
+
*
|
|
5
|
+
* These require node:fs and/or @lloyal-labs/lloyal.node.
|
|
6
|
+
* Import from `@lloyal-labs/rig/node` only in Node.js environments.
|
|
7
|
+
*
|
|
8
|
+
* @packageDocumentation
|
|
9
|
+
* @category Rig
|
|
10
|
+
*/
|
|
11
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
12
|
+
if (k2 === undefined) k2 = k;
|
|
13
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
14
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
15
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
16
|
+
}
|
|
17
|
+
Object.defineProperty(o, k2, desc);
|
|
18
|
+
}) : (function(o, m, k, k2) {
|
|
19
|
+
if (k2 === undefined) k2 = k;
|
|
20
|
+
o[k2] = m[k];
|
|
21
|
+
}));
|
|
22
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
23
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.chunkResources = exports.loadResources = exports.CorpusSource = exports.WebSource = exports.createReranker = void 0;
|
|
27
|
+
// Re-export everything from the platform-agnostic barrel
|
|
28
|
+
__exportStar(require("./index"), exports);
|
|
29
|
+
// Node-only: Reranker factory (requires @lloyal-labs/lloyal.node)
|
|
30
|
+
var reranker_1 = require("./reranker");
|
|
31
|
+
Object.defineProperty(exports, "createReranker", { enumerable: true, get: function () { return reranker_1.createReranker; } });
|
|
32
|
+
// Node-only: Sources (require node:fs)
|
|
33
|
+
var web_1 = require("./sources/web");
|
|
34
|
+
Object.defineProperty(exports, "WebSource", { enumerable: true, get: function () { return web_1.WebSource; } });
|
|
35
|
+
var corpus_1 = require("./sources/corpus");
|
|
36
|
+
Object.defineProperty(exports, "CorpusSource", { enumerable: true, get: function () { return corpus_1.CorpusSource; } });
|
|
37
|
+
// Node-only: Resource loading (requires node:fs)
|
|
38
|
+
var resources_1 = require("./resources");
|
|
39
|
+
Object.defineProperty(exports, "loadResources", { enumerable: true, get: function () { return resources_1.loadResources; } });
|
|
40
|
+
Object.defineProperty(exports, "chunkResources", { enumerable: true, get: function () { return resources_1.chunkResources; } });
|
|
41
|
+
//# sourceMappingURL=node.js.map
|
package/dist/node.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"node.js","sourceRoot":"","sources":["../src/node.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;;;;;;;;;;;;;;;;AAEH,yDAAyD;AACzD,0CAAwB;AAExB,kEAAkE;AAClE,uCAA4C;AAAnC,0GAAA,cAAc,OAAA;AAEvB,uCAAuC;AACvC,qCAA0C;AAAjC,gGAAA,SAAS,OAAA;AAClB,2CAAgD;AAAvC,sGAAA,YAAY,OAAA;AAErB,iDAAiD;AACjD,yCAA4D;AAAnD,0GAAA,aAAa,OAAA;AAAE,2GAAA,cAAc,OAAA"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunking utilities for web page content
|
|
3
|
+
*
|
|
4
|
+
* Two strategies:
|
|
5
|
+
* - {@link chunkHtml} — structural splitting on HTML headings/paragraphs
|
|
6
|
+
* via linkedom. Used by FetchPageTool for per-tool reranking.
|
|
7
|
+
* - {@link chunkFetchedPages} — plain-text `\n\n` splitting for buffered
|
|
8
|
+
* content. Used for post-research passage reranking.
|
|
9
|
+
*
|
|
10
|
+
* @packageDocumentation
|
|
11
|
+
* @category Rig
|
|
12
|
+
*/
|
|
13
|
+
import type { Chunk } from '../resources/types';
|
|
14
|
+
/**
|
|
15
|
+
* Raw page content buffered during web research for post-research reranking
|
|
16
|
+
*
|
|
17
|
+
* Populated by {@link BufferingFetchPage} as agents fetch pages. After
|
|
18
|
+
* the research phase ends, buffered pages are converted to {@link Chunk}
|
|
19
|
+
* instances via {@link chunkFetchedPages} for reranker scoring.
|
|
20
|
+
*
|
|
21
|
+
* @category Rig
|
|
22
|
+
*/
|
|
23
|
+
export interface FetchedPage {
|
|
24
|
+
/** Resolved URL of the fetched page */
|
|
25
|
+
url: string;
|
|
26
|
+
/** Page title extracted during fetch (may be empty) */
|
|
27
|
+
title: string;
|
|
28
|
+
/** Full extracted article text */
|
|
29
|
+
text: string;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Convert buffered web pages into {@link Chunk} instances for reranking
|
|
33
|
+
*
|
|
34
|
+
* Splits each page's text on blank-line paragraph boundaries, filtering
|
|
35
|
+
* paragraphs shorter than 40 characters. If no paragraphs survive the
|
|
36
|
+
* filter, the full text is emitted as a single chunk (if long enough).
|
|
37
|
+
*
|
|
38
|
+
* @param pages - Buffered pages from web research
|
|
39
|
+
* @returns Flat array of paragraph-level chunks with `tokens` arrays left empty for later tokenization
|
|
40
|
+
*
|
|
41
|
+
* @category Rig
|
|
42
|
+
*/
|
|
43
|
+
export declare function chunkFetchedPages(pages: FetchedPage[]): Chunk[];
|
|
44
|
+
/**
|
|
45
|
+
* Split article HTML into heading-delimited section chunks via linkedom
|
|
46
|
+
*
|
|
47
|
+
* Same structural strategy as `parseMarkdown` (md4c) uses for corpus files:
|
|
48
|
+
* headings are section boundaries, content between headings is accumulated
|
|
49
|
+
* into a single chunk with the heading as metadata.
|
|
50
|
+
*
|
|
51
|
+
* Falls back to `<p>`-level chunks for pages without headings.
|
|
52
|
+
*
|
|
53
|
+
* @param html - Article HTML from Readability's `article.content`
|
|
54
|
+
* @param url - Page URL (used as chunk `resource`)
|
|
55
|
+
* @param title - Page title (used as default heading)
|
|
56
|
+
* @returns Array of section-level chunks
|
|
57
|
+
*
|
|
58
|
+
* @category Rig
|
|
59
|
+
*/
|
|
60
|
+
export declare function chunkHtml(html: string, url: string, title: string): Promise<Chunk[]>;
|
|
61
|
+
//# sourceMappingURL=chunking.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunking.d.ts","sourceRoot":"","sources":["../../src/sources/chunking.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAEhD;;;;;;;;GAQG;AACH,MAAM,WAAW,WAAW;IAC1B,uCAAuC;IACvC,GAAG,EAAE,MAAM,CAAC;IACZ,uDAAuD;IACvD,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,WAAW,EAAE,GAAG,KAAK,EAAE,CAkC/D;AAKD;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC,CAmD1F"}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Chunking utilities for web page content
|
|
4
|
+
*
|
|
5
|
+
* Two strategies:
|
|
6
|
+
* - {@link chunkHtml} — structural splitting on HTML headings/paragraphs
|
|
7
|
+
* via linkedom. Used by FetchPageTool for per-tool reranking.
|
|
8
|
+
* - {@link chunkFetchedPages} — plain-text `\n\n` splitting for buffered
|
|
9
|
+
* content. Used for post-research passage reranking.
|
|
10
|
+
*
|
|
11
|
+
* @packageDocumentation
|
|
12
|
+
* @category Rig
|
|
13
|
+
*/
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.chunkFetchedPages = chunkFetchedPages;
|
|
16
|
+
exports.chunkHtml = chunkHtml;
|
|
17
|
+
/**
|
|
18
|
+
* Convert buffered web pages into {@link Chunk} instances for reranking
|
|
19
|
+
*
|
|
20
|
+
* Splits each page's text on blank-line paragraph boundaries, filtering
|
|
21
|
+
* paragraphs shorter than 40 characters. If no paragraphs survive the
|
|
22
|
+
* filter, the full text is emitted as a single chunk (if long enough).
|
|
23
|
+
*
|
|
24
|
+
* @param pages - Buffered pages from web research
|
|
25
|
+
* @returns Flat array of paragraph-level chunks with `tokens` arrays left empty for later tokenization
|
|
26
|
+
*
|
|
27
|
+
* @category Rig
|
|
28
|
+
*/
|
|
29
|
+
function chunkFetchedPages(pages) {
|
|
30
|
+
const chunks = [];
|
|
31
|
+
for (const page of pages) {
|
|
32
|
+
const paragraphs = page.text
|
|
33
|
+
.split(/\n\s*\n/)
|
|
34
|
+
.map((p) => p.trim())
|
|
35
|
+
.filter((p) => p.length > 40);
|
|
36
|
+
if (paragraphs.length === 0) {
|
|
37
|
+
if (page.text.trim().length > 40) {
|
|
38
|
+
chunks.push({
|
|
39
|
+
resource: page.url,
|
|
40
|
+
heading: page.title || page.url,
|
|
41
|
+
text: page.text.trim(),
|
|
42
|
+
tokens: [],
|
|
43
|
+
startLine: 1,
|
|
44
|
+
endLine: 1,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
for (let i = 0; i < paragraphs.length; i++) {
|
|
50
|
+
chunks.push({
|
|
51
|
+
resource: page.url,
|
|
52
|
+
heading: page.title || page.url,
|
|
53
|
+
text: paragraphs[i],
|
|
54
|
+
tokens: [],
|
|
55
|
+
startLine: i + 1,
|
|
56
|
+
endLine: i + 1,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return chunks;
|
|
61
|
+
}
|
|
62
|
+
const HEADING_TAGS = new Set(['H1', 'H2', 'H3', 'H4', 'H5', 'H6']);
|
|
63
|
+
const TEXT_TAGS = new Set(['P', 'LI', 'BLOCKQUOTE', 'PRE', 'TD', 'TH', 'FIGCAPTION', 'DT', 'DD']);
|
|
64
|
+
/**
|
|
65
|
+
* Split article HTML into heading-delimited section chunks via linkedom
|
|
66
|
+
*
|
|
67
|
+
* Same structural strategy as `parseMarkdown` (md4c) uses for corpus files:
|
|
68
|
+
* headings are section boundaries, content between headings is accumulated
|
|
69
|
+
* into a single chunk with the heading as metadata.
|
|
70
|
+
*
|
|
71
|
+
* Falls back to `<p>`-level chunks for pages without headings.
|
|
72
|
+
*
|
|
73
|
+
* @param html - Article HTML from Readability's `article.content`
|
|
74
|
+
* @param url - Page URL (used as chunk `resource`)
|
|
75
|
+
* @param title - Page title (used as default heading)
|
|
76
|
+
* @returns Array of section-level chunks
|
|
77
|
+
*
|
|
78
|
+
* @category Rig
|
|
79
|
+
*/
|
|
80
|
+
async function chunkHtml(html, url, title) {
|
|
81
|
+
const { parseHTML } = await Promise.resolve().then(() => require('linkedom'));
|
|
82
|
+
const { document } = parseHTML(html);
|
|
83
|
+
const chunks = [];
|
|
84
|
+
let currentHeading = title;
|
|
85
|
+
let currentText = '';
|
|
86
|
+
let chunkIndex = 0;
|
|
87
|
+
function flushSection() {
|
|
88
|
+
const text = currentText.trim();
|
|
89
|
+
if (text.length > 40) {
|
|
90
|
+
chunks.push({
|
|
91
|
+
resource: url,
|
|
92
|
+
heading: currentHeading || title || url,
|
|
93
|
+
text,
|
|
94
|
+
tokens: [],
|
|
95
|
+
startLine: chunkIndex + 1,
|
|
96
|
+
endLine: chunkIndex + 1,
|
|
97
|
+
});
|
|
98
|
+
chunkIndex++;
|
|
99
|
+
}
|
|
100
|
+
currentText = '';
|
|
101
|
+
}
|
|
102
|
+
// Walk all elements in the article DOM
|
|
103
|
+
const elements = document.querySelectorAll('*');
|
|
104
|
+
for (const el of elements) {
|
|
105
|
+
const tag = el.tagName;
|
|
106
|
+
if (HEADING_TAGS.has(tag)) {
|
|
107
|
+
// Close current section, start new one with this heading
|
|
108
|
+
flushSection();
|
|
109
|
+
currentHeading = el.textContent?.trim() || title;
|
|
110
|
+
}
|
|
111
|
+
else if (TEXT_TAGS.has(tag)) {
|
|
112
|
+
// Accumulate text content — skip if this element is nested inside
|
|
113
|
+
// another TEXT_TAG (avoid double-counting nested <li> etc.)
|
|
114
|
+
const parentTag = el.parentElement?.tagName;
|
|
115
|
+
if (parentTag && TEXT_TAGS.has(parentTag))
|
|
116
|
+
continue;
|
|
117
|
+
const text = el.textContent?.trim();
|
|
118
|
+
if (text) {
|
|
119
|
+
currentText += (currentText ? '\n\n' : '') + text;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
// Flush final section
|
|
124
|
+
flushSection();
|
|
125
|
+
return chunks;
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=chunking.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunking.js","sourceRoot":"","sources":["../../src/sources/chunking.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;GAWG;;AAkCH,8CAkCC;AAqBD,8BAmDC;AAtHD;;;;;;;;;;;GAWG;AACH,SAAgB,iBAAiB,CAAC,KAAoB;IACpD,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI;aACzB,KAAK,CAAC,SAAS,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;QAEhC,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;gBACjC,MAAM,CAAC,IAAI,CAAC;oBACV,QAAQ,EAAE,IAAI,CAAC,GAAG;oBAClB,OAAO,EAAE,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG;oBAC/B,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;oBACtB,MAAM,EAAE,EAAE;oBACV,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,CAAC;iBACX,CAAC,CAAC;YACL,CAAC;YACD,SAAS;QACX,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,CAAC,IAAI,CAAC;gBACV,QAAQ,EAAE,IAAI,CAAC,GAAG;gBAClB,OAAO,EAAE,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG;gBAC/B,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC;gBACnB,MAAM,EAAE,EAAE;gBACV,SAAS,EAAE,CAAC,GAAG,CAAC;gBAChB,OAAO,EAAE,CAAC,GAAG,CAAC;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;AACnE,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;AAElG;;;;;;;;;;;;;;;GAeG;AACI,KAAK,UAAU,SAAS,CAAC,IAAY,EAAE,GAAW,EAAE,KAAa;IACtE,MAAM,EAAE,SAAS,EAAE,GAAG,2CAAa,UAAU,EAAC,CAAC;IAC/C,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAErC,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,cAAc,GAAG,KAAK,CAAC;IAC3B,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,SAAS,YAAY;QACnB,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC;QAChC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC;gBACV,QAAQ,EAAE,GAAG;gBACb,OAAO,EAAE,cAAc,IAAI,KAAK,IAAI,GAAG;gBACvC,IAAI;gBACJ,MAAM,EAAE,EAAE;gBACV,SAAS,EAAE,UAAU,GAAG,CAAC;gBACzB,OAAO,EAAE,UAAU,GAAG,CAAC;aACxB,CAAC,CAAC;YACH,UAAU,EAAE,CAAC;QACf,CAAC;QACD,WAAW,GAAG,EAAE,CAAC;IACnB,CAAC;IAED,uCAAuC;IACvC,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;IAChD,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC;QAEvB,IAAI,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,yDAAyD;YACzD,YAAY,EAAE,CAAC;YACf,cAAc,GAAG,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,KAAK,CAAC;QACnD,CAAC;aAAM,IAAI,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9B,kEAAkE;YAClE,4DAA4D;YAC5D,MAAM,SAAS,GAAG,EAAE,CAAC,aAAa,EAAE,OAAO,CAAC;YAC5C,IAAI,SAAS,IAAI,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC;gBAAE,SAAS;YAEpD,MAAM,IAAI,GAAG,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YACpC,IAAI,IAAI,EAAE,CAAC;gBACT,WAAW,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC;YACpD,CAAC;QACH,CAAC;IACH,CAAC;IAED,sBAAsB;IACtB,YAAY,EAAE,CAAC;IAEf,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/dist/sources/web.d.ts
CHANGED
|
@@ -1,47 +1,17 @@
|
|
|
1
1
|
import type { Operation } from "effection";
|
|
2
2
|
import { Source } from "@lloyal-labs/lloyal-agents";
|
|
3
|
-
import { Tool } from "@lloyal-labs/lloyal-agents";
|
|
3
|
+
import type { Tool } from "@lloyal-labs/lloyal-agents";
|
|
4
4
|
import type { Chunk } from "../resources/types";
|
|
5
5
|
import type { SourceContext } from "./types";
|
|
6
6
|
import type { SearchProvider } from "../tools/types";
|
|
7
|
+
export { chunkFetchedPages, type FetchedPage } from "./chunking";
|
|
7
8
|
/**
|
|
8
|
-
*
|
|
9
|
+
* Web-backed research source
|
|
9
10
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
* @category Rig
|
|
15
|
-
*/
|
|
16
|
-
export interface FetchedPage {
|
|
17
|
-
/** Resolved URL of the fetched page */
|
|
18
|
-
url: string;
|
|
19
|
-
/** Page title extracted during fetch (may be empty) */
|
|
20
|
-
title: string;
|
|
21
|
-
/** Full extracted article text */
|
|
22
|
-
text: string;
|
|
23
|
-
}
|
|
24
|
-
/**
|
|
25
|
-
* Convert buffered web pages into {@link Chunk} instances for reranking
|
|
26
|
-
*
|
|
27
|
-
* Splits each page's text on blank-line paragraph boundaries, filtering
|
|
28
|
-
* paragraphs shorter than 40 characters. If no paragraphs survive the
|
|
29
|
-
* filter, the full text is emitted as a single chunk (if long enough).
|
|
30
|
-
*
|
|
31
|
-
* @param pages - Buffered pages from web research
|
|
32
|
-
* @returns Flat array of paragraph-level chunks with `tokens` arrays left empty for later tokenization
|
|
33
|
-
*
|
|
34
|
-
* @category Rig
|
|
35
|
-
*/
|
|
36
|
-
export declare function chunkFetchedPages(pages: FetchedPage[]): Chunk[];
|
|
37
|
-
/**
|
|
38
|
-
* Web-backed research source using search + fetch with scratchpad extraction
|
|
39
|
-
*
|
|
40
|
-
* Wires up {@link BufferingWebSearch} and {@link BufferingFetchPage} for
|
|
41
|
-
* grounding, and a self-referential {@link WebResearchTool} for spawning
|
|
42
|
-
* parallel research sub-agents. Fetched page content is buffered in memory;
|
|
43
|
-
* after research completes, {@link getChunks} converts the buffer into
|
|
44
|
-
* {@link Chunk} instances via {@link chunkFetchedPages} for reranker scoring.
|
|
11
|
+
* Agents search the web via {@link WebSearchTool} (returns titles, snippets,
|
|
12
|
+
* URLs), then fetch promising pages via {@link FetchPageTool} (returns
|
|
13
|
+
* reranked relevant chunks). Fetched content is buffered for post-research
|
|
14
|
+
* passage reranking via {@link getChunks}.
|
|
45
15
|
*
|
|
46
16
|
* @category Rig
|
|
47
17
|
*/
|
|
@@ -62,12 +32,7 @@ export declare class WebSource extends Source<SourceContext, Chunk> {
|
|
|
62
32
|
/** @inheritDoc */
|
|
63
33
|
get groundingTools(): Tool[];
|
|
64
34
|
/**
|
|
65
|
-
*
|
|
66
|
-
*
|
|
67
|
-
* Resets the internal {@link FetchedPage} buffer on every call so
|
|
68
|
-
* prior-run content does not leak into a new research pass. Constructs
|
|
69
|
-
* the {@link WebResearchTool} on first bind only (toolkit is stateless
|
|
70
|
-
* once built).
|
|
35
|
+
* Wire reranker to FetchPageTool and build the research toolkit
|
|
71
36
|
*
|
|
72
37
|
* @inheritDoc
|
|
73
38
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"web.d.ts","sourceRoot":"","sources":["../../src/sources/web.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"web.d.ts","sourceRoot":"","sources":["../../src/sources/web.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAwB,MAAM,4BAA4B,CAAC;AAC1E,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,4BAA4B,CAAC;AACvD,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAC7C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAQrD,OAAO,EAAE,iBAAiB,EAAE,KAAK,WAAW,EAAE,MAAM,YAAY,CAAC;AAmDjE;;;;;;;;;GASG;AACH,qBAAa,SAAU,SAAQ,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC;IACzD,OAAO,CAAC,OAAO,CAAqB;IACpC,OAAO,CAAC,UAAU,CAAqB;IACvC,OAAO,CAAC,UAAU,CAAgB;IAClC,OAAO,CAAC,eAAe,CAAmC;IAC1D,OAAO,CAAC,aAAa,CAAgC;IAErD,kBAAkB;IAClB,QAAQ,CAAC,IAAI,SAAS;IAEtB;;OAEG;gBACS,QAAQ,EAAE,cAAc;IAOpC,kBAAkB;IAClB,IAAI,YAAY,IAAI,IAAI,CAIvB;IAED,kBAAkB;IAClB,IAAI,cAAc,IAAI,IAAI,EAAE,CAE3B;IAED;;;;OAIG;IACF,IAAI,CAAC,GAAG,EAAE,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC;IAoC1C,kBAAkB;IAClB,SAAS,IAAI,KAAK,EAAE;CAGrB"}
|
package/dist/sources/web.js
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.WebSource = void 0;
|
|
4
|
-
exports.chunkFetchedPages = chunkFetchedPages;
|
|
3
|
+
exports.WebSource = exports.chunkFetchedPages = void 0;
|
|
5
4
|
const fs = require("node:fs");
|
|
6
5
|
const path = require("node:path");
|
|
7
|
-
const effection_1 = require("effection");
|
|
8
6
|
const lloyal_agents_1 = require("@lloyal-labs/lloyal-agents");
|
|
9
|
-
const lloyal_agents_2 = require("@lloyal-labs/lloyal-agents");
|
|
10
7
|
const web_search_1 = require("../tools/web-search");
|
|
11
8
|
const fetch_page_1 = require("../tools/fetch-page");
|
|
12
9
|
const web_research_1 = require("../tools/web-research");
|
|
10
|
+
const chunking_1 = require("./chunking");
|
|
11
|
+
// Re-export for backwards compatibility
|
|
12
|
+
var chunking_2 = require("./chunking");
|
|
13
|
+
Object.defineProperty(exports, "chunkFetchedPages", { enumerable: true, get: function () { return chunking_2.chunkFetchedPages; } });
|
|
13
14
|
// ── Task loader ──────────────────────────────────────────────────
|
|
14
15
|
function readTask(name) {
|
|
15
16
|
const raw = fs
|
|
@@ -20,232 +21,44 @@ function readTask(name) {
|
|
|
20
21
|
return { system: raw, user: "" };
|
|
21
22
|
return { system: raw.slice(0, sep).trim(), user: raw.slice(sep + 5).trim() };
|
|
22
23
|
}
|
|
23
|
-
/**
|
|
24
|
-
* Convert buffered web pages into {@link Chunk} instances for reranking
|
|
25
|
-
*
|
|
26
|
-
* Splits each page's text on blank-line paragraph boundaries, filtering
|
|
27
|
-
* paragraphs shorter than 40 characters. If no paragraphs survive the
|
|
28
|
-
* filter, the full text is emitted as a single chunk (if long enough).
|
|
29
|
-
*
|
|
30
|
-
* @param pages - Buffered pages from web research
|
|
31
|
-
* @returns Flat array of paragraph-level chunks with `tokens` arrays left empty for later tokenization
|
|
32
|
-
*
|
|
33
|
-
* @category Rig
|
|
34
|
-
*/
|
|
35
|
-
function chunkFetchedPages(pages) {
|
|
36
|
-
const chunks = [];
|
|
37
|
-
for (const page of pages) {
|
|
38
|
-
const paragraphs = page.text
|
|
39
|
-
.split(/\n\s*\n/)
|
|
40
|
-
.map((p) => p.trim())
|
|
41
|
-
.filter((p) => p.length > 40);
|
|
42
|
-
if (paragraphs.length === 0) {
|
|
43
|
-
if (page.text.trim().length > 40) {
|
|
44
|
-
chunks.push({
|
|
45
|
-
resource: page.url,
|
|
46
|
-
heading: page.title || page.url,
|
|
47
|
-
text: page.text.trim(),
|
|
48
|
-
tokens: [],
|
|
49
|
-
startLine: 1,
|
|
50
|
-
endLine: 1,
|
|
51
|
-
});
|
|
52
|
-
}
|
|
53
|
-
continue;
|
|
54
|
-
}
|
|
55
|
-
for (let i = 0; i < paragraphs.length; i++) {
|
|
56
|
-
chunks.push({
|
|
57
|
-
resource: page.url,
|
|
58
|
-
heading: page.title || page.url,
|
|
59
|
-
text: paragraphs[i],
|
|
60
|
-
tokens: [],
|
|
61
|
-
startLine: i + 1,
|
|
62
|
-
endLine: i + 1,
|
|
63
|
-
});
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
return chunks;
|
|
67
|
-
}
|
|
68
24
|
// ── BufferingFetchPage ───────────────────────────────────────────
|
|
69
25
|
/**
|
|
70
|
-
*
|
|
26
|
+
* Thin wrapper over {@link FetchPageTool} that buffers fetched content
|
|
27
|
+
* for post-research reranking via {@link WebSource.getChunks}.
|
|
71
28
|
*
|
|
72
|
-
*
|
|
73
|
-
*
|
|
74
|
-
* post-research reranking. An attention scratchpad (forked from
|
|
75
|
-
* {@link ScratchpadParent}) then grammar-constrains a summary + links
|
|
76
|
-
* extraction, returning the compact result to the calling agent instead
|
|
77
|
-
* of the full page text. Falls back to the full result if extraction
|
|
78
|
-
* fails or no scratchpad parent is available.
|
|
29
|
+
* No scratchpad extraction. No content transformation. Just buffers
|
|
30
|
+
* the raw text alongside returning the reranked result to the agent.
|
|
79
31
|
*
|
|
80
32
|
* @category Rig
|
|
81
33
|
*/
|
|
82
|
-
class BufferingFetchPage extends
|
|
83
|
-
name = "fetch_page";
|
|
84
|
-
description = "Fetch a web page and extract its article content. Returns a summary and any links worth following. Use to read search results or follow links discovered in pages.";
|
|
85
|
-
parameters = {
|
|
86
|
-
type: "object",
|
|
87
|
-
properties: { url: { type: "string", description: "URL to fetch" } },
|
|
88
|
-
required: ["url"],
|
|
89
|
-
};
|
|
90
|
-
_inner;
|
|
34
|
+
class BufferingFetchPage extends fetch_page_1.FetchPageTool {
|
|
91
35
|
_buffer;
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
super();
|
|
95
|
-
this._inner = new fetch_page_1.FetchPageTool(maxChars);
|
|
36
|
+
constructor(buffer, maxChars) {
|
|
37
|
+
super(maxChars);
|
|
96
38
|
this._buffer = buffer;
|
|
97
|
-
this._extractTask = extractTask;
|
|
98
39
|
}
|
|
99
40
|
*execute(args) {
|
|
100
|
-
const result = yield*
|
|
41
|
+
const result = yield* super.execute(args);
|
|
101
42
|
const r = result;
|
|
102
43
|
if (typeof r?.content === "string" &&
|
|
103
44
|
r.content !== "[Could not extract article content]") {
|
|
104
|
-
const content = r.content;
|
|
105
|
-
// Buffer full content for reranking
|
|
106
45
|
this._buffer.push({
|
|
107
46
|
url: r.url || args.url,
|
|
108
47
|
title: r.title || "",
|
|
109
|
-
text: content,
|
|
48
|
+
text: r.content,
|
|
110
49
|
});
|
|
111
|
-
// Attention scratchpad: fork from innermost active root, extract summary + links, prune
|
|
112
|
-
let parent;
|
|
113
|
-
try {
|
|
114
|
-
parent = yield* lloyal_agents_2.ScratchpadParent.expect();
|
|
115
|
-
}
|
|
116
|
-
catch { /* no parent — skip extraction */ }
|
|
117
|
-
if (!parent || parent.disposed)
|
|
118
|
-
return result;
|
|
119
|
-
const ctx = yield* lloyal_agents_2.Ctx.expect();
|
|
120
|
-
const schema = {
|
|
121
|
-
type: "object",
|
|
122
|
-
properties: {
|
|
123
|
-
summary: { type: "string" },
|
|
124
|
-
links: { type: "array", items: { type: "string" } },
|
|
125
|
-
},
|
|
126
|
-
required: ["summary", "links"],
|
|
127
|
-
};
|
|
128
|
-
const grammar = yield* (0, effection_1.call)(() => ctx.jsonSchemaToGrammar(JSON.stringify(schema)));
|
|
129
|
-
const extractPrompt = this._extractTask.user
|
|
130
|
-
.replace("{{url}}", args.url)
|
|
131
|
-
.replace("{{title}}", r.title || "")
|
|
132
|
-
.replace("{{content}}", content);
|
|
133
|
-
const messages = [
|
|
134
|
-
{ role: "system", content: this._extractTask.system },
|
|
135
|
-
{ role: "user", content: extractPrompt },
|
|
136
|
-
];
|
|
137
|
-
const { prompt } = ctx.formatChatSync(JSON.stringify(messages), { enableThinking: false });
|
|
138
|
-
try {
|
|
139
|
-
const extracted = yield* (0, lloyal_agents_2.generate)({
|
|
140
|
-
prompt,
|
|
141
|
-
grammar,
|
|
142
|
-
params: { temperature: 0.3 },
|
|
143
|
-
parse: (o) => JSON.parse(o),
|
|
144
|
-
parent,
|
|
145
|
-
});
|
|
146
|
-
return {
|
|
147
|
-
url: r.url || args.url,
|
|
148
|
-
title: r.title || "",
|
|
149
|
-
summary: extracted.parsed?.summary || "",
|
|
150
|
-
links: extracted.parsed?.links || [],
|
|
151
|
-
};
|
|
152
|
-
}
|
|
153
|
-
catch {
|
|
154
|
-
return result; // fallback to full result on extraction failure
|
|
155
|
-
}
|
|
156
50
|
}
|
|
157
51
|
return result;
|
|
158
52
|
}
|
|
159
53
|
}
|
|
160
|
-
// ── BufferingWebSearch ────────────────────────────────────────────
|
|
161
|
-
/**
|
|
162
|
-
* Web-search wrapper that extracts a compact summary via attention scratchpad
|
|
163
|
-
*
|
|
164
|
-
* Wraps {@link WebSearchTool} and, when a {@link ScratchpadParent} is
|
|
165
|
-
* available, forks a grammar-constrained generation to distill raw search
|
|
166
|
-
* results into a list of promising URLs plus a brief summary. The compact
|
|
167
|
-
* output reduces KV pressure on the calling agent. Falls back to raw
|
|
168
|
-
* results if extraction fails or no scratchpad parent is available.
|
|
169
|
-
*
|
|
170
|
-
* @category Rig
|
|
171
|
-
*/
|
|
172
|
-
class BufferingWebSearch extends lloyal_agents_2.Tool {
|
|
173
|
-
name = "web_search";
|
|
174
|
-
description = "Search the web. Returns the most relevant URLs and a summary. Use fetch_page to read full content of promising results.";
|
|
175
|
-
parameters = {
|
|
176
|
-
type: "object",
|
|
177
|
-
properties: { query: { type: "string", description: "Search query" } },
|
|
178
|
-
required: ["query"],
|
|
179
|
-
};
|
|
180
|
-
_inner;
|
|
181
|
-
_extractTask;
|
|
182
|
-
constructor(provider, extractTask) {
|
|
183
|
-
super();
|
|
184
|
-
this._inner = new web_search_1.WebSearchTool(provider);
|
|
185
|
-
this._extractTask = extractTask;
|
|
186
|
-
}
|
|
187
|
-
*execute(args) {
|
|
188
|
-
const results = yield* this._inner.execute(args);
|
|
189
|
-
// If error or not an array, return as-is (no scratchpad needed)
|
|
190
|
-
if (!Array.isArray(results) || results.length === 0)
|
|
191
|
-
return results;
|
|
192
|
-
// Scratchpad: fork from innermost active root, extract URLs + summary
|
|
193
|
-
let parent;
|
|
194
|
-
try {
|
|
195
|
-
parent = yield* lloyal_agents_2.ScratchpadParent.expect();
|
|
196
|
-
}
|
|
197
|
-
catch { /* no parent — return raw */ }
|
|
198
|
-
if (!parent || parent.disposed)
|
|
199
|
-
return results;
|
|
200
|
-
const ctx = yield* lloyal_agents_2.Ctx.expect();
|
|
201
|
-
const schema = {
|
|
202
|
-
type: "object",
|
|
203
|
-
properties: {
|
|
204
|
-
urls: { type: "array", items: { type: "string" }, description: "URLs worth fetching" },
|
|
205
|
-
summary: { type: "string", description: "Brief summary of what the search found" },
|
|
206
|
-
},
|
|
207
|
-
required: ["urls", "summary"],
|
|
208
|
-
};
|
|
209
|
-
const grammar = yield* (0, effection_1.call)(() => ctx.jsonSchemaToGrammar(JSON.stringify(schema)));
|
|
210
|
-
const resultsText = results
|
|
211
|
-
.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}\n ${r.snippet}`)
|
|
212
|
-
.join("\n\n");
|
|
213
|
-
const extractPrompt = this._extractTask.user
|
|
214
|
-
.replace("{{query}}", args.query)
|
|
215
|
-
.replace("{{results}}", resultsText);
|
|
216
|
-
const messages = [
|
|
217
|
-
{ role: "system", content: this._extractTask.system },
|
|
218
|
-
{ role: "user", content: extractPrompt },
|
|
219
|
-
];
|
|
220
|
-
const { prompt } = ctx.formatChatSync(JSON.stringify(messages), { enableThinking: false });
|
|
221
|
-
try {
|
|
222
|
-
const extracted = yield* (0, lloyal_agents_2.generate)({
|
|
223
|
-
prompt,
|
|
224
|
-
grammar,
|
|
225
|
-
params: { temperature: 0.3 },
|
|
226
|
-
parse: (o) => JSON.parse(o),
|
|
227
|
-
parent,
|
|
228
|
-
});
|
|
229
|
-
return {
|
|
230
|
-
urls: extracted.parsed?.urls || [],
|
|
231
|
-
summary: extracted.parsed?.summary || "",
|
|
232
|
-
resultCount: results.length,
|
|
233
|
-
};
|
|
234
|
-
}
|
|
235
|
-
catch {
|
|
236
|
-
return results; // fallback to raw results on extraction failure
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
54
|
// ── WebSource ────────────────────────────────────────────────────
|
|
241
55
|
/**
|
|
242
|
-
* Web-backed research source
|
|
56
|
+
* Web-backed research source
|
|
243
57
|
*
|
|
244
|
-
*
|
|
245
|
-
*
|
|
246
|
-
*
|
|
247
|
-
*
|
|
248
|
-
* {@link Chunk} instances via {@link chunkFetchedPages} for reranker scoring.
|
|
58
|
+
* Agents search the web via {@link WebSearchTool} (returns titles, snippets,
|
|
59
|
+
* URLs), then fetch promising pages via {@link FetchPageTool} (returns
|
|
60
|
+
* reranked relevant chunks). Fetched content is buffered for post-research
|
|
61
|
+
* passage reranking via {@link getChunks}.
|
|
249
62
|
*
|
|
250
63
|
* @category Rig
|
|
251
64
|
*/
|
|
@@ -262,11 +75,9 @@ class WebSource extends lloyal_agents_1.Source {
|
|
|
262
75
|
*/
|
|
263
76
|
constructor(provider) {
|
|
264
77
|
super();
|
|
265
|
-
const extractTask = readTask("extract");
|
|
266
|
-
const searchExtractTask = readTask("search-extract");
|
|
267
78
|
this._researchPrompt = readTask("web-research");
|
|
268
|
-
this._fetchPage = new BufferingFetchPage(this._buffer
|
|
269
|
-
this._webSearch = new
|
|
79
|
+
this._fetchPage = new BufferingFetchPage(this._buffer);
|
|
80
|
+
this._webSearch = new web_search_1.WebSearchTool(provider);
|
|
270
81
|
}
|
|
271
82
|
/** @inheritDoc */
|
|
272
83
|
get researchTool() {
|
|
@@ -275,22 +86,26 @@ class WebSource extends lloyal_agents_1.Source {
|
|
|
275
86
|
return this._researchTool;
|
|
276
87
|
}
|
|
277
88
|
/** @inheritDoc */
|
|
278
|
-
get groundingTools() {
|
|
89
|
+
get groundingTools() {
|
|
90
|
+
return [this._webSearch, this._fetchPage];
|
|
91
|
+
}
|
|
279
92
|
/**
|
|
280
|
-
*
|
|
281
|
-
*
|
|
282
|
-
* Resets the internal {@link FetchedPage} buffer on every call so
|
|
283
|
-
* prior-run content does not leak into a new research pass. Constructs
|
|
284
|
-
* the {@link WebResearchTool} on first bind only (toolkit is stateless
|
|
285
|
-
* once built).
|
|
93
|
+
* Wire reranker to FetchPageTool and build the research toolkit
|
|
286
94
|
*
|
|
287
95
|
* @inheritDoc
|
|
288
96
|
*/
|
|
289
97
|
*bind(ctx) {
|
|
290
98
|
this._buffer.length = 0;
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
99
|
+
// Wire reranker to FetchPageTool for chunk scoring
|
|
100
|
+
this._fetchPage.setReranker(ctx.reranker);
|
|
101
|
+
const tw = yield* lloyal_agents_1.Trace.expect();
|
|
102
|
+
tw.write({
|
|
103
|
+
traceId: tw.nextId(),
|
|
104
|
+
parentTraceId: null,
|
|
105
|
+
ts: performance.now(),
|
|
106
|
+
type: "source:bind",
|
|
107
|
+
sourceName: this.name,
|
|
108
|
+
});
|
|
294
109
|
if (!this._researchTool) {
|
|
295
110
|
const webResearch = new web_research_1.WebResearchTool({
|
|
296
111
|
name: "web_research",
|
|
@@ -300,7 +115,7 @@ class WebSource extends lloyal_agents_1.Source {
|
|
|
300
115
|
maxTurns: ctx.maxTurns,
|
|
301
116
|
trace: ctx.trace,
|
|
302
117
|
});
|
|
303
|
-
const toolkit = (0,
|
|
118
|
+
const toolkit = (0, lloyal_agents_1.createToolkit)([
|
|
304
119
|
this._webSearch,
|
|
305
120
|
this._fetchPage,
|
|
306
121
|
ctx.reportTool,
|
|
@@ -312,7 +127,7 @@ class WebSource extends lloyal_agents_1.Source {
|
|
|
312
127
|
}
|
|
313
128
|
/** @inheritDoc */
|
|
314
129
|
getChunks() {
|
|
315
|
-
return chunkFetchedPages(this._buffer);
|
|
130
|
+
return (0, chunking_1.chunkFetchedPages)(this._buffer);
|
|
316
131
|
}
|
|
317
132
|
}
|
|
318
133
|
exports.WebSource = WebSource;
|
package/dist/sources/web.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"web.js","sourceRoot":"","sources":["../../src/sources/web.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"web.js","sourceRoot":"","sources":["../../src/sources/web.ts"],"names":[],"mappings":";;;AAAA,8BAA8B;AAC9B,kCAAkC;AAElC,8DAA0E;AAK1E,oDAAoD;AACpD,oDAAoD;AACpD,wDAAwD;AACxD,yCAA+C;AAG/C,wCAAwC;AACxC,uCAAiE;AAAxD,6GAAA,iBAAiB,OAAA;AAE1B,oEAAoE;AAEpE,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,GAAG,GAAG,EAAE;SACX,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,IAAI,KAAK,CAAC,EAAE,MAAM,CAAC;SAC3D,IAAI,EAAE,CAAC;IACV,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IACnC,IAAI,GAAG,KAAK,CAAC,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;IACjD,OAAO,EAAE,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;AAC/E,CAAC;AAED,oEAAoE;AAEpE;;;;;;;;GAQG;AACH,MAAM,kBAAmB,SAAQ,0BAAa;IACpC,OAAO,CAAgB;IAE/B,YAAY,MAAqB,EAAE,QAAiB;QAClD,KAAK,CAAC,QAAQ,CAAC,CAAC;QAChB,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;IACxB,CAAC;IAED,CAAC,OAAO,CAAC,IAAqC;QAC5C,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,CAAC,GAAG,MAAiC,CAAC;QAC5C,IACE,OAAO,CAAC,EAAE,OAAO,KAAK,QAAQ;YAC9B,CAAC,CAAC,OAAO,KAAK,qCAAqC,EACnD,CAAC;YACD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;gBAChB,GAAG,EAAG,CAAC,CAAC,GAAc,IAAI,IAAI,CAAC,GAAG;gBAClC,KAAK,EAAG,CAAC,CAAC,KAAgB,IAAI,EAAE;gBAChC,IAAI,EAAE,CAAC,CAAC,OAAiB;aAC1B,CAAC,CAAC;QACL,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAED,oEAAoE;AAEpE;;;;;;;;;GASG;AACH,MAAa,SAAU,SAAQ,sBAA4B;IACjD,OAAO,GAAkB,EAAE,CAAC;IAC5B,UAAU,CAAqB;IAC/B,UAAU,CAAgB;IAC1B,eAAe,CAAmC;IAClD,aAAa,GAA2B,IAAI,CAAC;IAErD,kBAAkB;IACT,IAAI,GAAG,KAAK,CAAC;IAEtB;;OAEG;IACH,YAAY,QAAwB;QAClC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC,cAAc,CAAC,CAAC;QAChD,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACvD,IAAI,CAAC,UAAU,GAAG,IAAI,0BAAa,CAAC,QAAQ,CAAC,CAAC;IAChD,CAAC;IAED,kBAAkB;IAClB,IAAI,YAAY;QACd,IAAI,CAAC,IAAI,CAAC,aAAa;YACrB,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;QAC5D,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;IAED,kBAAkB;IAClB,IAAI,cAAc;QAChB,OAAO,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAC5C,CAAC;IAED;;;;OAIG;IACH,CAAC,IAAI,CAAC,GAAkB;QACtB,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;QAExB,mDAAmD;QACnD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,qBAAK,CAAC,MAAM,EAAE,CAAC;QACjC,EAAE,CAAC,KAAK,CAAC;YACP,OAAO,EAAE,EAAE,CAAC,MAAM,EAAE;YACpB,aAAa,EAAE,IAAI;YACnB,EAAE,EAAE,WAAW,CAAC,GAAG,EAAE;YACrB,IAAI,EAAE,aAAa;YACnB,UAAU,EAAE,IAAI,CAAC,IAAI;SACtB,CAAC,CAAC;QAEH,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;YACxB,MAAM,WAAW,GAAG,IAAI,8BAAe,CAAC;gBACtC,IAAI,EAAE,cAAc;gBACpB,WAAW,EACT,2FAA2F;gBAC7F,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,MAAM;gBACzC,cAAc,EAAE,GAAG,CAAC,cAAc;gBAClC,QAAQ,EAAE,GAAG,CAAC,QAAQ;gBACtB,KAAK,EAAE,GAAG,CAAC,KAAK;aACjB,CAAC,CAAC;YACH,MAAM,OAAO,GAAG,IAAA,6BAAa,EAAC;gBAC5B,IAAI,CAAC,UAAU;gBACf,IAAI,CAAC,UAAU;gBACf,GAAG,CAAC,UAAU;gBACd,WAAW;aACZ,CAAC,CAAC;YACH,WAAW,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YAChC,IAAI,CAAC,aAAa,GAAG,WAAW,CAAC;QACnC,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,SAAS;QACP,OAAO,IAAA,4BAAiB,EAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACzC,CAAC;CACF;AA7ED,8BA6EC"}
|
|
@@ -1,26 +1,41 @@
|
|
|
1
1
|
import type { Operation } from 'effection';
|
|
2
2
|
import { Tool } from '@lloyal-labs/lloyal-agents';
|
|
3
|
-
import type { JsonSchema } from '@lloyal-labs/lloyal-agents';
|
|
3
|
+
import type { JsonSchema, ToolContext } from '@lloyal-labs/lloyal-agents';
|
|
4
|
+
import type { Reranker } from './types';
|
|
4
5
|
/**
|
|
5
6
|
* Fetch a web page and extract readable article content.
|
|
6
7
|
*
|
|
7
8
|
* Uses the Fetch API with a 10-second timeout, then extracts the
|
|
8
|
-
* article body via linkedom + Readability.
|
|
9
|
-
*
|
|
10
|
-
*
|
|
9
|
+
* article body via linkedom + Readability.
|
|
10
|
+
*
|
|
11
|
+
* When a reranker is set (via {@link setReranker}) and the agent provides
|
|
12
|
+
* a `query` argument, the article HTML is structurally chunked on heading
|
|
13
|
+
* boundaries (same pattern as corpus `parseMarkdown`) and scored against
|
|
14
|
+
* the query. Only the top-K most relevant verbatim chunks are returned —
|
|
15
|
+
* reducing KV pressure without lossy summarization. The reranker runs on
|
|
16
|
+
* its own `llama_context`, consuming zero inference KV.
|
|
17
|
+
*
|
|
18
|
+
* Without a reranker or query, returns the full content truncated to
|
|
19
|
+
* `maxChars` (default 6000). Fully backward compatible.
|
|
11
20
|
*
|
|
12
21
|
* @category Rig
|
|
13
22
|
*/
|
|
14
23
|
export declare class FetchPageTool extends Tool<{
|
|
15
24
|
url: string;
|
|
25
|
+
query?: string;
|
|
16
26
|
}> {
|
|
17
27
|
readonly name = "fetch_page";
|
|
18
|
-
readonly description = "Fetch a web page and extract its article content. Returns readable text with title and excerpt. Use to read search results or follow links discovered in pages.";
|
|
28
|
+
readonly description = "Fetch a web page and extract its article content. Returns readable text with title and excerpt. Use to read search results or follow links discovered in pages. Pass a query to get only the most relevant sections.";
|
|
19
29
|
readonly parameters: JsonSchema;
|
|
20
30
|
private _maxChars;
|
|
21
|
-
|
|
31
|
+
private _reranker;
|
|
32
|
+
private _topK;
|
|
33
|
+
constructor(maxChars?: number, topK?: number);
|
|
34
|
+
/** Inject reranker for chunk scoring. Call from Source.bind(). */
|
|
35
|
+
setReranker(reranker: Reranker): void;
|
|
22
36
|
execute(args: {
|
|
23
37
|
url: string;
|
|
24
|
-
|
|
38
|
+
query?: string;
|
|
39
|
+
}, context?: ToolContext): Operation<unknown>;
|
|
25
40
|
}
|
|
26
41
|
//# sourceMappingURL=fetch-page.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-page.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-page.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,IAAI,
|
|
1
|
+
{"version":3,"file":"fetch-page.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-page.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,IAAI,EAAS,MAAM,4BAA4B,CAAC;AACzD,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAE1E,OAAO,KAAK,EAAE,QAAQ,EAAe,MAAM,SAAS,CAAC;AAErD;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,aAAc,SAAQ,IAAI,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;IACtE,QAAQ,CAAC,IAAI,gBAAgB;IAC7B,QAAQ,CAAC,WAAW,0NAA0N;IAC9O,QAAQ,CAAC,UAAU,EAAE,UAAU,CAO7B;IAEF,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAyB;IAC1C,OAAO,CAAC,KAAK,CAAS;gBAEV,QAAQ,SAAO,EAAE,IAAI,SAAI;IAMrC,kEAAkE;IAClE,WAAW,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAIpC,OAAO,CAAC,IAAI,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC,OAAO,CAAC;CAgJ3F"}
|
package/dist/tools/fetch-page.js
CHANGED
|
@@ -3,40 +3,62 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.FetchPageTool = void 0;
|
|
4
4
|
const effection_1 = require("effection");
|
|
5
5
|
const lloyal_agents_1 = require("@lloyal-labs/lloyal-agents");
|
|
6
|
+
const chunking_1 = require("../sources/chunking");
|
|
6
7
|
/**
|
|
7
8
|
* Fetch a web page and extract readable article content.
|
|
8
9
|
*
|
|
9
10
|
* Uses the Fetch API with a 10-second timeout, then extracts the
|
|
10
|
-
* article body via linkedom + Readability.
|
|
11
|
-
*
|
|
12
|
-
*
|
|
11
|
+
* article body via linkedom + Readability.
|
|
12
|
+
*
|
|
13
|
+
* When a reranker is set (via {@link setReranker}) and the agent provides
|
|
14
|
+
* a `query` argument, the article HTML is structurally chunked on heading
|
|
15
|
+
* boundaries (same pattern as corpus `parseMarkdown`) and scored against
|
|
16
|
+
* the query. Only the top-K most relevant verbatim chunks are returned —
|
|
17
|
+
* reducing KV pressure without lossy summarization. The reranker runs on
|
|
18
|
+
* its own `llama_context`, consuming zero inference KV.
|
|
19
|
+
*
|
|
20
|
+
* Without a reranker or query, returns the full content truncated to
|
|
21
|
+
* `maxChars` (default 6000). Fully backward compatible.
|
|
13
22
|
*
|
|
14
23
|
* @category Rig
|
|
15
24
|
*/
|
|
16
25
|
class FetchPageTool extends lloyal_agents_1.Tool {
|
|
17
26
|
name = 'fetch_page';
|
|
18
|
-
description = 'Fetch a web page and extract its article content. Returns readable text with title and excerpt. Use to read search results or follow links discovered in pages.';
|
|
27
|
+
description = 'Fetch a web page and extract its article content. Returns readable text with title and excerpt. Use to read search results or follow links discovered in pages. Pass a query to get only the most relevant sections.';
|
|
19
28
|
parameters = {
|
|
20
29
|
type: 'object',
|
|
21
|
-
properties: {
|
|
30
|
+
properties: {
|
|
31
|
+
url: { type: 'string', description: 'URL to fetch' },
|
|
32
|
+
query: { type: 'string', description: 'What to look for in this page (optional — improves relevance of returned content)' },
|
|
33
|
+
},
|
|
22
34
|
required: ['url'],
|
|
23
35
|
};
|
|
24
36
|
_maxChars;
|
|
25
|
-
|
|
37
|
+
_reranker = null;
|
|
38
|
+
_topK;
|
|
39
|
+
constructor(maxChars = 6000, topK = 5) {
|
|
26
40
|
super();
|
|
27
41
|
this._maxChars = maxChars;
|
|
42
|
+
this._topK = topK;
|
|
28
43
|
}
|
|
29
|
-
|
|
44
|
+
/** Inject reranker for chunk scoring. Call from Source.bind(). */
|
|
45
|
+
setReranker(reranker) {
|
|
46
|
+
this._reranker = reranker;
|
|
47
|
+
}
|
|
48
|
+
*execute(args, context) {
|
|
30
49
|
const url = args.url?.trim();
|
|
31
50
|
if (!url)
|
|
32
51
|
return { error: 'url must not be empty' };
|
|
33
|
-
// Early reject PDF URLs
|
|
52
|
+
// Early reject PDF URLs
|
|
34
53
|
const lowerUrl = url.toLowerCase();
|
|
35
54
|
if (lowerUrl.endsWith('.pdf') || lowerUrl.includes('.pdf?') || lowerUrl.includes('.pdf#')) {
|
|
36
55
|
return { error: 'PDF documents cannot be extracted. Try searching for an HTML version of this content.', url };
|
|
37
56
|
}
|
|
38
57
|
const maxChars = this._maxChars;
|
|
39
|
-
|
|
58
|
+
const reranker = this._reranker;
|
|
59
|
+
const topK = this._topK;
|
|
60
|
+
// Step 1: Fetch + readability (async)
|
|
61
|
+
const fetched = yield* (0, effection_1.call)(async () => {
|
|
40
62
|
let res;
|
|
41
63
|
try {
|
|
42
64
|
res = await fetch(url, {
|
|
@@ -60,12 +82,105 @@ class FetchPageTool extends lloyal_agents_1.Tool {
|
|
|
60
82
|
const article = new Readability(document).parse();
|
|
61
83
|
if (!article)
|
|
62
84
|
return { url, content: '[Could not extract article content]' };
|
|
63
|
-
|
|
64
|
-
|
|
85
|
+
return {
|
|
86
|
+
url,
|
|
87
|
+
title: article.title ?? '',
|
|
88
|
+
content: article.textContent ?? '',
|
|
89
|
+
articleHtml: article.content ?? '',
|
|
90
|
+
excerpt: article.excerpt ?? '',
|
|
91
|
+
};
|
|
92
|
+
});
|
|
93
|
+
// Early return on error or no article
|
|
94
|
+
if ('error' in fetched)
|
|
95
|
+
return fetched;
|
|
96
|
+
if (!fetched.articleHtml) {
|
|
97
|
+
let content = fetched.content;
|
|
98
|
+
if (content.length > maxChars)
|
|
65
99
|
content = content.slice(0, maxChars) + '\n\n[truncated]';
|
|
100
|
+
return { url: fetched.url, title: fetched.title, content, excerpt: fetched.excerpt };
|
|
101
|
+
}
|
|
102
|
+
// Step 2: Reranker path — chunk HTML structurally, score, return top-K
|
|
103
|
+
if (reranker && args.query) {
|
|
104
|
+
const chunks = yield* (0, effection_1.call)(() => (0, chunking_1.chunkHtml)(fetched.articleHtml, url, fetched.title));
|
|
105
|
+
// Write chunks to trace for replay sufficiency
|
|
106
|
+
let tw;
|
|
107
|
+
try {
|
|
108
|
+
tw = yield* lloyal_agents_1.Trace.expect();
|
|
66
109
|
}
|
|
67
|
-
|
|
68
|
-
|
|
110
|
+
catch { /* no trace context */ }
|
|
111
|
+
const rerankT0 = performance.now();
|
|
112
|
+
if (tw) {
|
|
113
|
+
tw.write({
|
|
114
|
+
traceId: tw.nextId(),
|
|
115
|
+
parentTraceId: null,
|
|
116
|
+
ts: rerankT0,
|
|
117
|
+
type: 'rerank:start',
|
|
118
|
+
query: args.query,
|
|
119
|
+
chunkCount: chunks.length,
|
|
120
|
+
tool: 'fetch_page',
|
|
121
|
+
url,
|
|
122
|
+
chunks: chunks.map(c => ({ heading: c.heading, textLength: c.text.length, startLine: c.startLine })),
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
if (chunks.length > 0) {
|
|
126
|
+
yield* (0, effection_1.call)(() => reranker.tokenizeChunks(chunks));
|
|
127
|
+
let scored = [];
|
|
128
|
+
yield* (0, effection_1.call)(async () => {
|
|
129
|
+
for await (const batch of reranker.score(args.query, chunks)) {
|
|
130
|
+
if (context?.onProgress)
|
|
131
|
+
context.onProgress({ filled: batch.filled, total: batch.total });
|
|
132
|
+
scored = batch.results;
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
// Select top-K within token budget (tokens populated by tokenizeChunks)
|
|
136
|
+
const TOKEN_BUDGET = 2048;
|
|
137
|
+
const topChunks = [];
|
|
138
|
+
let tokenTotal = 0;
|
|
139
|
+
for (const sc of scored.slice(0, topK)) {
|
|
140
|
+
const chunk = chunks.find(c => c.resource === sc.file && c.startLine === sc.startLine);
|
|
141
|
+
if (!chunk?.text)
|
|
142
|
+
continue;
|
|
143
|
+
const chunkTokens = chunk.tokens.length || Math.ceil(chunk.text.length / 4);
|
|
144
|
+
if (topChunks.length > 0 && tokenTotal + chunkTokens > TOKEN_BUDGET)
|
|
145
|
+
break;
|
|
146
|
+
topChunks.push({ text: chunk.text, heading: sc.heading, score: sc.score });
|
|
147
|
+
tokenTotal += chunkTokens;
|
|
148
|
+
}
|
|
149
|
+
if (tw) {
|
|
150
|
+
tw.write({
|
|
151
|
+
traceId: tw.nextId(),
|
|
152
|
+
parentTraceId: null,
|
|
153
|
+
ts: performance.now(),
|
|
154
|
+
type: 'rerank:end',
|
|
155
|
+
topResults: topChunks.map(c => ({
|
|
156
|
+
file: url,
|
|
157
|
+
heading: c.heading,
|
|
158
|
+
score: c.score,
|
|
159
|
+
textPreview: c.text.slice(0, 200),
|
|
160
|
+
})),
|
|
161
|
+
selectedPassageCount: topChunks.length,
|
|
162
|
+
totalChars: topChunks.reduce((sum, c) => sum + c.text.length, 0),
|
|
163
|
+
durationMs: performance.now() - rerankT0,
|
|
164
|
+
tool: 'fetch_page',
|
|
165
|
+
url,
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
if (topChunks.length > 0) {
|
|
169
|
+
return {
|
|
170
|
+
url,
|
|
171
|
+
title: fetched.title,
|
|
172
|
+
content: topChunks.map(c => c.text).join('\n\n---\n\n'),
|
|
173
|
+
chunks: topChunks.length,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// Fallback: return full content, truncated
|
|
179
|
+
let content = fetched.content;
|
|
180
|
+
if (content.length > maxChars) {
|
|
181
|
+
content = content.slice(0, maxChars) + '\n\n[truncated]';
|
|
182
|
+
}
|
|
183
|
+
return { url, title: fetched.title, content, excerpt: fetched.excerpt };
|
|
69
184
|
}
|
|
70
185
|
}
|
|
71
186
|
exports.FetchPageTool = FetchPageTool;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-page.js","sourceRoot":"","sources":["../../src/tools/fetch-page.ts"],"names":[],"mappings":";;;AAAA,yCAAiC;AAEjC,
|
|
1
|
+
{"version":3,"file":"fetch-page.js","sourceRoot":"","sources":["../../src/tools/fetch-page.ts"],"names":[],"mappings":";;;AAAA,yCAAiC;AAEjC,8DAAyD;AAEzD,kDAAgD;AAGhD;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAa,aAAc,SAAQ,oBAAqC;IAC7D,IAAI,GAAG,YAAY,CAAC;IACpB,WAAW,GAAG,sNAAsN,CAAC;IACrO,UAAU,GAAe;QAChC,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE;YACV,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,cAAc,EAAE;YACpD,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,mFAAmF,EAAE;SAC5H;QACD,QAAQ,EAAE,CAAC,KAAK,CAAC;KAClB,CAAC;IAEM,SAAS,CAAS;IAClB,SAAS,GAAoB,IAAI,CAAC;IAClC,KAAK,CAAS;IAEtB,YAAY,QAAQ,GAAG,IAAI,EAAE,IAAI,GAAG,CAAC;QACnC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,SAAS,GAAG,QAAQ,CAAC;QAC1B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;IACpB,CAAC;IAED,kEAAkE;IAClE,WAAW,CAAC,QAAkB;QAC5B,IAAI,CAAC,SAAS,GAAG,QAAQ,CAAC;IAC5B,CAAC;IAED,CAAC,OAAO,CAAC,IAAqC,EAAE,OAAqB;QACnE,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,GAAG;YAAE,OAAO,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC;QAEpD,wBAAwB;QACxB,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QACnC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1F,OAAO,EAAE,KAAK,EAAE,uFAAuF,EAAE,GAAG,EAAE,CAAC;QACjH,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC;QAExB,sCAAsC;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,IAAA,gBAAI,EAAC,KAAK,IAAI,EAAE;YACrC,IAAI,GAAa,CAAC;YAClB,IAAI,CAAC;gBACH,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;oBACrB,OAAO,EAAE,EAAE,YAAY,EAAE,6CAA6C,EAAE;oBACxE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC;iBACpC,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,EAAE,KAAK,EAAE,iBAAkB,GAAa,CAAC,OAAO,EAAE,EAAE,GAAG,EAAW,CAAC;YAC5E,CAAC;YAED,IAAI,CAAC,GAAG,CAAC,EAAE;gBAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,EAAE,GAAG,EAAW,CAAC;YAEpF,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;YAC1D,IAAI,WAAW,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;gBAC5C,OAAO,EAAE,KAAK,EAAE,uFAAuF,EAAE,GAAG,EAAW,CAAC;YAC1H,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;YAE9B,MAAM,EAAE,SAAS,EAAE,GAAG,2CAAa,UAAU,EAAC,CAAC;YAC/C,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;YAErC,MAAM,EAAE,WAAW,EAAE,GAAG,2CAAa,sBAAsB,EAAC,CAAC;YAC7D,MAAM,OAAO,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC;YAElD,IAAI,CAAC,OAAO;gBAAE,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,qCAAqC,EAAW,CAAC;YAEtF,OAAO;gBACL,GAAG;gBACH,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;gBAC1B,OAAO,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;gBAClC,WAAW,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;gBAClC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;aACtB,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,sCAAsC;QACtC,IAAI,OAAO,IAAI,OAAO;YAAE,OAAO,OAAO,CAAC;QACvC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YACzB,IAAI,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;YAC9B,IAAI,OAAO,CAAC,MAAM,GAAG,QAAQ;gBAAE,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,GAAG,iBAAiB,CAAC;YACxF,OAAO,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC;QACvF,CAAC;QAED,uEAAuE;QACvE,IAAI,QAAQ,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAC3B,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,IAAA,gBAAI,EAAC,GAAG,EAAE,CAAC,IAAA,oBAAS,EAAC,OAAO,CAAC,WAAW,EAAE,GAAG,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;YAErF,+CAA+C;YAC/C,IAAI,EAAE,CAAC;YACP,IAAI,CAAC;gBAAC,EAAE,GAAG,KAAK,CAAC,CAAC,qBAAK,CAAC,MAAM,EAAE,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC;YACpE,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACnC,IAAI,EAAE,EAAE,CAAC;gBACP,EAAE,CAAC,KAAK,CAAC;oBACP,OAAO,EAAE,EAAE,CAAC,MAAM,EAAE;oBACpB,aAAa,EAAE,IAAI;oBACnB,EAAE,EAAE,QAAQ;oBACZ,IAAI,EAAE,cAAc;oBACpB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,UAAU,EAAE,MAAM,CAAC,MAAM;oBACzB,IAAI,EAAE,YAAY;oBAClB,GAAG;oBACH,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;iBACrG,CAAC,CAAC;YACL,CAAC;YAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,KAAK,CAAC,CAAC,IAAA,gBAAI,EAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC;gBAEnD,IAAI,MAAM,GAAkB,EAAE,CAAC;gBAC/B,KAAK,CAAC,CAAC,IAAA,gBAAI,EAAC,KAAK,IAAI,EAAE;oBACrB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,KAAM,EAAE,MAAM,CAAC,EAAE,CAAC;wBAC9D,IAAI,OAAO,EAAE,UAAU;4BAAE,OAAO,CAAC,UAAU,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;wBAC1F,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC;oBACzB,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,wEAAwE;gBACxE,MAAM,YAAY,GAAG,IAAI,CAAC;gBAC1B,MAAM,SAAS,GAA4D,EAAE,CAAC;gBAC9E,IAAI,UAAU,GAAG,CAAC,CAAC;gBACnB,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC;oBACvC,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,EAAE,CAAC,IAAI,IAAI,CAAC,CAAC,SAAS,KAAK,EAAE,CAAC,SAAS,CAAC,CAAC;oBACvF,IAAI,CAAC,KAAK,EAAE,IAAI;wBAAE,SAAS;oBAC3B,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBAC5E,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,GAAG,WAAW,GAAG,YAAY;wBAAE,MAAM;oBAC3E,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,EAAE,CAAC,CAAC;oBAC3E,UAAU,IAAI,WAAW,CAAC;gBAC5B,CAAC;gBAED,IAAI,EAAE,EAAE,CAAC;oBACP,EAAE,CAAC,KAAK,CAAC;wBACP,OAAO,EAAE,EAAE,CAAC,MAAM,EAAE;wBACpB,aAAa,EAAE,IAAI;wBACnB,EAAE,EAAE,WAAW,CAAC,GAAG,EAAE;wBACrB,IAAI,EAAE,YAAY;wBAClB,UAAU,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;4BAC9B,IAAI,EAAE,GAAG;4BACT,OAAO,EAAE,CAAC,CAAC,OAAO;4BAClB,KAAK,EAAE,CAAC,CAAC,KAAK;4BACd,WAAW,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;yBAClC,CAAC,CAAC;wBACH,oBAAoB,EAAE,SAAS,CAAC,MAAM;wBACtC,UAAU,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;wBAChE,UAAU,EAAE,WAAW,CAAC,GAAG,EAAE,GAAG,QAAQ;wBACxC,IAAI,EAAE,YAAY;wBAClB,GAAG;qBACJ,CAAC,CAAC;gBACL,CAAC;gBAED,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACzB,OAAO;wBACL,GAAG;wBACH,KAAK,EAAE,OAAO,CAAC,KAAK;wBACpB,OAAO,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC;wBACvD,MAAM,EAAE,SAAS,CAAC,MAAM;qBACzB,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QAED,2CAA2C;QAC3C,IAAI,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;QAC9B,IAAI,OAAO,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;YAC9B,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,GAAG,iBAAiB,CAAC;QAC3D,CAAC;QACD,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC;IAC1E,CAAC;CACF;AA3KD,sCA2KC"}
|
package/dist/tools/index.d.ts
CHANGED
|
@@ -5,8 +5,8 @@ import { ReportTool } from './report';
|
|
|
5
5
|
export { ResearchTool } from './research';
|
|
6
6
|
export type { ResearchToolOpts } from './research';
|
|
7
7
|
export { WebSearchTool, TavilyProvider } from './web-search';
|
|
8
|
-
export type { SearchProvider, SearchResult, Reranker, ScoredChunk, ScoredResult } from './types';
|
|
9
8
|
export { FetchPageTool } from './fetch-page';
|
|
9
|
+
export type { SearchProvider, SearchResult, Reranker, ScoredChunk, ScoredResult } from './types';
|
|
10
10
|
export { WebResearchTool } from './web-research';
|
|
11
11
|
export type { WebResearchToolOpts } from './web-research';
|
|
12
12
|
export { PlanTool } from './plan';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAIxC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAEtC,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAC1C,YAAY,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC7D,YAAY,EAAE,cAAc,EAAE,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACjG,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAIxC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAEtC,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAC1C,YAAY,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,YAAY,EAAE,cAAc,EAAE,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACjG,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,YAAY,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAClC,YAAY,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAErE;;;;;;GAMG;AACH,eAAO,MAAM,UAAU,YAAmB,CAAC;AAE3C;;;;;;;;;;;GAWG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE;IAChC,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,QAAQ,EAAE,QAAQ,CAAC;CACpB,GAAG,OAAO,CAOV"}
|
package/dist/tools/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":";;;AAwCA,kCAWC;AAnDD,8DAA2D;AAI3D,qCAAsC;AACtC,2CAA2C;AAC3C,iCAAkC;AAClC,qCAAsC;AAEtC,uCAA0C;AAAjC,wGAAA,YAAY,OAAA;AAErB,2CAA6D;AAApD,2GAAA,aAAa,OAAA;AAAE,4GAAA,cAAc,OAAA;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":";;;AAwCA,kCAWC;AAnDD,8DAA2D;AAI3D,qCAAsC;AACtC,2CAA2C;AAC3C,iCAAkC;AAClC,qCAAsC;AAEtC,uCAA0C;AAAjC,wGAAA,YAAY,OAAA;AAErB,2CAA6D;AAApD,2GAAA,aAAa,OAAA;AAAE,4GAAA,cAAc,OAAA;AACtC,2CAA6C;AAApC,2GAAA,aAAa,OAAA;AAEtB,+CAAiD;AAAxC,+GAAA,eAAe,OAAA;AAExB,+BAAkC;AAAzB,gGAAA,QAAQ,OAAA;AAGjB;;;;;;GAMG;AACU,QAAA,UAAU,GAAG,IAAI,mBAAU,EAAE,CAAC;AAE3C;;;;;;;;;;;GAWG;AACH,SAAgB,WAAW,CAAC,IAI3B;IACC,OAAO,IAAA,6BAAa,EAAC;QACnB,IAAI,mBAAU,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC;QAC1C,IAAI,wBAAY,CAAC,IAAI,CAAC,SAAS,CAAC;QAChC,IAAI,eAAQ,CAAC,IAAI,CAAC,SAAS,CAAC;QAC5B,kBAAU;KACX,CAAC,CAAC;AACL,CAAC"}
|
package/dist/tools/types.d.ts
CHANGED
|
@@ -66,6 +66,10 @@ export interface SearchResult {
|
|
|
66
66
|
url: string;
|
|
67
67
|
/** Excerpt or snippet from the page content */
|
|
68
68
|
snippet: string;
|
|
69
|
+
/** Full page content — markdown when provider supports it, plain text otherwise */
|
|
70
|
+
rawContent?: string;
|
|
71
|
+
/** Provider-side relevance score (higher = more relevant) */
|
|
72
|
+
score?: number;
|
|
69
73
|
}
|
|
70
74
|
/**
|
|
71
75
|
* Adapter interface for web search backends
|
|
@@ -80,5 +84,7 @@ export interface SearchResult {
|
|
|
80
84
|
export interface SearchProvider {
|
|
81
85
|
/** Execute a web search and return ranked results */
|
|
82
86
|
search(query: string, maxResults: number): Promise<SearchResult[]>;
|
|
87
|
+
/** When true, rawContent on results is markdown with heading structure suitable for parseMarkdown chunking */
|
|
88
|
+
readonly returnsFullContentMarkdown: boolean;
|
|
83
89
|
}
|
|
84
90
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/tools/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAEhD;;;;;;;;;GASG;AACH,MAAM,WAAW,WAAW;IAC1B,2CAA2C;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,2CAA2C;IAC3C,OAAO,EAAE,MAAM,CAAC;IAChB,+CAA+C;IAC/C,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,SAAS,EAAE,MAAM,CAAC;IAClB,8CAA8C;IAC9C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,YAAY;IAC3B,6DAA6D;IAC7D,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,qCAAqC;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,sCAAsC;IACtC,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,QAAQ;IACvB,kEAAkE;IAClE,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,aAAa,CAAC,YAAY,CAAC,CAAC;IACnE,uDAAuD;IACvD,cAAc,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/C,iCAAiC;IACjC,OAAO,IAAI,IAAI,CAAC;CACjB;AAID;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,eAAe;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,+CAA+C;IAC/C,OAAO,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/tools/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAEhD;;;;;;;;;GASG;AACH,MAAM,WAAW,WAAW;IAC1B,2CAA2C;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,2CAA2C;IAC3C,OAAO,EAAE,MAAM,CAAC;IAChB,+CAA+C;IAC/C,KAAK,EAAE,MAAM,CAAC;IACd,gDAAgD;IAChD,SAAS,EAAE,MAAM,CAAC;IAClB,8CAA8C;IAC9C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,YAAY;IAC3B,6DAA6D;IAC7D,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,qCAAqC;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,sCAAsC;IACtC,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,QAAQ;IACvB,kEAAkE;IAClE,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,aAAa,CAAC,YAAY,CAAC,CAAC;IACnE,uDAAuD;IACvD,cAAc,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/C,iCAAiC;IACjC,OAAO,IAAI,IAAI,CAAC;CACjB;AAID;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,eAAe;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,+CAA+C;IAC/C,OAAO,EAAE,MAAM,CAAC;IAChB,mFAAmF;IACnF,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,cAAc;IAC7B,qDAAqD;IACrD,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IACnE,8GAA8G;IAC9G,QAAQ,CAAC,0BAA0B,EAAE,OAAO,CAAC;CAC9C"}
|
|
@@ -13,8 +13,12 @@ export type { SearchProvider, SearchResult };
|
|
|
13
13
|
* @category Rig
|
|
14
14
|
*/
|
|
15
15
|
export declare class TavilyProvider implements SearchProvider {
|
|
16
|
+
readonly returnsFullContentMarkdown = false;
|
|
16
17
|
private _apiKey;
|
|
17
|
-
|
|
18
|
+
private _snippetMaxLength;
|
|
19
|
+
constructor(apiKey?: string, opts?: {
|
|
20
|
+
snippetMaxLength?: number;
|
|
21
|
+
});
|
|
18
22
|
search(query: string, maxResults: number): Promise<SearchResult[]>;
|
|
19
23
|
}
|
|
20
24
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"web-search.d.ts","sourceRoot":"","sources":["../../src/tools/web-search.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,IAAI,EAAE,MAAM,4BAA4B,CAAC;AAClD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAC7D,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAE5D,YAAY,EAAE,cAAc,EAAE,YAAY,EAAE,CAAC;AAI7C;;;;;;;;GAQG;AACH,qBAAa,cAAe,YAAW,cAAc;IACnD,OAAO,CAAC,OAAO,CAAS;
|
|
1
|
+
{"version":3,"file":"web-search.d.ts","sourceRoot":"","sources":["../../src/tools/web-search.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,IAAI,EAAE,MAAM,4BAA4B,CAAC;AAClD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAC7D,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAE5D,YAAY,EAAE,cAAc,EAAE,YAAY,EAAE,CAAC;AAI7C;;;;;;;;GAQG;AACH,qBAAa,cAAe,YAAW,cAAc;IACnD,QAAQ,CAAC,0BAA0B,SAAS;IAC5C,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,iBAAiB,CAAS;gBAEtB,MAAM,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE;QAAE,gBAAgB,CAAC,EAAE,MAAM,CAAA;KAAE;IAK3D,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;CAqBzE;AAID;;;;;;;;GAQG;AACH,qBAAa,aAAc,SAAQ,IAAI,CAAC;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC;IACxD,QAAQ,CAAC,IAAI,gBAAgB;IAC7B,QAAQ,CAAC,WAAW,gIAAgI;IACpJ,QAAQ,CAAC,UAAU,EAAE,UAAU,CAI7B;IAEF,OAAO,CAAC,SAAS,CAAiB;IAClC,OAAO,CAAC,KAAK,CAAS;gBAEV,QAAQ,EAAE,cAAc,EAAE,IAAI,SAAI;IAM7C,OAAO,CAAC,IAAI,EAAE;QAAE,KAAK,EAAE,MAAM,CAAA;KAAE,GAAG,SAAS,CAAC,OAAO,CAAC;CAYtD"}
|
package/dist/tools/web-search.js
CHANGED
|
@@ -14,9 +14,12 @@ const lloyal_agents_1 = require("@lloyal-labs/lloyal-agents");
|
|
|
14
14
|
* @category Rig
|
|
15
15
|
*/
|
|
16
16
|
class TavilyProvider {
|
|
17
|
+
returnsFullContentMarkdown = false;
|
|
17
18
|
_apiKey;
|
|
18
|
-
|
|
19
|
+
_snippetMaxLength;
|
|
20
|
+
constructor(apiKey, opts) {
|
|
19
21
|
this._apiKey = apiKey || process.env.TAVILY_API_KEY || '';
|
|
22
|
+
this._snippetMaxLength = opts?.snippetMaxLength ?? 500;
|
|
20
23
|
}
|
|
21
24
|
async search(query, maxResults) {
|
|
22
25
|
if (!this._apiKey)
|
|
@@ -32,10 +35,13 @@ class TavilyProvider {
|
|
|
32
35
|
if (!res.ok)
|
|
33
36
|
throw new Error(`Tavily ${res.status}: ${await res.text()}`);
|
|
34
37
|
const data = await res.json();
|
|
38
|
+
const max = this._snippetMaxLength;
|
|
35
39
|
return data.results.map(r => ({
|
|
36
40
|
title: r.title,
|
|
37
41
|
url: r.url,
|
|
38
|
-
snippet: r.content
|
|
42
|
+
snippet: r.content.length > max
|
|
43
|
+
? r.content.slice(0, max) + ' [\u2026]'
|
|
44
|
+
: r.content,
|
|
39
45
|
}));
|
|
40
46
|
}
|
|
41
47
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"web-search.js","sourceRoot":"","sources":["../../src/tools/web-search.ts"],"names":[],"mappings":";;;AAAA,yCAAiC;AAEjC,8DAAkD;AAMlD,2DAA2D;AAE3D;;;;;;;;GAQG;AACH,MAAa,cAAc;
|
|
1
|
+
{"version":3,"file":"web-search.js","sourceRoot":"","sources":["../../src/tools/web-search.ts"],"names":[],"mappings":";;;AAAA,yCAAiC;AAEjC,8DAAkD;AAMlD,2DAA2D;AAE3D;;;;;;;;GAQG;AACH,MAAa,cAAc;IAChB,0BAA0B,GAAG,KAAK,CAAC;IACpC,OAAO,CAAS;IAChB,iBAAiB,CAAS;IAElC,YAAY,MAAe,EAAE,IAAoC;QAC/D,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,EAAE,CAAC;QAC1D,IAAI,CAAC,iBAAiB,GAAG,IAAI,EAAE,gBAAgB,IAAI,GAAG,CAAC;IACzD,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAkB;QAC5C,IAAI,CAAC,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAC7D,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,+BAA+B,EAAE;YACvD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,eAAe,EAAE,UAAU,IAAI,CAAC,OAAO,EAAE;aAC1C;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC;SACzD,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,MAAM,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,MAAM,KAAK,MAAM,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC1E,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAoE,CAAC;QAChG,MAAM,GAAG,GAAG,IAAI,CAAC,iBAAiB,CAAC;QACnC,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAC5B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG;gBAC7B,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,WAAW;gBACvC,CAAC,CAAC,CAAC,CAAC,OAAO;SACd,CAAC,CAAC,CAAC;IACN,CAAC;CACF;AA/BD,wCA+BC;AAED,2DAA2D;AAE3D;;;;;;;;GAQG;AACH,MAAa,aAAc,SAAQ,oBAAuB;IAC/C,IAAI,GAAG,YAAY,CAAC;IACpB,WAAW,GAAG,4HAA4H,CAAC;IAC3I,UAAU,GAAe;QAChC,IAAI,EAAE,QAAQ;QACd,UAAU,EAAE,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,cAAc,EAAE,EAAE;QACtE,QAAQ,EAAE,CAAC,OAAO,CAAC;KACpB,CAAC;IAEM,SAAS,CAAiB;IAC1B,KAAK,CAAS;IAEtB,YAAY,QAAwB,EAAE,IAAI,GAAG,CAAC;QAC5C,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,SAAS,GAAG,QAAQ,CAAC;QAC1B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;IACpB,CAAC;IAED,CAAC,OAAO,CAAC,IAAuB;QAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,KAAK,EAAE,yBAAyB,EAAE,CAAC;QAExD,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC;QACxB,IAAI,CAAC;YACH,OAAO,KAAK,CAAC,CAAC,IAAA,gBAAI,EAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;QACzD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,EAAE,KAAK,EAAE,kBAAmB,GAAa,CAAC,OAAO,EAAE,EAAE,CAAC;QAC/D,CAAC;IACH,CAAC;CACF;AA9BD,sCA8BC"}
|
package/package.json
CHANGED
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lloyal-labs/rig",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.2",
|
|
4
4
|
"description": "Retrieval-Interleaved Generation for lloyal-agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": {
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"default": "./dist/index.js"
|
|
11
|
+
},
|
|
12
|
+
"./node": {
|
|
13
|
+
"types": "./dist/node.d.ts",
|
|
14
|
+
"default": "./dist/node.js"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
7
17
|
"publishConfig": {
|
|
8
18
|
"access": "public"
|
|
9
19
|
},
|