@nhtio/adk 1.20260609.0 → 1.20260609.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -9
- package/batteries/tools/_shared/index.d.ts +121 -0
- package/batteries/tools/_shared.cjs +157 -0
- package/batteries/tools/_shared.cjs.map +1 -0
- package/batteries/tools/_shared.mjs +149 -0
- package/batteries/tools/_shared.mjs.map +1 -0
- package/batteries/tools/index.d.ts +2 -0
- package/batteries/tools/scrapper/exceptions.d.ts +21 -0
- package/batteries/tools/scrapper/index.d.ts +172 -0
- package/batteries/tools/scrapper/shared.d.ts +139 -0
- package/batteries/tools/scrapper.cjs +8 -0
- package/batteries/tools/scrapper.mjs +2 -0
- package/batteries/tools/searxng/index.d.ts +47 -20
- package/batteries/tools/searxng.cjs +2 -1
- package/batteries/tools/searxng.mjs +2 -2
- package/batteries/tools/web_retrieval/index.d.ts +186 -0
- package/batteries/tools/web_retrieval.cjs +206 -0
- package/batteries/tools/web_retrieval.cjs.map +1 -0
- package/batteries/tools/web_retrieval.mjs +201 -0
- package/batteries/tools/web_retrieval.mjs.map +1 -0
- package/batteries/tools.cjs +13 -1
- package/batteries/tools.mjs +4 -2
- package/batteries.cjs +13 -1
- package/batteries.mjs +4 -2
- package/common.d.ts +1 -1
- package/eslint/rules.cjs +1 -1
- package/eslint/rules.mjs +1 -1
- package/eslint.cjs +2 -2
- package/eslint.mjs +2 -2
- package/index.cjs +2 -2
- package/index.mjs +2 -2
- package/mcp/adk-docs-corpus.json +1 -1
- package/package.json +210 -195
- package/scrapper-BHM1mCde.mjs +432 -0
- package/scrapper-BHM1mCde.mjs.map +1 -0
- package/scrapper-BeweWurk.js +462 -0
- package/scrapper-BeweWurk.js.map +1 -0
- package/{searxng-CyA-nEu5.mjs → searxng-BJFulNcK.mjs} +74 -84
- package/searxng-BJFulNcK.mjs.map +1 -0
- package/{searxng-Bkrwhwhw.js → searxng-B_D--V5q.js} +80 -84
- package/searxng-B_D--V5q.js.map +1 -0
- package/skills/adk-assembly/SKILL.md +2 -2
- package/searxng-Bkrwhwhw.js.map +0 -1
- package/searxng-CyA-nEu5.mjs.map +0 -1
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
2
|
+
require("../../chunk-Ble4zEEl.js");
|
|
3
|
+
const require_tool_registry = require("../../tool_registry-CKJPze3j.js");
|
|
4
|
+
require("../../guards.cjs");
|
|
5
|
+
let js_sha256 = require("js-sha256");
|
|
6
|
+
//#region src/batteries/tools/web_retrieval/index.ts
|
|
7
|
+
/**
|
|
8
|
+
* RAG glue: turn web-search and web-scrape results into `Retrievable` records for a turn.
|
|
9
|
+
*
|
|
10
|
+
* @module @nhtio/adk/batteries/tools/web_retrieval
|
|
11
|
+
*
|
|
12
|
+
* @remarks
|
|
13
|
+
* The seam from "I searched / scraped something" to "it is in the turn as a `Retrievable`",
|
|
14
|
+
* shared by the SearXNG and Scrapper batteries. It is deliberately **decoupled** from the ADK
|
|
15
|
+
* core at runtime:
|
|
16
|
+
*
|
|
17
|
+
* - The converters are **pure** `(payload) => RawRetrievable[]` — they build plain data objects and
|
|
18
|
+
* never instantiate a core class, so the module's only core coupling is erased `import type`.
|
|
19
|
+
* - The recommended spool-artifact type travels as an **open resolver**
|
|
20
|
+
* ({@link @nhtio/adk/forge!ArtifactConstructorResolver}), never a closed string enum — a consumer's
|
|
21
|
+
* future YAML/HTML `SpooledArtifact` subclass works with no change here. The converter hands the
|
|
22
|
+
* recommendation to the caller's `spool` hook; the caller owns the actual class import.
|
|
23
|
+
* - The one helper that must construct a `Retrievable` ({@link storeRetrievables}) takes the
|
|
24
|
+
* constructor via a **resolver** (constructor / sync / async / dynamic-import), exactly like the
|
|
25
|
+
* vector battery's `createVectorStore` `client`.
|
|
26
|
+
*
|
|
27
|
+
* Web content is `'third-party-public'` by default — a definitional constant for open-web data
|
|
28
|
+
* (NOT inferred from the URL, which CONTRIBUTING Design Decision #12 forbids); override via
|
|
29
|
+
* `trustTier` when you know better.
|
|
30
|
+
*/
|
|
31
|
+
var nowIso = () => (/* @__PURE__ */ new Date()).toISOString();
|
|
32
|
+
/** A stable, unguessable id derived from a source string (URL) plus an optional prefix. */
|
|
33
|
+
var stableId = (prefix, source) => {
|
|
34
|
+
const h = (0, js_sha256.sha256)(source);
|
|
35
|
+
return prefix ? `${prefix}:${h}` : h;
|
|
36
|
+
};
|
|
37
|
+
/** Clamp a possibly-unbounded score into `[0, 1]`; drop non-finite. */
|
|
38
|
+
var clampScore = (score) => {
|
|
39
|
+
if (typeof score !== "number" || !Number.isFinite(score)) return void 0;
|
|
40
|
+
if (score < 0) return 0;
|
|
41
|
+
if (score > 1) return 1;
|
|
42
|
+
return score;
|
|
43
|
+
};
|
|
44
|
+
/**
|
|
45
|
+
* Resolve content to either an inline string or a caller-provided {@link SpooledArtifact}. When a
|
|
46
|
+
* `spool` hook is supplied it is offered the recommended resolver; whatever it returns (artifact or
|
|
47
|
+
* `undefined`→inline) is used.
|
|
48
|
+
*/
|
|
49
|
+
var resolveContent = (id, text, opts, recommended) => {
|
|
50
|
+
if (opts.spool) {
|
|
51
|
+
const artifact = opts.spool(id, text, recommended);
|
|
52
|
+
if (artifact) return artifact;
|
|
53
|
+
}
|
|
54
|
+
return text;
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Convert a SearXNG normalised payload into one {@link @nhtio/adk!RawRetrievable} per result.
|
|
58
|
+
*
|
|
59
|
+
* @remarks
|
|
60
|
+
* Snippets are short, so `content` stays an inline string (the `spool` hook, if any, is still
|
|
61
|
+
* offered the `text` recommendation). `source` is the result URL; `score` is clamped to `[0,1]`.
|
|
62
|
+
*
|
|
63
|
+
* @param payload - The SearXNG normalised payload (`{ results: [{ url, title, content, score }] }`).
|
|
64
|
+
* @param opts - Trust tier, kind, id prefix, optional spool hook.
|
|
65
|
+
* @param recommend - Optional artifact-resolver recommendations (the glue names no class itself).
|
|
66
|
+
* @returns One `RawRetrievable` per result.
|
|
67
|
+
*/
|
|
68
|
+
var searxngResultsToRetrievables = (payload, opts = {}, recommend = {}) => {
|
|
69
|
+
const trustTier = opts.trustTier ?? "third-party-public";
|
|
70
|
+
const kind = opts.kind ?? "web-search-result";
|
|
71
|
+
const created = nowIso();
|
|
72
|
+
return (payload.results ?? []).map((r, i) => {
|
|
73
|
+
const source = r.url ?? "";
|
|
74
|
+
const id = stableId(opts.idPrefix, source || `${kind}:${i}`);
|
|
75
|
+
const text = [r.title, r.content].filter((s) => typeof s === "string").join("\n");
|
|
76
|
+
const recommended = recommend.text ?? recommend.markdown ?? recommend.json;
|
|
77
|
+
const raw = {
|
|
78
|
+
id,
|
|
79
|
+
content: recommended ? resolveContent(id, text, opts, recommended) : text,
|
|
80
|
+
trustTier,
|
|
81
|
+
kind,
|
|
82
|
+
createdAt: created,
|
|
83
|
+
updatedAt: created
|
|
84
|
+
};
|
|
85
|
+
if (source) raw.source = source;
|
|
86
|
+
const score = clampScore(r.score);
|
|
87
|
+
if (score !== void 0) raw.score = score;
|
|
88
|
+
return raw;
|
|
89
|
+
});
|
|
90
|
+
};
|
|
91
|
+
/**
|
|
92
|
+
* Convert a Scrapper normalised article into a single {@link @nhtio/adk!RawRetrievable}.
|
|
93
|
+
*
|
|
94
|
+
* @remarks
|
|
95
|
+
* Long article text is exactly what a reader-backed {@link @nhtio/adk!SpooledArtifact} is for: pass a
|
|
96
|
+
* `spool` hook and the converter offers it the recommended artifact resolver (markdown when
|
|
97
|
+
* `asMarkdown`, else text/HTML) so the model gets the right forged query tools. Without a hook,
|
|
98
|
+
* content stays inline.
|
|
99
|
+
*
|
|
100
|
+
* @param article - The Scrapper normalised article.
|
|
101
|
+
* @param opts - Trust tier, kind, id prefix, content source, markdown flag, optional spool hook.
|
|
102
|
+
* @param recommend - Optional artifact-resolver recommendations.
|
|
103
|
+
* @returns A single `RawRetrievable`.
|
|
104
|
+
*/
|
|
105
|
+
var scrapperArticleToRetrievable = (article, opts = {}, recommend = {}) => {
|
|
106
|
+
const trustTier = opts.trustTier ?? "third-party-public";
|
|
107
|
+
const kind = opts.kind ?? "web-article";
|
|
108
|
+
const created = nowIso();
|
|
109
|
+
const source = article.url ?? "";
|
|
110
|
+
const id = stableId(opts.idPrefix, source || kind);
|
|
111
|
+
const text = ((opts.contentSource ?? "textContent") === "content" ? article.content : article.textContent) ?? "";
|
|
112
|
+
const recommended = opts.asMarkdown ? recommend.markdown ?? recommend.text : recommend.text;
|
|
113
|
+
const raw = {
|
|
114
|
+
id,
|
|
115
|
+
content: recommended ? resolveContent(id, text, opts, recommended) : text,
|
|
116
|
+
trustTier,
|
|
117
|
+
kind,
|
|
118
|
+
createdAt: created,
|
|
119
|
+
updatedAt: created
|
|
120
|
+
};
|
|
121
|
+
if (source) raw.source = source;
|
|
122
|
+
return raw;
|
|
123
|
+
};
|
|
124
|
+
/**
|
|
125
|
+
* Convert a Scrapper normalised links payload into one {@link @nhtio/adk!RawRetrievable} per link.
|
|
126
|
+
*
|
|
127
|
+
* @remarks
|
|
128
|
+
* Each link's `text` becomes the (inline) content and its `url` the `source`. Link text is short,
|
|
129
|
+
* so no spooling is applied.
|
|
130
|
+
*
|
|
131
|
+
* @param payload - The Scrapper normalised links payload (`{ links: [{ url, text }] }`).
|
|
132
|
+
* @param opts - Trust tier, kind, id prefix.
|
|
133
|
+
* @returns One `RawRetrievable` per link.
|
|
134
|
+
*/
|
|
135
|
+
var scrapperLinksToRetrievables = (payload, opts = {}) => {
|
|
136
|
+
const trustTier = opts.trustTier ?? "third-party-public";
|
|
137
|
+
const kind = opts.kind ?? "web-link";
|
|
138
|
+
const created = nowIso();
|
|
139
|
+
return (payload.links ?? []).map((l, i) => {
|
|
140
|
+
const source = l.url ?? "";
|
|
141
|
+
const raw = {
|
|
142
|
+
id: stableId(opts.idPrefix, source || `${kind}:${i}`),
|
|
143
|
+
content: l.text ?? source,
|
|
144
|
+
trustTier,
|
|
145
|
+
kind,
|
|
146
|
+
createdAt: created,
|
|
147
|
+
updatedAt: created
|
|
148
|
+
};
|
|
149
|
+
if (source) raw.source = source;
|
|
150
|
+
return raw;
|
|
151
|
+
});
|
|
152
|
+
};
|
|
153
|
+
/**
|
|
154
|
+
* Resolve a {@link Resolver} of the `Retrievable` constructor (sync / async / `{ default }`).
|
|
155
|
+
*
|
|
156
|
+
* @remarks
|
|
157
|
+
* Both a bare class and a resolver are `typeof 'function'`, and we hold `Retrievable` only as an
|
|
158
|
+
* `import type` (no runtime value to duck-type against). We disambiguate by behaviour: invoking a
|
|
159
|
+
* real ES class without `new` throws, so a bare constructor is caught and returned as-is; a resolver
|
|
160
|
+
* invokes cleanly and yields the constructor (possibly via a Promise and/or a `{ default }`).
|
|
161
|
+
*/
|
|
162
|
+
var resolveRetrievableCtor = async (resolver) => {
|
|
163
|
+
if (typeof resolver !== "function") throw new TypeError("retrievable must be a constructor or a resolver returning one");
|
|
164
|
+
let resolved;
|
|
165
|
+
try {
|
|
166
|
+
resolved = resolver();
|
|
167
|
+
} catch {
|
|
168
|
+
return resolver;
|
|
169
|
+
}
|
|
170
|
+
if (require_tool_registry.isInstanceOf(resolved, "Promise", Promise)) resolved = await resolved;
|
|
171
|
+
if (resolved && typeof resolved === "object" && "default" in resolved) resolved = resolved.default;
|
|
172
|
+
if (typeof resolved === "function") return resolved;
|
|
173
|
+
return resolver;
|
|
174
|
+
};
|
|
175
|
+
/**
|
|
176
|
+
* Construct {@link @nhtio/adk!Retrievable}s from `RawRetrievable`s and store each via `ctx`.
|
|
177
|
+
*
|
|
178
|
+
* @remarks
|
|
179
|
+
* This is the only function here that touches a core class, and it does so through an injected
|
|
180
|
+
* **resolver** (`deps.retrievable`) so the glue itself never value-imports `Retrievable`. Each
|
|
181
|
+
* record's `RawRetrievable` validation (including the required `trustTier`) fires at construction.
|
|
182
|
+
* For reader-backed content, the caller's `spool` hook will typically have used
|
|
183
|
+
* `ctx.storeRetrievableBytes` already; this helper just persists the records into the turn.
|
|
184
|
+
*
|
|
185
|
+
* @param ctx - Anything with a `storeRetrievable` method (a `DispatchContext`, or a stub).
|
|
186
|
+
* @param raws - The plain records from the converters.
|
|
187
|
+
* @param deps - `{ retrievable }`: the `Retrievable` constructor or a resolver of it.
|
|
188
|
+
* @returns The constructed `Retrievable` instances, in input order.
|
|
189
|
+
*/
|
|
190
|
+
var storeRetrievables = async (ctx, raws, deps) => {
|
|
191
|
+
const Ctor = await resolveRetrievableCtor(deps.retrievable);
|
|
192
|
+
const out = [];
|
|
193
|
+
for (const raw of raws) {
|
|
194
|
+
const record = new Ctor(raw);
|
|
195
|
+
await ctx.storeRetrievable(record);
|
|
196
|
+
out.push(record);
|
|
197
|
+
}
|
|
198
|
+
return out;
|
|
199
|
+
};
|
|
200
|
+
//#endregion
|
|
201
|
+
exports.scrapperArticleToRetrievable = scrapperArticleToRetrievable;
|
|
202
|
+
exports.scrapperLinksToRetrievables = scrapperLinksToRetrievables;
|
|
203
|
+
exports.searxngResultsToRetrievables = searxngResultsToRetrievables;
|
|
204
|
+
exports.storeRetrievables = storeRetrievables;
|
|
205
|
+
|
|
206
|
+
//# sourceMappingURL=web_retrieval.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web_retrieval.cjs","names":[],"sources":["../../../src/batteries/tools/web_retrieval/index.ts"],"sourcesContent":["/**\n * RAG glue: turn web-search and web-scrape results into `Retrievable` records for a turn.\n *\n * @module @nhtio/adk/batteries/tools/web_retrieval\n *\n * @remarks\n * The seam from \"I searched / scraped something\" to \"it is in the turn as a `Retrievable`\",\n * shared by the SearXNG and Scrapper batteries. It is deliberately **decoupled** from the ADK\n * core at runtime:\n *\n * - The converters are **pure** `(payload) => RawRetrievable[]` — they build plain data objects and\n * never instantiate a core class, so the module's only core coupling is erased `import type`.\n * - The recommended spool-artifact type travels as an **open resolver**\n * ({@link @nhtio/adk/forge!ArtifactConstructorResolver}), never a closed string enum — a consumer's\n * future YAML/HTML `SpooledArtifact` subclass works with no change here. The converter hands the\n * recommendation to the caller's `spool` hook; the caller owns the actual class import.\n * - The one helper that must construct a `Retrievable` ({@link storeRetrievables}) takes the\n * constructor via a **resolver** (constructor / sync / async / dynamic-import), exactly like the\n * vector battery's `createVectorStore` `client`.\n *\n * Web content is `'third-party-public'` by default — a definitional constant for open-web data\n * (NOT inferred from the URL, which CONTRIBUTING Design Decision #12 forbids); override via\n * `trustTier` when you know better.\n */\n\nimport { sha256 } from 'js-sha256'\nimport { isInstanceOf } from '@nhtio/adk/guards'\nimport type { SpooledArtifact } from '@nhtio/adk/spooled_artifact'\nimport type { ArtifactConstructorResolver } from '@nhtio/adk/forge'\nimport type { RawRetrievable, Retrievable, RetrievableTrustTier } from '@nhtio/adk/common'\n\n/** A constructor that builds a {@link @nhtio/adk!Retrievable} from a {@link @nhtio/adk!RawRetrievable}. */\nexport type RetrievableCtor = new (raw: RawRetrievable) => Retrievable\n\n/** A resolver of `T`: the value itself, or a (sync/async) thunk, optionally a module `{ default }`. */\nexport type Resolver<T> = T | (() => T | { default: T }) | (() => Promise<T | { default: T }>)\n\n/**\n * A reader-backed-artifact hook. Called by a converter for content that may be large; the\n * converter passes the artifact constructor it **recommends** for this content (an open\n * {@link @nhtio/adk/forge!ArtifactConstructorResolver}) so the caller can wrap with the right\n * subclass — preserving its forged query tools — using the caller's own core import. Return a\n * {@link @nhtio/adk!SpooledArtifact} to store the content reader-backed, or `undefined` to keep it\n * inline as a string.\n */\nexport type SpoolHook = (\n id: string,\n text: string,\n recommended: ArtifactConstructorResolver\n) => SpooledArtifact | undefined\n\n/** Options common to every converter. */\nexport interface ToRetrievableOptions {\n /**\n * Trust tier for the produced records. Default `'third-party-public'` (web content is\n * third-party by definition — this is a constant, not URL inference).\n */\n trustTier?: RetrievableTrustTier\n /** Semantic `kind` label, e.g. `'web-search-result'`, `'web-article'`, `'web-links'`. */\n kind?: string\n /** Prefix for the stable, hashed record id (namespacing across sources). */\n idPrefix?: string\n /** Optional reader-backed-artifact hook for large content. See {@link SpoolHook}. */\n spool?: SpoolHook\n}\n\n/**\n * The artifact-resolver recommendations a caller may supply so the glue names no concrete class\n * itself. Each converter asks for the relevant key; if the caller omits it, content stays inline.\n */\nexport interface ArtifactRecommendations {\n /** Recommended for plain-text / HTML content (base `SpooledArtifact`). */\n text?: ArtifactConstructorResolver\n /** Recommended for markdown content (`SpooledMarkdownArtifact`). */\n markdown?: ArtifactConstructorResolver\n /** Recommended for JSON content (`SpooledJsonArtifact`). */\n json?: ArtifactConstructorResolver\n}\n\nconst nowIso = (): string => new Date().toISOString()\n\n/** A stable, unguessable id derived from a source string (URL) plus an optional prefix. */\nconst stableId = (prefix: string | undefined, source: string): string => {\n const h = sha256(source)\n return prefix ? `${prefix}:${h}` : h\n}\n\n/** Clamp a possibly-unbounded score into `[0, 1]`; drop non-finite. */\nconst clampScore = (score: unknown): number | undefined => {\n if (typeof score !== 'number' || !Number.isFinite(score)) return undefined\n if (score < 0) return 0\n if (score > 1) return 1\n return score\n}\n\n/**\n * Resolve content to either an inline string or a caller-provided {@link SpooledArtifact}. When a\n * `spool` hook is supplied it is offered the recommended resolver; whatever it returns (artifact or\n * `undefined`→inline) is used.\n */\nconst resolveContent = (\n id: string,\n text: string,\n opts: ToRetrievableOptions,\n recommended: ArtifactConstructorResolver\n): string | SpooledArtifact => {\n if (opts.spool) {\n const artifact = opts.spool(id, text, recommended)\n if (artifact) return artifact\n }\n return text\n}\n\n// ── SearXNG ──────────────────────────────────────────────────────────────────\n\n/** Minimal structural shape of a SearXNG normalised result the converter reads. */\nexport interface SearxngResultLike {\n /** Result URL (becomes the record's `source`). */\n url?: string\n /** Result title (joined into the inline content). */\n title?: string\n /** Result snippet (joined into the inline content). */\n content?: string\n /** Relevance score (clamped to `[0,1]` on the record). */\n score?: number\n}\n/** Minimal structural shape of a SearXNG normalised payload. */\nexport interface SearxngPayloadLike {\n /** The result list. */\n results?: SearxngResultLike[]\n}\n\n/**\n * Convert a SearXNG normalised payload into one {@link @nhtio/adk!RawRetrievable} per result.\n *\n * @remarks\n * Snippets are short, so `content` stays an inline string (the `spool` hook, if any, is still\n * offered the `text` recommendation). `source` is the result URL; `score` is clamped to `[0,1]`.\n *\n * @param payload - The SearXNG normalised payload (`{ results: [{ url, title, content, score }] }`).\n * @param opts - Trust tier, kind, id prefix, optional spool hook.\n * @param recommend - Optional artifact-resolver recommendations (the glue names no class itself).\n * @returns One `RawRetrievable` per result.\n */\nexport const searxngResultsToRetrievables = (\n payload: SearxngPayloadLike,\n opts: ToRetrievableOptions = {},\n recommend: ArtifactRecommendations = {}\n): RawRetrievable[] => {\n const trustTier: RetrievableTrustTier = opts.trustTier ?? 'third-party-public'\n const kind = opts.kind ?? 'web-search-result'\n const created = nowIso()\n const results = payload.results ?? []\n return results.map((r, i) => {\n const source = r.url ?? ''\n const id = stableId(opts.idPrefix, source || `${kind}:${i}`)\n const text = [r.title, r.content].filter((s): s is string => typeof s === 'string').join('\\n')\n const recommended = recommend.text ?? recommend.markdown ?? recommend.json\n const content = recommended ? resolveContent(id, text, opts, recommended) : text\n const raw: RawRetrievable = {\n id,\n content,\n trustTier,\n kind,\n createdAt: created,\n updatedAt: created,\n }\n if (source) raw.source = source\n const score = clampScore(r.score)\n if (score !== undefined) raw.score = score\n return raw\n })\n}\n\n// ── Scrapper: article ──────────────────────────────────────────────────────────\n\n/** Minimal structural shape of a Scrapper normalised article. */\nexport interface ScrapperArticleLike {\n /** The page URL (becomes the record's `source`). */\n url?: string\n /** Article title. */\n title?: string\n /** Article text with HTML stripped (the default content source). */\n textContent?: string\n /** Processed article HTML (the `'content'` content source). */\n content?: string\n}\n\n/** Which article text field to use as the record content. */\nexport type ArticleContentSource = 'textContent' | 'content'\n\n/** Options for {@link scrapperArticleToRetrievable}. */\nexport interface ArticleToRetrievableOptions extends ToRetrievableOptions {\n /** Which field to use as content (default `'textContent'`). `'content'` is HTML. */\n contentSource?: ArticleContentSource\n /**\n * Whether the chosen content is markdown (recommend `markdown`) rather than plain text.\n * Default false. Use when an output pipeline rendered the article to markdown.\n */\n asMarkdown?: boolean\n}\n\n/**\n * Convert a Scrapper normalised article into a single {@link @nhtio/adk!RawRetrievable}.\n *\n * @remarks\n * Long article text is exactly what a reader-backed {@link @nhtio/adk!SpooledArtifact} is for: pass a\n * `spool` hook and the converter offers it the recommended artifact resolver (markdown when\n * `asMarkdown`, else text/HTML) so the model gets the right forged query tools. Without a hook,\n * content stays inline.\n *\n * @param article - The Scrapper normalised article.\n * @param opts - Trust tier, kind, id prefix, content source, markdown flag, optional spool hook.\n * @param recommend - Optional artifact-resolver recommendations.\n * @returns A single `RawRetrievable`.\n */\nexport const scrapperArticleToRetrievable = (\n article: ScrapperArticleLike,\n opts: ArticleToRetrievableOptions = {},\n recommend: ArtifactRecommendations = {}\n): RawRetrievable => {\n const trustTier: RetrievableTrustTier = opts.trustTier ?? 'third-party-public'\n const kind = opts.kind ?? 'web-article'\n const created = nowIso()\n const source = article.url ?? ''\n const id = stableId(opts.idPrefix, source || kind)\n const field = opts.contentSource ?? 'textContent'\n const text = (field === 'content' ? article.content : article.textContent) ?? ''\n const recommended = opts.asMarkdown ? (recommend.markdown ?? recommend.text) : recommend.text\n const content = recommended ? resolveContent(id, text, opts, recommended) : text\n const raw: RawRetrievable = {\n id,\n content,\n trustTier,\n kind,\n createdAt: created,\n updatedAt: created,\n }\n if (source) raw.source = source\n return raw\n}\n\n// ── Scrapper: links ──────────────────────────────────────────────────────────\n\n/** Minimal structural shape of a Scrapper normalised link. */\nexport interface ScrapperLinkLike {\n /** The link's target URL (becomes the record's `source`). */\n url?: string\n /** The link's anchor text (becomes the record's content). */\n text?: string\n}\n/** Minimal structural shape of a Scrapper normalised links payload. */\nexport interface ScrapperLinksLike {\n /** The page URL the links were collected from. */\n url?: string\n /** The collected links. */\n links?: ScrapperLinkLike[]\n}\n\n/**\n * Convert a Scrapper normalised links payload into one {@link @nhtio/adk!RawRetrievable} per link.\n *\n * @remarks\n * Each link's `text` becomes the (inline) content and its `url` the `source`. Link text is short,\n * so no spooling is applied.\n *\n * @param payload - The Scrapper normalised links payload (`{ links: [{ url, text }] }`).\n * @param opts - Trust tier, kind, id prefix.\n * @returns One `RawRetrievable` per link.\n */\nexport const scrapperLinksToRetrievables = (\n payload: ScrapperLinksLike,\n opts: ToRetrievableOptions = {}\n): RawRetrievable[] => {\n const trustTier: RetrievableTrustTier = opts.trustTier ?? 'third-party-public'\n const kind = opts.kind ?? 'web-link'\n const created = nowIso()\n const links = payload.links ?? []\n return links.map((l, i) => {\n const source = l.url ?? ''\n const id = stableId(opts.idPrefix, source || `${kind}:${i}`)\n const raw: RawRetrievable = {\n id,\n content: l.text ?? source,\n trustTier,\n kind,\n createdAt: created,\n updatedAt: created,\n }\n if (source) raw.source = source\n return raw\n })\n}\n\n// ── Store helper (the single core-touching function) ─────────────────────────\n\n/** The minimal context surface {@link storeRetrievables} needs. */\nexport interface RetrievableStoreCtx {\n /** Persist a single `Retrievable` into the turn (a `DispatchContext` method, or a stub). */\n storeRetrievable: (v: Retrievable) => unknown | Promise<unknown>\n}\n\n/**\n * Resolve a {@link Resolver} of the `Retrievable` constructor (sync / async / `{ default }`).\n *\n * @remarks\n * Both a bare class and a resolver are `typeof 'function'`, and we hold `Retrievable` only as an\n * `import type` (no runtime value to duck-type against). We disambiguate by behaviour: invoking a\n * real ES class without `new` throws, so a bare constructor is caught and returned as-is; a resolver\n * invokes cleanly and yields the constructor (possibly via a Promise and/or a `{ default }`).\n */\nconst resolveRetrievableCtor = async (\n resolver: Resolver<RetrievableCtor>\n): Promise<RetrievableCtor> => {\n if (typeof resolver !== 'function') {\n throw new TypeError('retrievable must be a constructor or a resolver returning one')\n }\n let resolved: unknown\n try {\n resolved = (resolver as () => unknown)()\n } catch {\n return resolver as RetrievableCtor // bare class: threw on no-`new` invocation\n }\n if (isInstanceOf(resolved, 'Promise', Promise)) resolved = await resolved\n if (resolved && typeof resolved === 'object' && 'default' in resolved) {\n resolved = (resolved as { default?: unknown }).default\n }\n if (typeof resolved === 'function') return resolved as RetrievableCtor\n return resolver as RetrievableCtor // resolver returned a non-function: it was itself the ctor\n}\n\n/**\n * Construct {@link @nhtio/adk!Retrievable}s from `RawRetrievable`s and store each via `ctx`.\n *\n * @remarks\n * This is the only function here that touches a core class, and it does so through an injected\n * **resolver** (`deps.retrievable`) so the glue itself never value-imports `Retrievable`. Each\n * record's `RawRetrievable` validation (including the required `trustTier`) fires at construction.\n * For reader-backed content, the caller's `spool` hook will typically have used\n * `ctx.storeRetrievableBytes` already; this helper just persists the records into the turn.\n *\n * @param ctx - Anything with a `storeRetrievable` method (a `DispatchContext`, or a stub).\n * @param raws - The plain records from the converters.\n * @param deps - `{ retrievable }`: the `Retrievable` constructor or a resolver of it.\n * @returns The constructed `Retrievable` instances, in input order.\n */\nexport const storeRetrievables = async (\n ctx: RetrievableStoreCtx,\n raws: RawRetrievable[],\n deps: { retrievable: Resolver<RetrievableCtor> }\n): Promise<Retrievable[]> => {\n const Ctor = await resolveRetrievableCtor(deps.retrievable)\n const out: Retrievable[] = []\n for (const raw of raws) {\n const record = new Ctor(raw)\n await ctx.storeRetrievable(record)\n out.push(record)\n }\n return out\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+EA,IAAM,gCAAuB,IAAI,KAAK,GAAE,YAAY;;AAGpD,IAAM,YAAY,QAA4B,WAA2B;CACvE,MAAM,KAAA,GAAA,UAAA,QAAW,MAAM;CACvB,OAAO,SAAS,GAAG,OAAO,GAAG,MAAM;AACrC;;AAGA,IAAM,cAAc,UAAuC;CACzD,IAAI,OAAO,UAAU,YAAY,CAAC,OAAO,SAAS,KAAK,GAAG,OAAO,KAAA;CACjE,IAAI,QAAQ,GAAG,OAAO;CACtB,IAAI,QAAQ,GAAG,OAAO;CACtB,OAAO;AACT;;;;;;AAOA,IAAM,kBACJ,IACA,MACA,MACA,gBAC6B;CAC7B,IAAI,KAAK,OAAO;EACd,MAAM,WAAW,KAAK,MAAM,IAAI,MAAM,WAAW;EACjD,IAAI,UAAU,OAAO;CACvB;CACA,OAAO;AACT;;;;;;;;;;;;;AAiCA,IAAa,gCACX,SACA,OAA6B,CAAC,GAC9B,YAAqC,CAAC,MACjB;CACrB,MAAM,YAAkC,KAAK,aAAa;CAC1D,MAAM,OAAO,KAAK,QAAQ;CAC1B,MAAM,UAAU,OAAO;CAEvB,QADgB,QAAQ,WAAW,CAAC,GACrB,KAAK,GAAG,MAAM;EAC3B,MAAM,SAAS,EAAE,OAAO;EACxB,MAAM,KAAK,SAAS,KAAK,UAAU,UAAU,GAAG,KAAK,GAAG,GAAG;EAC3D,MAAM,OAAO,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,MAAmB,OAAO,MAAM,QAAQ,EAAE,KAAK,IAAI;EAC7F,MAAM,cAAc,UAAU,QAAQ,UAAU,YAAY,UAAU;EAEtE,MAAM,MAAsB;GAC1B;GACA,SAHc,cAAc,eAAe,IAAI,MAAM,MAAM,WAAW,IAAI;GAI1E;GACA;GACA,WAAW;GACX,WAAW;EACb;EACA,IAAI,QAAQ,IAAI,SAAS;EACzB,MAAM,QAAQ,WAAW,EAAE,KAAK;EAChC,IAAI,UAAU,KAAA,GAAW,IAAI,QAAQ;EACrC,OAAO;CACT,CAAC;AACH;;;;;;;;;;;;;;;AA4CA,IAAa,gCACX,SACA,OAAoC,CAAC,GACrC,YAAqC,CAAC,MACnB;CACnB,MAAM,YAAkC,KAAK,aAAa;CAC1D,MAAM,OAAO,KAAK,QAAQ;CAC1B,MAAM,UAAU,OAAO;CACvB,MAAM,SAAS,QAAQ,OAAO;CAC9B,MAAM,KAAK,SAAS,KAAK,UAAU,UAAU,IAAI;CAEjD,MAAM,SADQ,KAAK,iBAAiB,mBACZ,YAAY,QAAQ,UAAU,QAAQ,gBAAgB;CAC9E,MAAM,cAAc,KAAK,aAAc,UAAU,YAAY,UAAU,OAAQ,UAAU;CAEzF,MAAM,MAAsB;EAC1B;EACA,SAHc,cAAc,eAAe,IAAI,MAAM,MAAM,WAAW,IAAI;EAI1E;EACA;EACA,WAAW;EACX,WAAW;CACb;CACA,IAAI,QAAQ,IAAI,SAAS;CACzB,OAAO;AACT;;;;;;;;;;;;AA8BA,IAAa,+BACX,SACA,OAA6B,CAAC,MACT;CACrB,MAAM,YAAkC,KAAK,aAAa;CAC1D,MAAM,OAAO,KAAK,QAAQ;CAC1B,MAAM,UAAU,OAAO;CAEvB,QADc,QAAQ,SAAS,CAAC,GACnB,KAAK,GAAG,MAAM;EACzB,MAAM,SAAS,EAAE,OAAO;EAExB,MAAM,MAAsB;GAC1B,IAFS,SAAS,KAAK,UAAU,UAAU,GAAG,KAAK,GAAG,GAEtD;GACA,SAAS,EAAE,QAAQ;GACnB;GACA;GACA,WAAW;GACX,WAAW;EACb;EACA,IAAI,QAAQ,IAAI,SAAS;EACzB,OAAO;CACT,CAAC;AACH;;;;;;;;;;AAmBA,IAAM,yBAAyB,OAC7B,aAC6B;CAC7B,IAAI,OAAO,aAAa,YACtB,MAAM,IAAI,UAAU,+DAA+D;CAErF,IAAI;CACJ,IAAI;EACF,WAAY,SAA2B;CACzC,QAAQ;EACN,OAAO;CACT;CACA,IAAI,sBAAA,aAAa,UAAU,WAAW,OAAO,GAAG,WAAW,MAAM;CACjE,IAAI,YAAY,OAAO,aAAa,YAAY,aAAa,UAC3D,WAAY,SAAmC;CAEjD,IAAI,OAAO,aAAa,YAAY,OAAO;CAC3C,OAAO;AACT;;;;;;;;;;;;;;;;AAiBA,IAAa,oBAAoB,OAC/B,KACA,MACA,SAC2B;CAC3B,MAAM,OAAO,MAAM,uBAAuB,KAAK,WAAW;CAC1D,MAAM,MAAqB,CAAC;CAC5B,KAAK,MAAM,OAAO,MAAM;EACtB,MAAM,SAAS,IAAI,KAAK,GAAG;EAC3B,MAAM,IAAI,iBAAiB,MAAM;EACjC,IAAI,KAAK,MAAM;CACjB;CACA,OAAO;AACT"}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { s as isInstanceOf } from "../../tool_registry-791Vrjtf.mjs";
|
|
2
|
+
import "../../guards.mjs";
|
|
3
|
+
import { sha256 } from "js-sha256";
|
|
4
|
+
//#region src/batteries/tools/web_retrieval/index.ts
|
|
5
|
+
/**
|
|
6
|
+
* RAG glue: turn web-search and web-scrape results into `Retrievable` records for a turn.
|
|
7
|
+
*
|
|
8
|
+
* @module @nhtio/adk/batteries/tools/web_retrieval
|
|
9
|
+
*
|
|
10
|
+
* @remarks
|
|
11
|
+
* The seam from "I searched / scraped something" to "it is in the turn as a `Retrievable`",
|
|
12
|
+
* shared by the SearXNG and Scrapper batteries. It is deliberately **decoupled** from the ADK
|
|
13
|
+
* core at runtime:
|
|
14
|
+
*
|
|
15
|
+
* - The converters are **pure** `(payload) => RawRetrievable[]` — they build plain data objects and
|
|
16
|
+
* never instantiate a core class, so the module's only core coupling is erased `import type`.
|
|
17
|
+
* - The recommended spool-artifact type travels as an **open resolver**
|
|
18
|
+
* ({@link @nhtio/adk/forge!ArtifactConstructorResolver}), never a closed string enum — a consumer's
|
|
19
|
+
* future YAML/HTML `SpooledArtifact` subclass works with no change here. The converter hands the
|
|
20
|
+
* recommendation to the caller's `spool` hook; the caller owns the actual class import.
|
|
21
|
+
* - The one helper that must construct a `Retrievable` ({@link storeRetrievables}) takes the
|
|
22
|
+
* constructor via a **resolver** (constructor / sync / async / dynamic-import), exactly like the
|
|
23
|
+
* vector battery's `createVectorStore` `client`.
|
|
24
|
+
*
|
|
25
|
+
* Web content is `'third-party-public'` by default — a definitional constant for open-web data
|
|
26
|
+
* (NOT inferred from the URL, which CONTRIBUTING Design Decision #12 forbids); override via
|
|
27
|
+
* `trustTier` when you know better.
|
|
28
|
+
*/
|
|
29
|
+
var nowIso = () => (/* @__PURE__ */ new Date()).toISOString();
|
|
30
|
+
/** A stable, unguessable id derived from a source string (URL) plus an optional prefix. */
|
|
31
|
+
var stableId = (prefix, source) => {
|
|
32
|
+
const h = sha256(source);
|
|
33
|
+
return prefix ? `${prefix}:${h}` : h;
|
|
34
|
+
};
|
|
35
|
+
/** Clamp a possibly-unbounded score into `[0, 1]`; drop non-finite. */
|
|
36
|
+
var clampScore = (score) => {
|
|
37
|
+
if (typeof score !== "number" || !Number.isFinite(score)) return void 0;
|
|
38
|
+
if (score < 0) return 0;
|
|
39
|
+
if (score > 1) return 1;
|
|
40
|
+
return score;
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Resolve content to either an inline string or a caller-provided {@link SpooledArtifact}. When a
|
|
44
|
+
* `spool` hook is supplied it is offered the recommended resolver; whatever it returns (artifact or
|
|
45
|
+
* `undefined`→inline) is used.
|
|
46
|
+
*/
|
|
47
|
+
var resolveContent = (id, text, opts, recommended) => {
|
|
48
|
+
if (opts.spool) {
|
|
49
|
+
const artifact = opts.spool(id, text, recommended);
|
|
50
|
+
if (artifact) return artifact;
|
|
51
|
+
}
|
|
52
|
+
return text;
|
|
53
|
+
};
|
|
54
|
+
/**
|
|
55
|
+
* Convert a SearXNG normalised payload into one {@link @nhtio/adk!RawRetrievable} per result.
|
|
56
|
+
*
|
|
57
|
+
* @remarks
|
|
58
|
+
* Snippets are short, so `content` stays an inline string (the `spool` hook, if any, is still
|
|
59
|
+
* offered the `text` recommendation). `source` is the result URL; `score` is clamped to `[0,1]`.
|
|
60
|
+
*
|
|
61
|
+
* @param payload - The SearXNG normalised payload (`{ results: [{ url, title, content, score }] }`).
|
|
62
|
+
* @param opts - Trust tier, kind, id prefix, optional spool hook.
|
|
63
|
+
* @param recommend - Optional artifact-resolver recommendations (the glue names no class itself).
|
|
64
|
+
* @returns One `RawRetrievable` per result.
|
|
65
|
+
*/
|
|
66
|
+
var searxngResultsToRetrievables = (payload, opts = {}, recommend = {}) => {
|
|
67
|
+
const trustTier = opts.trustTier ?? "third-party-public";
|
|
68
|
+
const kind = opts.kind ?? "web-search-result";
|
|
69
|
+
const created = nowIso();
|
|
70
|
+
return (payload.results ?? []).map((r, i) => {
|
|
71
|
+
const source = r.url ?? "";
|
|
72
|
+
const id = stableId(opts.idPrefix, source || `${kind}:${i}`);
|
|
73
|
+
const text = [r.title, r.content].filter((s) => typeof s === "string").join("\n");
|
|
74
|
+
const recommended = recommend.text ?? recommend.markdown ?? recommend.json;
|
|
75
|
+
const raw = {
|
|
76
|
+
id,
|
|
77
|
+
content: recommended ? resolveContent(id, text, opts, recommended) : text,
|
|
78
|
+
trustTier,
|
|
79
|
+
kind,
|
|
80
|
+
createdAt: created,
|
|
81
|
+
updatedAt: created
|
|
82
|
+
};
|
|
83
|
+
if (source) raw.source = source;
|
|
84
|
+
const score = clampScore(r.score);
|
|
85
|
+
if (score !== void 0) raw.score = score;
|
|
86
|
+
return raw;
|
|
87
|
+
});
|
|
88
|
+
};
|
|
89
|
+
/**
|
|
90
|
+
* Convert a Scrapper normalised article into a single {@link @nhtio/adk!RawRetrievable}.
|
|
91
|
+
*
|
|
92
|
+
* @remarks
|
|
93
|
+
* Long article text is exactly what a reader-backed {@link @nhtio/adk!SpooledArtifact} is for: pass a
|
|
94
|
+
* `spool` hook and the converter offers it the recommended artifact resolver (markdown when
|
|
95
|
+
* `asMarkdown`, else text/HTML) so the model gets the right forged query tools. Without a hook,
|
|
96
|
+
* content stays inline.
|
|
97
|
+
*
|
|
98
|
+
* @param article - The Scrapper normalised article.
|
|
99
|
+
* @param opts - Trust tier, kind, id prefix, content source, markdown flag, optional spool hook.
|
|
100
|
+
* @param recommend - Optional artifact-resolver recommendations.
|
|
101
|
+
* @returns A single `RawRetrievable`.
|
|
102
|
+
*/
|
|
103
|
+
var scrapperArticleToRetrievable = (article, opts = {}, recommend = {}) => {
|
|
104
|
+
const trustTier = opts.trustTier ?? "third-party-public";
|
|
105
|
+
const kind = opts.kind ?? "web-article";
|
|
106
|
+
const created = nowIso();
|
|
107
|
+
const source = article.url ?? "";
|
|
108
|
+
const id = stableId(opts.idPrefix, source || kind);
|
|
109
|
+
const text = ((opts.contentSource ?? "textContent") === "content" ? article.content : article.textContent) ?? "";
|
|
110
|
+
const recommended = opts.asMarkdown ? recommend.markdown ?? recommend.text : recommend.text;
|
|
111
|
+
const raw = {
|
|
112
|
+
id,
|
|
113
|
+
content: recommended ? resolveContent(id, text, opts, recommended) : text,
|
|
114
|
+
trustTier,
|
|
115
|
+
kind,
|
|
116
|
+
createdAt: created,
|
|
117
|
+
updatedAt: created
|
|
118
|
+
};
|
|
119
|
+
if (source) raw.source = source;
|
|
120
|
+
return raw;
|
|
121
|
+
};
|
|
122
|
+
/**
|
|
123
|
+
* Convert a Scrapper normalised links payload into one {@link @nhtio/adk!RawRetrievable} per link.
|
|
124
|
+
*
|
|
125
|
+
* @remarks
|
|
126
|
+
* Each link's `text` becomes the (inline) content and its `url` the `source`. Link text is short,
|
|
127
|
+
* so no spooling is applied.
|
|
128
|
+
*
|
|
129
|
+
* @param payload - The Scrapper normalised links payload (`{ links: [{ url, text }] }`).
|
|
130
|
+
* @param opts - Trust tier, kind, id prefix.
|
|
131
|
+
* @returns One `RawRetrievable` per link.
|
|
132
|
+
*/
|
|
133
|
+
var scrapperLinksToRetrievables = (payload, opts = {}) => {
|
|
134
|
+
const trustTier = opts.trustTier ?? "third-party-public";
|
|
135
|
+
const kind = opts.kind ?? "web-link";
|
|
136
|
+
const created = nowIso();
|
|
137
|
+
return (payload.links ?? []).map((l, i) => {
|
|
138
|
+
const source = l.url ?? "";
|
|
139
|
+
const raw = {
|
|
140
|
+
id: stableId(opts.idPrefix, source || `${kind}:${i}`),
|
|
141
|
+
content: l.text ?? source,
|
|
142
|
+
trustTier,
|
|
143
|
+
kind,
|
|
144
|
+
createdAt: created,
|
|
145
|
+
updatedAt: created
|
|
146
|
+
};
|
|
147
|
+
if (source) raw.source = source;
|
|
148
|
+
return raw;
|
|
149
|
+
});
|
|
150
|
+
};
|
|
151
|
+
/**
|
|
152
|
+
* Resolve a {@link Resolver} of the `Retrievable` constructor (sync / async / `{ default }`).
|
|
153
|
+
*
|
|
154
|
+
* @remarks
|
|
155
|
+
* Both a bare class and a resolver are `typeof 'function'`, and we hold `Retrievable` only as an
|
|
156
|
+
* `import type` (no runtime value to duck-type against). We disambiguate by behaviour: invoking a
|
|
157
|
+
* real ES class without `new` throws, so a bare constructor is caught and returned as-is; a resolver
|
|
158
|
+
* invokes cleanly and yields the constructor (possibly via a Promise and/or a `{ default }`).
|
|
159
|
+
*/
|
|
160
|
+
var resolveRetrievableCtor = async (resolver) => {
|
|
161
|
+
if (typeof resolver !== "function") throw new TypeError("retrievable must be a constructor or a resolver returning one");
|
|
162
|
+
let resolved;
|
|
163
|
+
try {
|
|
164
|
+
resolved = resolver();
|
|
165
|
+
} catch {
|
|
166
|
+
return resolver;
|
|
167
|
+
}
|
|
168
|
+
if (isInstanceOf(resolved, "Promise", Promise)) resolved = await resolved;
|
|
169
|
+
if (resolved && typeof resolved === "object" && "default" in resolved) resolved = resolved.default;
|
|
170
|
+
if (typeof resolved === "function") return resolved;
|
|
171
|
+
return resolver;
|
|
172
|
+
};
|
|
173
|
+
/**
|
|
174
|
+
* Construct {@link @nhtio/adk!Retrievable}s from `RawRetrievable`s and store each via `ctx`.
|
|
175
|
+
*
|
|
176
|
+
* @remarks
|
|
177
|
+
* This is the only function here that touches a core class, and it does so through an injected
|
|
178
|
+
* **resolver** (`deps.retrievable`) so the glue itself never value-imports `Retrievable`. Each
|
|
179
|
+
* record's `RawRetrievable` validation (including the required `trustTier`) fires at construction.
|
|
180
|
+
* For reader-backed content, the caller's `spool` hook will typically have used
|
|
181
|
+
* `ctx.storeRetrievableBytes` already; this helper just persists the records into the turn.
|
|
182
|
+
*
|
|
183
|
+
* @param ctx - Anything with a `storeRetrievable` method (a `DispatchContext`, or a stub).
|
|
184
|
+
* @param raws - The plain records from the converters.
|
|
185
|
+
* @param deps - `{ retrievable }`: the `Retrievable` constructor or a resolver of it.
|
|
186
|
+
* @returns The constructed `Retrievable` instances, in input order.
|
|
187
|
+
*/
|
|
188
|
+
var storeRetrievables = async (ctx, raws, deps) => {
|
|
189
|
+
const Ctor = await resolveRetrievableCtor(deps.retrievable);
|
|
190
|
+
const out = [];
|
|
191
|
+
for (const raw of raws) {
|
|
192
|
+
const record = new Ctor(raw);
|
|
193
|
+
await ctx.storeRetrievable(record);
|
|
194
|
+
out.push(record);
|
|
195
|
+
}
|
|
196
|
+
return out;
|
|
197
|
+
};
|
|
198
|
+
//#endregion
|
|
199
|
+
export { scrapperArticleToRetrievable, scrapperLinksToRetrievables, searxngResultsToRetrievables, storeRetrievables };
|
|
200
|
+
|
|
201
|
+
//# sourceMappingURL=web_retrieval.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web_retrieval.mjs","names":[],"sources":["../../../src/batteries/tools/web_retrieval/index.ts"],"sourcesContent":["/**\n * RAG glue: turn web-search and web-scrape results into `Retrievable` records for a turn.\n *\n * @module @nhtio/adk/batteries/tools/web_retrieval\n *\n * @remarks\n * The seam from \"I searched / scraped something\" to \"it is in the turn as a `Retrievable`\",\n * shared by the SearXNG and Scrapper batteries. It is deliberately **decoupled** from the ADK\n * core at runtime:\n *\n * - The converters are **pure** `(payload) => RawRetrievable[]` — they build plain data objects and\n * never instantiate a core class, so the module's only core coupling is erased `import type`.\n * - The recommended spool-artifact type travels as an **open resolver**\n * ({@link @nhtio/adk/forge!ArtifactConstructorResolver}), never a closed string enum — a consumer's\n * future YAML/HTML `SpooledArtifact` subclass works with no change here. The converter hands the\n * recommendation to the caller's `spool` hook; the caller owns the actual class import.\n * - The one helper that must construct a `Retrievable` ({@link storeRetrievables}) takes the\n * constructor via a **resolver** (constructor / sync / async / dynamic-import), exactly like the\n * vector battery's `createVectorStore` `client`.\n *\n * Web content is `'third-party-public'` by default — a definitional constant for open-web data\n * (NOT inferred from the URL, which CONTRIBUTING Design Decision #12 forbids); override via\n * `trustTier` when you know better.\n */\n\nimport { sha256 } from 'js-sha256'\nimport { isInstanceOf } from '@nhtio/adk/guards'\nimport type { SpooledArtifact } from '@nhtio/adk/spooled_artifact'\nimport type { ArtifactConstructorResolver } from '@nhtio/adk/forge'\nimport type { RawRetrievable, Retrievable, RetrievableTrustTier } from '@nhtio/adk/common'\n\n/** A constructor that builds a {@link @nhtio/adk!Retrievable} from a {@link @nhtio/adk!RawRetrievable}. */\nexport type RetrievableCtor = new (raw: RawRetrievable) => Retrievable\n\n/** A resolver of `T`: the value itself, or a (sync/async) thunk, optionally a module `{ default }`. */\nexport type Resolver<T> = T | (() => T | { default: T }) | (() => Promise<T | { default: T }>)\n\n/**\n * A reader-backed-artifact hook. Called by a converter for content that may be large; the\n * converter passes the artifact constructor it **recommends** for this content (an open\n * {@link @nhtio/adk/forge!ArtifactConstructorResolver}) so the caller can wrap with the right\n * subclass — preserving its forged query tools — using the caller's own core import. Return a\n * {@link @nhtio/adk!SpooledArtifact} to store the content reader-backed, or `undefined` to keep it\n * inline as a string.\n */\nexport type SpoolHook = (\n id: string,\n text: string,\n recommended: ArtifactConstructorResolver\n) => SpooledArtifact | undefined\n\n/** Options common to every converter. */\nexport interface ToRetrievableOptions {\n /**\n * Trust tier for the produced records. Default `'third-party-public'` (web content is\n * third-party by definition — this is a constant, not URL inference).\n */\n trustTier?: RetrievableTrustTier\n /** Semantic `kind` label, e.g. `'web-search-result'`, `'web-article'`, `'web-links'`. */\n kind?: string\n /** Prefix for the stable, hashed record id (namespacing across sources). */\n idPrefix?: string\n /** Optional reader-backed-artifact hook for large content. See {@link SpoolHook}. */\n spool?: SpoolHook\n}\n\n/**\n * The artifact-resolver recommendations a caller may supply so the glue names no concrete class\n * itself. Each converter asks for the relevant key; if the caller omits it, content stays inline.\n */\nexport interface ArtifactRecommendations {\n /** Recommended for plain-text / HTML content (base `SpooledArtifact`). */\n text?: ArtifactConstructorResolver\n /** Recommended for markdown content (`SpooledMarkdownArtifact`). */\n markdown?: ArtifactConstructorResolver\n /** Recommended for JSON content (`SpooledJsonArtifact`). */\n json?: ArtifactConstructorResolver\n}\n\nconst nowIso = (): string => new Date().toISOString()\n\n/** A stable, unguessable id derived from a source string (URL) plus an optional prefix. */\nconst stableId = (prefix: string | undefined, source: string): string => {\n const h = sha256(source)\n return prefix ? `${prefix}:${h}` : h\n}\n\n/** Clamp a possibly-unbounded score into `[0, 1]`; drop non-finite. */\nconst clampScore = (score: unknown): number | undefined => {\n if (typeof score !== 'number' || !Number.isFinite(score)) return undefined\n if (score < 0) return 0\n if (score > 1) return 1\n return score\n}\n\n/**\n * Resolve content to either an inline string or a caller-provided {@link SpooledArtifact}. When a\n * `spool` hook is supplied it is offered the recommended resolver; whatever it returns (artifact or\n * `undefined`→inline) is used.\n */\nconst resolveContent = (\n id: string,\n text: string,\n opts: ToRetrievableOptions,\n recommended: ArtifactConstructorResolver\n): string | SpooledArtifact => {\n if (opts.spool) {\n const artifact = opts.spool(id, text, recommended)\n if (artifact) return artifact\n }\n return text\n}\n\n// ── SearXNG ──────────────────────────────────────────────────────────────────\n\n/** Minimal structural shape of a SearXNG normalised result the converter reads. */\nexport interface SearxngResultLike {\n /** Result URL (becomes the record's `source`). */\n url?: string\n /** Result title (joined into the inline content). */\n title?: string\n /** Result snippet (joined into the inline content). */\n content?: string\n /** Relevance score (clamped to `[0,1]` on the record). */\n score?: number\n}\n/** Minimal structural shape of a SearXNG normalised payload. */\nexport interface SearxngPayloadLike {\n /** The result list. */\n results?: SearxngResultLike[]\n}\n\n/**\n * Convert a SearXNG normalised payload into one {@link @nhtio/adk!RawRetrievable} per result.\n *\n * @remarks\n * Snippets are short, so `content` stays an inline string (the `spool` hook, if any, is still\n * offered the `text` recommendation). `source` is the result URL; `score` is clamped to `[0,1]`.\n *\n * @param payload - The SearXNG normalised payload (`{ results: [{ url, title, content, score }] }`).\n * @param opts - Trust tier, kind, id prefix, optional spool hook.\n * @param recommend - Optional artifact-resolver recommendations (the glue names no class itself).\n * @returns One `RawRetrievable` per result.\n */\nexport const searxngResultsToRetrievables = (\n payload: SearxngPayloadLike,\n opts: ToRetrievableOptions = {},\n recommend: ArtifactRecommendations = {}\n): RawRetrievable[] => {\n const trustTier: RetrievableTrustTier = opts.trustTier ?? 'third-party-public'\n const kind = opts.kind ?? 'web-search-result'\n const created = nowIso()\n const results = payload.results ?? []\n return results.map((r, i) => {\n const source = r.url ?? ''\n const id = stableId(opts.idPrefix, source || `${kind}:${i}`)\n const text = [r.title, r.content].filter((s): s is string => typeof s === 'string').join('\\n')\n const recommended = recommend.text ?? recommend.markdown ?? recommend.json\n const content = recommended ? resolveContent(id, text, opts, recommended) : text\n const raw: RawRetrievable = {\n id,\n content,\n trustTier,\n kind,\n createdAt: created,\n updatedAt: created,\n }\n if (source) raw.source = source\n const score = clampScore(r.score)\n if (score !== undefined) raw.score = score\n return raw\n })\n}\n\n// ── Scrapper: article ──────────────────────────────────────────────────────────\n\n/** Minimal structural shape of a Scrapper normalised article. */\nexport interface ScrapperArticleLike {\n /** The page URL (becomes the record's `source`). */\n url?: string\n /** Article title. */\n title?: string\n /** Article text with HTML stripped (the default content source). */\n textContent?: string\n /** Processed article HTML (the `'content'` content source). */\n content?: string\n}\n\n/** Which article text field to use as the record content. */\nexport type ArticleContentSource = 'textContent' | 'content'\n\n/** Options for {@link scrapperArticleToRetrievable}. */\nexport interface ArticleToRetrievableOptions extends ToRetrievableOptions {\n /** Which field to use as content (default `'textContent'`). `'content'` is HTML. */\n contentSource?: ArticleContentSource\n /**\n * Whether the chosen content is markdown (recommend `markdown`) rather than plain text.\n * Default false. Use when an output pipeline rendered the article to markdown.\n */\n asMarkdown?: boolean\n}\n\n/**\n * Convert a Scrapper normalised article into a single {@link @nhtio/adk!RawRetrievable}.\n *\n * @remarks\n * Long article text is exactly what a reader-backed {@link @nhtio/adk!SpooledArtifact} is for: pass a\n * `spool` hook and the converter offers it the recommended artifact resolver (markdown when\n * `asMarkdown`, else text/HTML) so the model gets the right forged query tools. Without a hook,\n * content stays inline.\n *\n * @param article - The Scrapper normalised article.\n * @param opts - Trust tier, kind, id prefix, content source, markdown flag, optional spool hook.\n * @param recommend - Optional artifact-resolver recommendations.\n * @returns A single `RawRetrievable`.\n */\nexport const scrapperArticleToRetrievable = (\n article: ScrapperArticleLike,\n opts: ArticleToRetrievableOptions = {},\n recommend: ArtifactRecommendations = {}\n): RawRetrievable => {\n const trustTier: RetrievableTrustTier = opts.trustTier ?? 'third-party-public'\n const kind = opts.kind ?? 'web-article'\n const created = nowIso()\n const source = article.url ?? ''\n const id = stableId(opts.idPrefix, source || kind)\n const field = opts.contentSource ?? 'textContent'\n const text = (field === 'content' ? article.content : article.textContent) ?? ''\n const recommended = opts.asMarkdown ? (recommend.markdown ?? recommend.text) : recommend.text\n const content = recommended ? resolveContent(id, text, opts, recommended) : text\n const raw: RawRetrievable = {\n id,\n content,\n trustTier,\n kind,\n createdAt: created,\n updatedAt: created,\n }\n if (source) raw.source = source\n return raw\n}\n\n// ── Scrapper: links ──────────────────────────────────────────────────────────\n\n/** Minimal structural shape of a Scrapper normalised link. */\nexport interface ScrapperLinkLike {\n /** The link's target URL (becomes the record's `source`). */\n url?: string\n /** The link's anchor text (becomes the record's content). */\n text?: string\n}\n/** Minimal structural shape of a Scrapper normalised links payload. */\nexport interface ScrapperLinksLike {\n /** The page URL the links were collected from. */\n url?: string\n /** The collected links. */\n links?: ScrapperLinkLike[]\n}\n\n/**\n * Convert a Scrapper normalised links payload into one {@link @nhtio/adk!RawRetrievable} per link.\n *\n * @remarks\n * Each link's `text` becomes the (inline) content and its `url` the `source`. Link text is short,\n * so no spooling is applied.\n *\n * @param payload - The Scrapper normalised links payload (`{ links: [{ url, text }] }`).\n * @param opts - Trust tier, kind, id prefix.\n * @returns One `RawRetrievable` per link.\n */\nexport const scrapperLinksToRetrievables = (\n payload: ScrapperLinksLike,\n opts: ToRetrievableOptions = {}\n): RawRetrievable[] => {\n const trustTier: RetrievableTrustTier = opts.trustTier ?? 'third-party-public'\n const kind = opts.kind ?? 'web-link'\n const created = nowIso()\n const links = payload.links ?? []\n return links.map((l, i) => {\n const source = l.url ?? ''\n const id = stableId(opts.idPrefix, source || `${kind}:${i}`)\n const raw: RawRetrievable = {\n id,\n content: l.text ?? source,\n trustTier,\n kind,\n createdAt: created,\n updatedAt: created,\n }\n if (source) raw.source = source\n return raw\n })\n}\n\n// ── Store helper (the single core-touching function) ─────────────────────────\n\n/** The minimal context surface {@link storeRetrievables} needs. */\nexport interface RetrievableStoreCtx {\n /** Persist a single `Retrievable` into the turn (a `DispatchContext` method, or a stub). */\n storeRetrievable: (v: Retrievable) => unknown | Promise<unknown>\n}\n\n/**\n * Resolve a {@link Resolver} of the `Retrievable` constructor (sync / async / `{ default }`).\n *\n * @remarks\n * Both a bare class and a resolver are `typeof 'function'`, and we hold `Retrievable` only as an\n * `import type` (no runtime value to duck-type against). We disambiguate by behaviour: invoking a\n * real ES class without `new` throws, so a bare constructor is caught and returned as-is; a resolver\n * invokes cleanly and yields the constructor (possibly via a Promise and/or a `{ default }`).\n */\nconst resolveRetrievableCtor = async (\n resolver: Resolver<RetrievableCtor>\n): Promise<RetrievableCtor> => {\n if (typeof resolver !== 'function') {\n throw new TypeError('retrievable must be a constructor or a resolver returning one')\n }\n let resolved: unknown\n try {\n resolved = (resolver as () => unknown)()\n } catch {\n return resolver as RetrievableCtor // bare class: threw on no-`new` invocation\n }\n if (isInstanceOf(resolved, 'Promise', Promise)) resolved = await resolved\n if (resolved && typeof resolved === 'object' && 'default' in resolved) {\n resolved = (resolved as { default?: unknown }).default\n }\n if (typeof resolved === 'function') return resolved as RetrievableCtor\n return resolver as RetrievableCtor // resolver returned a non-function: it was itself the ctor\n}\n\n/**\n * Construct {@link @nhtio/adk!Retrievable}s from `RawRetrievable`s and store each via `ctx`.\n *\n * @remarks\n * This is the only function here that touches a core class, and it does so through an injected\n * **resolver** (`deps.retrievable`) so the glue itself never value-imports `Retrievable`. Each\n * record's `RawRetrievable` validation (including the required `trustTier`) fires at construction.\n * For reader-backed content, the caller's `spool` hook will typically have used\n * `ctx.storeRetrievableBytes` already; this helper just persists the records into the turn.\n *\n * @param ctx - Anything with a `storeRetrievable` method (a `DispatchContext`, or a stub).\n * @param raws - The plain records from the converters.\n * @param deps - `{ retrievable }`: the `Retrievable` constructor or a resolver of it.\n * @returns The constructed `Retrievable` instances, in input order.\n */\nexport const storeRetrievables = async (\n ctx: RetrievableStoreCtx,\n raws: RawRetrievable[],\n deps: { retrievable: Resolver<RetrievableCtor> }\n): Promise<Retrievable[]> => {\n const Ctor = await resolveRetrievableCtor(deps.retrievable)\n const out: Retrievable[] = []\n for (const raw of raws) {\n const record = new Ctor(raw)\n await ctx.storeRetrievable(record)\n out.push(record)\n }\n return out\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+EA,IAAM,gCAAuB,IAAI,KAAK,GAAE,YAAY;;AAGpD,IAAM,YAAY,QAA4B,WAA2B;CACvE,MAAM,IAAI,OAAO,MAAM;CACvB,OAAO,SAAS,GAAG,OAAO,GAAG,MAAM;AACrC;;AAGA,IAAM,cAAc,UAAuC;CACzD,IAAI,OAAO,UAAU,YAAY,CAAC,OAAO,SAAS,KAAK,GAAG,OAAO,KAAA;CACjE,IAAI,QAAQ,GAAG,OAAO;CACtB,IAAI,QAAQ,GAAG,OAAO;CACtB,OAAO;AACT;;;;;;AAOA,IAAM,kBACJ,IACA,MACA,MACA,gBAC6B;CAC7B,IAAI,KAAK,OAAO;EACd,MAAM,WAAW,KAAK,MAAM,IAAI,MAAM,WAAW;EACjD,IAAI,UAAU,OAAO;CACvB;CACA,OAAO;AACT;;;;;;;;;;;;;AAiCA,IAAa,gCACX,SACA,OAA6B,CAAC,GAC9B,YAAqC,CAAC,MACjB;CACrB,MAAM,YAAkC,KAAK,aAAa;CAC1D,MAAM,OAAO,KAAK,QAAQ;CAC1B,MAAM,UAAU,OAAO;CAEvB,QADgB,QAAQ,WAAW,CAAC,GACrB,KAAK,GAAG,MAAM;EAC3B,MAAM,SAAS,EAAE,OAAO;EACxB,MAAM,KAAK,SAAS,KAAK,UAAU,UAAU,GAAG,KAAK,GAAG,GAAG;EAC3D,MAAM,OAAO,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,MAAmB,OAAO,MAAM,QAAQ,EAAE,KAAK,IAAI;EAC7F,MAAM,cAAc,UAAU,QAAQ,UAAU,YAAY,UAAU;EAEtE,MAAM,MAAsB;GAC1B;GACA,SAHc,cAAc,eAAe,IAAI,MAAM,MAAM,WAAW,IAAI;GAI1E;GACA;GACA,WAAW;GACX,WAAW;EACb;EACA,IAAI,QAAQ,IAAI,SAAS;EACzB,MAAM,QAAQ,WAAW,EAAE,KAAK;EAChC,IAAI,UAAU,KAAA,GAAW,IAAI,QAAQ;EACrC,OAAO;CACT,CAAC;AACH;;;;;;;;;;;;;;;AA4CA,IAAa,gCACX,SACA,OAAoC,CAAC,GACrC,YAAqC,CAAC,MACnB;CACnB,MAAM,YAAkC,KAAK,aAAa;CAC1D,MAAM,OAAO,KAAK,QAAQ;CAC1B,MAAM,UAAU,OAAO;CACvB,MAAM,SAAS,QAAQ,OAAO;CAC9B,MAAM,KAAK,SAAS,KAAK,UAAU,UAAU,IAAI;CAEjD,MAAM,SADQ,KAAK,iBAAiB,mBACZ,YAAY,QAAQ,UAAU,QAAQ,gBAAgB;CAC9E,MAAM,cAAc,KAAK,aAAc,UAAU,YAAY,UAAU,OAAQ,UAAU;CAEzF,MAAM,MAAsB;EAC1B;EACA,SAHc,cAAc,eAAe,IAAI,MAAM,MAAM,WAAW,IAAI;EAI1E;EACA;EACA,WAAW;EACX,WAAW;CACb;CACA,IAAI,QAAQ,IAAI,SAAS;CACzB,OAAO;AACT;;;;;;;;;;;;AA8BA,IAAa,+BACX,SACA,OAA6B,CAAC,MACT;CACrB,MAAM,YAAkC,KAAK,aAAa;CAC1D,MAAM,OAAO,KAAK,QAAQ;CAC1B,MAAM,UAAU,OAAO;CAEvB,QADc,QAAQ,SAAS,CAAC,GACnB,KAAK,GAAG,MAAM;EACzB,MAAM,SAAS,EAAE,OAAO;EAExB,MAAM,MAAsB;GAC1B,IAFS,SAAS,KAAK,UAAU,UAAU,GAAG,KAAK,GAAG,GAEtD;GACA,SAAS,EAAE,QAAQ;GACnB;GACA;GACA,WAAW;GACX,WAAW;EACb;EACA,IAAI,QAAQ,IAAI,SAAS;EACzB,OAAO;CACT,CAAC;AACH;;;;;;;;;;AAmBA,IAAM,yBAAyB,OAC7B,aAC6B;CAC7B,IAAI,OAAO,aAAa,YACtB,MAAM,IAAI,UAAU,+DAA+D;CAErF,IAAI;CACJ,IAAI;EACF,WAAY,SAA2B;CACzC,QAAQ;EACN,OAAO;CACT;CACA,IAAI,aAAa,UAAU,WAAW,OAAO,GAAG,WAAW,MAAM;CACjE,IAAI,YAAY,OAAO,aAAa,YAAY,aAAa,UAC3D,WAAY,SAAmC;CAEjD,IAAI,OAAO,aAAa,YAAY,OAAO;CAC3C,OAAO;AACT;;;;;;;;;;;;;;;;AAiBA,IAAa,oBAAoB,OAC/B,KACA,MACA,SAC2B;CAC3B,MAAM,OAAO,MAAM,uBAAuB,KAAK,WAAW;CAC1D,MAAM,MAAqB,CAAC;CAC5B,KAAK,MAAM,OAAO,MAAM;EACtB,MAAM,SAAS,IAAI,KAAK,GAAG;EAC3B,MAAM,IAAI,iBAAiB,MAAM;EACjC,IAAI,KAAK,MAAM;CACjB;CACA,OAAO;AACT"}
|
package/batteries/tools.cjs
CHANGED
|
@@ -12,15 +12,18 @@ const require_batteries_tools_math = require("./tools/math.cjs");
|
|
|
12
12
|
const require_batteries_tools_memory = require("./tools/memory.cjs");
|
|
13
13
|
const require_batteries_tools_parsing = require("./tools/parsing.cjs");
|
|
14
14
|
const require_batteries_tools_retrievables = require("./tools/retrievables.cjs");
|
|
15
|
-
const
|
|
15
|
+
const require_scrapper = require("../scrapper-BeweWurk.js");
|
|
16
|
+
const require_searxng = require("../searxng-B_D--V5q.js");
|
|
16
17
|
const require_batteries_tools_standing_instructions = require("./tools/standing_instructions.cjs");
|
|
17
18
|
const require_batteries_tools_statistics = require("./tools/statistics.cjs");
|
|
18
19
|
const require_batteries_tools_string_processing = require("./tools/string_processing.cjs");
|
|
19
20
|
const require_batteries_tools_structured_data = require("./tools/structured_data.cjs");
|
|
20
21
|
const require_batteries_tools_text_analysis = require("./tools/text_analysis.cjs");
|
|
21
22
|
const require_batteries_tools_text_comparison = require("./tools/text_comparison.cjs");
|
|
23
|
+
const require_batteries_tools_web_retrieval = require("./tools/web_retrieval.cjs");
|
|
22
24
|
const require_batteries_tools_time = require("./tools/time.cjs");
|
|
23
25
|
const require_batteries_tools_unit_conversion = require("./tools/unit_conversion.cjs");
|
|
26
|
+
exports.E_INVALID_SCRAPPER_CONFIG = require_scrapper.E_INVALID_SCRAPPER_CONFIG;
|
|
24
27
|
exports.E_INVALID_SEARXNG_CONFIG = require_searxng.E_INVALID_SEARXNG_CONFIG;
|
|
25
28
|
exports.addStandingInstructionTool = require_batteries_tools_standing_instructions.addStandingInstructionTool;
|
|
26
29
|
exports.calculateTool = require_batteries_tools_math.calculateTool;
|
|
@@ -31,7 +34,12 @@ exports.compareRecordsTool = require_batteries_tools_comparison.compareRecordsTo
|
|
|
31
34
|
exports.compareValuesTool = require_batteries_tools_comparison.compareValuesTool;
|
|
32
35
|
exports.convertTimeTool = require_batteries_tools_time.convertTimeTool;
|
|
33
36
|
exports.convertUnitTool = require_batteries_tools_unit_conversion.convertUnitTool;
|
|
37
|
+
exports.createScrapperArticleTool = require_scrapper.createScrapperArticleTool;
|
|
38
|
+
exports.createScrapperArticleToolSync = require_scrapper.createScrapperArticleToolSync;
|
|
39
|
+
exports.createScrapperLinksTool = require_scrapper.createScrapperLinksTool;
|
|
40
|
+
exports.createScrapperLinksToolSync = require_scrapper.createScrapperLinksToolSync;
|
|
34
41
|
exports.createSearxngSearchTool = require_searxng.createSearxngSearchTool;
|
|
42
|
+
exports.createSearxngSearchToolSync = require_searxng.createSearxngSearchToolSync;
|
|
35
43
|
exports.dateAddTool = require_batteries_tools_datetime_math.dateAddTool;
|
|
36
44
|
exports.dateBusinessDaysTool = require_batteries_tools_datetime_extended.dateBusinessDaysTool;
|
|
37
45
|
exports.dateCalendarInfoTool = require_batteries_tools_datetime_extended.dateCalendarInfoTool;
|
|
@@ -63,6 +71,9 @@ exports.parseKvTool = require_batteries_tools_parsing.parseKvTool;
|
|
|
63
71
|
exports.parseYamlTool = require_batteries_tools_parsing.parseYamlTool;
|
|
64
72
|
exports.removeStandingInstructionTool = require_batteries_tools_standing_instructions.removeStandingInstructionTool;
|
|
65
73
|
exports.retrievableTools = require_batteries_tools_retrievables.retrievableTools;
|
|
74
|
+
exports.scrapperArticleToRetrievable = require_batteries_tools_web_retrieval.scrapperArticleToRetrievable;
|
|
75
|
+
exports.scrapperLinksToRetrievables = require_batteries_tools_web_retrieval.scrapperLinksToRetrievables;
|
|
76
|
+
exports.searxngResultsToRetrievables = require_batteries_tools_web_retrieval.searxngResultsToRetrievables;
|
|
66
77
|
exports.setOperationsTool = require_batteries_tools_data_structure.setOperationsTool;
|
|
67
78
|
exports.standingInstructionTools = require_batteries_tools_standing_instructions.standingInstructionTools;
|
|
68
79
|
exports.statsCorrelateTool = require_batteries_tools_statistics.statsCorrelateTool;
|
|
@@ -71,6 +82,7 @@ exports.statsHistogramTool = require_batteries_tools_statistics.statsHistogramTo
|
|
|
71
82
|
exports.statsTransformTool = require_batteries_tools_statistics.statsTransformTool;
|
|
72
83
|
exports.storeMemoryTool = require_batteries_tools_memory.storeMemoryTool;
|
|
73
84
|
exports.storeRetrievableTool = require_batteries_tools_retrievables.storeRetrievableTool;
|
|
85
|
+
exports.storeRetrievables = require_batteries_tools_web_retrieval.storeRetrievables;
|
|
74
86
|
exports.stringExtractTool = require_batteries_tools_string_processing.stringExtractTool;
|
|
75
87
|
exports.stringSimilarityTool = require_batteries_tools_text_comparison.stringSimilarityTool;
|
|
76
88
|
exports.stringTransformTool = require_batteries_tools_string_processing.stringTransformTool;
|
package/batteries/tools.mjs
CHANGED
|
@@ -10,13 +10,15 @@ import { calculateTool, evaluateKatexTool } from "./tools/math.mjs";
|
|
|
10
10
|
import { deleteMemoryTool, listMemoriesTool, memoryTools, storeMemoryTool, updateMemoryTool } from "./tools/memory.mjs";
|
|
11
11
|
import { detectDelimiterTool, parseCsvTool, parseKvTool, parseYamlTool } from "./tools/parsing.mjs";
|
|
12
12
|
import { deleteRetrievableTool, listRetrievablesTool, retrievableTools, storeRetrievableTool, updateRetrievableTool } from "./tools/retrievables.mjs";
|
|
13
|
-
import { n as
|
|
13
|
+
import { a as E_INVALID_SCRAPPER_CONFIG, i as createScrapperLinksToolSync, n as createScrapperArticleToolSync, r as createScrapperLinksTool, t as createScrapperArticleTool } from "../scrapper-BHM1mCde.mjs";
|
|
14
|
+
import { n as createSearxngSearchToolSync, r as E_INVALID_SEARXNG_CONFIG, t as createSearxngSearchTool } from "../searxng-BJFulNcK.mjs";
|
|
14
15
|
import { addStandingInstructionTool, listStandingInstructionsTool, removeStandingInstructionTool, standingInstructionTools } from "./tools/standing_instructions.mjs";
|
|
15
16
|
import { statsCorrelateTool, statsDescribeTool, statsHistogramTool, statsTransformTool } from "./tools/statistics.mjs";
|
|
16
17
|
import { stringExtractTool, stringTransformTool } from "./tools/string_processing.mjs";
|
|
17
18
|
import { formatTableTool, jsonFormatTool, validateFormatTool } from "./tools/structured_data.mjs";
|
|
18
19
|
import { textAnalyzeTool, textLinesTool } from "./tools/text_analysis.mjs";
|
|
19
20
|
import { stringSimilarityTool, textDiffTool } from "./tools/text_comparison.mjs";
|
|
21
|
+
import { scrapperArticleToRetrievable, scrapperLinksToRetrievables, searxngResultsToRetrievables, storeRetrievables } from "./tools/web_retrieval.mjs";
|
|
20
22
|
import { convertTimeTool, getCurrentTimeTool } from "./tools/time.mjs";
|
|
21
23
|
import { convertUnitTool } from "./tools/unit_conversion.mjs";
|
|
22
|
-
export { E_INVALID_SEARXNG_CONFIG, addStandingInstructionTool, calculateTool, colorAdjustTool, colorContrastTool, colorSchemeTool, compareRecordsTool, compareValuesTool, convertTimeTool, convertUnitTool, createSearxngSearchTool, dateAddTool, dateBusinessDaysTool, dateCalendarInfoTool, dateDiffTool, dateNthWeekdayTool, dateParseTool, datePeriodTool, deleteMemoryTool, deleteRetrievableTool, detectDelimiterTool, durationFormatTool, encodeTextTool, evaluateKatexTool, formatListTool, formatNumberTool, formatTableTool, geoBboxContainsTool, geoDistanceTool, geoWithinRadiusTool, getCurrentTimeTool, jsonFormatTool, jsonTransformTool, listMemoriesTool, listRetrievablesTool, listStandingInstructionsTool, memoryTools, parseCsvTool, parseKvTool, parseYamlTool, removeStandingInstructionTool, retrievableTools, setOperationsTool, standingInstructionTools, statsCorrelateTool, statsDescribeTool, statsHistogramTool, statsTransformTool, storeMemoryTool, storeRetrievableTool, stringExtractTool, stringSimilarityTool, stringTransformTool, textAnalyzeTool, textDiffTool, textEscapeTool, textLinesTool, unicodeNormalizeTool, updateMemoryTool, updateRetrievableTool, validateFormatTool };
|
|
24
|
+
export { E_INVALID_SCRAPPER_CONFIG, E_INVALID_SEARXNG_CONFIG, addStandingInstructionTool, calculateTool, colorAdjustTool, colorContrastTool, colorSchemeTool, compareRecordsTool, compareValuesTool, convertTimeTool, convertUnitTool, createScrapperArticleTool, createScrapperArticleToolSync, createScrapperLinksTool, createScrapperLinksToolSync, createSearxngSearchTool, createSearxngSearchToolSync, dateAddTool, dateBusinessDaysTool, dateCalendarInfoTool, dateDiffTool, dateNthWeekdayTool, dateParseTool, datePeriodTool, deleteMemoryTool, deleteRetrievableTool, detectDelimiterTool, durationFormatTool, encodeTextTool, evaluateKatexTool, formatListTool, formatNumberTool, formatTableTool, geoBboxContainsTool, geoDistanceTool, geoWithinRadiusTool, getCurrentTimeTool, jsonFormatTool, jsonTransformTool, listMemoriesTool, listRetrievablesTool, listStandingInstructionsTool, memoryTools, parseCsvTool, parseKvTool, parseYamlTool, removeStandingInstructionTool, retrievableTools, scrapperArticleToRetrievable, scrapperLinksToRetrievables, searxngResultsToRetrievables, setOperationsTool, standingInstructionTools, statsCorrelateTool, statsDescribeTool, statsHistogramTool, statsTransformTool, storeMemoryTool, storeRetrievableTool, storeRetrievables, stringExtractTool, stringSimilarityTool, stringTransformTool, textAnalyzeTool, textDiffTool, textEscapeTool, textLinesTool, unicodeNormalizeTool, updateMemoryTool, updateRetrievableTool, validateFormatTool };
|
package/batteries.cjs
CHANGED
|
@@ -24,13 +24,15 @@ const require_batteries_tools_math = require("./batteries/tools/math.cjs");
|
|
|
24
24
|
const require_batteries_tools_memory = require("./batteries/tools/memory.cjs");
|
|
25
25
|
const require_batteries_tools_parsing = require("./batteries/tools/parsing.cjs");
|
|
26
26
|
const require_batteries_tools_retrievables = require("./batteries/tools/retrievables.cjs");
|
|
27
|
-
const
|
|
27
|
+
const require_scrapper = require("./scrapper-BeweWurk.js");
|
|
28
|
+
const require_searxng = require("./searxng-B_D--V5q.js");
|
|
28
29
|
const require_batteries_tools_standing_instructions = require("./batteries/tools/standing_instructions.cjs");
|
|
29
30
|
const require_batteries_tools_statistics = require("./batteries/tools/statistics.cjs");
|
|
30
31
|
const require_batteries_tools_string_processing = require("./batteries/tools/string_processing.cjs");
|
|
31
32
|
const require_batteries_tools_structured_data = require("./batteries/tools/structured_data.cjs");
|
|
32
33
|
const require_batteries_tools_text_analysis = require("./batteries/tools/text_analysis.cjs");
|
|
33
34
|
const require_batteries_tools_text_comparison = require("./batteries/tools/text_comparison.cjs");
|
|
35
|
+
const require_batteries_tools_web_retrieval = require("./batteries/tools/web_retrieval.cjs");
|
|
34
36
|
const require_batteries_tools_time = require("./batteries/tools/time.cjs");
|
|
35
37
|
const require_batteries_tools_unit_conversion = require("./batteries/tools/unit_conversion.cjs");
|
|
36
38
|
require("./batteries/tools.cjs");
|
|
@@ -58,6 +60,7 @@ exports.CollectionBuilder = require_batteries_vector_schema.CollectionBuilder;
|
|
|
58
60
|
exports.E_INVALID_OLLAMA_OPTIONS = require_batteries_llm_ollama_exceptions.E_INVALID_OLLAMA_OPTIONS;
|
|
59
61
|
exports.E_INVALID_OPENAI_CHAT_COMPLETIONS_OPTIONS = require_batteries_llm_openai_chat_completions_exceptions.E_INVALID_OPENAI_CHAT_COMPLETIONS_OPTIONS;
|
|
60
62
|
exports.E_INVALID_OPENAI_EMBEDDINGS_OPTIONS = require_batteries_embeddings_openai_exceptions.E_INVALID_OPENAI_EMBEDDINGS_OPTIONS;
|
|
63
|
+
exports.E_INVALID_SCRAPPER_CONFIG = require_scrapper.E_INVALID_SCRAPPER_CONFIG;
|
|
61
64
|
exports.E_INVALID_SEARXNG_CONFIG = require_searxng.E_INVALID_SEARXNG_CONFIG;
|
|
62
65
|
exports.E_INVALID_VECTOR_RECORD = require_batteries_vector_exceptions.E_INVALID_VECTOR_RECORD;
|
|
63
66
|
exports.E_INVALID_VECTOR_STORE_CONFIG = require_batteries_vector_exceptions.E_INVALID_VECTOR_STORE_CONFIG;
|
|
@@ -121,7 +124,12 @@ exports.compareValuesTool = require_batteries_tools_comparison.compareValuesTool
|
|
|
121
124
|
exports.convertTimeTool = require_batteries_tools_time.convertTimeTool;
|
|
122
125
|
exports.convertUnitTool = require_batteries_tools_unit_conversion.convertUnitTool;
|
|
123
126
|
exports.createChatCompletionsToolCallDeltaAccumulator = require_batteries_llm_openai_chat_completions_helpers.createChatCompletionsToolCallDeltaAccumulator;
|
|
127
|
+
exports.createScrapperArticleTool = require_scrapper.createScrapperArticleTool;
|
|
128
|
+
exports.createScrapperArticleToolSync = require_scrapper.createScrapperArticleToolSync;
|
|
129
|
+
exports.createScrapperLinksTool = require_scrapper.createScrapperLinksTool;
|
|
130
|
+
exports.createScrapperLinksToolSync = require_scrapper.createScrapperLinksToolSync;
|
|
124
131
|
exports.createSearxngSearchTool = require_searxng.createSearxngSearchTool;
|
|
132
|
+
exports.createSearxngSearchToolSync = require_searxng.createSearxngSearchToolSync;
|
|
125
133
|
exports.createVectorRetrievableCallbacks = require_batteries_vector_retrievable.createVectorRetrievableCallbacks;
|
|
126
134
|
exports.createVectorStore = require_batteries_vector_factory.createVectorStore;
|
|
127
135
|
exports.dateAddTool = require_batteries_tools_datetime_math.dateAddTool;
|
|
@@ -205,6 +213,9 @@ exports.renderTrustedContent = require_helpers.renderTrustedContent;
|
|
|
205
213
|
exports.renderUntrustedContent = require_helpers.renderUntrustedContent;
|
|
206
214
|
exports.resolveClientCtor = require_batteries_vector_validation.resolveClientCtor;
|
|
207
215
|
exports.retrievableTools = require_batteries_tools_retrievables.retrievableTools;
|
|
216
|
+
exports.scrapperArticleToRetrievable = require_batteries_tools_web_retrieval.scrapperArticleToRetrievable;
|
|
217
|
+
exports.scrapperLinksToRetrievables = require_batteries_tools_web_retrieval.scrapperLinksToRetrievables;
|
|
218
|
+
exports.searxngResultsToRetrievables = require_batteries_tools_web_retrieval.searxngResultsToRetrievables;
|
|
208
219
|
exports.setOperationsTool = require_batteries_tools_data_structure.setOperationsTool;
|
|
209
220
|
exports.standingInstructionTools = require_batteries_tools_standing_instructions.standingInstructionTools;
|
|
210
221
|
exports.statsCorrelateTool = require_batteries_tools_statistics.statsCorrelateTool;
|
|
@@ -213,6 +224,7 @@ exports.statsHistogramTool = require_batteries_tools_statistics.statsHistogramTo
|
|
|
213
224
|
exports.statsTransformTool = require_batteries_tools_statistics.statsTransformTool;
|
|
214
225
|
exports.storeMemoryTool = require_batteries_tools_memory.storeMemoryTool;
|
|
215
226
|
exports.storeRetrievableTool = require_batteries_tools_retrievables.storeRetrievableTool;
|
|
227
|
+
exports.storeRetrievables = require_batteries_tools_web_retrieval.storeRetrievables;
|
|
216
228
|
exports.stringExtractTool = require_batteries_tools_string_processing.stringExtractTool;
|
|
217
229
|
exports.stringSimilarityTool = require_batteries_tools_text_comparison.stringSimilarityTool;
|
|
218
230
|
exports.stringTransformTool = require_batteries_tools_string_processing.stringTransformTool;
|