@nhtio/adk 1.20260609.0 → 1.20260609.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +54 -9
  2. package/batteries/tools/_shared/index.d.ts +121 -0
  3. package/batteries/tools/_shared.cjs +157 -0
  4. package/batteries/tools/_shared.cjs.map +1 -0
  5. package/batteries/tools/_shared.mjs +149 -0
  6. package/batteries/tools/_shared.mjs.map +1 -0
  7. package/batteries/tools/index.d.ts +2 -0
  8. package/batteries/tools/scrapper/exceptions.d.ts +21 -0
  9. package/batteries/tools/scrapper/index.d.ts +172 -0
  10. package/batteries/tools/scrapper/shared.d.ts +139 -0
  11. package/batteries/tools/scrapper.cjs +8 -0
  12. package/batteries/tools/scrapper.mjs +2 -0
  13. package/batteries/tools/searxng/index.d.ts +47 -20
  14. package/batteries/tools/searxng.cjs +2 -1
  15. package/batteries/tools/searxng.mjs +2 -2
  16. package/batteries/tools/web_retrieval/index.d.ts +186 -0
  17. package/batteries/tools/web_retrieval.cjs +206 -0
  18. package/batteries/tools/web_retrieval.cjs.map +1 -0
  19. package/batteries/tools/web_retrieval.mjs +201 -0
  20. package/batteries/tools/web_retrieval.mjs.map +1 -0
  21. package/batteries/tools.cjs +13 -1
  22. package/batteries/tools.mjs +4 -2
  23. package/batteries.cjs +13 -1
  24. package/batteries.mjs +4 -2
  25. package/common.d.ts +1 -1
  26. package/eslint/rules.cjs +1 -1
  27. package/eslint/rules.mjs +1 -1
  28. package/eslint.cjs +2 -2
  29. package/eslint.mjs +2 -2
  30. package/index.cjs +2 -2
  31. package/index.mjs +2 -2
  32. package/mcp/adk-docs-corpus.json +1 -1
  33. package/package.json +210 -195
  34. package/scrapper-BHM1mCde.mjs +432 -0
  35. package/scrapper-BHM1mCde.mjs.map +1 -0
  36. package/scrapper-BeweWurk.js +462 -0
  37. package/scrapper-BeweWurk.js.map +1 -0
  38. package/{searxng-CyA-nEu5.mjs → searxng-BJFulNcK.mjs} +74 -84
  39. package/searxng-BJFulNcK.mjs.map +1 -0
  40. package/{searxng-Bkrwhwhw.js → searxng-B_D--V5q.js} +80 -84
  41. package/searxng-B_D--V5q.js.map +1 -0
  42. package/skills/adk-assembly/SKILL.md +2 -2
  43. package/searxng-Bkrwhwhw.js.map +0 -1
  44. package/searxng-CyA-nEu5.mjs.map +0 -1
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Factories for configured Scrapper web-extraction tools (article + links).
3
+ *
4
+ * @module @nhtio/adk/batteries/tools/scrapper
5
+ *
6
+ * @remarks
7
+ * [Scrapper](https://github.com/amerkurev/scrapper) is a self-hosted service that loads a page in a
8
+ * real headless browser and extracts either the readable article (`/api/article`) or the page's
9
+ * links (`/api/links`). It gives an agent browser-grade reading power — JS-rendered pages a
10
+ * renderless fetcher can't see — but as a **stateless** HTTP call: each request runs in a fresh
11
+ * incognito context, stores no session or credentials, and shares nothing with any other call.
12
+ *
13
+ * Like the SearXNG battery, this exports **factories** (not ready-made `Tool` constants), because a
14
+ * scrape tool needs per-deployment config (instance URL + custom auth headers). Two verbs, each with
15
+ * an async factory ({@link createScrapperArticleTool} / {@link createScrapperLinksTool}, accepting a
16
+ * dynamic-import `artifact` resolver) and a sync variant ({@link createScrapperArticleToolSync} /
17
+ * {@link createScrapperLinksToolSync}). Because these are factories, they MUST NOT be bulk-registered
18
+ * via `Object.values(batteries)` — call one, then register the returned tool.
19
+ *
20
+ * @see https://github.com/amerkurev/scrapper
21
+ */
22
+ import { type ScrapperBaseConfig } from "./shared";
23
+ import type { Tool } from "../../../forge";
24
+ import type { ArtifactResolver, SyncArtifactResolver } from "../_shared/index";
25
+ export { E_INVALID_SCRAPPER_CONFIG } from "./exceptions";
26
+ export type { ScrapperRequestContext, ScrapperResponseContext, ScrapperInputMiddlewareFn, ScrapperOutputMiddlewareFn, } from "./shared";
27
+ /** Model-facing params common to both verbs (snake_case; mapped to kebab on the wire). */
28
+ export interface ScrapperCommonParams {
29
+ /** Return a cached result when available instead of re-scraping. */
30
+ cache?: boolean;
31
+ /** Capture a screenshot; the result carries a `screenshotUri`. */
32
+ screenshot?: boolean;
33
+ /** Run in an incognito browser context (no persisted browsing data). Default true upstream. */
34
+ incognito?: boolean;
35
+ /** Browser navigation timeout in ms (`0` disables). Distinct from the tool's own fetch timeout. */
36
+ timeout?: number;
37
+ /** When navigation is considered finished. */
38
+ wait_until?: 'load' | 'domcontentloaded' | 'networkidle' | 'commit';
39
+ /** Wait this many ms after load before parsing. */
40
+ sleep?: number;
41
+ /** Scroll down N pixels for lazy-loading pages. Requires a positive `sleep`. */
42
+ scroll_down?: number;
43
+ /** Emulated device, e.g. `Desktop Chrome`. Overrides individual viewport/UA settings. */
44
+ device?: string;
45
+ /** Explicit user-agent (prefer `device`). */
46
+ user_agent?: string;
47
+ /** Extra headers the SCRAPER's browser sends to the TARGET site, `K:v;K2:v2` (NOT instance auth). */
48
+ extra_http_headers?: string;
49
+ /** Upstream proxy, e.g. `http://host:3128` or `socks5://host:1080`. */
50
+ proxy_server?: string;
51
+ }
52
+ /** Model-facing params for `/api/article`. */
53
+ export interface ScrapperArticleParams extends ScrapperCommonParams {
54
+ /** Populate `fullContent` with the page's full HTML. */
55
+ full_content?: boolean;
56
+ }
57
+ /** Model-facing params for `/api/links`. */
58
+ export interface ScrapperLinksParams extends ScrapperCommonParams {
59
+ /** Median link-text length threshold for the link parser. */
60
+ text_len_threshold?: number;
61
+ /** Median words-per-link threshold for the link parser. */
62
+ words_threshold?: number;
63
+ }
64
+ /** A normalised Scrapper article (loose/nullable upstream). */
65
+ export interface ScrapperArticle {
66
+ /** The page URL the article was extracted from. */
67
+ url?: string;
68
+ /** Article title. */
69
+ title?: string;
70
+ /** Author / byline metadata. */
71
+ byline?: string;
72
+ /** Short excerpt or description of the article. */
73
+ excerpt?: string;
74
+ /** Name of the site the article came from. */
75
+ siteName?: string;
76
+ /** Detected content language. */
77
+ lang?: string;
78
+ /** Character count of the extracted article text. */
79
+ length?: number;
80
+ /** Publication time, when the page exposed one. */
81
+ publishedTime?: string;
82
+ /** Scrapper's own date field for the result. */
83
+ date?: string;
84
+ /** Article text with HTML stripped. */
85
+ textContent?: string;
86
+ /** Processed article HTML; present when the caller requested it. */
87
+ content?: string;
88
+ /** Full page HTML; present only when `full_content` was set. */
89
+ fullContent?: string;
90
+ /** Screenshot URI; present only when `screenshot` was set. */
91
+ screenshotUri?: string;
92
+ }
93
+ /** A single link from `/api/links` (verified live: `{ url, text }`). */
94
+ export interface ScrapperLink {
95
+ /** The link's target URL. */
96
+ url?: string;
97
+ /** The link's anchor text. */
98
+ text?: string;
99
+ }
100
+ /** A normalised Scrapper links payload. */
101
+ export interface ScrapperLinks {
102
+ /** The page URL the links were collected from. */
103
+ url?: string;
104
+ /** The page title. */
105
+ title?: string;
106
+ /** The page's domain. */
107
+ domain?: string;
108
+ /** Scrapper's own date field for the result. */
109
+ date?: string;
110
+ /** The collected links, each `{ url, text }`. */
111
+ links: ScrapperLink[];
112
+ /** Screenshot URI; present only when `screenshot` was set. */
113
+ screenshotUri?: string;
114
+ }
115
+ /** Async-factory config for `/api/article` (full `artifact` resolver, incl. dynamic import). */
116
+ export type ScrapperArticleConfig = ScrapperBaseConfig<ScrapperArticleParams, ScrapperArticle, ArtifactResolver>;
117
+ /** Sync-factory config for `/api/article` (`artifact` narrowed to the sync subset). */
118
+ export type ScrapperArticleConfigSync = ScrapperBaseConfig<ScrapperArticleParams, ScrapperArticle, SyncArtifactResolver>;
119
+ /** Async-factory config for `/api/links`. */
120
+ export type ScrapperLinksConfig = ScrapperBaseConfig<ScrapperLinksParams, ScrapperLinks, ArtifactResolver>;
121
+ /** Sync-factory config for `/api/links`. */
122
+ export type ScrapperLinksConfigSync = ScrapperBaseConfig<ScrapperLinksParams, ScrapperLinks, SyncArtifactResolver>;
123
+ /**
124
+ * Create a configured Scrapper **article** {@link Tool} (async — accepts a dynamic-import `artifact`).
125
+ *
126
+ * @remarks
127
+ * Async because `artifact` may be an async / dynamic-import resolver, which must resolve to the sync
128
+ * `() => Ctor` `Tool.artifactConstructor` requires before the tool is built. For the common case,
129
+ * use {@link createScrapperArticleToolSync} and skip the `await`.
130
+ *
131
+ * @warning
132
+ * Two distinct "headers": `config.headers` authenticates to the Scrapper *instance*; the
133
+ * `extra_http_headers` *parameter* is what the scraper's browser sends to the *target site* — do not
134
+ * conflate them. Also note `scroll_down` requires a positive `sleep`, and `resultUri`/`screenshotUri`
135
+ * are instance-relative and may come back `http://` even over HTTPS — do not assume they match
136
+ * `instanceUrl`.
137
+ *
138
+ * @param config - Instance URL, instance-auth headers, output policy, `artifact` resolver,
139
+ * per-parameter disposition (`fixed`/`defaults`/`fixedQuery`), and middleware pipelines.
140
+ * @returns A promise of a `Tool` ready to register in a `ToolRegistry`.
141
+ * @throws {@link E_INVALID_SCRAPPER_CONFIG} when `instanceUrl` or `artifact` is invalid.
142
+ */
143
+ export declare const createScrapperArticleTool: (config: ScrapperArticleConfig) => Promise<Tool>;
144
+ /**
145
+ * Synchronous {@link createScrapperArticleTool} — `artifact` narrowed to the sync subset.
146
+ *
147
+ * @param config - Same as {@link createScrapperArticleTool}, with a sync-only `artifact`.
148
+ * @returns A `Tool` ready to register in a `ToolRegistry`.
149
+ * @throws {@link E_INVALID_SCRAPPER_CONFIG} when `instanceUrl` or `artifact` is invalid (incl. an async resolver).
150
+ */
151
+ export declare const createScrapperArticleToolSync: (config: ScrapperArticleConfigSync) => Tool;
152
+ /**
153
+ * Create a configured Scrapper **links** {@link Tool} (async — accepts a dynamic-import `artifact`).
154
+ *
155
+ * @remarks
156
+ * See {@link createScrapperArticleTool} for the two-headers caveat and the async rationale. Each
157
+ * `links` item is `{ url, text }`.
158
+ *
159
+ * @param config - Instance URL, instance-auth headers, output policy, `artifact` resolver,
160
+ * per-parameter disposition, and middleware pipelines.
161
+ * @returns A promise of a `Tool` ready to register in a `ToolRegistry`.
162
+ * @throws {@link E_INVALID_SCRAPPER_CONFIG} when `instanceUrl` or `artifact` is invalid.
163
+ */
164
+ export declare const createScrapperLinksTool: (config: ScrapperLinksConfig) => Promise<Tool>;
165
+ /**
166
+ * Synchronous {@link createScrapperLinksTool} — `artifact` narrowed to the sync subset.
167
+ *
168
+ * @param config - Same as {@link createScrapperLinksTool}, with a sync-only `artifact`.
169
+ * @returns A `Tool` ready to register in a `ToolRegistry`.
170
+ * @throws {@link E_INVALID_SCRAPPER_CONFIG} when `instanceUrl` or `artifact` is invalid (incl. an async resolver).
171
+ */
172
+ export declare const createScrapperLinksToolSync: (config: ScrapperLinksConfigSync) => Tool;
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Internal core shared by both Scrapper verbs (article / links).
3
+ *
4
+ * @remarks
5
+ * No `@module` tag — this is a sibling of `index.ts`, relative-imported, not its own entrypoint.
6
+ * Houses the per-parameter disposition machinery (schema building from `fixed`/`defaults`), the
7
+ * snake→kebab wire mapping, the request/response contexts, and the `fetch`+pipeline handler core.
8
+ * Generic harness helpers (artifact/header resolution, pipeline runners) come from `../_shared`.
9
+ */
10
+ import { Tool } from "../../../forge";
11
+ import { type ToolHeaders, type ToolHeadersResolver, type SpooledArtifactCtor } from "../_shared/index";
12
+ import type { Schema } from '@nhtio/validation';
13
+ import type { NextFn } from '@nhtio/middleware';
14
+ /** Throw the battery-scoped config error. */
15
+ export declare const failConfig: (reason: string) => never;
16
+ /** The wire type of a Scrapper query parameter — controls serialisation. */
17
+ export type ScrapperParamType = 'string' | 'number' | 'boolean';
18
+ /**
19
+ * One curated, model-facing Scrapper parameter: its snake_case key (used in the model schema and
20
+ * in `fixed`/`defaults`), its kebab-case wire name, its type, the base validator, and a description.
21
+ */
22
+ export interface ScrapperParamSpec {
23
+ /** snake_case key as seen by the model and in `config.fixed` / `config.defaults`. */
24
+ key: string;
25
+ /** kebab-case name sent to the Scrapper API. */
26
+ wire: string;
27
+ /** Wire type, controlling string/number/boolean serialisation. */
28
+ type: ScrapperParamType;
29
+ /** Base `@nhtio/validation` schema (no `.required()`/`.default()`/`.optional()` applied yet). */
30
+ schema: Schema;
31
+ /** Human-readable description surfaced to the model. */
32
+ description: string;
33
+ }
34
+ /**
35
+ * Build the model-facing input schema from a verb's param specs and the factory's disposition.
36
+ * `url` is always required. A `fixed` param is omitted (the model can't set it); a `defaults` param
37
+ * gets `.default(value)`; everything else is `.optional()`.
38
+ */
39
+ export declare const buildScrapperSchema: (specs: ScrapperParamSpec[], fixed: Record<string, unknown> | undefined, defaults: Record<string, unknown> | undefined, extra?: Record<string, Schema>) => Schema;
40
+ /**
41
+ * Assemble the wire-kebab query params for one request: each spec's value is `fixed` (if pinned)
42
+ * else the validated model/default value; then `fixedQuery` raw passthrough is layered on. `url`
43
+ * is handled separately (it is the search target, never pinned).
44
+ */
45
+ export declare const buildWireParams: (args: Record<string, unknown>, specs: ScrapperParamSpec[], fixed: Record<string, unknown> | undefined, fixedQuery: Record<string, string> | undefined) => Record<string, string>;
46
+ /**
47
+ * Mutable context handed to each input-pipeline stage **before** the HTTP request is sent.
48
+ * Identical for both verbs.
49
+ */
50
+ export interface ScrapperRequestContext {
51
+ /** The tool's name (read-only). */
52
+ readonly toolName: string;
53
+ /** The target page URL (the `url` argument). Mutable. */
54
+ url: string;
55
+ /** Wire-kebab query params (everything except `url`). Mutable. */
56
+ params: Record<string, string>;
57
+ /** Resolved request headers sent to the SCRAPPER INSTANCE (auth). Mutable. */
58
+ headers: ToolHeaders;
59
+ /** The Scrapper instance base URL (read-only). */
60
+ readonly instanceUrl: string;
61
+ /** Cross-stage scratch space; also carried onto the response context. */
62
+ readonly stash: Map<string, unknown>;
63
+ /** Skip the fetch and return `result` verbatim as the tool's output (e.g. a cache hit). */
64
+ shortCircuit(result: string): void;
65
+ }
66
+ /**
67
+ * Mutable context handed to each output-pipeline stage **after** the response JSON is parsed.
68
+ *
69
+ * @typeParam R - The verb's normalised result type (article object or links payload).
70
+ */
71
+ export interface ScrapperResponseContext<R> {
72
+ /** The tool's name (read-only). */
73
+ readonly toolName: string;
74
+ /** The request context as it was sent (post-input-pipeline). */
75
+ readonly request: ScrapperRequestContext;
76
+ /** The parsed Scrapper JSON body. Mutable (used when `format` is `raw`). */
77
+ raw: unknown;
78
+ /** The normalised result. Mutable — reshape, redact, enrich. */
79
+ result: R;
80
+ /** The effective payload shape for this call. */
81
+ format: 'normalized' | 'raw';
82
+ /** When set, used verbatim as the tool's output (overrides serialisation). */
83
+ output?: string;
84
+ /** Cross-stage scratch space; carried over from the request context. */
85
+ readonly stash: Map<string, unknown>;
86
+ }
87
+ /** An input-pipeline stage. Onion middleware over {@link ScrapperRequestContext}. */
88
+ export type ScrapperInputMiddlewareFn = (ctx: ScrapperRequestContext, next: NextFn) => void | Promise<void>;
89
+ /** An output-pipeline stage over a verb's {@link ScrapperResponseContext}. */
90
+ export type ScrapperOutputMiddlewareFn<R> = (ctx: ScrapperResponseContext<R>, next: NextFn) => void | Promise<void>;
91
+ /** Configuration common to every Scrapper factory. `A` is the accepted `artifact` resolver type. */
92
+ export interface ScrapperBaseConfig<P, R, A> {
93
+ /** Base URL of the Scrapper instance, e.g. `https://scrapper.example.org`. Required. */
94
+ instanceUrl: string;
95
+ /** Headers sent to the Scrapper INSTANCE for auth (X-API-Key / Basic) — static or resolver. */
96
+ headers?: ToolHeaders | ToolHeadersResolver;
97
+ /** The tool's own `fetch` AbortController timeout in ms. Default `65_000` (> Scrapper's 60s browser default). */
98
+ requestTimeoutMs?: number;
99
+ /** Output shape. `normalized`/`raw` pin it; `either` (default) exposes a `format` arg to the model. */
100
+ resultFormat?: 'normalized' | 'raw' | 'either';
101
+ /** Spool-artifact resolver for the output. Default `() => SpooledJsonArtifact`. */
102
+ artifact?: A;
103
+ /** Tool name override. */
104
+ name?: string;
105
+ /** Tool description override. */
106
+ description?: string;
107
+ /** Pinned params — sent always, removed from the model schema. */
108
+ fixed?: Partial<P>;
109
+ /** Model-overridable default param values. */
110
+ defaults?: Partial<P>;
111
+ /** Raw, un-modeled wire params (kebab keys) — always sent, never model-visible. Keeps the battery generic. */
112
+ fixedQuery?: Record<string, string>;
113
+ /** Stages run before the HTTP request. See {@link ScrapperRequestContext}. */
114
+ inputPipeline?: ScrapperInputMiddlewareFn[];
115
+ /** Stages run after the response is parsed. See {@link ScrapperResponseContext}. */
116
+ outputPipeline?: ScrapperOutputMiddlewareFn<R>[];
117
+ }
118
+ /** Verb-specific wiring passed to {@link assembleScrapperTool}. */
119
+ export interface ScrapperVerb<R> {
120
+ /** Scrapper endpoint path, e.g. `/api/article`. */
121
+ endpoint: string;
122
+ /** The curated param specs for this verb. */
123
+ specs: ScrapperParamSpec[];
124
+ /** Default tool name (`scrapper_article` / `scrapper_links`). */
125
+ defaultName: string;
126
+ /** Default tool description. */
127
+ defaultDescription: string;
128
+ /** Map a parsed Scrapper body to the verb's normalised result. */
129
+ normalize: (body: Record<string, unknown>) => R;
130
+ }
131
+ /**
132
+ * Build a configured Scrapper {@link Tool} from validated config + an already-resolved sync
133
+ * artifact constructor. Shared by every verb and by both the async and sync factories.
134
+ */
135
+ export declare const assembleScrapperTool: <P, R>(verb: ScrapperVerb<R>, config: ScrapperBaseConfig<P, R, unknown>, instanceUrl: string, artifactConstructor: () => SpooledArtifactCtor) => Tool;
136
+ /** Validate `instanceUrl` and return the trailing-slash-normalised base. */
137
+ export declare const validateScrapperInstanceUrl: (config: {
138
+ instanceUrl?: string;
139
+ }) => string;
@@ -0,0 +1,8 @@
1
+ Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
2
+ require("../../chunk-Ble4zEEl.js");
3
+ const require_scrapper = require("../../scrapper-BeweWurk.js");
4
+ exports.E_INVALID_SCRAPPER_CONFIG = require_scrapper.E_INVALID_SCRAPPER_CONFIG;
5
+ exports.createScrapperArticleTool = require_scrapper.createScrapperArticleTool;
6
+ exports.createScrapperArticleToolSync = require_scrapper.createScrapperArticleToolSync;
7
+ exports.createScrapperLinksTool = require_scrapper.createScrapperLinksTool;
8
+ exports.createScrapperLinksToolSync = require_scrapper.createScrapperLinksToolSync;
@@ -0,0 +1,2 @@
1
+ import { a as E_INVALID_SCRAPPER_CONFIG, i as createScrapperLinksToolSync, n as createScrapperArticleToolSync, r as createScrapperLinksTool, t as createScrapperArticleTool } from "../../scrapper-BHM1mCde.mjs";
2
+ export { E_INVALID_SCRAPPER_CONFIG, createScrapperArticleTool, createScrapperArticleToolSync, createScrapperLinksTool, createScrapperLinksToolSync };
@@ -5,18 +5,19 @@
5
5
  *
6
6
  * @remarks
7
7
  * Unlike the other bundled tool categories — every one of which exports a ready-made,
8
- * stateless `Tool` constant — the SearXNG battery exports a **factory**,
9
- * {@link createSearxngSearchTool}. A search tool has to talk to a *specific* SearXNG instance,
10
- * usually behind custom authentication, so it needs per-deployment configuration (a base URL
11
- * and headers) that cannot be baked in at module load.
8
+ * stateless `Tool` constant — the SearXNG battery exports **factories**,
9
+ * {@link createSearxngSearchTool} (async) and {@link createSearxngSearchToolSync}. A search tool
10
+ * has to talk to a *specific* SearXNG instance, usually behind custom authentication, so it needs
11
+ * per-deployment configuration (a base URL and headers) that cannot be baked in at module load.
12
12
  *
13
- * Because this module exports a factory rather than a `Tool` instance, it MUST NOT be
14
- * bulk-registered via `Object.values(batteries)`. Call the factory first, then register the
15
- * returned tool: `new ToolRegistry([createSearxngSearchTool({ instanceUrl })])`.
13
+ * Because this module exports factories rather than `Tool` instances, they MUST NOT be
14
+ * bulk-registered via `Object.values(batteries)`. Call a factory first, then register the
15
+ * returned tool: `new ToolRegistry([await createSearxngSearchTool({ instanceUrl })])`.
16
16
  *
17
17
  * @see https://docs.searxng.org/dev/search_api.html
18
18
  */
19
- import { Tool, type ArtifactConstructorResolver } from "../../../common";
19
+ import { Tool } from "../../../forge";
20
+ import { type ArtifactResolver, type SyncArtifactResolver } from "../_shared/index";
20
21
  import type { NextFn } from '@nhtio/middleware';
21
22
  export { E_INVALID_SEARXNG_CONFIG } from "./exceptions";
22
23
  /** A static set of request headers (used for custom authentication). */
@@ -76,7 +77,7 @@ export interface SearxngRequestContext {
76
77
  * @remarks
77
78
  * Stages reshape, redact, enrich, or re-rank {@link SearxngResponseContext.results}, mutate the
78
79
  * raw body, or set {@link SearxngResponseContext.output} to override the serialised string
79
- * verbatim (e.g. to render markdown that matches a markdown `artifactConstructor`).
80
+ * verbatim (e.g. to render markdown that matches a markdown `artifact` resolver).
80
81
  */
81
82
  export interface SearxngResponseContext {
82
83
  /** The tool's name (read-only). */
@@ -98,8 +99,14 @@ export interface SearxngResponseContext {
98
99
  export type SearxngInputMiddlewareFn = (ctx: SearxngRequestContext, next: NextFn) => void | Promise<void>;
99
100
  /** An output-pipeline stage. Onion middleware over {@link SearxngResponseContext}. */
100
101
  export type SearxngOutputMiddlewareFn = (ctx: SearxngResponseContext, next: NextFn) => void | Promise<void>;
101
- /** Configuration for {@link createSearxngSearchTool}. */
102
- export interface SearxngToolConfig {
102
+ /**
103
+ * Configuration for {@link createSearxngSearchTool} (async) and
104
+ * {@link createSearxngSearchToolSync} (sync — `artifact` narrowed to the sync subset).
105
+ *
106
+ * @typeParam A - The {@link ArtifactResolver} variant accepted: the full resolver (async factory)
107
+ * or the sync subset ({@link createSearxngSearchToolSync}).
108
+ */
109
+ export interface SearxngToolConfig<A = ArtifactResolver> {
103
110
  /** Base URL of the SearXNG instance, e.g. `https://searx.example.org`. Required. */
104
111
  instanceUrl: string;
105
112
  /** Custom request headers — a static object or a (sync/async) resolver for refreshable auth. */
@@ -116,20 +123,26 @@ export interface SearxngToolConfig {
116
123
  /** Tool description override. */
117
124
  description?: string;
118
125
  /**
119
- * Spool artifact constructor for the tool's output. Default `() => SpooledJsonArtifact`.
120
- * Pass `() => SpooledMarkdownArtifact` (paired with an output stage that renders markdown into
121
- * `ctx.output`) or `() => SpooledArtifact` for plain text.
126
+ * Spool-artifact resolver for the tool's output. Default `() => SpooledJsonArtifact`. Accepts a
127
+ * constructor, a sync resolver, or via {@link createSearxngSearchTool} an async /
128
+ * dynamic-import resolver. Pass `() => SpooledMarkdownArtifact` (paired with an output stage that
129
+ * renders markdown into `ctx.output`) or `() => SpooledArtifact` for plain text.
122
130
  */
123
- artifactConstructor?: ArtifactConstructorResolver;
131
+ artifact?: A;
124
132
  /** Stages run before the HTTP request. See {@link SearxngRequestContext}. */
125
133
  inputPipeline?: SearxngInputMiddlewareFn[];
126
134
  /** Stages run after the response is parsed. See {@link SearxngResponseContext}. */
127
135
  outputPipeline?: SearxngOutputMiddlewareFn[];
128
136
  }
129
137
  /**
130
- * Create a configured SearXNG search {@link Tool}.
138
+ * Create a configured SearXNG search {@link Tool} (async — accepts a dynamic-import `artifact`).
131
139
  *
132
140
  * @remarks
141
+ * Async because `artifact` may be an async / dynamic-import resolver, which must be resolved to the
142
+ * sync `() => Ctor` that `Tool.artifactConstructor` requires before the tool is built (the
143
+ * wrap-site invokes it synchronously). For the common case where you reference the artifact class
144
+ * directly, use {@link createSearxngSearchToolSync} and skip the `await`.
145
+ *
133
146
  * The handler always requests `format=json`. Note that SearXNG ships with JSON output
134
147
  * **disabled** by default (it is abused by bots); an instance that has not enabled
135
148
  * `search.formats: [json]` in its `settings.yml` answers with HTTP 403, which the tool returns
@@ -142,9 +155,23 @@ export interface SearxngToolConfig {
142
155
  * {@link https://github.com/searxng/searxng/issues/2457 | searxng#2457}). The tool passes the
143
156
  * field through verbatim; use `results.length` as the authoritative count.
144
157
  *
145
- * @param config - The instance URL, optional custom headers, output-format policy, artifact
146
- * type, and input/output middleware pipelines. See {@link SearxngToolConfig}.
158
+ * @param config - The instance URL, optional custom headers, output-format policy, `artifact`
159
+ * resolver, and input/output middleware pipelines. See {@link SearxngToolConfig}.
160
+ * @returns A promise of a `Tool` ready to register in a `ToolRegistry`.
161
+ * @throws {@link E_INVALID_SEARXNG_CONFIG} when `instanceUrl` or `artifact` is invalid.
162
+ */
163
+ export declare const createSearxngSearchTool: (config: SearxngToolConfig<ArtifactResolver>) => Promise<Tool>;
164
+ /**
165
+ * Synchronous {@link createSearxngSearchTool} — the ergonomic common path.
166
+ *
167
+ * @remarks
168
+ * `artifact` is narrowed to the sync subset (a constructor or a sync resolver). Passing an async
169
+ * resolver is a compile-time type error and a runtime {@link E_INVALID_SEARXNG_CONFIG}; for
170
+ * dynamic-import resolvers use the async {@link createSearxngSearchTool}. See its docs for the
171
+ * `number_of_results` caveat and 403/JSON-disabled behaviour.
172
+ *
173
+ * @param config - Same as {@link SearxngToolConfig}, with `artifact` restricted to the sync subset.
147
174
  * @returns A `Tool` ready to register in a `ToolRegistry`.
148
- * @throws {@link E_INVALID_SEARXNG_CONFIG} when `instanceUrl` is missing or unparseable.
175
+ * @throws {@link E_INVALID_SEARXNG_CONFIG} when `instanceUrl` or `artifact` is invalid (incl. an async resolver).
149
176
  */
150
- export declare const createSearxngSearchTool: (config: SearxngToolConfig) => Tool;
177
+ export declare const createSearxngSearchToolSync: (config: SearxngToolConfig<SyncArtifactResolver>) => Tool;
@@ -1,5 +1,6 @@
1
1
  Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
2
2
  require("../../chunk-Ble4zEEl.js");
3
- const require_searxng = require("../../searxng-Bkrwhwhw.js");
3
+ const require_searxng = require("../../searxng-B_D--V5q.js");
4
4
  exports.E_INVALID_SEARXNG_CONFIG = require_searxng.E_INVALID_SEARXNG_CONFIG;
5
5
  exports.createSearxngSearchTool = require_searxng.createSearxngSearchTool;
6
+ exports.createSearxngSearchToolSync = require_searxng.createSearxngSearchToolSync;
@@ -1,2 +1,2 @@
1
- import { n as E_INVALID_SEARXNG_CONFIG, t as createSearxngSearchTool } from "../../searxng-CyA-nEu5.mjs";
2
- export { E_INVALID_SEARXNG_CONFIG, createSearxngSearchTool };
1
+ import { n as createSearxngSearchToolSync, r as E_INVALID_SEARXNG_CONFIG, t as createSearxngSearchTool } from "../../searxng-BJFulNcK.mjs";
2
+ export { E_INVALID_SEARXNG_CONFIG, createSearxngSearchTool, createSearxngSearchToolSync };
@@ -0,0 +1,186 @@
1
+ /**
2
+ * RAG glue: turn web-search and web-scrape results into `Retrievable` records for a turn.
3
+ *
4
+ * @module @nhtio/adk/batteries/tools/web_retrieval
5
+ *
6
+ * @remarks
7
+ * The seam from "I searched / scraped something" to "it is in the turn as a `Retrievable`",
8
+ * shared by the SearXNG and Scrapper batteries. It is deliberately **decoupled** from the ADK
9
+ * core at runtime:
10
+ *
11
+ * - The converters are **pure** `(payload) => RawRetrievable[]` — they build plain data objects and
12
+ * never instantiate a core class, so the module's only core coupling is erased `import type`.
13
+ * - The recommended spool-artifact type travels as an **open resolver**
14
+ * ({@link @nhtio/adk/forge!ArtifactConstructorResolver}), never a closed string enum — a consumer's
15
+ * future YAML/HTML `SpooledArtifact` subclass works with no change here. The converter hands the
16
+ * recommendation to the caller's `spool` hook; the caller owns the actual class import.
17
+ * - The one helper that must construct a `Retrievable` ({@link storeRetrievables}) takes the
18
+ * constructor via a **resolver** (constructor / sync / async / dynamic-import), exactly like the
19
+ * vector battery's `createVectorStore` `client`.
20
+ *
21
+ * Web content is `'third-party-public'` by default — a definitional constant for open-web data
22
+ * (NOT inferred from the URL, which CONTRIBUTING Design Decision #12 forbids); override via
23
+ * `trustTier` when you know better.
24
+ */
25
+ import type { SpooledArtifact } from "../../../spooled_artifact";
26
+ import type { ArtifactConstructorResolver } from "../../../forge";
27
+ import type { RawRetrievable, Retrievable, RetrievableTrustTier } from "../../../common";
28
+ /** A constructor that builds a {@link @nhtio/adk!Retrievable} from a {@link @nhtio/adk!RawRetrievable}. */
29
+ export type RetrievableCtor = new (raw: RawRetrievable) => Retrievable;
30
+ /** A resolver of `T`: the value itself, or a (sync/async) thunk, optionally a module `{ default }`. */
31
+ export type Resolver<T> = T | (() => T | {
32
+ default: T;
33
+ }) | (() => Promise<T | {
34
+ default: T;
35
+ }>);
36
+ /**
37
+ * A reader-backed-artifact hook. Called by a converter for content that may be large; the
38
+ * converter passes the artifact constructor it **recommends** for this content (an open
39
+ * {@link @nhtio/adk/forge!ArtifactConstructorResolver}) so the caller can wrap with the right
40
+ * subclass — preserving its forged query tools — using the caller's own core import. Return a
41
+ * {@link @nhtio/adk!SpooledArtifact} to store the content reader-backed, or `undefined` to keep it
42
+ * inline as a string.
43
+ */
44
+ export type SpoolHook = (id: string, text: string, recommended: ArtifactConstructorResolver) => SpooledArtifact | undefined;
45
+ /** Options common to every converter. */
46
+ export interface ToRetrievableOptions {
47
+ /**
48
+ * Trust tier for the produced records. Default `'third-party-public'` (web content is
49
+ * third-party by definition — this is a constant, not URL inference).
50
+ */
51
+ trustTier?: RetrievableTrustTier;
52
+ /** Semantic `kind` label, e.g. `'web-search-result'`, `'web-article'`, `'web-links'`. */
53
+ kind?: string;
54
+ /** Prefix for the stable, hashed record id (namespacing across sources). */
55
+ idPrefix?: string;
56
+ /** Optional reader-backed-artifact hook for large content. See {@link SpoolHook}. */
57
+ spool?: SpoolHook;
58
+ }
59
+ /**
60
+ * The artifact-resolver recommendations a caller may supply so the glue names no concrete class
61
+ * itself. Each converter asks for the relevant key; if the caller omits it, content stays inline.
62
+ */
63
+ export interface ArtifactRecommendations {
64
+ /** Recommended for plain-text / HTML content (base `SpooledArtifact`). */
65
+ text?: ArtifactConstructorResolver;
66
+ /** Recommended for markdown content (`SpooledMarkdownArtifact`). */
67
+ markdown?: ArtifactConstructorResolver;
68
+ /** Recommended for JSON content (`SpooledJsonArtifact`). */
69
+ json?: ArtifactConstructorResolver;
70
+ }
71
+ /** Minimal structural shape of a SearXNG normalised result the converter reads. */
72
+ export interface SearxngResultLike {
73
+ /** Result URL (becomes the record's `source`). */
74
+ url?: string;
75
+ /** Result title (joined into the inline content). */
76
+ title?: string;
77
+ /** Result snippet (joined into the inline content). */
78
+ content?: string;
79
+ /** Relevance score (clamped to `[0,1]` on the record). */
80
+ score?: number;
81
+ }
82
+ /** Minimal structural shape of a SearXNG normalised payload. */
83
+ export interface SearxngPayloadLike {
84
+ /** The result list. */
85
+ results?: SearxngResultLike[];
86
+ }
87
+ /**
88
+ * Convert a SearXNG normalised payload into one {@link @nhtio/adk!RawRetrievable} per result.
89
+ *
90
+ * @remarks
91
+ * Snippets are short, so `content` stays an inline string (the `spool` hook, if any, is still
92
+ * offered the `text` recommendation). `source` is the result URL; `score` is clamped to `[0,1]`.
93
+ *
94
+ * @param payload - The SearXNG normalised payload (`{ results: [{ url, title, content, score }] }`).
95
+ * @param opts - Trust tier, kind, id prefix, optional spool hook.
96
+ * @param recommend - Optional artifact-resolver recommendations (the glue names no class itself).
97
+ * @returns One `RawRetrievable` per result.
98
+ */
99
+ export declare const searxngResultsToRetrievables: (payload: SearxngPayloadLike, opts?: ToRetrievableOptions, recommend?: ArtifactRecommendations) => RawRetrievable[];
100
+ /** Minimal structural shape of a Scrapper normalised article. */
101
+ export interface ScrapperArticleLike {
102
+ /** The page URL (becomes the record's `source`). */
103
+ url?: string;
104
+ /** Article title. */
105
+ title?: string;
106
+ /** Article text with HTML stripped (the default content source). */
107
+ textContent?: string;
108
+ /** Processed article HTML (the `'content'` content source). */
109
+ content?: string;
110
+ }
111
+ /** Which article text field to use as the record content. */
112
+ export type ArticleContentSource = 'textContent' | 'content';
113
+ /** Options for {@link scrapperArticleToRetrievable}. */
114
+ export interface ArticleToRetrievableOptions extends ToRetrievableOptions {
115
+ /** Which field to use as content (default `'textContent'`). `'content'` is HTML. */
116
+ contentSource?: ArticleContentSource;
117
+ /**
118
+ * Whether the chosen content is markdown (recommend `markdown`) rather than plain text.
119
+ * Default false. Use when an output pipeline rendered the article to markdown.
120
+ */
121
+ asMarkdown?: boolean;
122
+ }
123
+ /**
124
+ * Convert a Scrapper normalised article into a single {@link @nhtio/adk!RawRetrievable}.
125
+ *
126
+ * @remarks
127
+ * Long article text is exactly what a reader-backed {@link @nhtio/adk!SpooledArtifact} is for: pass a
128
+ * `spool` hook and the converter offers it the recommended artifact resolver (markdown when
129
+ * `asMarkdown`, else text/HTML) so the model gets the right forged query tools. Without a hook,
130
+ * content stays inline.
131
+ *
132
+ * @param article - The Scrapper normalised article.
133
+ * @param opts - Trust tier, kind, id prefix, content source, markdown flag, optional spool hook.
134
+ * @param recommend - Optional artifact-resolver recommendations.
135
+ * @returns A single `RawRetrievable`.
136
+ */
137
+ export declare const scrapperArticleToRetrievable: (article: ScrapperArticleLike, opts?: ArticleToRetrievableOptions, recommend?: ArtifactRecommendations) => RawRetrievable;
138
+ /** Minimal structural shape of a Scrapper normalised link. */
139
+ export interface ScrapperLinkLike {
140
+ /** The link's target URL (becomes the record's `source`). */
141
+ url?: string;
142
+ /** The link's anchor text (becomes the record's content). */
143
+ text?: string;
144
+ }
145
+ /** Minimal structural shape of a Scrapper normalised links payload. */
146
+ export interface ScrapperLinksLike {
147
+ /** The page URL the links were collected from. */
148
+ url?: string;
149
+ /** The collected links. */
150
+ links?: ScrapperLinkLike[];
151
+ }
152
+ /**
153
+ * Convert a Scrapper normalised links payload into one {@link @nhtio/adk!RawRetrievable} per link.
154
+ *
155
+ * @remarks
156
+ * Each link's `text` becomes the (inline) content and its `url` the `source`. Link text is short,
157
+ * so no spooling is applied.
158
+ *
159
+ * @param payload - The Scrapper normalised links payload (`{ links: [{ url, text }] }`).
160
+ * @param opts - Trust tier, kind, id prefix.
161
+ * @returns One `RawRetrievable` per link.
162
+ */
163
+ export declare const scrapperLinksToRetrievables: (payload: ScrapperLinksLike, opts?: ToRetrievableOptions) => RawRetrievable[];
164
+ /** The minimal context surface {@link storeRetrievables} needs. */
165
+ export interface RetrievableStoreCtx {
166
+ /** Persist a single `Retrievable` into the turn (a `DispatchContext` method, or a stub). */
167
+ storeRetrievable: (v: Retrievable) => unknown | Promise<unknown>;
168
+ }
169
+ /**
170
+ * Construct {@link @nhtio/adk!Retrievable}s from `RawRetrievable`s and store each via `ctx`.
171
+ *
172
+ * @remarks
173
+ * This is the only function here that touches a core class, and it does so through an injected
174
+ * **resolver** (`deps.retrievable`) so the glue itself never value-imports `Retrievable`. Each
175
+ * record's `RawRetrievable` validation (including the required `trustTier`) fires at construction.
176
+ * For reader-backed content, the caller's `spool` hook will typically have used
177
+ * `ctx.storeRetrievableBytes` already; this helper just persists the records into the turn.
178
+ *
179
+ * @param ctx - Anything with a `storeRetrievable` method (a `DispatchContext`, or a stub).
180
+ * @param raws - The plain records from the converters.
181
+ * @param deps - `{ retrievable }`: the `Retrievable` constructor or a resolver of it.
182
+ * @returns The constructed `Retrievable` instances, in input order.
183
+ */
184
+ export declare const storeRetrievables: (ctx: RetrievableStoreCtx, raws: RawRetrievable[], deps: {
185
+ retrievable: Resolver<RetrievableCtor>;
186
+ }) => Promise<Retrievable[]>;