webveil 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +101 -0
- package/dist/cli.d.ts +58 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +91 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/backends/custom.d.ts +15 -0
- package/dist/core/backends/custom.d.ts.map +1 -0
- package/dist/core/backends/custom.js +106 -0
- package/dist/core/backends/custom.js.map +1 -0
- package/dist/core/backends/registry.d.ts +13 -0
- package/dist/core/backends/registry.d.ts.map +1 -0
- package/dist/core/backends/registry.js +31 -0
- package/dist/core/backends/registry.js.map +1 -0
- package/dist/core/backends/searxng.d.ts +8 -0
- package/dist/core/backends/searxng.d.ts.map +1 -0
- package/dist/core/backends/searxng.js +43 -0
- package/dist/core/backends/searxng.js.map +1 -0
- package/dist/core/backends/tavily-compat.d.ts +10 -0
- package/dist/core/backends/tavily-compat.d.ts.map +1 -0
- package/dist/core/backends/tavily-compat.js +85 -0
- package/dist/core/backends/tavily-compat.js.map +1 -0
- package/dist/core/backends/types.d.ts +48 -0
- package/dist/core/backends/types.d.ts.map +1 -0
- package/dist/core/backends/types.js +5 -0
- package/dist/core/backends/types.js.map +1 -0
- package/dist/core/config.d.ts +39 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +72 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/egress.d.ts +30 -0
- package/dist/core/egress.d.ts.map +1 -0
- package/dist/core/egress.js +87 -0
- package/dist/core/egress.js.map +1 -0
- package/dist/core/extract.d.ts +45 -0
- package/dist/core/extract.d.ts.map +1 -0
- package/dist/core/extract.js +36 -0
- package/dist/core/extract.js.map +1 -0
- package/dist/core/fetch.d.ts +42 -0
- package/dist/core/fetch.d.ts.map +1 -0
- package/dist/core/fetch.js +76 -0
- package/dist/core/fetch.js.map +1 -0
- package/dist/core/http.d.ts +8 -0
- package/dist/core/http.d.ts.map +1 -0
- package/dist/core/http.js +49 -0
- package/dist/core/http.js.map +1 -0
- package/dist/core/search.d.ts +31 -0
- package/dist/core/search.d.ts.map +1 -0
- package/dist/core/search.js +65 -0
- package/dist/core/search.js.map +1 -0
- package/dist/core/security.d.ts +35 -0
- package/dist/core/security.d.ts.map +1 -0
- package/dist/core/security.js +141 -0
- package/dist/core/security.js.map +1 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/package.json +62 -2
- package/src/cli.ts +106 -0
- package/src/core/backends/custom.ts +159 -0
- package/src/core/backends/registry.ts +41 -0
- package/src/core/backends/searxng.ts +70 -0
- package/src/core/backends/tavily-compat.ts +156 -0
- package/src/core/backends/types.ts +61 -0
- package/src/core/config.ts +106 -0
- package/src/core/egress.ts +106 -0
- package/src/core/extract.ts +82 -0
- package/src/core/fetch.ts +132 -0
- package/src/core/http.ts +62 -0
- package/src/core/search.ts +104 -0
- package/src/core/security.ts +141 -0
- package/src/index.ts +82 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/** A single search hit. */
|
|
2
|
+
export interface SearchResult {
|
|
3
|
+
title: string;
|
|
4
|
+
url: string;
|
|
5
|
+
snippet?: string;
|
|
6
|
+
}
|
|
7
|
+
/** A fetched, extracted page as budget-bounded markdown. */
|
|
8
|
+
export interface FetchResult {
|
|
9
|
+
url: string;
|
|
10
|
+
title?: string;
|
|
11
|
+
markdown: string;
|
|
12
|
+
truncated: boolean;
|
|
13
|
+
}
|
|
14
|
+
export interface SearchOptions {
|
|
15
|
+
maxResults?: number;
|
|
16
|
+
signal?: AbortSignal;
|
|
17
|
+
}
|
|
18
|
+
export interface FetchOptions {
|
|
19
|
+
size?: 's' | 'm' | 'l' | 'f';
|
|
20
|
+
signal?: AbortSignal;
|
|
21
|
+
}
|
|
22
|
+
/** Options the http helper accepts for a single request. */
|
|
23
|
+
export interface HttpRequestOptions {
|
|
24
|
+
method?: string;
|
|
25
|
+
headers?: Record<string, string>;
|
|
26
|
+
body?: string;
|
|
27
|
+
/** Per-request timeout in ms (the helper aborts past this). */
|
|
28
|
+
timeoutMs?: number;
|
|
29
|
+
signal?: AbortSignal;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* The proxied http helper handed to backends. Both methods route through the
|
|
33
|
+
* egress dispatcher; a backend never gets un-proxied transport of its own.
|
|
34
|
+
*/
|
|
35
|
+
export interface Http {
|
|
36
|
+
fetchJson<T = unknown>(url: string, options?: HttpRequestOptions): Promise<T>;
|
|
37
|
+
fetchText(url: string, options?: HttpRequestOptions): Promise<string>;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* A result/content source. `search` is required; `fetch` is optional (a backend
|
|
41
|
+
* may override the default distilly Extractor with its own `/extract`). Both are
|
|
42
|
+
* given the proxied `http` helper so they cannot escape the configured egress.
|
|
43
|
+
*/
|
|
44
|
+
export interface Backend {
|
|
45
|
+
search(query: string, http: Http, options?: SearchOptions): Promise<SearchResult[]>;
|
|
46
|
+
fetch?(url: string, http: Http, options?: FetchOptions): Promise<FetchResult>;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/core/backends/types.ts"],"names":[],"mappings":"AAIA,2BAA2B;AAC3B,MAAM,WAAW,YAAY;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,4DAA4D;AAC5D,MAAM,WAAW,WAAW;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC7B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC5B,IAAI,CAAC,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;IAC7B,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED,4DAA4D;AAC5D,MAAM,WAAW,kBAAkB;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,+DAA+D;IAC/D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED;;;GAGG;AACH,MAAM,WAAW,IAAI;IACpB,SAAS,CAAC,CAAC,GAAG,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAC9E,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACtE;AAED;;;;GAIG;AACH,MAAM,WAAW,OAAO;IACvB,MAAM,CACL,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,IAAI,EACV,OAAO,CAAC,EAAE,aAAa,GACrB,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IAC3B,KAAK,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CAC9E"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
// backend seam — the contract every result source (searxng | tavily-compat |
|
|
2
|
+
// custom) implements. A Backend is HANDED a proxied `http` helper (bound to the
|
|
3
|
+
// configured egress dispatcher) so it physically cannot bypass the egress.
|
|
4
|
+
export {};
|
|
5
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/core/backends/types.ts"],"names":[],"mappings":"AAAA,6EAA6E;AAC7E,gFAAgF;AAChF,2EAA2E"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/** How outbound HTTP leaves the machine. See egress.ts. */
|
|
2
|
+
export type Egress = {
|
|
3
|
+
mode: 'direct';
|
|
4
|
+
} | {
|
|
5
|
+
mode: 'http';
|
|
6
|
+
url: string;
|
|
7
|
+
} | {
|
|
8
|
+
mode: 'socks5';
|
|
9
|
+
url: string;
|
|
10
|
+
};
|
|
11
|
+
/** Page-size budget preset for fetch (passed through to distilly). */
|
|
12
|
+
export type FetchSize = 's' | 'm' | 'l' | 'f';
|
|
13
|
+
/** The fully-resolved config every webveil module consumes. */
|
|
14
|
+
export interface Config {
|
|
15
|
+
backend: string;
|
|
16
|
+
baseUrl: string;
|
|
17
|
+
apiKey?: string;
|
|
18
|
+
egress: Egress;
|
|
19
|
+
fetchSize: FetchSize;
|
|
20
|
+
}
|
|
21
|
+
/** A config file / env layer: any subset of the resolved shape. */
|
|
22
|
+
export type PartialConfig = Partial<Config>;
|
|
23
|
+
export interface ResolveOptions {
|
|
24
|
+
/** Directory the per-folder walk starts from. Defaults to process.cwd(). */
|
|
25
|
+
cwd?: string;
|
|
26
|
+
/** Environment to read overrides from. Defaults to process.env. */
|
|
27
|
+
env?: Record<string, string | undefined>;
|
|
28
|
+
/**
|
|
29
|
+
* Path to the global config file. Defaults to ~/.pi/agent/webveil.json.
|
|
30
|
+
* Tests point this at a temp dir to isolate the real home directory.
|
|
31
|
+
*/
|
|
32
|
+
globalPath?: string;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Resolve the effective config. Higher-precedence layers override lower ones,
|
|
36
|
+
* key by key: env > project chain > global file > defaults.
|
|
37
|
+
*/
|
|
38
|
+
export declare function resolveConfig(options?: ResolveOptions): Config;
|
|
39
|
+
//# sourceMappingURL=config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/core/config.ts"],"names":[],"mappings":"AAUA,2DAA2D;AAC3D,MAAM,MAAM,MAAM,GACf;IAAC,IAAI,EAAE,QAAQ,CAAA;CAAC,GAChB;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAC,GAC3B;IAAC,IAAI,EAAE,QAAQ,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAC,CAAC;AAEjC,sEAAsE;AACtE,MAAM,MAAM,SAAS,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;AAE9C,+DAA+D;AAC/D,MAAM,WAAW,MAAM;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,SAAS,CAAC;CACrB;AAED,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AAE5C,MAAM,WAAW,cAAc;IAC9B,4EAA4E;IAC5E,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,mEAAmE;IACnE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC,CAAC;IACzC;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AA+CD;;;GAGG;AACH,wBAAgB,aAAa,CAAC,OAAO,GAAE,cAAmB,GAAG,MAAM,CAalE"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
// config seam — per-folder resolution. Precedence (highest wins):
|
|
2
|
+
// env > nearest .pi/webveil.json (walking up from cwd) > global
|
|
3
|
+
// ~/.pi/agent/webveil.json > defaults.
|
|
4
|
+
// "Per folder = per account/egress." Each layer is a partial; later (lower)
|
|
5
|
+
// layers fill gaps the higher layers leave.
|
|
6
|
+
import { readFileSync } from 'node:fs';
|
|
7
|
+
import { homedir } from 'node:os';
|
|
8
|
+
import { dirname, join, parse } from 'node:path';
|
|
9
|
+
const DEFAULTS = {
|
|
10
|
+
backend: 'searxng',
|
|
11
|
+
baseUrl: 'http://127.0.0.1:8080',
|
|
12
|
+
egress: { mode: 'direct' },
|
|
13
|
+
fetchSize: 'm',
|
|
14
|
+
};
|
|
15
|
+
const PROJECT_FILE = join('.pi', 'webveil.json');
|
|
16
|
+
function readJson(path) {
|
|
17
|
+
let text;
|
|
18
|
+
try {
|
|
19
|
+
text = readFileSync(path, 'utf8');
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return undefined; // absent file is fine; missing layers are expected
|
|
23
|
+
}
|
|
24
|
+
return JSON.parse(text);
|
|
25
|
+
}
|
|
26
|
+
/** The nearest `.pi/webveil.json` walking up from `cwd` (first found wins). */
|
|
27
|
+
function readProjectChain(cwd) {
|
|
28
|
+
let dir = cwd;
|
|
29
|
+
const { root } = parse(dir);
|
|
30
|
+
for (;;) {
|
|
31
|
+
const found = readJson(join(dir, PROJECT_FILE));
|
|
32
|
+
if (found)
|
|
33
|
+
return found;
|
|
34
|
+
if (dir === root)
|
|
35
|
+
return undefined;
|
|
36
|
+
dir = dirname(dir);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function readEnv(env) {
|
|
40
|
+
const layer = {};
|
|
41
|
+
if (env.WEBVEIL_BACKEND)
|
|
42
|
+
layer.backend = env.WEBVEIL_BACKEND;
|
|
43
|
+
if (env.WEBVEIL_BASE_URL)
|
|
44
|
+
layer.baseUrl = env.WEBVEIL_BASE_URL;
|
|
45
|
+
if (env.WEBVEIL_API_KEY)
|
|
46
|
+
layer.apiKey = env.WEBVEIL_API_KEY;
|
|
47
|
+
if (env.WEBVEIL_FETCH_SIZE)
|
|
48
|
+
layer.fetchSize = env.WEBVEIL_FETCH_SIZE;
|
|
49
|
+
const mode = env.WEBVEIL_EGRESS;
|
|
50
|
+
if (mode === 'direct')
|
|
51
|
+
layer.egress = { mode: 'direct' };
|
|
52
|
+
else if (mode === 'http' || mode === 'socks5')
|
|
53
|
+
layer.egress = { mode, url: env.WEBVEIL_EGRESS_URL ?? '' };
|
|
54
|
+
return layer;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Resolve the effective config. Higher-precedence layers override lower ones,
|
|
58
|
+
* key by key: env > project chain > global file > defaults.
|
|
59
|
+
*/
|
|
60
|
+
export function resolveConfig(options = {}) {
|
|
61
|
+
const cwd = options.cwd ?? process.cwd();
|
|
62
|
+
const env = options.env ?? process.env;
|
|
63
|
+
const globalPath = options.globalPath ?? join(homedir(), '.pi', 'agent', 'webveil.json');
|
|
64
|
+
const layers = [
|
|
65
|
+
DEFAULTS,
|
|
66
|
+
readJson(globalPath) ?? {},
|
|
67
|
+
readProjectChain(cwd) ?? {},
|
|
68
|
+
readEnv(env),
|
|
69
|
+
];
|
|
70
|
+
return Object.assign({}, ...layers);
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/core/config.ts"],"names":[],"mappings":"AAAA,kEAAkE;AAClE,kEAAkE;AAClE,yCAAyC;AACzC,4EAA4E;AAC5E,4CAA4C;AAE5C,OAAO,EAAC,YAAY,EAAC,MAAM,SAAS,CAAC;AACrC,OAAO,EAAC,OAAO,EAAC,MAAM,SAAS,CAAC;AAChC,OAAO,EAAC,OAAO,EAAE,IAAI,EAAE,KAAK,EAAC,MAAM,WAAW,CAAC;AAmC/C,MAAM,QAAQ,GAAW;IACxB,OAAO,EAAE,SAAS;IAClB,OAAO,EAAE,uBAAuB;IAChC,MAAM,EAAE,EAAC,IAAI,EAAE,QAAQ,EAAC;IACxB,SAAS,EAAE,GAAG;CACd,CAAC;AAEF,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;AAEjD,SAAS,QAAQ,CAAC,IAAY;IAC7B,IAAI,IAAY,CAAC;IACjB,IAAI,CAAC;QACJ,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAC,CAAC,mDAAmD;IACtE,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAkB,CAAC;AAC1C,CAAC;AAED,+EAA+E;AAC/E,SAAS,gBAAgB,CAAC,GAAW;IACpC,IAAI,GAAG,GAAG,GAAG,CAAC;IACd,MAAM,EAAC,IAAI,EAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;IAC1B,SAAS,CAAC;QACT,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC,CAAC;QAChD,IAAI,KAAK;YAAE,OAAO,KAAK,CAAC;QACxB,IAAI,GAAG,KAAK,IAAI;YAAE,OAAO,SAAS,CAAC;QACnC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;IACpB,CAAC;AACF,CAAC;AAED,SAAS,OAAO,CAAC,GAAuC;IACvD,MAAM,KAAK,GAAkB,EAAE,CAAC;IAChC,IAAI,GAAG,CAAC,eAAe;QAAE,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,eAAe,CAAC;IAC7D,IAAI,GAAG,CAAC,gBAAgB;QAAE,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,gBAAgB,CAAC;IAC/D,IAAI,GAAG,CAAC,eAAe;QAAE,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,eAAe,CAAC;IAC5D,IAAI,GAAG,CAAC,kBAAkB;QACzB,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,kBAA+B,CAAC;IACvD,MAAM,IAAI,GAAG,GAAG,CAAC,cAAc,CAAC;IAChC,IAAI,IAAI,KAAK,QAAQ;QAAE,KAAK,CAAC,MAAM,GAAG,EAAC,IAAI,EAAE,QAAQ,EAAC,CAAC;SAClD,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,KAAK,QAAQ;QAC5C,KAAK,CAAC,MAAM,GAAG,EAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,kBAAkB,IAAI,EAAE,EAAC,CAAC;IAC1D,OAAO,KAAK,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,UAA0B,EAAE;IACzD,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,CAAC;IACvC,MAAM,UAAU,GACf,OAAO,CAAC,UAAU,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC;IAEvE,MAAM,MAAM,GAAoB;QAC/B,QAAQ;QACR,QAAQ,CAAC,UAAU,CAAC,IAAI,EAAE;QAC1B,gBAAgB,CAAC,GAAG,CAAC,IAAI,EAAE;QAC3B,OAAO,CAAC,GAAG,CAAC;KACZ,CAAC;IACF,OAAO,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,MAAM,CAAW,CAAC;AAC/C,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { Agent, type Dispatcher, ProxyAgent } from 'undici';
|
|
2
|
+
import type { Config } from './config.js';
|
|
3
|
+
/** Thrown when a configured egress proxy cannot be built. Never swallowed. */
|
|
4
|
+
export declare class EgressError extends Error {
|
|
5
|
+
constructor(message: string, options?: {
|
|
6
|
+
cause?: unknown;
|
|
7
|
+
});
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Build the undici Dispatcher for the config's egress mode:
|
|
11
|
+
* - direct → undefined (undici uses its default, un-proxied transport)
|
|
12
|
+
* - http → ProxyAgent
|
|
13
|
+
* - socks5 → socks dispatcher (undici Agent over a socks connector)
|
|
14
|
+
*
|
|
15
|
+
* Throws (fail loud) if a configured http/socks5 proxy cannot be built. It
|
|
16
|
+
* NEVER returns `undefined` (direct) as a fallback for a broken proxy.
|
|
17
|
+
*/
|
|
18
|
+
export declare function buildDispatcher(cfg: Config): Dispatcher | undefined;
|
|
19
|
+
/** A WHATWG-compatible fetch bound to a specific egress dispatcher. */
|
|
20
|
+
export type EgressFetch = typeof globalThis.fetch;
|
|
21
|
+
/**
|
|
22
|
+
* Build an egress-bound WHATWG `fetch`: undici's `fetch` closed over the
|
|
23
|
+
* dispatcher from buildDispatcher(cfg). This is the `fetch` injected into
|
|
24
|
+
* distilly/fetch so distilly never has egress of its own. Same fail-loud
|
|
25
|
+
* guarantee: a broken proxy throws HERE (before any I/O), never goes un-proxied.
|
|
26
|
+
*/
|
|
27
|
+
export declare function createEgressFetch(cfg: Config): EgressFetch;
|
|
28
|
+
export type { Dispatcher };
|
|
29
|
+
export { Agent, ProxyAgent };
|
|
30
|
+
//# sourceMappingURL=egress.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"egress.d.ts","sourceRoot":"","sources":["../../src/core/egress.ts"],"names":[],"mappings":"AAQA,OAAO,EAAC,KAAK,EAAE,KAAK,UAAU,EAAE,UAAU,EAAuB,MAAM,QAAQ,CAAC;AAEhF,OAAO,KAAK,EAAC,MAAM,EAAS,MAAM,aAAa,CAAC;AAEhD,8EAA8E;AAC9E,qBAAa,WAAY,SAAQ,KAAK;gBACzB,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAC;CAIxD;AAqBD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAiCnE;AAED,uEAAuE;AACvE,MAAM,MAAM,WAAW,GAAG,OAAO,UAAU,CAAC,KAAK,CAAC;AAElD;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,WAAW,CAU1D;AAED,YAAY,EAAC,UAAU,EAAC,CAAC;AACzB,OAAO,EAAC,KAAK,EAAE,UAAU,EAAC,CAAC"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// egress seam — how outbound HTTP leaves the machine. Yields TWO artifacts off
|
|
2
|
+
// the SAME undici dispatcher: the proxied `http` helper (see http.ts, handed to
|
|
3
|
+
// backends) and an egress-bound WHATWG `fetch` (injected into distilly/fetch).
|
|
4
|
+
//
|
|
5
|
+
// CRITICAL anonymity invariant (docs/adr/0001): egress is fail-loud. A
|
|
6
|
+
// configured proxy that cannot be built MUST throw — it must NEVER silently
|
|
7
|
+
// fall back to un-proxied (direct) transport.
|
|
8
|
+
import { Agent, ProxyAgent, fetch as undiciFetch } from 'undici';
|
|
9
|
+
import { socksDispatcher } from 'fetch-socks';
|
|
10
|
+
/** Thrown when a configured egress proxy cannot be built. Never swallowed. */
|
|
11
|
+
export class EgressError extends Error {
|
|
12
|
+
constructor(message, options) {
|
|
13
|
+
super(message, options);
|
|
14
|
+
this.name = 'EgressError';
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
function socksFromUrl(raw) {
|
|
18
|
+
const url = new URL(raw); // throws on a malformed proxy URL → fail loud
|
|
19
|
+
const protocol = url.protocol.replace(':', '');
|
|
20
|
+
if (protocol !== 'socks5' && protocol !== 'socks' && protocol !== 'socks5h')
|
|
21
|
+
throw new EgressError(`egress socks5: expected a socks5:// proxy url, got ${raw}`);
|
|
22
|
+
const port = Number(url.port);
|
|
23
|
+
if (!url.hostname || !Number.isInteger(port) || port <= 0)
|
|
24
|
+
throw new EgressError(`egress socks5: invalid host/port in ${raw}`);
|
|
25
|
+
return socksDispatcher({
|
|
26
|
+
type: 5,
|
|
27
|
+
host: url.hostname,
|
|
28
|
+
port,
|
|
29
|
+
userId: url.username || undefined,
|
|
30
|
+
password: url.password || undefined,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Build the undici Dispatcher for the config's egress mode:
|
|
35
|
+
* - direct → undefined (undici uses its default, un-proxied transport)
|
|
36
|
+
* - http → ProxyAgent
|
|
37
|
+
* - socks5 → socks dispatcher (undici Agent over a socks connector)
|
|
38
|
+
*
|
|
39
|
+
* Throws (fail loud) if a configured http/socks5 proxy cannot be built. It
|
|
40
|
+
* NEVER returns `undefined` (direct) as a fallback for a broken proxy.
|
|
41
|
+
*/
|
|
42
|
+
export function buildDispatcher(cfg) {
|
|
43
|
+
const egress = cfg.egress;
|
|
44
|
+
switch (egress.mode) {
|
|
45
|
+
case 'direct':
|
|
46
|
+
return undefined;
|
|
47
|
+
case 'http':
|
|
48
|
+
try {
|
|
49
|
+
if (!egress.url)
|
|
50
|
+
throw new Error('missing proxy url');
|
|
51
|
+
return new ProxyAgent(egress.url);
|
|
52
|
+
}
|
|
53
|
+
catch (cause) {
|
|
54
|
+
throw new EgressError(`egress http: could not build proxy for ${egress.url}`, { cause });
|
|
55
|
+
}
|
|
56
|
+
case 'socks5':
|
|
57
|
+
try {
|
|
58
|
+
if (!egress.url)
|
|
59
|
+
throw new Error('missing proxy url');
|
|
60
|
+
return socksFromUrl(egress.url);
|
|
61
|
+
}
|
|
62
|
+
catch (cause) {
|
|
63
|
+
if (cause instanceof EgressError)
|
|
64
|
+
throw cause;
|
|
65
|
+
throw new EgressError(`egress socks5: could not build proxy for ${egress.url}`, { cause });
|
|
66
|
+
}
|
|
67
|
+
default: {
|
|
68
|
+
const exhaustive = egress;
|
|
69
|
+
throw new EgressError(`egress: unknown mode ${JSON.stringify(exhaustive)}`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Build an egress-bound WHATWG `fetch`: undici's `fetch` closed over the
|
|
75
|
+
* dispatcher from buildDispatcher(cfg). This is the `fetch` injected into
|
|
76
|
+
* distilly/fetch so distilly never has egress of its own. Same fail-loud
|
|
77
|
+
* guarantee: a broken proxy throws HERE (before any I/O), never goes un-proxied.
|
|
78
|
+
*/
|
|
79
|
+
export function createEgressFetch(cfg) {
|
|
80
|
+
const dispatcher = buildDispatcher(cfg);
|
|
81
|
+
return ((input, init) => undiciFetch(input, {
|
|
82
|
+
...(init ?? {}),
|
|
83
|
+
dispatcher,
|
|
84
|
+
}));
|
|
85
|
+
}
|
|
86
|
+
export { Agent, ProxyAgent };
|
|
87
|
+
//# sourceMappingURL=egress.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"egress.js","sourceRoot":"","sources":["../../src/core/egress.ts"],"names":[],"mappings":"AAAA,+EAA+E;AAC/E,gFAAgF;AAChF,+EAA+E;AAC/E,EAAE;AACF,uEAAuE;AACvE,4EAA4E;AAC5E,8CAA8C;AAE9C,OAAO,EAAC,KAAK,EAAmB,UAAU,EAAE,KAAK,IAAI,WAAW,EAAC,MAAM,QAAQ,CAAC;AAChF,OAAO,EAAC,eAAe,EAAC,MAAM,aAAa,CAAC;AAG5C,8EAA8E;AAC9E,MAAM,OAAO,WAAY,SAAQ,KAAK;IACrC,YAAY,OAAe,EAAE,OAA2B;QACvD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,aAAa,CAAC;IAC3B,CAAC;CACD;AAED,SAAS,YAAY,CAAC,GAAW;IAChC,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,8CAA8C;IACxE,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;IAC/C,IAAI,QAAQ,KAAK,QAAQ,IAAI,QAAQ,KAAK,OAAO,IAAI,QAAQ,KAAK,SAAS;QAC1E,MAAM,IAAI,WAAW,CACpB,sDAAsD,GAAG,EAAE,CAC3D,CAAC;IACH,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC;QACxD,MAAM,IAAI,WAAW,CAAC,uCAAuC,GAAG,EAAE,CAAC,CAAC;IACrE,OAAO,eAAe,CAAC;QACtB,IAAI,EAAE,CAAC;QACP,IAAI,EAAE,GAAG,CAAC,QAAQ;QAClB,IAAI;QACJ,MAAM,EAAE,GAAG,CAAC,QAAQ,IAAI,SAAS;QACjC,QAAQ,EAAE,GAAG,CAAC,QAAQ,IAAI,SAAS;KACnC,CAAC,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe,CAAC,GAAW;IAC1C,MAAM,MAAM,GAAW,GAAG,CAAC,MAAM,CAAC;IAClC,QAAQ,MAAM,CAAC,IAAI,EAAE,CAAC;QACrB,KAAK,QAAQ;YACZ,OAAO,SAAS,CAAC;QAClB,KAAK,MAAM;YACV,IAAI,CAAC;gBACJ,IAAI,CAAC,MAAM,CAAC,GAAG;oBAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;gBACtD,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACnC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,MAAM,IAAI,WAAW,CACpB,0CAA0C,MAAM,CAAC,GAAG,EAAE,EACtD,EAAC,KAAK,EAAC,CACP,CAAC;YACH,CAAC;QACF,KAAK,QAAQ;YACZ,IAAI,CAAC;gBACJ,IAAI,CAAC,MAAM,CAAC,GAAG;oBAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;gBACtD,OAAO,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACjC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,IAAI,KAAK,YAAY,WAAW;oBAAE,MAAM,KAAK,CAAC;gBAC9C,MAAM,IAAI,WAAW,CACpB,4CAA4C,MAAM,CAAC,GAAG,EAAE,EACxD,EAAC,KAAK,EAAC,CACP,CAAC;YACH,CAAC;QACF,OAAO,CAAC,CAAC,CAAC;YACT,MAAM,UAAU,GAAU,MAAM,CAAC;YACjC,MAAM,IAAI,WAAW,CACpB,wBAAwB,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,EAAE,CACpD,CAAC;QACH,CAAC;IACF,CAAC;AACF,CAAC;AAKD;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAAW;IAC5C,MAAM,UAAU,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;IACxC,OAAO,CAAC,CAAC,KAAwB,EAAE,IAAkB,EAAE,EAAE,CACxD,WAAW,CACV,KAAc,EACd;QACC,GAAI,CAAC,IAAI,IAAI,EAAE,CAA6B;QAC5C,UAAU;KACD,CACV,CAAgB,CAAC;AACpB,CAAC;AAGD,OAAO,EAAC,KAAK,EAAE,UAAU,EAAC,CAAC"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import type { Config, FetchSize } from './config.js';
|
|
2
|
+
import { type EgressFetch } from './egress.js';
|
|
3
|
+
import type { FetchResult } from './backends/types.js';
|
|
4
|
+
/** The shape distilly's `urlToMarkdown` returns (the bits we surface). */
|
|
5
|
+
interface UrlToMarkdownResult {
|
|
6
|
+
markdown: string;
|
|
7
|
+
truncated: boolean;
|
|
8
|
+
}
|
|
9
|
+
/** distilly's networked entrypoint, narrowed to what the seam injects/uses. */
|
|
10
|
+
type UrlToMarkdown = (url: string | URL, options: {
|
|
11
|
+
fetch: EgressFetch;
|
|
12
|
+
size?: FetchSize;
|
|
13
|
+
}) => Promise<UrlToMarkdownResult>;
|
|
14
|
+
/** Per-call extractor options. */
|
|
15
|
+
export interface ExtractOptions {
|
|
16
|
+
/**
|
|
17
|
+
* Page-size budget for THIS call. Overrides the config's `fetchSize` when
|
|
18
|
+
* given. webveil's `s`/`m`/`l`/`f` preset maps STRAIGHT to distilly's `size`
|
|
19
|
+
* (the two enums are identical), so this is passed through verbatim.
|
|
20
|
+
*/
|
|
21
|
+
size?: FetchSize;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Seams the extractor's collaborators so it is testable WITHOUT real network or
|
|
25
|
+
* undici: tests inject a spy `urlToMarkdown` and/or a spy egress fetch to assert
|
|
26
|
+
* distilly is called with the egress fetch (never a global). Defaults wire the
|
|
27
|
+
* real `distilly/fetch` + `createEgressFetch`.
|
|
28
|
+
*/
|
|
29
|
+
export interface ExtractDeps {
|
|
30
|
+
/** distilly's networked `urlToMarkdown` (default: the real `distilly/fetch`). */
|
|
31
|
+
urlToMarkdown?: UrlToMarkdown;
|
|
32
|
+
/** Builds the egress-bound fetch from config (default: createEgressFetch). */
|
|
33
|
+
createEgressFetch?: (config: Config) => EgressFetch;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Extract a URL to clean, budget-bounded markdown via distilly over webveil's
|
|
37
|
+
* egress. Builds the egress-bound `fetch` (fail-loud on an unbuildable proxy),
|
|
38
|
+
* injects it into distilly's `urlToMarkdown`, maps the `s`/`m`/`l`/`f` preset to
|
|
39
|
+
* distilly's `size`, and surfaces distilly's `truncated`.
|
|
40
|
+
*
|
|
41
|
+
* @returns `{ url, markdown, truncated }` (a `FetchResult` without a `title`).
|
|
42
|
+
*/
|
|
43
|
+
export declare function extract(url: string, config: Config, options?: ExtractOptions, deps?: ExtractDeps): Promise<FetchResult>;
|
|
44
|
+
export {};
|
|
45
|
+
//# sourceMappingURL=extract.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/core/extract.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAC,MAAM,EAAE,SAAS,EAAC,MAAM,aAAa,CAAC;AACnD,OAAO,EAAoB,KAAK,WAAW,EAAC,MAAM,aAAa,CAAC;AAChE,OAAO,KAAK,EAAC,WAAW,EAAC,MAAM,qBAAqB,CAAC;AAErD,0EAA0E;AAC1E,UAAU,mBAAmB;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,OAAO,CAAC;CACnB;AAED,+EAA+E;AAC/E,KAAK,aAAa,GAAG,CACpB,GAAG,EAAE,MAAM,GAAG,GAAG,EACjB,OAAO,EAAE;IAAC,KAAK,EAAE,WAAW,CAAC;IAAC,IAAI,CAAC,EAAE,SAAS,CAAA;CAAC,KAC3C,OAAO,CAAC,mBAAmB,CAAC,CAAC;AAElC,kCAAkC;AAClC,MAAM,WAAW,cAAc;IAC9B;;;;OAIG;IACH,IAAI,CAAC,EAAE,SAAS,CAAC;CACjB;AAED;;;;;GAKG;AACH,MAAM,WAAW,WAAW;IAC3B,iFAAiF;IACjF,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,8EAA8E;IAC9E,iBAAiB,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,WAAW,CAAC;CACpD;AAED;;;;;;;GAOG;AACH,wBAAsB,OAAO,CAC5B,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,cAAmB,EAC5B,IAAI,GAAE,WAAgB,GACpB,OAAO,CAAC,WAAW,CAAC,CAYtB"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
// Extractor seam — turn a URL into clean, size-bounded markdown by calling
|
|
2
|
+
// distilly's NETWORKED `urlToMarkdown` (the `distilly/fetch` entrypoint),
|
|
3
|
+
// INJECTING webveil's egress-bound `fetch` as the only transport. distilly's
|
|
4
|
+
// network Rules (github/mdn/react.dev/vuejs.org) rewrite a matching URL to its
|
|
5
|
+
// raw `.md`/API source and fetch THAT over our egress — shorter, cleaner output;
|
|
6
|
+
// non-matching URLs run through distilly's pure core. See docs/adr/0001.
|
|
7
|
+
//
|
|
8
|
+
// THE HARD INVARIANT (load-bearing for anonymity): webveil ALWAYS injects its
|
|
9
|
+
// egress-bound `fetch` here and NEVER lets distilly use a global/default fetch.
|
|
10
|
+
// distilly throws if none is injected — the desired fail-loud. And the egress
|
|
11
|
+
// fetch itself throws (before any I/O) when a configured proxy is unbuildable
|
|
12
|
+
// (egress.ts), so a broken proxy can never become an un-proxied request.
|
|
13
|
+
//
|
|
14
|
+
// This is the DEFAULT extractor; a backend's own `/extract` (tavily-compat) may
|
|
15
|
+
// override it (wired in the core-fetch task).
|
|
16
|
+
import { urlToMarkdown as distillyUrlToMarkdown } from 'distilly/fetch';
|
|
17
|
+
import { createEgressFetch } from './egress.js';
|
|
18
|
+
/**
|
|
19
|
+
* Extract a URL to clean, budget-bounded markdown via distilly over webveil's
|
|
20
|
+
* egress. Builds the egress-bound `fetch` (fail-loud on an unbuildable proxy),
|
|
21
|
+
* injects it into distilly's `urlToMarkdown`, maps the `s`/`m`/`l`/`f` preset to
|
|
22
|
+
* distilly's `size`, and surfaces distilly's `truncated`.
|
|
23
|
+
*
|
|
24
|
+
* @returns `{ url, markdown, truncated }` (a `FetchResult` without a `title`).
|
|
25
|
+
*/
|
|
26
|
+
export async function extract(url, config, options = {}, deps = {}) {
|
|
27
|
+
const urlToMarkdown = deps.urlToMarkdown ?? distillyUrlToMarkdown;
|
|
28
|
+
const buildFetch = deps.createEgressFetch ?? createEgressFetch;
|
|
29
|
+
// Build the egress-bound fetch FIRST: a configured-but-unbuildable proxy
|
|
30
|
+
// throws here, before any network access (never an un-proxied request).
|
|
31
|
+
const fetch = buildFetch(config);
|
|
32
|
+
const size = options.size ?? config.fetchSize;
|
|
33
|
+
const { markdown, truncated } = await urlToMarkdown(url, { fetch, size });
|
|
34
|
+
return { url, markdown, truncated };
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=extract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/core/extract.ts"],"names":[],"mappings":"AAAA,2EAA2E;AAC3E,0EAA0E;AAC1E,6EAA6E;AAC7E,+EAA+E;AAC/E,iFAAiF;AACjF,yEAAyE;AACzE,EAAE;AACF,8EAA8E;AAC9E,gFAAgF;AAChF,8EAA8E;AAC9E,8EAA8E;AAC9E,yEAAyE;AACzE,EAAE;AACF,gFAAgF;AAChF,8CAA8C;AAE9C,OAAO,EAAC,aAAa,IAAI,qBAAqB,EAAC,MAAM,gBAAgB,CAAC;AAEtE,OAAO,EAAC,iBAAiB,EAAmB,MAAM,aAAa,CAAC;AAsChE;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAC5B,GAAW,EACX,MAAc,EACd,UAA0B,EAAE,EAC5B,OAAoB,EAAE;IAEtB,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,qBAAqB,CAAC;IAClE,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,IAAI,iBAAiB,CAAC;IAE/D,yEAAyE;IACzE,wEAAwE;IACxE,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IAEjC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,SAAS,CAAC;IAE9C,MAAM,EAAC,QAAQ,EAAE,SAAS,EAAC,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,EAAC,KAAK,EAAE,IAAI,EAAC,CAAC,CAAC;IACtE,OAAO,EAAC,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAC,CAAC;AACnC,CAAC"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { Config, ResolveOptions } from './config.js';
|
|
2
|
+
import type { EgressFetch } from './egress.js';
|
|
3
|
+
import type { Dispatcher } from './egress.js';
|
|
4
|
+
import type { ExtractDeps } from './extract.js';
|
|
5
|
+
import type { Backend, FetchOptions, FetchResult, Http } from './backends/types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Collaborators, seamed so the core is testable WITHOUT real config files,
|
|
8
|
+
* undici, network, or distilly: a test injects fakes to assert the
|
|
9
|
+
* backend-`/extract`-vs-distilly branch, the list path, and that the guarded
|
|
10
|
+
* egress fetch (never a global) is what reaches distilly. Defaults wire the real
|
|
11
|
+
* modules.
|
|
12
|
+
*/
|
|
13
|
+
export interface FetchDeps {
|
|
14
|
+
resolveConfig?: (options?: ResolveOptions) => Config;
|
|
15
|
+
getBackend?: (name: string, config: Config) => Backend;
|
|
16
|
+
buildDispatcher?: (config: Config) => Dispatcher | undefined;
|
|
17
|
+
createHttp?: (dispatcher: Dispatcher | undefined) => Http;
|
|
18
|
+
createEgressFetch?: (config: Config) => EgressFetch;
|
|
19
|
+
guardEgressFetch?: (fetch: EgressFetch, config: Config) => EgressFetch;
|
|
20
|
+
extract?: (url: string, config: Config, options: {
|
|
21
|
+
size?: Config['fetchSize'];
|
|
22
|
+
}, deps: ExtractDeps) => Promise<FetchResult>;
|
|
23
|
+
}
|
|
24
|
+
/** Per-call fetch options plus the config-resolution knobs (cwd/env/global). */
|
|
25
|
+
export interface FetchCoreOptions extends FetchOptions, ResolveOptions {
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Fetch a LIST of urls to clean, size-bounded markdown, in order. This is the
|
|
29
|
+
* list-ready internal (story 12): the single-URL `fetch()` below is a thin
|
|
30
|
+
* wrapper over it, so a future `web_batch_fetch` reuses this directly.
|
|
31
|
+
*
|
|
32
|
+
* Each url goes through the SAME content-source choice: a backend's own
|
|
33
|
+
* `/extract` (if the configured backend implements `fetch`) OR the default
|
|
34
|
+
* distilly Extractor with the GUARDED egress fetch injected.
|
|
35
|
+
*/
|
|
36
|
+
export declare function fetchAll(urls: string[], options?: FetchCoreOptions, deps?: FetchDeps): Promise<FetchResult[]>;
|
|
37
|
+
/**
|
|
38
|
+
* Fetch ONE url to clean, size-bounded markdown (`{ markdown, truncated, … }`).
|
|
39
|
+
* A thin single-URL wrapper over the list-ready `fetchAll` (story 12).
|
|
40
|
+
*/
|
|
41
|
+
export declare function fetch(url: string, options?: FetchCoreOptions, deps?: FetchDeps): Promise<FetchResult>;
|
|
42
|
+
//# sourceMappingURL=fetch.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch.d.ts","sourceRoot":"","sources":["../../src/core/fetch.ts"],"names":[],"mappings":"AAeA,OAAO,KAAK,EAAC,MAAM,EAAE,cAAc,EAAC,MAAM,aAAa,CAAC;AAExD,OAAO,KAAK,EAAC,WAAW,EAAC,MAAM,aAAa,CAAC;AAI7C,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,aAAa,CAAC;AAE5C,OAAO,KAAK,EAAC,WAAW,EAAC,MAAM,cAAc,CAAC;AAE9C,OAAO,KAAK,EACX,OAAO,EACP,YAAY,EACZ,WAAW,EACX,IAAI,EACJ,MAAM,qBAAqB,CAAC;AAE7B;;;;;;GAMG;AACH,MAAM,WAAW,SAAS;IACzB,aAAa,CAAC,EAAE,CAAC,OAAO,CAAC,EAAE,cAAc,KAAK,MAAM,CAAC;IACrD,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC;IACvD,eAAe,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,UAAU,GAAG,SAAS,CAAC;IAC7D,UAAU,CAAC,EAAE,CAAC,UAAU,EAAE,UAAU,GAAG,SAAS,KAAK,IAAI,CAAC;IAC1D,iBAAiB,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,WAAW,CAAC;IACpD,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,KAAK,WAAW,CAAC;IACvE,OAAO,CAAC,EAAE,CACT,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,MAAM,EACd,OAAO,EAAE;QAAC,IAAI,CAAC,EAAE,MAAM,CAAC,WAAW,CAAC,CAAA;KAAC,EACrC,IAAI,EAAE,WAAW,KACb,OAAO,CAAC,WAAW,CAAC,CAAC;CAC1B;AAED,gFAAgF;AAChF,MAAM,WAAW,gBAAiB,SAAQ,YAAY,EAAE,cAAc;CAAG;AAEzE;;;;;;;;GAQG;AACH,wBAAsB,QAAQ,CAC7B,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,gBAAqB,EAC9B,IAAI,GAAE,SAAc,GAClB,OAAO,CAAC,WAAW,EAAE,CAAC,CAsCxB;AAYD;;;GAGG;AACH,wBAAsB,KAAK,CAC1B,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,gBAAqB,EAC9B,IAAI,GAAE,SAAc,GAClB,OAAO,CAAC,WAAW,CAAC,CAGtB"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
// core fetch: the plain, framework-agnostic `fetch()` BOTH frontends (the incur
|
|
2
|
+
// CLI/MCP and the pi extension) call. Returns clean, size-bounded markdown with
|
|
3
|
+
// distilly's `truncated` flag.
|
|
4
|
+
//
|
|
5
|
+
// Flow (per URL): pick the content source (a backend's own `/extract`
|
|
6
|
+
// (tavily-compat) when the configured backend provides one, OTHERWISE the
|
|
7
|
+
// default distilly Extractor seam, urlToMarkdown over webveil's egress). The
|
|
8
|
+
// SSRF guard lives INSIDE the egress-bound fetch injected into distilly, so it
|
|
9
|
+
// covers distilly's rule-rewritten requests too (docs/adr/0001).
|
|
10
|
+
//
|
|
11
|
+
// LIST-READY INTERNALS (story 12): the work happens in `fetchAll(urls, …)`, a
|
|
12
|
+
// list-processing internal, so a future `web_batch_fetch` tool is a trivial add
|
|
13
|
+
// with no redesign. The public `fetch()` is a thin single-URL wrapper over it.
|
|
14
|
+
import { resolveConfig as defaultResolveConfig } from './config.js';
|
|
15
|
+
import { createEgressFetch as defaultCreateEgressFetch } from './egress.js';
|
|
16
|
+
import { guardEgressFetch as defaultGuardEgressFetch } from './security.js';
|
|
17
|
+
import { createHttp as defaultCreateHttp } from './http.js';
|
|
18
|
+
import { buildDispatcher as defaultBuildDispatcher } from './egress.js';
|
|
19
|
+
import { extract as defaultExtract } from './extract.js';
|
|
20
|
+
import { getBackend as defaultGetBackend } from './backends/registry.js';
|
|
21
|
+
/**
|
|
22
|
+
* Fetch a LIST of urls to clean, size-bounded markdown, in order. This is the
|
|
23
|
+
* list-ready internal (story 12): the single-URL `fetch()` below is a thin
|
|
24
|
+
* wrapper over it, so a future `web_batch_fetch` reuses this directly.
|
|
25
|
+
*
|
|
26
|
+
* Each url goes through the SAME content-source choice: a backend's own
|
|
27
|
+
* `/extract` (if the configured backend implements `fetch`) OR the default
|
|
28
|
+
* distilly Extractor with the GUARDED egress fetch injected.
|
|
29
|
+
*/
|
|
30
|
+
export async function fetchAll(urls, options = {}, deps = {}) {
|
|
31
|
+
const resolveConfig = deps.resolveConfig ?? defaultResolveConfig;
|
|
32
|
+
const getBackend = deps.getBackend ?? defaultGetBackend;
|
|
33
|
+
const buildDispatcher = deps.buildDispatcher ?? defaultBuildDispatcher;
|
|
34
|
+
const createHttp = deps.createHttp ?? defaultCreateHttp;
|
|
35
|
+
const createEgressFetch = deps.createEgressFetch ?? defaultCreateEgressFetch;
|
|
36
|
+
const guardEgressFetch = deps.guardEgressFetch ?? defaultGuardEgressFetch;
|
|
37
|
+
const extract = deps.extract ?? defaultExtract;
|
|
38
|
+
const config = resolveConfig({
|
|
39
|
+
cwd: options.cwd,
|
|
40
|
+
env: options.env,
|
|
41
|
+
globalPath: options.globalPath,
|
|
42
|
+
});
|
|
43
|
+
const backend = getBackend(config.backend, config);
|
|
44
|
+
// A backend that provides its own `/extract` (tavily-compat) OVERRIDES the
|
|
45
|
+
// distilly Extractor; it is handed only the proxied http helper (built from
|
|
46
|
+
// the SAME dispatcher as the egress fetch), so it cannot bypass egress.
|
|
47
|
+
if (backend.fetch) {
|
|
48
|
+
const http = createHttp(buildDispatcher(config));
|
|
49
|
+
const backendFetch = backend.fetch.bind(backend);
|
|
50
|
+
return runAll(urls, (url) => backendFetch(url, http, { size: options.size, signal: options.signal }));
|
|
51
|
+
}
|
|
52
|
+
// Default path: distilly Extractor over webveil's egress. Build the
|
|
53
|
+
// egress-bound fetch ONCE, wrap it with the SSRF guard, and inject THAT into
|
|
54
|
+
// distilly (never a global fetch). The guard covers distilly's rule-rewritten
|
|
55
|
+
// requests too. A configured-but-unbuildable proxy throws at build time
|
|
56
|
+
// (fail-loud), before any I/O.
|
|
57
|
+
const guardedFetch = guardEgressFetch(createEgressFetch(config), config);
|
|
58
|
+
const extractDeps = { createEgressFetch: () => guardedFetch };
|
|
59
|
+
return runAll(urls, (url) => extract(url, config, { size: options.size }, extractDeps));
|
|
60
|
+
}
|
|
61
|
+
/** Run a per-url worker over the list in order, collecting the results. */
|
|
62
|
+
async function runAll(urls, work) {
|
|
63
|
+
const out = [];
|
|
64
|
+
for (const url of urls)
|
|
65
|
+
out.push(await work(url));
|
|
66
|
+
return out;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Fetch ONE url to clean, size-bounded markdown (`{ markdown, truncated, … }`).
|
|
70
|
+
* A thin single-URL wrapper over the list-ready `fetchAll` (story 12).
|
|
71
|
+
*/
|
|
72
|
+
export async function fetch(url, options = {}, deps = {}) {
|
|
73
|
+
const [result] = await fetchAll([url], options, deps);
|
|
74
|
+
return result;
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=fetch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch.js","sourceRoot":"","sources":["../../src/core/fetch.ts"],"names":[],"mappings":"AAAA,gFAAgF;AAChF,gFAAgF;AAChF,+BAA+B;AAC/B,EAAE;AACF,sEAAsE;AACtE,0EAA0E;AAC1E,6EAA6E;AAC7E,+EAA+E;AAC/E,iEAAiE;AACjE,EAAE;AACF,8EAA8E;AAC9E,gFAAgF;AAChF,+EAA+E;AAE/E,OAAO,EAAC,aAAa,IAAI,oBAAoB,EAAC,MAAM,aAAa,CAAC;AAElE,OAAO,EAAC,iBAAiB,IAAI,wBAAwB,EAAC,MAAM,aAAa,CAAC;AAE1E,OAAO,EAAC,gBAAgB,IAAI,uBAAuB,EAAC,MAAM,eAAe,CAAC;AAC1E,OAAO,EAAC,UAAU,IAAI,iBAAiB,EAAC,MAAM,WAAW,CAAC;AAC1D,OAAO,EAAC,eAAe,IAAI,sBAAsB,EAAC,MAAM,aAAa,CAAC;AAEtE,OAAO,EAAC,OAAO,IAAI,cAAc,EAAC,MAAM,cAAc,CAAC;AAEvD,OAAO,EAAC,UAAU,IAAI,iBAAiB,EAAC,MAAM,wBAAwB,CAAC;AAiCvE;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC7B,IAAc,EACd,UAA4B,EAAE,EAC9B,OAAkB,EAAE;IAEpB,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,oBAAoB,CAAC;IACjE,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,iBAAiB,CAAC;IACxD,MAAM,eAAe,GAAG,IAAI,CAAC,eAAe,IAAI,sBAAsB,CAAC;IACvE,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,iBAAiB,CAAC;IACxD,MAAM,iBAAiB,GAAG,IAAI,CAAC,iBAAiB,IAAI,wBAAwB,CAAC;IAC7E,MAAM,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,uBAAuB,CAAC;IAC1E,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,cAAc,CAAC;IAE/C,MAAM,MAAM,GAAG,aAAa,CAAC;QAC5B,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,UAAU,EAAE,OAAO,CAAC,UAAU;KAC9B,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAEnD,2EAA2E;IAC3E,4EAA4E;IAC5E,wEAAwE;IACxE,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACnB,MAAM,IAAI,GAAG,UAAU,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACjD,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAC3B,YAAY,CAAC,GAAG,EAAE,IAAI,EAAE,EAAC,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAC,CAAC,CACrE,CAAC;IACH,CAAC;IAED,oEAAoE;IACpE,6EAA6E;IAC7E,8EAA8E;IAC9E,wEAAwE;IACxE,+BAA+B;IAC/B,MAAM,YAAY,GAAG,gBAAgB,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC;IACzE,MAAM,WAAW,GAAgB,EAAC,iBAAiB,EAAE,GAAG,EAAE,CAAC,YAAY,EAAC,CAAC;IACzE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAC3B,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,EAAC,IAAI,EAAE,OAAO,CAAC,IAAI,EAAC,EAAE,WAAW,CAAC,CACvD,CAAC;AACH,CAAC;AAED,2EAA2E;AAC3E,KAAK,UAAU,MAAM,CACpB,IAAc,EACd,IAA2C;IAE3C,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,KAAK,MAAM,GAAG,IAAI,IAAI;QAAE,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAClD,OAAO,GAAG,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CAC1B,GAAW,EACX,UAA4B,EAAE,EAC9B,OAAkB,EAAE;IAEpB,MAAM,CAAC,MAAM,CAAC,GAAG,MAAM,QAAQ,CAAC,CAAC,GAAG,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACtD,OAAO,MAAO,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { type Dispatcher } from 'undici';
|
|
2
|
+
import type { Http } from './backends/types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Build the proxied http helper over a given dispatcher. Both methods throw on a
|
|
5
|
+
* non-2xx response so a backend never silently consumes an error body.
|
|
6
|
+
*/
|
|
7
|
+
export declare function createHttp(dispatcher: Dispatcher | undefined): Http;
|
|
8
|
+
//# sourceMappingURL=http.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"http.d.ts","sourceRoot":"","sources":["../../src/core/http.ts"],"names":[],"mappings":"AAKA,OAAO,EAAC,KAAK,UAAU,EAAuB,MAAM,QAAQ,CAAC;AAC7D,OAAO,KAAK,EAAC,IAAI,EAAqB,MAAM,qBAAqB,CAAC;AA8BlE;;;GAGG;AACH,wBAAgB,UAAU,CAAC,UAAU,EAAE,UAAU,GAAG,SAAS,GAAG,IAAI,CAqBnE"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// http helper — the proxied `http` handed to backends. fetchJson / fetchText
|
|
2
|
+
// apply the egress dispatcher + a per-request timeout + abort. Distinct from the
|
|
3
|
+
// egress-bound WHATWG `fetch` (egress.ts), but bound to the SAME dispatcher, so
|
|
4
|
+
// a backend physically cannot bypass the configured egress.
|
|
5
|
+
import { fetch as undiciFetch } from 'undici';
|
|
6
|
+
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
7
|
+
async function request(dispatcher, url, options = {}) {
|
|
8
|
+
const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
9
|
+
const controller = new AbortController();
|
|
10
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
11
|
+
if (options.signal)
|
|
12
|
+
options.signal.addEventListener('abort', () => controller.abort(), {
|
|
13
|
+
once: true,
|
|
14
|
+
});
|
|
15
|
+
try {
|
|
16
|
+
const res = await undiciFetch(url, {
|
|
17
|
+
method: options.method,
|
|
18
|
+
headers: options.headers,
|
|
19
|
+
body: options.body,
|
|
20
|
+
signal: controller.signal,
|
|
21
|
+
dispatcher,
|
|
22
|
+
});
|
|
23
|
+
return res;
|
|
24
|
+
}
|
|
25
|
+
finally {
|
|
26
|
+
clearTimeout(timer);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Build the proxied http helper over a given dispatcher. Both methods throw on a
|
|
31
|
+
* non-2xx response so a backend never silently consumes an error body.
|
|
32
|
+
*/
|
|
33
|
+
export function createHttp(dispatcher) {
|
|
34
|
+
return {
|
|
35
|
+
async fetchJson(url, options) {
|
|
36
|
+
const res = await request(dispatcher, url, options);
|
|
37
|
+
if (!res.ok)
|
|
38
|
+
throw new Error(`http ${res.status} ${res.statusText} for ${url}`);
|
|
39
|
+
return (await res.json());
|
|
40
|
+
},
|
|
41
|
+
async fetchText(url, options) {
|
|
42
|
+
const res = await request(dispatcher, url, options);
|
|
43
|
+
if (!res.ok)
|
|
44
|
+
throw new Error(`http ${res.status} ${res.statusText} for ${url}`);
|
|
45
|
+
return await res.text();
|
|
46
|
+
},
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=http.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"http.js","sourceRoot":"","sources":["../../src/core/http.ts"],"names":[],"mappings":"AAAA,6EAA6E;AAC7E,iFAAiF;AACjF,gFAAgF;AAChF,4DAA4D;AAE5D,OAAO,EAAkB,KAAK,IAAI,WAAW,EAAC,MAAM,QAAQ,CAAC;AAG7D,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAElC,KAAK,UAAU,OAAO,CACrB,UAAkC,EAClC,GAAW,EACX,UAA8B,EAAE;IAEhC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,kBAAkB,CAAC;IAC1D,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,SAAS,CAAC,CAAC;IAC9D,IAAI,OAAO,CAAC,MAAM;QACjB,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE;YAClE,IAAI,EAAE,IAAI;SACV,CAAC,CAAC;IACJ,IAAI,CAAC;QACJ,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE;YAClC,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,UAAU;SACD,CAAC,CAAC;QACZ,OAAO,GAA0B,CAAC;IACnC,CAAC;YAAS,CAAC;QACV,YAAY,CAAC,KAAK,CAAC,CAAC;IACrB,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,UAAU,CAAC,UAAkC;IAC5D,OAAO;QACN,KAAK,CAAC,SAAS,CACd,GAAW,EACX,OAA4B;YAE5B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,UAAU,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;YACpD,IAAI,CAAC,GAAG,CAAC,EAAE;gBACV,MAAM,IAAI,KAAK,CAAC,QAAQ,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,QAAQ,GAAG,EAAE,CAAC,CAAC;YACpE,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAM,CAAC;QAChC,CAAC;QACD,KAAK,CAAC,SAAS,CACd,GAAW,EACX,OAA4B;YAE5B,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,UAAU,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;YACpD,IAAI,CAAC,GAAG,CAAC,EAAE;gBACV,MAAM,IAAI,KAAK,CAAC,QAAQ,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,QAAQ,GAAG,EAAE,CAAC,CAAC;YACpE,OAAO,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QACzB,CAAC;KACD,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { Config, ResolveOptions } from './config.js';
|
|
2
|
+
import type { Dispatcher } from './egress.js';
|
|
3
|
+
import type { Http, SearchOptions, SearchResult } from './backends/types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Collaborators, seamed so the core is testable WITHOUT real config files,
|
|
6
|
+
* undici, or network: a test injects a fake `getBackend`/`createHttp` to assert
|
|
7
|
+
* the backend is handed only the proxied helper, and a fake backend returning
|
|
8
|
+
* duplicate/over-limit hits to assert dedup + clamp. Defaults wire the real
|
|
9
|
+
* config/egress/http/registry modules.
|
|
10
|
+
*/
|
|
11
|
+
export interface SearchDeps {
|
|
12
|
+
resolveConfig?: (options?: ResolveOptions) => Config;
|
|
13
|
+
buildDispatcher?: (config: Config) => Dispatcher | undefined;
|
|
14
|
+
createHttp?: (dispatcher: Dispatcher | undefined) => Http;
|
|
15
|
+
getBackend?: (name: string, config: Config) => {
|
|
16
|
+
search: (query: string, http: Http, options?: SearchOptions) => Promise<SearchResult[]>;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
/** Per-call search options plus the config-resolution knobs (cwd/env/global). */
|
|
20
|
+
export interface SearchCoreOptions extends SearchOptions, ResolveOptions {
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Search the configured backend over the configured egress and return
|
|
24
|
+
* normalized `SearchResult[]` (deduped by url, then clamped to `maxResults`).
|
|
25
|
+
*
|
|
26
|
+
* Dedup runs BEFORE the clamp so the caller gets up to `maxResults` UNIQUE hits,
|
|
27
|
+
* not a window that duplicates eat into; for the same reason the backend is NOT
|
|
28
|
+
* asked to pre-clamp (only the abort signal is forwarded).
|
|
29
|
+
*/
|
|
30
|
+
export declare function search(query: string, options?: SearchCoreOptions, deps?: SearchDeps): Promise<SearchResult[]>;
|
|
31
|
+
//# sourceMappingURL=search.d.ts.map
|