@pentoshi/clai 0.10.4 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -0
- package/dist/agent/runner.js +41 -3
- package/dist/agent/runner.js.map +1 -1
- package/dist/commands/providers.js +28 -0
- package/dist/commands/providers.js.map +1 -1
- package/dist/commands/search-providers.d.ts +50 -0
- package/dist/commands/search-providers.js +134 -0
- package/dist/commands/search-providers.js.map +1 -0
- package/dist/commands/update.js +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/llm/provider.js +9 -6
- package/dist/llm/provider.js.map +1 -1
- package/dist/prompts/index.d.ts +1 -1
- package/dist/prompts/index.js +6 -0
- package/dist/prompts/index.js.map +1 -1
- package/dist/repl.d.ts +1 -0
- package/dist/repl.js +139 -113
- package/dist/repl.js.map +1 -1
- package/dist/safety/classifier.js +40 -0
- package/dist/safety/classifier.js.map +1 -1
- package/dist/store/config.d.ts +5 -0
- package/dist/store/config.js +7 -0
- package/dist/store/config.js.map +1 -1
- package/dist/store/keys.d.ts +65 -0
- package/dist/store/keys.js +164 -28
- package/dist/store/keys.js.map +1 -1
- package/dist/tools/http.d.ts +12 -1
- package/dist/tools/http.js +8 -43
- package/dist/tools/http.js.map +1 -1
- package/dist/tools/registry.js +52 -0
- package/dist/tools/registry.js.map +1 -1
- package/dist/tools/shell.d.ts +25 -0
- package/dist/tools/shell.js +155 -6
- package/dist/tools/shell.js.map +1 -1
- package/dist/tools/web/audit.d.ts +154 -0
- package/dist/tools/web/audit.js +147 -0
- package/dist/tools/web/audit.js.map +1 -0
- package/dist/tools/web/budget.d.ts +76 -0
- package/dist/tools/web/budget.js +187 -0
- package/dist/tools/web/budget.js.map +1 -0
- package/dist/tools/web/capture.d.ts +201 -0
- package/dist/tools/web/capture.js +380 -0
- package/dist/tools/web/capture.js.map +1 -0
- package/dist/tools/web/fetch-core.d.ts +66 -0
- package/dist/tools/web/fetch-core.js +1123 -0
- package/dist/tools/web/fetch-core.js.map +1 -0
- package/dist/tools/web/fetch.d.ts +42 -0
- package/dist/tools/web/fetch.js +115 -0
- package/dist/tools/web/fetch.js.map +1 -0
- package/dist/tools/web/providers/brave.d.ts +46 -0
- package/dist/tools/web/providers/brave.js +263 -0
- package/dist/tools/web/providers/brave.js.map +1 -0
- package/dist/tools/web/providers/duckduckgo.d.ts +47 -0
- package/dist/tools/web/providers/duckduckgo.js +248 -0
- package/dist/tools/web/providers/duckduckgo.js.map +1 -0
- package/dist/tools/web/providers/provider.d.ts +99 -0
- package/dist/tools/web/providers/provider.js +38 -0
- package/dist/tools/web/providers/provider.js.map +1 -0
- package/dist/tools/web/providers/tavily.d.ts +52 -0
- package/dist/tools/web/providers/tavily.js +285 -0
- package/dist/tools/web/providers/tavily.js.map +1 -0
- package/dist/tools/web/readable.d.ts +67 -0
- package/dist/tools/web/readable.js +248 -0
- package/dist/tools/web/readable.js.map +1 -0
- package/dist/tools/web/redact.d.ts +120 -0
- package/dist/tools/web/redact.js +155 -0
- package/dist/tools/web/redact.js.map +1 -0
- package/dist/tools/web/search.d.ts +51 -0
- package/dist/tools/web/search.js +389 -0
- package/dist/tools/web/search.js.map +1 -0
- package/dist/tools/web/ssrf-guard.d.ts +85 -0
- package/dist/tools/web/ssrf-guard.js +265 -0
- package/dist/tools/web/ssrf-guard.js.map +1 -0
- package/dist/tools/web/types.d.ts +331 -0
- package/dist/tools/web/types.js +71 -0
- package/dist/tools/web/types.js.map +1 -0
- package/dist/ui/keys.js +3 -2
- package/dist/ui/keys.js.map +1 -1
- package/dist/ui/spinner.js +87 -14
- package/dist/ui/spinner.js.map +1 -1
- package/package.json +3 -1
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tavily search-provider adapter for `web.search`.
|
|
3
|
+
*
|
|
4
|
+
* Implements the {@link SearchProvider} contract from `./provider.ts` and
|
|
5
|
+
* registers itself in the {@link searchProviders} registry on import. The
|
|
6
|
+
* adapter performs exactly one outbound HTTPS request per invocation
|
|
7
|
+
* (Requirement 6.7), forwards the caller-provided {@link AbortSignal} to
|
|
8
|
+
* the underlying transport so the 15-second `web.search` timeout is
|
|
9
|
+
* honored (Requirement 1.8), and returns a {@link RawProviderResponse}
|
|
10
|
+
* describing the HTTP outcome plus the raw hit list.
|
|
11
|
+
*
|
|
12
|
+
* Status-to-error-kind classification (`401/403 → auth`, `429 → rate-limit`,
|
|
13
|
+
* `5xx → server`, non-JSON → `parse`, other non-2xx → `http`) is the
|
|
14
|
+
* responsibility of the `web.search` handler; this adapter only exposes
|
|
15
|
+
* the raw HTTP `status` and the parsed (or `parseError`-flagged) hit list
|
|
16
|
+
* so that mapping can be applied uniformly across providers (Requirements
|
|
17
|
+
* 6.1, 6.2, 6.5, 6.6).
|
|
18
|
+
*
|
|
19
|
+
* Endpoint and request shape match the design's "Per-provider notes →
|
|
20
|
+
* Tavily" section (`.kiro/specs/web-search-and-fetch/design.md`):
|
|
21
|
+
*
|
|
22
|
+
* - POST `https://api.tavily.com/search`
|
|
23
|
+
* - Body: `{ api_key, query, max_results, search_depth: "basic" }`
|
|
24
|
+
* where `max_results` is clamped to `[1..20]` defensively.
|
|
25
|
+
* - Response: `{ results: [{ title, url, content }] }` mapped into
|
|
26
|
+
* `SearchResult { title, url, snippet }`.
|
|
27
|
+
*/
|
|
28
|
+
import { Buffer } from "node:buffer";
|
|
29
|
+
import https from "node:https";
|
|
30
|
+
import { searchProviders, } from "./provider.js";
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Constants
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
/** Tavily search endpoint (host + path). */
|
|
35
|
+
const TAVILY_HOST = "api.tavily.com";
|
|
36
|
+
const TAVILY_PATH = "/search";
|
|
37
|
+
/**
|
|
38
|
+
* `max_results` accepted by Tavily. The provider's documented range
|
|
39
|
+
* already matches the `maxResults` clamp the `web.search` handler
|
|
40
|
+
* enforces; we re-clamp here so the adapter is self-consistent if
|
|
41
|
+
* invoked directly (e.g. from a unit test) with an out-of-range value.
|
|
42
|
+
*/
|
|
43
|
+
const TAVILY_MIN_RESULTS = 1;
|
|
44
|
+
const TAVILY_MAX_RESULTS = 20;
|
|
45
|
+
/**
|
|
46
|
+
* Search depth passed to Tavily. The design specifies `"basic"` for
|
|
47
|
+
* cost predictability; users who want deeper retrieval can layer that
|
|
48
|
+
* on later via per-provider configuration.
|
|
49
|
+
*/
|
|
50
|
+
const TAVILY_SEARCH_DEPTH = "basic";
|
|
51
|
+
/** User-Agent sent on outbound Tavily requests. */
|
|
52
|
+
const DEFAULT_USER_AGENT = "clai-web-search/1.0";
|
|
53
|
+
/**
|
|
54
|
+
* Hard cap on the number of body bytes we read from the provider before
|
|
55
|
+
* giving up and surfacing a `parse` error. Tavily responses for 20 hits
|
|
56
|
+
* are well under 100 KiB; this cap is defensive against a misbehaving
|
|
57
|
+
* upstream that streams an unbounded body.
|
|
58
|
+
*/
|
|
59
|
+
const MAX_RESPONSE_BYTES = 1_048_576; // 1 MiB
|
|
60
|
+
let httpsRequestFn = https.request;
|
|
61
|
+
/**
|
|
62
|
+
* Test-only seam: swap the HTTPS transport used by the adapter.
|
|
63
|
+
* Production callers never invoke this; tests use it to inject a
|
|
64
|
+
* stubbed `request` implementation that emits scripted responses.
|
|
65
|
+
*/
|
|
66
|
+
export function __setTavilyHttpsRequestForTesting(fn) {
|
|
67
|
+
httpsRequestFn = fn ?? https.request;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Clamp `maxResults` to the Tavily-supported range. The handler is
|
|
71
|
+
* expected to pass an already-clamped value, but defending here keeps
|
|
72
|
+
* the adapter self-consistent if invoked directly.
|
|
73
|
+
*/
|
|
74
|
+
function clampMaxResults(count) {
|
|
75
|
+
if (!Number.isFinite(count))
|
|
76
|
+
return TAVILY_MIN_RESULTS;
|
|
77
|
+
const rounded = Math.trunc(count);
|
|
78
|
+
if (rounded < TAVILY_MIN_RESULTS)
|
|
79
|
+
return TAVILY_MIN_RESULTS;
|
|
80
|
+
if (rounded > TAVILY_MAX_RESULTS)
|
|
81
|
+
return TAVILY_MAX_RESULTS;
|
|
82
|
+
return rounded;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Drain `res` into a UTF-8 string, capped at {@link MAX_RESPONSE_BYTES}.
|
|
86
|
+
* The body cap defends against a misbehaving upstream; the
|
|
87
|
+
* {@link AbortSignal} short-circuits the read on `web.search`'s
|
|
88
|
+
* 15-second timeout (Requirement 1.8).
|
|
89
|
+
*/
|
|
90
|
+
function readBody(res, signal) {
|
|
91
|
+
return new Promise((resolve, reject) => {
|
|
92
|
+
const chunks = [];
|
|
93
|
+
let total = 0;
|
|
94
|
+
let aborted = false;
|
|
95
|
+
const onAbort = () => {
|
|
96
|
+
if (aborted)
|
|
97
|
+
return;
|
|
98
|
+
aborted = true;
|
|
99
|
+
res.destroy(new Error("aborted"));
|
|
100
|
+
reject(signal.reason instanceof Error
|
|
101
|
+
? signal.reason
|
|
102
|
+
: new Error("aborted"));
|
|
103
|
+
};
|
|
104
|
+
if (signal.aborted) {
|
|
105
|
+
onAbort();
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
109
|
+
res.on("data", (chunk) => {
|
|
110
|
+
if (aborted)
|
|
111
|
+
return;
|
|
112
|
+
total += chunk.length;
|
|
113
|
+
if (total > MAX_RESPONSE_BYTES) {
|
|
114
|
+
aborted = true;
|
|
115
|
+
signal.removeEventListener("abort", onAbort);
|
|
116
|
+
res.destroy();
|
|
117
|
+
reject(new Error("response body exceeded 1 MiB cap"));
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
chunks.push(chunk);
|
|
121
|
+
});
|
|
122
|
+
res.on("end", () => {
|
|
123
|
+
if (aborted)
|
|
124
|
+
return;
|
|
125
|
+
signal.removeEventListener("abort", onAbort);
|
|
126
|
+
resolve(Buffer.concat(chunks).toString("utf8"));
|
|
127
|
+
});
|
|
128
|
+
res.on("error", (err) => {
|
|
129
|
+
if (aborted)
|
|
130
|
+
return;
|
|
131
|
+
aborted = true;
|
|
132
|
+
signal.removeEventListener("abort", onAbort);
|
|
133
|
+
reject(err);
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Issue the Tavily HTTPS POST and return `{status, body}` once the
|
|
139
|
+
* response is fully read. Network failures (DNS, connect, TLS, socket
|
|
140
|
+
* reset) propagate as a thrown error so the adapter can map them to
|
|
141
|
+
* a `RawProviderResponse` with a `status: 0` placeholder for the
|
|
142
|
+
* search handler's `network` classification (Requirement 6.3).
|
|
143
|
+
*/
|
|
144
|
+
function dispatchRequest(query, maxResults, apiKey, signal) {
|
|
145
|
+
// Tavily authenticates via a body field rather than a header, per
|
|
146
|
+
// the design's "Per-provider notes" section. We embed `api_key`,
|
|
147
|
+
// the user query, the clamped `max_results`, and the documented
|
|
148
|
+
// `search_depth: "basic"` cost-control hint.
|
|
149
|
+
const payload = JSON.stringify({
|
|
150
|
+
api_key: apiKey,
|
|
151
|
+
query,
|
|
152
|
+
max_results: maxResults,
|
|
153
|
+
search_depth: TAVILY_SEARCH_DEPTH,
|
|
154
|
+
});
|
|
155
|
+
const bodyBytes = Buffer.from(payload, "utf8");
|
|
156
|
+
return new Promise((resolve, reject) => {
|
|
157
|
+
let req;
|
|
158
|
+
try {
|
|
159
|
+
req = httpsRequestFn({
|
|
160
|
+
method: "POST",
|
|
161
|
+
host: TAVILY_HOST,
|
|
162
|
+
path: TAVILY_PATH,
|
|
163
|
+
signal,
|
|
164
|
+
headers: {
|
|
165
|
+
accept: "application/json",
|
|
166
|
+
"content-type": "application/json",
|
|
167
|
+
"content-length": String(bodyBytes.length),
|
|
168
|
+
"user-agent": DEFAULT_USER_AGENT,
|
|
169
|
+
},
|
|
170
|
+
}, (res) => {
|
|
171
|
+
const status = res.statusCode ?? 0;
|
|
172
|
+
readBody(res, signal).then((body) => resolve({ status, body }), (err) => reject(err));
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
catch (err) {
|
|
176
|
+
reject(err);
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
req.on("error", (err) => {
|
|
180
|
+
reject(err);
|
|
181
|
+
});
|
|
182
|
+
req.write(bodyBytes);
|
|
183
|
+
req.end();
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Extract the Tavily `results[]` array from a parsed JSON body and map
|
|
188
|
+
* it to the {@link RawProviderResponse.hits} shape. Each hit
|
|
189
|
+
* contributes `title`/`url`/`content` (Tavily's snippet field)
|
|
190
|
+
* verbatim into `title`/`url`/`snippet`; the `web.search` handler is
|
|
191
|
+
* responsible for further validation (URL shape, whitespace, control
|
|
192
|
+
* chars — Requirement 7.3).
|
|
193
|
+
*
|
|
194
|
+
* Returns `null` when the JSON body did not have the expected
|
|
195
|
+
* `{ results: [...] }` shape, signalling the adapter to surface a
|
|
196
|
+
* `parseError` so the handler can emit `error.kind="parse"`
|
|
197
|
+
* (Requirement 6.5).
|
|
198
|
+
*/
|
|
199
|
+
function extractHits(parsed) {
|
|
200
|
+
if (!parsed || typeof parsed !== "object")
|
|
201
|
+
return null;
|
|
202
|
+
const results = parsed.results;
|
|
203
|
+
if (!Array.isArray(results))
|
|
204
|
+
return null;
|
|
205
|
+
const hits = [];
|
|
206
|
+
for (const entry of results) {
|
|
207
|
+
if (!entry || typeof entry !== "object")
|
|
208
|
+
continue;
|
|
209
|
+
const e = entry;
|
|
210
|
+
const hit = {};
|
|
211
|
+
if (typeof e.title === "string")
|
|
212
|
+
hit.title = e.title;
|
|
213
|
+
if (typeof e.url === "string")
|
|
214
|
+
hit.url = e.url;
|
|
215
|
+
if (typeof e.content === "string")
|
|
216
|
+
hit.snippet = e.content;
|
|
217
|
+
hits.push(hit);
|
|
218
|
+
}
|
|
219
|
+
return hits;
|
|
220
|
+
}
|
|
221
|
+
// ---------------------------------------------------------------------------
|
|
222
|
+
// Provider definition
|
|
223
|
+
// ---------------------------------------------------------------------------
|
|
224
|
+
/**
|
|
225
|
+
* Tavily adapter. Registered in {@link searchProviders} as a
|
|
226
|
+
* side-effect of importing this module — `web.search` resolves the
|
|
227
|
+
* active provider via the registry.
|
|
228
|
+
*/
|
|
229
|
+
export const tavilyProvider = {
|
|
230
|
+
id: "tavily",
|
|
231
|
+
displayName: "Tavily",
|
|
232
|
+
needsApiKey: true,
|
|
233
|
+
envVar: "TAVILY_API_KEY",
|
|
234
|
+
async search(query, maxResults, auth, signal) {
|
|
235
|
+
// Defensive: the handler resolves the key before calling us. If
|
|
236
|
+
// somehow we're invoked without one, surface a 0-status response
|
|
237
|
+
// so the handler can map it to `missing-key` / `network` rather
|
|
238
|
+
// than dispatching an unauthenticated request to Tavily.
|
|
239
|
+
if (!auth.apiKey) {
|
|
240
|
+
return {
|
|
241
|
+
status: 0,
|
|
242
|
+
hits: [],
|
|
243
|
+
parseError: "missing api key",
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
const clamped = clampMaxResults(maxResults);
|
|
247
|
+
const { status, body } = await dispatchRequest(query, clamped, auth.apiKey, signal);
|
|
248
|
+
// Non-2xx: forward the status with an empty hit list. The search
|
|
249
|
+
// handler maps the status to the appropriate error kind
|
|
250
|
+
// (`auth` for 401/403, `rate-limit` for 429, `server` for 5xx,
|
|
251
|
+
// `http` for everything else).
|
|
252
|
+
if (status < 200 || status >= 300) {
|
|
253
|
+
return { status, hits: [] };
|
|
254
|
+
}
|
|
255
|
+
// 2xx: parse JSON. Anything that does not parse, or whose shape
|
|
256
|
+
// does not match `{ results: [...] }`, surfaces as `parseError`
|
|
257
|
+
// so the handler emits `error.kind="parse"` (Requirement 6.5).
|
|
258
|
+
let parsed;
|
|
259
|
+
try {
|
|
260
|
+
parsed = JSON.parse(body);
|
|
261
|
+
}
|
|
262
|
+
catch (err) {
|
|
263
|
+
return {
|
|
264
|
+
status,
|
|
265
|
+
hits: [],
|
|
266
|
+
parseError: err instanceof Error
|
|
267
|
+
? `non-JSON response: ${err.message}`
|
|
268
|
+
: "non-JSON response",
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
const hits = extractHits(parsed);
|
|
272
|
+
if (hits === null) {
|
|
273
|
+
return {
|
|
274
|
+
status,
|
|
275
|
+
hits: [],
|
|
276
|
+
parseError: "missing results array in Tavily response",
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
return { status, hits };
|
|
280
|
+
},
|
|
281
|
+
};
|
|
282
|
+
// Register on import so `searchProviders.tavily` is populated by the
|
|
283
|
+
// time the `web.search` handler dispatches.
|
|
284
|
+
searchProviders.tavily = tavilyProvider;
|
|
285
|
+
//# sourceMappingURL=tavily.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tavily.js","sourceRoot":"","sources":["../../../../src/tools/web/providers/tavily.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,KAAK,MAAM,YAAY,CAAC;AAG/B,OAAO,EACL,eAAe,GAGhB,MAAM,eAAe,CAAC;AAEvB,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E,4CAA4C;AAC5C,MAAM,WAAW,GAAG,gBAAgB,CAAC;AACrC,MAAM,WAAW,GAAG,SAAS,CAAC;AAE9B;;;;;GAKG;AACH,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAC7B,MAAM,kBAAkB,GAAG,EAAE,CAAC;AAE9B;;;;GAIG;AACH,MAAM,mBAAmB,GAAG,OAAO,CAAC;AAEpC,mDAAmD;AACnD,MAAM,kBAAkB,GAAG,qBAAqB,CAAC;AAEjD;;;;;GAKG;AACH,MAAM,kBAAkB,GAAG,SAAS,CAAC,CAAC,QAAQ;AAiB9C,IAAI,cAAc,GAAmB,KAAK,CAAC,OAAO,CAAC;AAEnD;;;;GAIG;AACH,MAAM,UAAU,iCAAiC,CAC/C,EAA8B;IAE9B,cAAc,GAAG,EAAE,IAAI,KAAK,CAAC,OAAO,CAAC;AACvC,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CAAC,KAAa;IACpC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,kBAAkB,CAAC;IACvD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAClC,IAAI,OAAO,GAAG,kBAAkB;QAAE,OAAO,kBAAkB,CAAC;IAC5D,IAAI,OAAO,GAAG,kBAAkB;QAAE,OAAO,kBAAkB,CAAC;IAC5D,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;GAKG;AACH,SAAS,QAAQ,CAAC,GAAoB,EAAE,MAAmB;IACzD,OAAO,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC7C,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,OAAO,GAAG,KAAK,CAAC;QAEpB,MAAM,OAAO,GAAG,GAAS,EAAE;YACzB,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,GAAG,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC;YAClC,MAAM,CACJ,MAAM,CAAC,MAAM,YAAY,KAAK;gBAC5B,CAAC,CAAC,MAAM,CAAC,MAAM;gBACf,CAAC,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,CACzB,CAAC;QACJ,CAAC,CAAC;QAEF,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,OAAO,EAAE,CAAC;YACV,OAAO;QACT,CAAC;QACD,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QAE1D,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YAC/B,IAAI,OAAO;gBAAE,OAAO;YACpB,KAAK,IAAI,KAAK,CAAC,MAAM,CAAC;YACtB,IAAI,KAAK,GAAG,kBAAkB,EAAE,CAAC;gBAC/B,OAAO,GAAG,IAAI,CAAC;gBACf,MAAM,CAAC,mBAAmB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;gBAC7C,GAAG,CAAC,OAAO,EAAE,CAAC;gBACd,MAAM,CAAC,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC,CAAC;gBACtD,OAAO;YACT,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC,CAAC,CAAC;QACH,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;YACjB,IAAI,OAAO;gBAAE,OAAO;YACpB,MAAM,CAAC,mBAAmB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAC7C,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QACH,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACtB,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YACf,MAAM,CAAC,mBAAmB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAC7C,MAAM,CAAC,GAAG,CAAC,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;;;GAMG;AACH,SAAS,eAAe,CACtB,KAAa,EACb,UAAkB,EAClB,MAAc,EACd,MAAmB;IAEnB,kEAAkE;IAClE,iEAAiE;IACjE,gEAAgE;IAChE,6CAA6C;IAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC;QAC7B,OAAO,EAAE,MAAM;QACf,KAAK;QACL,WAAW,EAAE,UAAU;QACvB,YAAY,EAAE,mBAAmB;KAClC,CAAC,CAAC;IACH,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAE/C,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,IAAI,GAAkB,CAAC;QACvB,IAAI,CAAC;YACH,GAAG,GAAG,cAAc,CAClB;gBACE,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,WAAW;gBACjB,MAAM;gBACN,OAAO,EAAE;oBACP,MAAM,EAAE,kBAAkB;oBAC1B,cAAc,EAAE,kBAAkB;oBAClC,gBAAgB,EAAE,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC;oBAC1C,YAAY,EAAE,kBAAkB;iBACjC;aACF,EACD,CAAC,GAAG,EAAE,EAAE;gBACN,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,IAAI,CAAC,CAAC;gBACnC,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,IAAI,CACxB,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EACnC,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CACrB,CAAC;YACJ,CAAC,CACF,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,GAAG,CAAC,CAAC;YACZ,OAAO;QACT,CAAC;QAED,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACtB,MAAM,CAAC,GAAG,CAAC,CAAC;QACd,CAAC,CAAC,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACrB,GAAG,CAAC,GAAG,EAAE,CAAC;IACZ,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,SAAS,WAAW,CAClB,MAAe;IAEf,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IACvD,MAAM,OAAO,GAAI,MAAgC,CAAC,OAAO,CAAC;IAC1D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAEzC,MAAM,IAAI,GAAgC,EAAE,CAAC;IAC7C,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;YAAE,SAAS;QAClD,MAAM,CAAC,GAAG,KAIT,CAAC;QACF,MAAM,GAAG,GAAwC,EAAE,CAAC;QACpD,IAAI,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ;YAAE,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;QACrD,IAAI,OAAO,CAAC,CAAC,GAAG,KAAK,QAAQ;YAAE,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC;QAC/C,IAAI,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ;YAAE,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC;QAC3D,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E;;;;GAIG;AACH,MAAM,CAAC,MAAM,cAAc,GAAmB;IAC5C,EAAE,EAAE,QAAQ;IACZ,WAAW,EAAE,QAAQ;IACrB,WAAW,EAAE,IAAI;IACjB,MAAM,EAAE,gBAAgB;IAExB,KAAK,CAAC,MAAM,CACV,KAAa,EACb,UAAkB,EAClB,IAAyB,EACzB,MAAmB;QAEnB,gEAAgE;QAChE,iEAAiE;QACjE,gEAAgE;QAChE,yDAAyD;QACzD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,OAAO;gBACL,MAAM,EAAE,CAAC;gBACT,IAAI,EAAE,EAAE;gBACR,UAAU,EAAE,iBAAiB;aAC9B,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;QAC5C,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,eAAe,CAC5C,KAAK,EACL,OAAO,EACP,IAAI,CAAC,MAAM,EACX,MAAM,CACP,CAAC;QAEF,iEAAiE;QACjE,wDAAwD;QACxD,+DAA+D;QAC/D,+BAA+B;QAC/B,IAAI,MAAM,GAAG,GAAG,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;YAClC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;QAC9B,CAAC;QAED,gEAAgE;QAChE,gEAAgE;QAChE,+DAA+D;QAC/D,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO;gBACL,MAAM;gBACN,IAAI,EAAE,EAAE;gBACR,UAAU,EACR,GAAG,YAAY,KAAK;oBAClB,CAAC,CAAC,sBAAsB,GAAG,CAAC,OAAO,EAAE;oBACrC,CAAC,CAAC,mBAAmB;aAC1B,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QACjC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YAClB,OAAO;gBACL,MAAM;gBACN,IAAI,EAAE,EAAE;gBACR,UAAU,EAAE,0CAA0C;aACvD,CAAC;QACJ,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;IAC1B,CAAC;CACF,CAAC;AAEF,qEAAqE;AACrE,4CAA4C;AAC5C,eAAe,CAAC,MAAM,GAAG,cAAc,CAAC"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML-to-readable-text conversion and a permissive `Set-Cookie` parser.
|
|
3
|
+
*
|
|
4
|
+
* Two helpers live here so both `web.fetch` (via `capture.ts`) and the
|
|
5
|
+
* DuckDuckGo provider's lite-HTML adapter share one implementation:
|
|
6
|
+
*
|
|
7
|
+
* - {@link toReadableText} strips obvious chrome and non-rendering content
|
|
8
|
+
* from an HTML document and returns the visible prose with whitespace
|
|
9
|
+
* collapsed. It satisfies Requirements 2.4, 2.5, and 2.28 and follows
|
|
10
|
+
* the design's "HTML-to-readable-text strategy" (cheerio-based, no
|
|
11
|
+
* browser/jsdom dependency).
|
|
12
|
+
* - {@link parseSetCookie} parses one `Set-Cookie` header value into a
|
|
13
|
+
* {@link CookieInfo}, supporting only the public attributes the
|
|
14
|
+
* `web.fetch` tool surfaces. It is regex-driven and intentionally
|
|
15
|
+
* permissive: missing or malformed attributes are simply absent in the
|
|
16
|
+
* returned object instead of producing a hard error.
|
|
17
|
+
*/
|
|
18
|
+
import type { CookieInfo } from "./types.js";
|
|
19
|
+
/**
|
|
20
|
+
* Convert an HTML document into a single readable text string.
|
|
21
|
+
*
|
|
22
|
+
* The conversion:
|
|
23
|
+
* 1. Parses `html` with cheerio (no DOM/browser dependency).
|
|
24
|
+
* 2. Removes `<script>`, `<style>`, `<noscript>`, `<nav>`, `<header>`,
|
|
25
|
+
* `<footer>`, and `<aside>` subtrees outright.
|
|
26
|
+
* 3. Removes every HTML comment node anywhere in the tree.
|
|
27
|
+
* 4. Extracts the remaining text via `$.root().text()`.
|
|
28
|
+
* 5. Collapses every run of ASCII/Unicode whitespace (including newlines,
|
|
29
|
+
* tabs, NBSPs) into a single space and trims the result so the agent
|
|
30
|
+
* receives compact prose.
|
|
31
|
+
*
|
|
32
|
+
* Empty input, whitespace-only input, and input that contains only
|
|
33
|
+
* stripped elements all yield the empty string.
|
|
34
|
+
*/
|
|
35
|
+
export declare function toReadableText(html: string): string;
|
|
36
|
+
/**
|
|
37
|
+
* Parse a single `Set-Cookie` header value into a {@link CookieInfo}.
|
|
38
|
+
*
|
|
39
|
+
* The parser is intentionally permissive: it never throws for malformed
|
|
40
|
+
* input. The first `;`-separated attribute is treated as the
|
|
41
|
+
* `name=value` pair (with everything after the first `=` taken verbatim
|
|
42
|
+
* as the value, matching common server practice). Subsequent attributes
|
|
43
|
+
* are matched case-insensitively against the public RFC 6265 set the
|
|
44
|
+
* `web.fetch` tool surfaces:
|
|
45
|
+
*
|
|
46
|
+
* - `Domain` → {@link CookieInfo.domain}
|
|
47
|
+
* - `Path` → {@link CookieInfo.path}
|
|
48
|
+
* - `Expires` → {@link CookieInfo.expires} as an ISO 8601 string
|
|
49
|
+
* (omitted if the date string fails to parse)
|
|
50
|
+
* - `Max-Age` → {@link CookieInfo.maxAge} as a finite integer
|
|
51
|
+
* (omitted if not a finite integer)
|
|
52
|
+
* - `HttpOnly` → {@link CookieInfo.httpOnly} = `true`
|
|
53
|
+
* - `Secure` → {@link CookieInfo.secure} = `true`
|
|
54
|
+
* - `SameSite=…` → {@link CookieInfo.sameSite} normalized to
|
|
55
|
+
* `"Strict"`/`"Lax"`/`"None"` (omitted if value is
|
|
56
|
+
* unknown)
|
|
57
|
+
*
|
|
58
|
+
* Unknown attributes (e.g. `Priority`, `Partitioned`) are ignored. When
|
|
59
|
+
* an attribute is missing, malformed, or unrecognised, the corresponding
|
|
60
|
+
* field is simply absent from the returned object.
|
|
61
|
+
*
|
|
62
|
+
* The header value is expected to be a single cookie. Callers that
|
|
63
|
+
* receive multiple cookies in a single header (which servers must not
|
|
64
|
+
* do, but a few do) should split on the appropriate boundary before
|
|
65
|
+
* calling this function.
|
|
66
|
+
*/
|
|
67
|
+
export declare function parseSetCookie(value: string): CookieInfo;
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML-to-readable-text conversion and a permissive `Set-Cookie` parser.
|
|
3
|
+
*
|
|
4
|
+
* Two helpers live here so both `web.fetch` (via `capture.ts`) and the
|
|
5
|
+
* DuckDuckGo provider's lite-HTML adapter share one implementation:
|
|
6
|
+
*
|
|
7
|
+
* - {@link toReadableText} strips obvious chrome and non-rendering content
|
|
8
|
+
* from an HTML document and returns the visible prose with whitespace
|
|
9
|
+
* collapsed. It satisfies Requirements 2.4, 2.5, and 2.28 and follows
|
|
10
|
+
* the design's "HTML-to-readable-text strategy" (cheerio-based, no
|
|
11
|
+
* browser/jsdom dependency).
|
|
12
|
+
* - {@link parseSetCookie} parses one `Set-Cookie` header value into a
|
|
13
|
+
* {@link CookieInfo}, supporting only the public attributes the
|
|
14
|
+
* `web.fetch` tool surfaces. It is regex-driven and intentionally
|
|
15
|
+
* permissive: missing or malformed attributes are simply absent in the
|
|
16
|
+
* returned object instead of producing a hard error.
|
|
17
|
+
*/
|
|
18
|
+
import * as cheerio from "cheerio";
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// HTML → readable text
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
/**
|
|
23
|
+
* Selectors for elements whose text content should never appear in the
|
|
24
|
+
* readable view. `script`/`style`/`noscript` carry executable or styling
|
|
25
|
+
* payloads (Requirement 2.4); `nav`/`header`/`footer`/`aside` are the
|
|
26
|
+
* obvious chrome regions called out in the design's
|
|
27
|
+
* "HTML-to-readable-text strategy".
|
|
28
|
+
*/
|
|
29
|
+
const STRIPPED_SELECTORS = [
|
|
30
|
+
"script",
|
|
31
|
+
"style",
|
|
32
|
+
"noscript",
|
|
33
|
+
"nav",
|
|
34
|
+
"header",
|
|
35
|
+
"footer",
|
|
36
|
+
"aside",
|
|
37
|
+
].join(", ");
|
|
38
|
+
/**
|
|
39
|
+
* Convert an HTML document into a single readable text string.
|
|
40
|
+
*
|
|
41
|
+
* The conversion:
|
|
42
|
+
* 1. Parses `html` with cheerio (no DOM/browser dependency).
|
|
43
|
+
* 2. Removes `<script>`, `<style>`, `<noscript>`, `<nav>`, `<header>`,
|
|
44
|
+
* `<footer>`, and `<aside>` subtrees outright.
|
|
45
|
+
* 3. Removes every HTML comment node anywhere in the tree.
|
|
46
|
+
* 4. Extracts the remaining text via `$.root().text()`.
|
|
47
|
+
* 5. Collapses every run of ASCII/Unicode whitespace (including newlines,
|
|
48
|
+
* tabs, NBSPs) into a single space and trims the result so the agent
|
|
49
|
+
* receives compact prose.
|
|
50
|
+
*
|
|
51
|
+
* Empty input, whitespace-only input, and input that contains only
|
|
52
|
+
* stripped elements all yield the empty string.
|
|
53
|
+
*/
|
|
54
|
+
export function toReadableText(html) {
|
|
55
|
+
if (typeof html !== "string" || html.length === 0)
|
|
56
|
+
return "";
|
|
57
|
+
const $ = cheerio.load(html);
|
|
58
|
+
// 1. Remove non-content elements outright.
|
|
59
|
+
$(STRIPPED_SELECTORS).remove();
|
|
60
|
+
// 2. Remove every comment node still attached to the tree. cheerio
|
|
61
|
+
// represents comments with `type === "comment"`; we walk the full
|
|
62
|
+
// contents of `*` so nested comments inside any remaining element
|
|
63
|
+
// are caught.
|
|
64
|
+
$("*")
|
|
65
|
+
.contents()
|
|
66
|
+
.filter(function () {
|
|
67
|
+
return this.type === "comment";
|
|
68
|
+
})
|
|
69
|
+
.remove();
|
|
70
|
+
const raw = $.root().text();
|
|
71
|
+
return collapseWhitespace(raw);
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Collapse every contiguous run of whitespace characters into a single
|
|
75
|
+
* ASCII space and trim leading/trailing whitespace.
|
|
76
|
+
*
|
|
77
|
+
* Treated as whitespace: the standard `\s` set (space, tab, CR, LF, FF,
|
|
78
|
+
* VT) plus the most common non-breaking and zero-width characters that
|
|
79
|
+
* show up in real-world web HTML — `\u00a0` (NBSP), `\u200b` (zero-width
|
|
80
|
+
* space), `\u200c`/`\u200d` (zero-width joiner/non-joiner), `\ufeff`
|
|
81
|
+
* (BOM/zero-width no-break space), and the assorted `\u2000-\u200a` set
|
|
82
|
+
* of Unicode spaces.
|
|
83
|
+
*/
|
|
84
|
+
function collapseWhitespace(text) {
|
|
85
|
+
return text
|
|
86
|
+
.replace(/[\s\u00a0\u2000-\u200a\u200b\u200c\u200d\u2028\u2029\ufeff]+/g, " ")
|
|
87
|
+
.trim();
|
|
88
|
+
}
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
// Set-Cookie parser
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
/**
|
|
93
|
+
* Allowed values for `SameSite`, lower-cased for case-insensitive lookup.
|
|
94
|
+
* The map preserves the canonical capitalisation used in the
|
|
95
|
+
* {@link CookieInfo.sameSite} field.
|
|
96
|
+
*/
|
|
97
|
+
const SAME_SITE_VALUES = new Map([
|
|
98
|
+
["strict", "Strict"],
|
|
99
|
+
["lax", "Lax"],
|
|
100
|
+
["none", "None"],
|
|
101
|
+
]);
|
|
102
|
+
/**
|
|
103
|
+
* Parse a single `Set-Cookie` header value into a {@link CookieInfo}.
|
|
104
|
+
*
|
|
105
|
+
* The parser is intentionally permissive: it never throws for malformed
|
|
106
|
+
* input. The first `;`-separated attribute is treated as the
|
|
107
|
+
* `name=value` pair (with everything after the first `=` taken verbatim
|
|
108
|
+
* as the value, matching common server practice). Subsequent attributes
|
|
109
|
+
* are matched case-insensitively against the public RFC 6265 set the
|
|
110
|
+
* `web.fetch` tool surfaces:
|
|
111
|
+
*
|
|
112
|
+
* - `Domain` → {@link CookieInfo.domain}
|
|
113
|
+
* - `Path` → {@link CookieInfo.path}
|
|
114
|
+
* - `Expires` → {@link CookieInfo.expires} as an ISO 8601 string
|
|
115
|
+
* (omitted if the date string fails to parse)
|
|
116
|
+
* - `Max-Age` → {@link CookieInfo.maxAge} as a finite integer
|
|
117
|
+
* (omitted if not a finite integer)
|
|
118
|
+
* - `HttpOnly` → {@link CookieInfo.httpOnly} = `true`
|
|
119
|
+
* - `Secure` → {@link CookieInfo.secure} = `true`
|
|
120
|
+
* - `SameSite=…` → {@link CookieInfo.sameSite} normalized to
|
|
121
|
+
* `"Strict"`/`"Lax"`/`"None"` (omitted if value is
|
|
122
|
+
* unknown)
|
|
123
|
+
*
|
|
124
|
+
* Unknown attributes (e.g. `Priority`, `Partitioned`) are ignored. When
|
|
125
|
+
* an attribute is missing, malformed, or unrecognised, the corresponding
|
|
126
|
+
* field is simply absent from the returned object.
|
|
127
|
+
*
|
|
128
|
+
* The header value is expected to be a single cookie. Callers that
|
|
129
|
+
* receive multiple cookies in a single header (which servers must not
|
|
130
|
+
* do, but a few do) should split on the appropriate boundary before
|
|
131
|
+
* calling this function.
|
|
132
|
+
*/
|
|
133
|
+
export function parseSetCookie(value) {
|
|
134
|
+
if (typeof value !== "string") {
|
|
135
|
+
return { name: "", value: "" };
|
|
136
|
+
}
|
|
137
|
+
// Split on `;` to peel attributes off the name=value pair. We do not
|
|
138
|
+
// split on `,` because RFC 6265 §4.1 forbids commas in cookie values
|
|
139
|
+
// unrelated to date attributes, and Node/undici always hand us one
|
|
140
|
+
// header value per Set-Cookie line.
|
|
141
|
+
const parts = value.split(";");
|
|
142
|
+
const head = (parts[0] ?? "").trim();
|
|
143
|
+
const eqIdx = head.indexOf("=");
|
|
144
|
+
let name;
|
|
145
|
+
let cookieValue;
|
|
146
|
+
if (eqIdx === -1) {
|
|
147
|
+
// No `=` at all: treat the whole token as the name, value empty.
|
|
148
|
+
name = head;
|
|
149
|
+
cookieValue = "";
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
name = head.slice(0, eqIdx).trim();
|
|
153
|
+
// Per RFC 6265 the value runs to end-of-attribute; trim outer
|
|
154
|
+
// whitespace but keep internal characters verbatim.
|
|
155
|
+
cookieValue = head.slice(eqIdx + 1).trim();
|
|
156
|
+
}
|
|
157
|
+
// Build the result one field at a time so `exactOptionalPropertyTypes`
|
|
158
|
+
// sees an absent key for any attribute we did not observe.
|
|
159
|
+
const result = { name, value: cookieValue };
|
|
160
|
+
for (let i = 1; i < parts.length; i++) {
|
|
161
|
+
const attr = parts[i];
|
|
162
|
+
if (typeof attr !== "string")
|
|
163
|
+
continue;
|
|
164
|
+
const trimmed = attr.trim();
|
|
165
|
+
if (trimmed.length === 0)
|
|
166
|
+
continue;
|
|
167
|
+
const attrEq = trimmed.indexOf("=");
|
|
168
|
+
const attrName = attrEq === -1 ? trimmed : trimmed.slice(0, attrEq).trim();
|
|
169
|
+
const attrValue = attrEq === -1 ? "" : trimmed.slice(attrEq + 1).trim();
|
|
170
|
+
const lowerName = attrName.toLowerCase();
|
|
171
|
+
switch (lowerName) {
|
|
172
|
+
case "domain": {
|
|
173
|
+
if (attrValue.length > 0)
|
|
174
|
+
result.domain = attrValue;
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
case "path": {
|
|
178
|
+
if (attrValue.length > 0)
|
|
179
|
+
result.path = attrValue;
|
|
180
|
+
break;
|
|
181
|
+
}
|
|
182
|
+
case "expires": {
|
|
183
|
+
const iso = parseHttpDate(attrValue);
|
|
184
|
+
if (iso !== undefined)
|
|
185
|
+
result.expires = iso;
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
case "max-age": {
|
|
189
|
+
const n = parseMaxAge(attrValue);
|
|
190
|
+
if (n !== undefined)
|
|
191
|
+
result.maxAge = n;
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
case "httponly": {
|
|
195
|
+
result.httpOnly = true;
|
|
196
|
+
break;
|
|
197
|
+
}
|
|
198
|
+
case "secure": {
|
|
199
|
+
result.secure = true;
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
case "samesite": {
|
|
203
|
+
const canonical = SAME_SITE_VALUES.get(attrValue.toLowerCase());
|
|
204
|
+
if (canonical !== undefined)
|
|
205
|
+
result.sameSite = canonical;
|
|
206
|
+
break;
|
|
207
|
+
}
|
|
208
|
+
default:
|
|
209
|
+
// Ignore unknown attributes (Priority, Partitioned, etc.).
|
|
210
|
+
break;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
return result;
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Parse an HTTP-date string (RFC 7231 §7.1.1.1, including the legacy
|
|
217
|
+
* RFC 850 and asctime forms) into an ISO 8601 timestamp. Returns
|
|
218
|
+
* `undefined` when the value does not parse to a finite, valid date.
|
|
219
|
+
*/
|
|
220
|
+
function parseHttpDate(value) {
|
|
221
|
+
if (value.length === 0)
|
|
222
|
+
return undefined;
|
|
223
|
+
const ms = Date.parse(value);
|
|
224
|
+
if (!Number.isFinite(ms))
|
|
225
|
+
return undefined;
|
|
226
|
+
return new Date(ms).toISOString();
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Parse a `Max-Age` attribute value into a finite integer. Returns
|
|
230
|
+
* `undefined` for empty input, non-numeric input, fractional values, or
|
|
231
|
+
* values outside the safe-integer range.
|
|
232
|
+
*/
|
|
233
|
+
function parseMaxAge(value) {
|
|
234
|
+
if (value.length === 0)
|
|
235
|
+
return undefined;
|
|
236
|
+
// RFC 6265 §5.2.2 specifies the value as DIGIT *DIGIT (optionally
|
|
237
|
+
// preceded by `-` for delete-now semantics). Reject anything that
|
|
238
|
+
// does not match that shape.
|
|
239
|
+
if (!/^-?\d+$/.test(value))
|
|
240
|
+
return undefined;
|
|
241
|
+
const n = Number(value);
|
|
242
|
+
if (!Number.isFinite(n) || !Number.isInteger(n))
|
|
243
|
+
return undefined;
|
|
244
|
+
if (!Number.isSafeInteger(n))
|
|
245
|
+
return undefined;
|
|
246
|
+
return n;
|
|
247
|
+
}
|
|
248
|
+
//# sourceMappingURL=readable.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"readable.js","sourceRoot":"","sources":["../../../src/tools/web/readable.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAInC,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E;;;;;;GAMG;AACH,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,OAAO;IACP,UAAU;IACV,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,OAAO;CACR,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE7D,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,2CAA2C;IAC3C,CAAC,CAAC,kBAAkB,CAAC,CAAC,MAAM,EAAE,CAAC;IAE/B,mEAAmE;IACnE,qEAAqE;IACrE,qEAAqE;IACrE,iBAAiB;IACjB,CAAC,CAAC,GAAG,CAAC;SACH,QAAQ,EAAE;SACV,MAAM,CAAC;QACN,OAAO,IAAI,CAAC,IAAI,KAAK,SAAS,CAAC;IACjC,CAAC,CAAC;SACD,MAAM,EAAE,CAAC;IAEZ,MAAM,GAAG,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC5B,OAAO,kBAAkB,CAAC,GAAG,CAAC,CAAC;AACjC,CAAC;AAED;;;;;;;;;;GAUG;AACH,SAAS,kBAAkB,CAAC,IAAY;IACtC,OAAO,IAAI;SACR,OAAO,CAAC,+DAA+D,EAAE,GAAG,CAAC;SAC7E,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,8EAA8E;AAC9E,oBAAoB;AACpB,8EAA8E;AAE9E;;;;GAIG;AACH,MAAM,gBAAgB,GAAwC,IAAI,GAAG,CAAC;IACpE,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACpB,CAAC,KAAK,EAAE,KAAK,CAAC;IACd,CAAC,MAAM,EAAE,MAAM,CAAC;CACjB,CAAC,CAAC;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;IACjC,CAAC;IAED,qEAAqE;IACrE,qEAAqE;IACrE,mEAAmE;IACnE,oCAAoC;IACpC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAErC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,IAAY,CAAC;IACjB,IAAI,WAAmB,CAAC;IACxB,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;QACjB,iEAAiE;QACjE,IAAI,GAAG,IAAI,CAAC;QACZ,WAAW,GAAG,EAAE,CAAC;IACnB,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QACnC,8DAA8D;QAC9D,oDAAoD;QACpD,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7C,CAAC;IAED,uEAAuE;IACvE,2DAA2D;IAC3D,MAAM,MAAM,GAAe,EAAE,IAAI,EAAE,KAAK,EAAE,WAAW,EAAE,CAAC;IAExD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,IAAI,OAAO,IAAI,KAAK,QAAQ;YAAE,SAAS;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEnC,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,QAAQ,GACZ,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5D,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACxE,MAAM,SAAS,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;QAEzC,QAAQ,SAAS,EAAE,CAAC;YAClB,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;oBAAE,MAAM,CAAC,MAAM,GAAG,SAAS,CAAC;gBACpD,MAAM;YACR,CAAC;YACD,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;oBAAE,MAAM,CAAC,IAAI,GAAG,SAAS,CAAC;gBAClD,MAAM;YACR,CAAC;YACD,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,MAAM,GAAG,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC;gBACrC,IAAI,GAAG,KAAK,SAAS;oBAAE,MAAM,CAAC,OAAO,GAAG,GAAG,CAAC;gBAC5C,MAAM;YACR,CAAC;YACD,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,MAAM,CAAC,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;gBACjC,IAAI,CAAC,KAAK,SAAS;oBAAE,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;gBACvC,MAAM;YACR,CAAC;YACD,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC;gBACvB,MAAM;YACR,CAAC;YACD,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;gBACrB,MAAM;YACR,CAAC;YACD,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,MAAM,SAAS,GAAG,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC;gBAChE,IAAI,SAAS,KAAK,SAAS;oBAAE,MAAM,CAAC,QAAQ,GAAG,SAAS,CAAC;gBACzD,MAAM;YACR,CAAC;YACD;gBACE,2DAA2D;gBAC3D,MAAM;QACV,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,KAAa;IAClC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IACzC,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QAAE,OAAO,SAAS,CAAC;IAC3C,OAAO,IAAI,IAAI,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;AACpC,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,KAAa;IAChC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IACzC,kEAAkE;IAClE,kEAAkE;IAClE,6BAA6B;IAC7B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAC7C,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IACxB,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;QAAE,OAAO,SAAS,CAAC;IAClE,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC;QAAE,OAAO,SAAS,CAAC;IAC/C,OAAO,CAAC,CAAC;AACX,CAAC"}
|