@askalf/deepdive 0.1.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +177 -52
- package/dist/agent.d.ts +48 -2
- package/dist/agent.d.ts.map +1 -1
- package/dist/agent.js +178 -65
- package/dist/agent.js.map +1 -1
- package/dist/cache.d.ts +16 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +62 -0
- package/dist/cache.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +164 -18
- package/dist/cli.js.map +1 -1
- package/dist/concurrency.d.ts +2 -0
- package/dist/concurrency.d.ts.map +1 -0
- package/dist/concurrency.js +38 -0
- package/dist/concurrency.js.map +1 -0
- package/dist/config.d.ts +20 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +64 -3
- package/dist/config.js.map +1 -1
- package/dist/doctor.d.ts +44 -0
- package/dist/doctor.d.ts.map +1 -0
- package/dist/doctor.js +533 -0
- package/dist/doctor.js.map +1 -0
- package/dist/index.d.ts +9 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -3
- package/dist/index.js.map +1 -1
- package/dist/llm-stream.d.ts +27 -0
- package/dist/llm-stream.d.ts.map +1 -0
- package/dist/llm-stream.js +173 -0
- package/dist/llm-stream.js.map +1 -0
- package/dist/llm.d.ts +10 -0
- package/dist/llm.d.ts.map +1 -1
- package/dist/llm.js +96 -20
- package/dist/llm.js.map +1 -1
- package/dist/plan.d.ts +7 -0
- package/dist/plan.d.ts.map +1 -1
- package/dist/plan.js +51 -0
- package/dist/plan.js.map +1 -1
- package/dist/retry.d.ts +18 -0
- package/dist/retry.d.ts.map +1 -0
- package/dist/retry.js +70 -0
- package/dist/retry.js.map +1 -0
- package/dist/robots.d.ts +26 -0
- package/dist/robots.d.ts.map +1 -0
- package/dist/robots.js +183 -0
- package/dist/robots.js.map +1 -0
- package/dist/search/duckduckgo.d.ts +2 -0
- package/dist/search/duckduckgo.d.ts.map +1 -1
- package/dist/search/duckduckgo.js +38 -13
- package/dist/search/duckduckgo.js.map +1 -1
- package/dist/search/exa.d.ts +17 -0
- package/dist/search/exa.d.ts.map +1 -0
- package/dist/search/exa.js +62 -0
- package/dist/search/exa.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +2 -1
- package/dist/search/searxng.js.map +1 -1
- package/dist/search.d.ts.map +1 -1
- package/dist/search.js +9 -1
- package/dist/search.js.map +1 -1
- package/dist/synthesize.d.ts +1 -1
- package/dist/synthesize.d.ts.map +1 -1
- package/dist/synthesize.js +11 -2
- package/dist/synthesize.js.map +1 -1
- package/dist/url-util.d.ts +4 -0
- package/dist/url-util.d.ts.map +1 -0
- package/dist/url-util.js +24 -0
- package/dist/url-util.js.map +1 -0
- package/package.json +3 -2
package/dist/robots.js
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
// robots.txt support — per-host fetch + parse + per-URL allow/deny check.
|
|
2
|
+
//
|
|
3
|
+
// Production-grade crawlers respect robots.txt. deepdive's fetch volume is
|
|
4
|
+
// low (~12 URLs per query) but it's still the polite thing; sites with
|
|
5
|
+
// explicit scraper deny rules shouldn't be surprised. --ignore-robots is
|
|
6
|
+
// provided for operators who know what they're doing.
|
|
7
|
+
//
|
|
8
|
+
// Cache is per-run (in-memory) and keyed by `<scheme>://<host>`. We don't
|
|
9
|
+
// persist to disk because the expected hit count per run is small and
|
|
10
|
+
// robots.txt content can change rapidly on the publisher's end.
|
|
11
|
+
export const DEFAULT_USER_AGENT = "deepdive-bot";
|
|
12
|
+
export function createRobotsCache() {
|
|
13
|
+
const store = new Map();
|
|
14
|
+
return {
|
|
15
|
+
get: (origin) => store.get(origin),
|
|
16
|
+
set: (origin, parsed) => void store.set(origin, parsed),
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
export async function canFetch(url, opts) {
|
|
20
|
+
let origin;
|
|
21
|
+
let path;
|
|
22
|
+
try {
|
|
23
|
+
const u = new URL(url);
|
|
24
|
+
if (u.protocol !== "http:" && u.protocol !== "https:")
|
|
25
|
+
return "allow";
|
|
26
|
+
origin = `${u.protocol}//${u.host}`;
|
|
27
|
+
path = u.pathname + u.search;
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
return "allow";
|
|
31
|
+
}
|
|
32
|
+
const cached = opts.cache?.get(origin);
|
|
33
|
+
const parsed = cached === undefined
|
|
34
|
+
? await fetchAndParse(origin, opts)
|
|
35
|
+
: cached;
|
|
36
|
+
if (opts.cache && cached === undefined)
|
|
37
|
+
opts.cache.set(origin, parsed);
|
|
38
|
+
if (parsed === null)
|
|
39
|
+
return "unknown"; // couldn't reach robots.txt
|
|
40
|
+
return isPathAllowed(parsed, path, opts.userAgent) ? "allow" : "deny";
|
|
41
|
+
}
|
|
42
|
+
async function fetchAndParse(origin, opts) {
|
|
43
|
+
const fetchImpl = opts.fetchImpl ?? fetch;
|
|
44
|
+
const timeoutMs = opts.timeoutMs ?? 5_000;
|
|
45
|
+
const timeout = AbortSignal.timeout(timeoutMs);
|
|
46
|
+
const signal = opts.signal
|
|
47
|
+
? AbortSignal.any([opts.signal, timeout])
|
|
48
|
+
: timeout;
|
|
49
|
+
try {
|
|
50
|
+
const res = await fetchImpl(`${origin}/robots.txt`, {
|
|
51
|
+
headers: { "user-agent": opts.userAgent },
|
|
52
|
+
signal,
|
|
53
|
+
});
|
|
54
|
+
// Per RFC 9309: 4xx → no restrictions (no robots file); 5xx → treat as
|
|
55
|
+
// "full disallow" conservatively. We lean permissive for 5xx too since
|
|
56
|
+
// it's often transient and we don't want to lock out a run because the
|
|
57
|
+
// publisher's server is flaky. Callers can pass --ignore-robots if they
|
|
58
|
+
// want to bypass robots entirely.
|
|
59
|
+
if (res.status >= 400) {
|
|
60
|
+
return { rules: [] };
|
|
61
|
+
}
|
|
62
|
+
const text = await res.text();
|
|
63
|
+
return parseRobotsTxt(text);
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// Exported for unit tests.
|
|
70
|
+
export function parseRobotsTxt(text) {
|
|
71
|
+
const lines = text.split(/\r?\n/);
|
|
72
|
+
const grouped = [];
|
|
73
|
+
let currentAgents = [];
|
|
74
|
+
let sawRuleThisGroup = false;
|
|
75
|
+
let crawlDelay;
|
|
76
|
+
for (const rawLine of lines) {
|
|
77
|
+
const line = stripComment(rawLine).trim();
|
|
78
|
+
if (!line)
|
|
79
|
+
continue;
|
|
80
|
+
const match = /^([a-zA-Z-]+)\s*:\s*(.*)$/.exec(line);
|
|
81
|
+
if (!match)
|
|
82
|
+
continue;
|
|
83
|
+
const [, key, value] = match;
|
|
84
|
+
const lower = key.toLowerCase();
|
|
85
|
+
if (lower === "user-agent") {
|
|
86
|
+
if (sawRuleThisGroup) {
|
|
87
|
+
// new group
|
|
88
|
+
currentAgents = [];
|
|
89
|
+
sawRuleThisGroup = false;
|
|
90
|
+
}
|
|
91
|
+
currentAgents.push(value.trim().toLowerCase());
|
|
92
|
+
}
|
|
93
|
+
else if (lower === "disallow" || lower === "allow") {
|
|
94
|
+
sawRuleThisGroup = true;
|
|
95
|
+
for (const agent of currentAgents) {
|
|
96
|
+
grouped.push({
|
|
97
|
+
agent,
|
|
98
|
+
allow: lower === "allow",
|
|
99
|
+
path: value.trim(),
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
else if (lower === "crawl-delay") {
|
|
104
|
+
const n = Number(value.trim());
|
|
105
|
+
if (Number.isFinite(n) && n >= 0)
|
|
106
|
+
crawlDelay = n;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return {
|
|
110
|
+
rules: grouped.map((g) => ({ allow: g.allow, path: g.path })),
|
|
111
|
+
crawlDelaySec: crawlDelay,
|
|
112
|
+
// We stash the grouping by keeping a hidden field. But since we want a
|
|
113
|
+
// clean exported type, bake agent-matching in: we'll re-do the parse at
|
|
114
|
+
// check time. Simpler: re-parse cheaply or store a bigger structure.
|
|
115
|
+
// Actually let's just store the grouped form and compute at check time:
|
|
116
|
+
...{ _grouped: grouped },
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
// Exported for unit tests.
|
|
120
|
+
export function isPathAllowed(parsed, path, userAgent) {
|
|
121
|
+
const grouped = parsed._grouped ?? [];
|
|
122
|
+
if (grouped.length === 0)
|
|
123
|
+
return true;
|
|
124
|
+
const ua = userAgent.toLowerCase();
|
|
125
|
+
// Pick matching rules: prefer exact agent match; fall back to '*'.
|
|
126
|
+
let applicable = grouped.filter((g) => g.agent && ua.includes(g.agent));
|
|
127
|
+
if (applicable.length === 0)
|
|
128
|
+
applicable = grouped.filter((g) => g.agent === "*");
|
|
129
|
+
if (applicable.length === 0)
|
|
130
|
+
return true;
|
|
131
|
+
// Pick the longest-matching rule. Tie → allow wins (RFC 9309).
|
|
132
|
+
let bestLen = -1;
|
|
133
|
+
let bestAllow = true;
|
|
134
|
+
for (const rule of applicable) {
|
|
135
|
+
if (!rule.path) {
|
|
136
|
+
// Empty Disallow: means allow everything. Empty Allow: is a no-op.
|
|
137
|
+
if (!rule.allow) {
|
|
138
|
+
if (bestLen < 0) {
|
|
139
|
+
bestLen = 0;
|
|
140
|
+
bestAllow = true; // empty Disallow explicitly grants
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
if (!pathMatches(rule.path, path))
|
|
146
|
+
continue;
|
|
147
|
+
if (rule.path.length > bestLen || (rule.path.length === bestLen && rule.allow)) {
|
|
148
|
+
bestLen = rule.path.length;
|
|
149
|
+
bestAllow = rule.allow;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return bestLen < 0 ? true : bestAllow;
|
|
153
|
+
}
|
|
154
|
+
function pathMatches(pattern, path) {
|
|
155
|
+
// Robots.txt patterns support * as wildcard and $ as end-anchor. For the
|
|
156
|
+
// simpler prefix patterns — which is what 95% of robots.txt files use — a
|
|
157
|
+
// startsWith check suffices. Anything fancier: compile to a regex.
|
|
158
|
+
if (!pattern.includes("*") && !pattern.endsWith("$")) {
|
|
159
|
+
return path.startsWith(pattern);
|
|
160
|
+
}
|
|
161
|
+
// Convert to regex, escaping other regex-special chars.
|
|
162
|
+
let re = "";
|
|
163
|
+
for (let i = 0; i < pattern.length; i++) {
|
|
164
|
+
const c = pattern[i];
|
|
165
|
+
if (c === "*")
|
|
166
|
+
re += ".*";
|
|
167
|
+
else if (c === "$" && i === pattern.length - 1)
|
|
168
|
+
re += "$";
|
|
169
|
+
else
|
|
170
|
+
re += c.replace(/[.+?^${}()|[\]\\]/g, "\\$&");
|
|
171
|
+
}
|
|
172
|
+
try {
|
|
173
|
+
return new RegExp("^" + re).test(path);
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
return false;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
function stripComment(s) {
|
|
180
|
+
const i = s.indexOf("#");
|
|
181
|
+
return i === -1 ? s : s.slice(0, i);
|
|
182
|
+
}
|
|
183
|
+
//# sourceMappingURL=robots.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"robots.js","sourceRoot":"","sources":["../src/robots.ts"],"names":[],"mappings":"AAAA,0EAA0E;AAC1E,EAAE;AACF,2EAA2E;AAC3E,uEAAuE;AACvE,yEAAyE;AACzE,sDAAsD;AACtD,EAAE;AACF,0EAA0E;AAC1E,sEAAsE;AACtE,gEAAgE;AA6BhE,MAAM,CAAC,MAAM,kBAAkB,GAAG,cAAc,CAAC;AAEjD,MAAM,UAAU,iBAAiB;IAC/B,MAAM,KAAK,GAAG,IAAI,GAAG,EAA+B,CAAC;IACrD,OAAO;QACL,GAAG,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC;QAClC,GAAG,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC;KACxD,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,GAAW,EACX,IAAqB;IAErB,IAAI,MAAc,CAAC;IACnB,IAAI,IAAY,CAAC;IACjB,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,IAAI,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,QAAQ,KAAK,QAAQ;YAAE,OAAO,OAAO,CAAC;QACtE,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QACpC,IAAI,GAAG,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,MAAM,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACvC,MAAM,MAAM,GACV,MAAM,KAAK,SAAS;QAClB,CAAC,CAAC,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,CAAC;QACnC,CAAC,CAAC,MAAM,CAAC;IACb,IAAI,IAAI,CAAC,KAAK,IAAI,MAAM,KAAK,SAAS;QAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEvE,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,SAAS,CAAC,CAAC,4BAA4B;IACnE,OAAO,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;AACxE,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,MAAc,EACd,IAAqB;IAErB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC;IAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC;IAC1C,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM;QACxB,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACzC,CAAC,CAAC,OAAO,CAAC;IACZ,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,GAAG,MAAM,aAAa,EAAE;YAClD,OAAO,EAAE,EAAE,YAAY,EAAE,IAAI,CAAC,SAAS,EAAE;YACzC,MAAM;SACP,CAAC,CAAC;QACH,uEAAuE;QACvE,uEAAuE;QACvE,uEAAuE;QACvE,wEAAwE;QACxE,kCAAkC;QAClC,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YACtB,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACvB,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,2BAA2B;AAC3B,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAMlC,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,IAAI,aAAa,GAAa,EAAE,CAAC;IACjC,IAAI,gBAAgB,GAAG,KAAK,CAAC;IAC7B,IAAI,UAA8B,CAAC;IAEnC,KAAK,MAAM,OAAO,IAAI,KAAK,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1C,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,MAAM,KAAK,GAAG,2BAA2B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACrD,IAAI,CAAC,KAAK;YAAE,SAAS;QACrB,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,KAAK,CAAC;QAC7B,MAAM,KAAK,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAChC,IAAI,KAAK,KAAK,YAAY,EAAE,CAAC;YAC3B,IAAI,gBAAgB,EAAE,CAAC;gBACrB,YAAY;gBACZ,aAAa,GAAG,EAAE,CAAC;gBACnB,gBAAgB,GAAG,KAAK,CAAC;YAC3B,CAAC;YACD,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;QACjD,CAAC;aAAM,IAAI,KAAK,KAAK,UAAU,IAAI,KAAK,KAAK,OAAO,EAAE,CAAC;YACrD,gBAAgB,GAAG,IAAI,CAAC;YACxB,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,OAAO,CAAC,IAAI,CAAC;oBACX,KAAK;oBACL,KAAK,EAAE,KAAK,KAAK,OAAO;oBACxB,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;aAAM,IAAI,KAAK,KAAK,aAAa,EAAE,CAAC;YACnC,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC/B,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,UAAU,GAAG,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAED,OAAO;QACL,KAAK,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAC7D,aAAa,EAAE,UAAU;QACzB,uEAAuE;QACvE,wEAAwE;QACxE,qEAAqE;QACrE,wEAAwE;QACxE,GAAI,EAAE,QAAQ,EAAE,OAAO,EAAa;KACrB,CAAC;AACpB,CAAC;AAED,2BAA2B;AAC3B,MAAM,UAAU,aAAa,CAC3B,MAAoB,EACpB,IAAY,EACZ,SAAiB;IAEjB,MAAM,OAAO,GAAI,MAAsF,CAAC,QAAQ,IAAI,EAAE,CAAC;IACvH,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,EAAE,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;IAEnC,mEAAmE;IACnE,IAAI,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IACxE,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,GAAG,CAAC,CAAC;IACjF,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEzC,+DAA+D;IAC/D,IAAI,OAAO,GAAG,CAAC,CAAC,CAAC;IACjB,IAAI,SAAS,GAAG,IAAI,CAAC;IACrB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,mEAAmE;YACnE,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;gBAChB,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;oBAChB,OAAO,GAAG,CAAC,CAAC;oBACZ,SAAS,GAAG,IAAI,CAAC,CAAC,mCAAmC;gBACvD,CAAC;YACH,CAAC;YACD,SAAS;QACX,CAAC;QACD,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC;YAAE,SAAS;QAC5C,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,KAAK,OAAO,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/E,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;YAC3B,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IACD,OAAO,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;AACxC,CAAC;AAED,SAAS,WAAW,CAAC,OAAe,EAAE,IAAY;IAChD,yEAAyE;IACzE,0EAA0E;IAC1E,mEAAmE;IACnE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACrD,OAAO,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;IAClC,CAAC;IACD,wDAAwD;IACxD,IAAI,EAAE,GAAG,EAAE,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACrB,IAAI,CAAC,KAAK,GAAG;YAAE,EAAE,IAAI,IAAI,CAAC;aACrB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,EAAE,IAAI,GAAG,CAAC;;YACrD,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,oBAAoB,EAAE,MAAM,CAAC,CAAC;IACrD,CAAC;IACD,IAAI,CAAC;QACH,OAAO,IAAI,MAAM,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CAAC,CAAS;IAC7B,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACzB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACtC,CAAC"}
|
|
@@ -4,4 +4,6 @@ export declare class DuckDuckGoSearch implements SearchAdapter {
|
|
|
4
4
|
search(query: string, limit: number, signal?: AbortSignal): Promise<SearchResult[]>;
|
|
5
5
|
}
|
|
6
6
|
export declare function parseDuckDuckGoHTML(html: string, limit: number): SearchResult[];
|
|
7
|
+
export declare function decodeHtmlEntities(s: string): string;
|
|
8
|
+
export declare function stripTags(s: string): string;
|
|
7
9
|
//# sourceMappingURL=duckduckgo.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"duckduckgo.d.ts","sourceRoot":"","sources":["../../src/search/duckduckgo.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAIhE,qBAAa,gBAAiB,YAAW,aAAa;IACpD,QAAQ,CAAC,IAAI,gBAAgB;IAEvB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;CAiB1F;AAGD,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,CAuB/E"}
|
|
1
|
+
{"version":3,"file":"duckduckgo.d.ts","sourceRoot":"","sources":["../../src/search/duckduckgo.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAIhE,qBAAa,gBAAiB,YAAW,aAAa;IACpD,QAAQ,CAAC,IAAI,gBAAgB;IAEvB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;CAiB1F;AAGD,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,CAuB/E;AA+BD,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAsBpD;AAWD,wBAAgB,SAAS,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAE3C"}
|
|
@@ -57,7 +57,8 @@ function unwrapDDGRedirect(href) {
|
|
|
57
57
|
href = "https:" + href;
|
|
58
58
|
try {
|
|
59
59
|
const u = new URL(href);
|
|
60
|
-
|
|
60
|
+
const isDDG = u.hostname === "duckduckgo.com" || u.hostname.endsWith(".duckduckgo.com");
|
|
61
|
+
if (isDDG && u.pathname === "/l/") {
|
|
61
62
|
const uddg = u.searchParams.get("uddg");
|
|
62
63
|
if (uddg)
|
|
63
64
|
return decodeURIComponent(uddg);
|
|
@@ -77,18 +78,42 @@ function isValidHttpUrl(s) {
|
|
|
77
78
|
return false;
|
|
78
79
|
}
|
|
79
80
|
}
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
81
|
+
// Exported for unit tests. Single-pass HTML entity decoder — each `&...;`
|
|
82
|
+
// token is resolved exactly once with no rescan, so `&#39;` decodes to
|
|
83
|
+
// the literal `'` rather than double-unescaping to `'`.
|
|
84
|
+
export function decodeHtmlEntities(s) {
|
|
85
|
+
return s.replace(/&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z]+);/g, (match, name) => {
|
|
86
|
+
const named = {
|
|
87
|
+
amp: "&",
|
|
88
|
+
lt: "<",
|
|
89
|
+
gt: ">",
|
|
90
|
+
quot: '"',
|
|
91
|
+
apos: "'",
|
|
92
|
+
nbsp: " ",
|
|
93
|
+
};
|
|
94
|
+
const low = name.toLowerCase();
|
|
95
|
+
if (low in named)
|
|
96
|
+
return named[low];
|
|
97
|
+
if (name.startsWith("#x") || name.startsWith("#X")) {
|
|
98
|
+
const code = parseInt(name.slice(2), 16);
|
|
99
|
+
return isValidCodePoint(code) ? String.fromCodePoint(code) : match;
|
|
100
|
+
}
|
|
101
|
+
if (name.startsWith("#")) {
|
|
102
|
+
const code = parseInt(name.slice(1), 10);
|
|
103
|
+
return isValidCodePoint(code) ? String.fromCodePoint(code) : match;
|
|
104
|
+
}
|
|
105
|
+
return match;
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
function isValidCodePoint(n) {
|
|
109
|
+
return Number.isFinite(n) && n >= 0 && n <= 0x10ffff;
|
|
90
110
|
}
|
|
91
|
-
|
|
92
|
-
|
|
111
|
+
// Exported for unit tests. Strips well-formed tags, then drops any stray `<`
|
|
112
|
+
// to defuse malformed/partial tags (e.g. `<scrip` with no closing `>`).
|
|
113
|
+
// Output is plain text destined for markdown citation rows — not
|
|
114
|
+
// HTML-rendered — but we harden here so a malformed snippet can never leak
|
|
115
|
+
// a tag opener into downstream consumers.
|
|
116
|
+
export function stripTags(s) {
|
|
117
|
+
return s.replace(/<[^>]*>/g, " ").split("<").join(" ");
|
|
93
118
|
}
|
|
94
119
|
//# sourceMappingURL=duckduckgo.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"duckduckgo.js","sourceRoot":"","sources":["../../src/search/duckduckgo.ts"],"names":[],"mappings":"AAAA,+EAA+E;AAC/E,4EAA4E;AAC5E,6EAA6E;AAC7E,0DAA0D;AAC1D,EAAE;AACF,gFAAgF;AAIhF,MAAM,QAAQ,GAAG,mCAAmC,CAAC;AAErD,MAAM,OAAO,gBAAgB;IAClB,IAAI,GAAG,YAAY,CAAC;IAE7B,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,KAAa,EAAE,MAAoB;QAC7D,MAAM,IAAI,GAAG,IAAI,eAAe,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QAC/C,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,QAAQ,EAAE;YAChC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,mCAAmC;gBACnD,YAAY,EACV,+DAA+D;oBAC/D,oDAAoD;aACvD;YACD,IAAI,EAAE,IAAI,CAAC,QAAQ,EAAE;YACrB,MAAM;SACP,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,MAAM,IAAI,KAAK,CAAC,cAAc,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QAC3E,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,OAAO,mBAAmB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IAC1C,CAAC;CACF;AAED,2BAA2B;AAC3B,MAAM,UAAU,mBAAmB,CAAC,IAAY,EAAE,KAAa;IAC7D,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,oEAAoE;IACpE,6DAA6D;IAC7D,MAAM,OAAO,GACX,8JAA8J,CAAC;IAEjK,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,MAAM,CAAC,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,GAAG,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC;QAC3D,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC;YAAE,SAAS;QACnC,IAAI,EAAE,CAAC;QACP,OAAO,CAAC,IAAI,CAAC;YACX,GAAG;YACH,KAAK,EAAE,SAAS,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE;YACrD,OAAO,EAAE,SAAS,CAAC,kBAAkB,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE;YACzD,IAAI;SACL,CAAC,CAAC;QACH,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK;YAAE,MAAM;IACrC,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,2EAA2E;IAC3E,wCAAwC;IACxC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,IAAI,GAAG,QAAQ,GAAG,IAAI,CAAC;IAClD,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;QACxB,IAAI,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,
|
|
1
|
+
{"version":3,"file":"duckduckgo.js","sourceRoot":"","sources":["../../src/search/duckduckgo.ts"],"names":[],"mappings":"AAAA,+EAA+E;AAC/E,4EAA4E;AAC5E,6EAA6E;AAC7E,0DAA0D;AAC1D,EAAE;AACF,gFAAgF;AAIhF,MAAM,QAAQ,GAAG,mCAAmC,CAAC;AAErD,MAAM,OAAO,gBAAgB;IAClB,IAAI,GAAG,YAAY,CAAC;IAE7B,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,KAAa,EAAE,MAAoB;QAC7D,MAAM,IAAI,GAAG,IAAI,eAAe,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QAC/C,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,QAAQ,EAAE;YAChC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,mCAAmC;gBACnD,YAAY,EACV,+DAA+D;oBAC/D,oDAAoD;aACvD;YACD,IAAI,EAAE,IAAI,CAAC,QAAQ,EAAE;YACrB,MAAM;SACP,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,MAAM,IAAI,KAAK,CAAC,cAAc,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QAC3E,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,OAAO,mBAAmB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IAC1C,CAAC;CACF;AAED,2BAA2B;AAC3B,MAAM,UAAU,mBAAmB,CAAC,IAAY,EAAE,KAAa;IAC7D,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,oEAAoE;IACpE,6DAA6D;IAC7D,MAAM,OAAO,GACX,8JAA8J,CAAC;IAEjK,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,MAAM,CAAC,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,GAAG,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC;QAC3D,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC;YAAE,SAAS;QACnC,IAAI,EAAE,CAAC;QACP,OAAO,CAAC,IAAI,CAAC;YACX,GAAG;YACH,KAAK,EAAE,SAAS,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE;YACrD,OAAO,EAAE,SAAS,CAAC,kBAAkB,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE;YACzD,IAAI;SACL,CAAC,CAAC;QACH,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK;YAAE,MAAM;IACrC,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,2EAA2E;IAC3E,wCAAwC;IACxC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,IAAI,GAAG,QAAQ,GAAG,IAAI,CAAC;IAClD,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;QACxB,MAAM,KAAK,GAAG,CAAC,CAAC,QAAQ,KAAK,gBAAgB,IAAI,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QACxF,IAAI,KAAK,IAAI,CAAC,CAAC,QAAQ,KAAK,KAAK,EAAE,CAAC;YAClC,MAAM,IAAI,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACxC,IAAI,IAAI;gBAAE,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAC;QAC5C,CAAC;QACD,OAAO,CAAC,CAAC,QAAQ,EAAE,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,cAAc,CAAC,CAAS;IAC/B,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;QACrB,OAAO,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC;IAC3D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,0EAA0E;AAC1E,2EAA2E;AAC3E,4DAA4D;AAC5D,MAAM,UAAU,kBAAkB,CAAC,CAAS;IAC1C,OAAO,CAAC,CAAC,OAAO,CAAC,0CAA0C,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;QAC3E,MAAM,KAAK,GAA2B;YACpC,GAAG,EAAE,GAAG;YACR,EAAE,EAAE,GAAG;YACP,EAAE,EAAE,GAAG;YACP,IAAI,EAAE,GAAG;YACT,IAAI,EAAE,GAAG;YACT,IAAI,EAAE,GAAG;SACV,CAAC;QACF,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAC/B,IAAI,GAAG,IAAI,KAAK;YAAE,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC;QACpC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACnD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzC,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;QACrE,CAAC;QACD,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzC,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;QACrE,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,gBAAgB,CAAC,CAAS;IACjC,OAAO,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,QAAQ,CAAC;AACvD,CAAC;AAED,6EAA6E;AAC7E,wEAAwE;AACxE,iEAAiE;AACjE,2EAA2E;AAC3E,0CAA0C;AAC1C,MAAM,UAAU,SAAS,CAAC,CAAS;IACjC,OAAO,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzD,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { SearchAdapter, SearchResult } from "../search.js";
|
|
2
|
+
interface ExaResponseItem {
|
|
3
|
+
url: string;
|
|
4
|
+
title?: string | null;
|
|
5
|
+
text?: string;
|
|
6
|
+
highlights?: string[];
|
|
7
|
+
summary?: string;
|
|
8
|
+
}
|
|
9
|
+
export declare class ExaSearch implements SearchAdapter {
|
|
10
|
+
private readonly key;
|
|
11
|
+
readonly name = "exa";
|
|
12
|
+
constructor(key: string);
|
|
13
|
+
search(query: string, limit: number, signal?: AbortSignal): Promise<SearchResult[]>;
|
|
14
|
+
}
|
|
15
|
+
export declare function mapExaResults(items: ExaResponseItem[], limit: number): SearchResult[];
|
|
16
|
+
export {};
|
|
17
|
+
//# sourceMappingURL=exa.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exa.d.ts","sourceRoot":"","sources":["../../src/search/exa.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEhE,UAAU,eAAe;IACvB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAMD,qBAAa,SAAU,YAAW,aAAa;IAEjC,OAAO,CAAC,QAAQ,CAAC,GAAG;IADhC,QAAQ,CAAC,IAAI,SAAS;gBACO,GAAG,EAAE,MAAM;IAElC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;CAqB1F;AAGD,wBAAgB,aAAa,CAAC,KAAK,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,CAOrF"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// Exa adapter. Requires DEEPDIVE_EXA_KEY. Exa is a neural search API tuned
|
|
2
|
+
// for research workloads — the planner's sub-queries (and especially the
|
|
3
|
+
// critic's gap-filling follow-ups) tend to be long, intent-rich strings,
|
|
4
|
+
// which is the shape Exa's embedding-based retrieval is built for.
|
|
5
|
+
//
|
|
6
|
+
// We request highlights (short, query-focused excerpts) for each result so
|
|
7
|
+
// the snippet shown in events/logs is informative. Full-page text is left
|
|
8
|
+
// to deepdive's normal Playwright fetch step so the downstream extract /
|
|
9
|
+
// synthesis path is identical across adapters.
|
|
10
|
+
export class ExaSearch {
|
|
11
|
+
key;
|
|
12
|
+
name = "exa";
|
|
13
|
+
constructor(key) {
|
|
14
|
+
this.key = key;
|
|
15
|
+
}
|
|
16
|
+
async search(query, limit, signal) {
|
|
17
|
+
const res = await fetch("https://api.exa.ai/search", {
|
|
18
|
+
method: "POST",
|
|
19
|
+
headers: {
|
|
20
|
+
"content-type": "application/json",
|
|
21
|
+
accept: "application/json",
|
|
22
|
+
"x-api-key": this.key,
|
|
23
|
+
"x-exa-integration": "deepdive",
|
|
24
|
+
},
|
|
25
|
+
body: JSON.stringify({
|
|
26
|
+
query,
|
|
27
|
+
numResults: Math.min(limit, 100),
|
|
28
|
+
type: "auto",
|
|
29
|
+
contents: { highlights: { numSentences: 2 } },
|
|
30
|
+
}),
|
|
31
|
+
signal,
|
|
32
|
+
});
|
|
33
|
+
if (!res.ok)
|
|
34
|
+
throw new Error(`exa ${res.status} ${res.statusText}`);
|
|
35
|
+
const json = (await res.json());
|
|
36
|
+
return mapExaResults(json.results ?? [], limit);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
// Exported for unit tests. Pure, deterministic, no I/O.
|
|
40
|
+
export function mapExaResults(items, limit) {
|
|
41
|
+
return items.slice(0, limit).map((r, i) => ({
|
|
42
|
+
url: r.url,
|
|
43
|
+
title: r.title ?? "",
|
|
44
|
+
snippet: extractSnippet(r),
|
|
45
|
+
rank: i + 1,
|
|
46
|
+
}));
|
|
47
|
+
}
|
|
48
|
+
// Snippet cascade: highlights → text → summary → "". Exa may return any
|
|
49
|
+
// combination depending on what was requested or what the page yielded.
|
|
50
|
+
function extractSnippet(r) {
|
|
51
|
+
if (r.highlights && r.highlights.length > 0) {
|
|
52
|
+
return r.highlights.join(" … ").trim();
|
|
53
|
+
}
|
|
54
|
+
if (r.text && r.text.length > 0) {
|
|
55
|
+
return r.text.slice(0, 500).trim();
|
|
56
|
+
}
|
|
57
|
+
if (r.summary && r.summary.length > 0) {
|
|
58
|
+
return r.summary.trim();
|
|
59
|
+
}
|
|
60
|
+
return "";
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=exa.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exa.js","sourceRoot":"","sources":["../../src/search/exa.ts"],"names":[],"mappings":"AAAA,2EAA2E;AAC3E,yEAAyE;AACzE,yEAAyE;AACzE,mEAAmE;AACnE,EAAE;AACF,2EAA2E;AAC3E,0EAA0E;AAC1E,yEAAyE;AACzE,+CAA+C;AAgB/C,MAAM,OAAO,SAAS;IAES;IADpB,IAAI,GAAG,KAAK,CAAC;IACtB,YAA6B,GAAW;QAAX,QAAG,GAAH,GAAG,CAAQ;IAAG,CAAC;IAE5C,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,KAAa,EAAE,MAAoB;QAC7D,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,2BAA2B,EAAE;YACnD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,MAAM,EAAE,kBAAkB;gBAC1B,WAAW,EAAE,IAAI,CAAC,GAAG;gBACrB,mBAAmB,EAAE,UAAU;aAChC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC;gBAChC,IAAI,EAAE,MAAM;gBACZ,QAAQ,EAAE,EAAE,UAAU,EAAE,EAAE,YAAY,EAAE,CAAC,EAAE,EAAE;aAC9C,CAAC;YACF,MAAM;SACP,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,MAAM,IAAI,KAAK,CAAC,OAAO,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACpE,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAgB,CAAC;QAC/C,OAAO,aAAa,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,EAAE,KAAK,CAAC,CAAC;IAClD,CAAC;CACF;AAED,wDAAwD;AACxD,MAAM,UAAU,aAAa,CAAC,KAAwB,EAAE,KAAa;IACnE,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1C,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;QACpB,OAAO,EAAE,cAAc,CAAC,CAAC,CAAC;QAC1B,IAAI,EAAE,CAAC,GAAG,CAAC;KACZ,CAAC,CAAC,CAAC;AACN,CAAC;AAED,wEAAwE;AACxE,wEAAwE;AACxE,SAAS,cAAc,CAAC,CAAkB;IACxC,IAAI,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,OAAO,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;IACzC,CAAC;IACD,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACrC,CAAC;IACD,IAAI,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtC,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"searxng.d.ts","sourceRoot":"","sources":["../../src/search/searxng.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"searxng.d.ts","sourceRoot":"","sources":["../../src/search/searxng.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAGhE,qBAAa,aAAc,YAAW,aAAa;IAErC,OAAO,CAAC,QAAQ,CAAC,OAAO;IADpC,QAAQ,CAAC,IAAI,aAAa;gBACG,OAAO,EAAE,MAAM;IAEtC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;CAoB1F"}
|
package/dist/search/searxng.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// SearXNG adapter. Points at an existing SearXNG instance (self-hosted or
|
|
2
2
|
// public). Requires DEEPDIVE_SEARXNG_URL. Uses the JSON output format.
|
|
3
|
+
import { trimTrailingSlashes } from "../url-util.js";
|
|
3
4
|
export class SearXNGSearch {
|
|
4
5
|
baseUrl;
|
|
5
6
|
name = "searxng";
|
|
@@ -7,7 +8,7 @@ export class SearXNGSearch {
|
|
|
7
8
|
this.baseUrl = baseUrl;
|
|
8
9
|
}
|
|
9
10
|
async search(query, limit, signal) {
|
|
10
|
-
const url = new URL(this.baseUrl
|
|
11
|
+
const url = new URL(trimTrailingSlashes(this.baseUrl) + "/search");
|
|
11
12
|
url.searchParams.set("q", query);
|
|
12
13
|
url.searchParams.set("format", "json");
|
|
13
14
|
const res = await fetch(url, {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"searxng.js","sourceRoot":"","sources":["../../src/search/searxng.ts"],"names":[],"mappings":"AAAA,0EAA0E;AAC1E,uEAAuE;
|
|
1
|
+
{"version":3,"file":"searxng.js","sourceRoot":"","sources":["../../src/search/searxng.ts"],"names":[],"mappings":"AAAA,0EAA0E;AAC1E,uEAAuE;AAGvE,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAErD,MAAM,OAAO,aAAa;IAEK;IADpB,IAAI,GAAG,SAAS,CAAC;IAC1B,YAA6B,OAAe;QAAf,YAAO,GAAP,OAAO,CAAQ;IAAG,CAAC;IAEhD,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,KAAa,EAAE,MAAoB;QAC7D,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,SAAS,CAAC,CAAC;QACnE,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACjC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACvC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAC3B,OAAO,EAAE,EAAE,MAAM,EAAE,kBAAkB,EAAE;YACvC,MAAM;SACP,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,MAAM,IAAI,KAAK,CAAC,WAAW,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACxE,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAE7B,CAAC;QACF,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YAC1C,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;YACpB,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE;YACxB,IAAI,EAAE,CAAC,GAAG,CAAC;SACZ,CAAC,CAAC,CAAC;IACN,CAAC;CACF"}
|
package/dist/search.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;CACrF;AAED,wBAAsB,oBAAoB,CACxC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC,GACtC,OAAO,CAAC,aAAa,CAAC,CAkCxB;AAED,wBAAgB,WAAW,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,EAAE,CAUnE"}
|
package/dist/search.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// Search adapter interface. A "search" returns candidate URLs with metadata.
|
|
2
2
|
// Adapters live under src/search/*. Default is DuckDuckGo HTML (no API key).
|
|
3
|
+
import { dedupeKey } from "./url-util.js";
|
|
3
4
|
export async function resolveSearchAdapter(name, env) {
|
|
4
5
|
switch (name) {
|
|
5
6
|
case "duckduckgo":
|
|
@@ -28,6 +29,13 @@ export async function resolveSearchAdapter(name, env) {
|
|
|
28
29
|
throw new Error("tavily adapter requires DEEPDIVE_TAVILY_KEY");
|
|
29
30
|
return new TavilySearch(key);
|
|
30
31
|
}
|
|
32
|
+
case "exa": {
|
|
33
|
+
const { ExaSearch } = await import("./search/exa.js");
|
|
34
|
+
const key = env.DEEPDIVE_EXA_KEY;
|
|
35
|
+
if (!key)
|
|
36
|
+
throw new Error("exa adapter requires DEEPDIVE_EXA_KEY");
|
|
37
|
+
return new ExaSearch(key);
|
|
38
|
+
}
|
|
31
39
|
default:
|
|
32
40
|
throw new Error(`unknown search adapter: ${name}`);
|
|
33
41
|
}
|
|
@@ -36,7 +44,7 @@ export function dedupeByUrl(results) {
|
|
|
36
44
|
const seen = new Set();
|
|
37
45
|
const out = [];
|
|
38
46
|
for (const r of results) {
|
|
39
|
-
const key = r.url
|
|
47
|
+
const key = dedupeKey(r.url);
|
|
40
48
|
if (seen.has(key))
|
|
41
49
|
continue;
|
|
42
50
|
seen.add(key);
|
package/dist/search.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA,6EAA6E;AAC7E,6EAA6E;
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA,6EAA6E;AAC7E,6EAA6E;AAE7E,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAc1C,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,GAAuC;IAEvC,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,YAAY,CAAC;QAClB,KAAK,KAAK,CAAC,CAAC,CAAC;YACX,MAAM,EAAE,gBAAgB,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;YACpE,OAAO,IAAI,gBAAgB,EAAE,CAAC;QAChC,CAAC;QACD,KAAK,SAAS,CAAC,CAAC,CAAC;YACf,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;YAC9D,MAAM,GAAG,GAAG,GAAG,CAAC,oBAAoB,CAAC;YACrC,IAAI,CAAC,GAAG;gBAAE,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;YAC3E,OAAO,IAAI,aAAa,CAAC,GAAG,CAAC,CAAC;QAChC,CAAC;QACD,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;YAC1D,MAAM,GAAG,GAAG,GAAG,CAAC,kBAAkB,CAAC;YACnC,IAAI,CAAC,GAAG;gBAAE,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YACvE,OAAO,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC;QAC9B,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC5D,MAAM,GAAG,GAAG,GAAG,CAAC,mBAAmB,CAAC;YACpC,IAAI,CAAC,GAAG;gBAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;YACzE,OAAO,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC;QAC/B,CAAC;QACD,KAAK,KAAK,CAAC,CAAC,CAAC;YACX,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACtD,MAAM,GAAG,GAAG,GAAG,CAAC,gBAAgB,CAAC;YACjC,IAAI,CAAC,GAAG;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YACnE,OAAO,IAAI,SAAS,CAAC,GAAG,CAAC,CAAC;QAC5B,CAAC;QACD;YACE,MAAM,IAAI,KAAK,CAAC,2BAA2B,IAAI,EAAE,CAAC,CAAC;IACvD,CAAC;AACH,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,OAAuB;IACjD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAmB,EAAE,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC5B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACd,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACd,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/dist/synthesize.d.ts
CHANGED
|
@@ -3,6 +3,6 @@ import type { Source } from "./citations.js";
|
|
|
3
3
|
export interface SourceWithContent extends Source {
|
|
4
4
|
content: string;
|
|
5
5
|
}
|
|
6
|
-
export declare function synthesize(question: string, sources: SourceWithContent[], config: LLMConfig, signal?: AbortSignal): Promise<string>;
|
|
6
|
+
export declare function synthesize(question: string, sources: SourceWithContent[], config: LLMConfig, signal?: AbortSignal, onToken?: (text: string) => void): Promise<string>;
|
|
7
7
|
export declare function buildSourcePacket(sources: SourceWithContent[]): string;
|
|
8
8
|
//# sourceMappingURL=synthesize.d.ts.map
|
package/dist/synthesize.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize.d.ts","sourceRoot":"","sources":["../src/synthesize.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"synthesize.d.ts","sourceRoot":"","sources":["../src/synthesize.ts"],"names":[],"mappings":"AAOA,OAAO,EAAW,KAAK,SAAS,EAAE,MAAM,UAAU,CAAC;AAEnD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAE7C,MAAM,WAAW,iBAAkB,SAAQ,MAAM;IAC/C,OAAO,EAAE,MAAM,CAAC;CACjB;AAkBD,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,iBAAiB,EAAE,EAC5B,MAAM,EAAE,SAAS,EACjB,MAAM,CAAC,EAAE,WAAW,EACpB,OAAO,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,GAC/B,OAAO,CAAC,MAAM,CAAC,CAsBjB;AAGD,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,iBAAiB,EAAE,GAAG,MAAM,CAOtE"}
|
package/dist/synthesize.js
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
// Final answer synthesis — takes the original question and the collected
|
|
2
2
|
// sources, asks the LLM to produce a cited markdown answer. Sources are
|
|
3
3
|
// passed as a numbered list so the model can cite them inline as [1], [2].
|
|
4
|
+
//
|
|
5
|
+
// When `onToken` is provided, uses the streaming variant so tokens land in
|
|
6
|
+
// front of the user as the model writes them instead of after a 30+s wait.
|
|
4
7
|
import { callLLM } from "./llm.js";
|
|
8
|
+
import { callLLMStream } from "./llm-stream.js";
|
|
5
9
|
const SYNTH_SYSTEM = `You are a careful research assistant. You will be given:
|
|
6
10
|
1. The user's original question.
|
|
7
11
|
2. A numbered list of source documents with titles, URLs, and extracted text.
|
|
@@ -17,7 +21,7 @@ Rules:
|
|
|
17
21
|
- If the sources do not answer the question, say so — do not hallucinate.
|
|
18
22
|
- Do not include a "Sources" section yourself — the caller appends it.
|
|
19
23
|
- Length: match the complexity of the question. A one-line question can get a paragraph; a comparison question may need headers and a table.`;
|
|
20
|
-
export async function synthesize(question, sources, config, signal) {
|
|
24
|
+
export async function synthesize(question, sources, config, signal, onToken) {
|
|
21
25
|
if (sources.length === 0) {
|
|
22
26
|
return "_No sources could be fetched or extracted. Unable to answer._";
|
|
23
27
|
}
|
|
@@ -25,7 +29,12 @@ export async function synthesize(question, sources, config, signal) {
|
|
|
25
29
|
const userMessage = `Question: ${question}\n\n` +
|
|
26
30
|
`Sources (${sources.length}):\n\n${packet}\n\n` +
|
|
27
31
|
`Write the cited markdown answer now.`;
|
|
28
|
-
const
|
|
32
|
+
const messages = [{ role: "user", content: userMessage }];
|
|
33
|
+
if (onToken) {
|
|
34
|
+
const { text } = await callLLMStream(messages, SYNTH_SYSTEM, config, { onToken }, signal);
|
|
35
|
+
return text;
|
|
36
|
+
}
|
|
37
|
+
const { text } = await callLLM(messages, SYNTH_SYSTEM, config, signal);
|
|
29
38
|
return text;
|
|
30
39
|
}
|
|
31
40
|
// Exported for unit tests.
|
package/dist/synthesize.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize.js","sourceRoot":"","sources":["../src/synthesize.ts"],"names":[],"mappings":"AAAA,yEAAyE;AACzE,wEAAwE;AACxE,2EAA2E;AAE3E,OAAO,EAAE,OAAO,EAAkB,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"synthesize.js","sourceRoot":"","sources":["../src/synthesize.ts"],"names":[],"mappings":"AAAA,yEAAyE;AACzE,wEAAwE;AACxE,2EAA2E;AAC3E,EAAE;AACF,2EAA2E;AAC3E,2EAA2E;AAE3E,OAAO,EAAE,OAAO,EAAkB,MAAM,UAAU,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAOhD,MAAM,YAAY,GAAG;;;;;;;;;;;;;;6IAcwH,CAAC;AAE9I,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,QAAgB,EAChB,OAA4B,EAC5B,MAAiB,EACjB,MAAoB,EACpB,OAAgC;IAEhC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,+DAA+D,CAAC;IACzE,CAAC;IACD,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,WAAW,GACf,aAAa,QAAQ,MAAM;QAC3B,YAAY,OAAO,CAAC,MAAM,SAAS,MAAM,MAAM;QAC/C,sCAAsC,CAAC;IACzC,MAAM,QAAQ,GAAG,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC,CAAC;IACnE,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,aAAa,CAClC,QAAQ,EACR,YAAY,EACZ,MAAM,EACN,EAAE,OAAO,EAAE,EACX,MAAM,CACP,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACvE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,2BAA2B;AAC3B,MAAM,UAAU,iBAAiB,CAAC,OAA4B;IAC5D,OAAO,OAAO;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,MAAM,MAAM,GAAG,IAAI,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,KAAK,IAAI,YAAY,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC;QACjE,OAAO,GAAG,MAAM,OAAO,CAAC,CAAC,OAAO,EAAE,CAAC;IACrC,CAAC,CAAC;SACD,IAAI,CAAC,aAAa,CAAC,CAAC;AACzB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url-util.d.ts","sourceRoot":"","sources":["../src/url-util.ts"],"names":[],"mappings":"AAMA,wBAAgB,mBAAmB,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAIrD;AAGD,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAGnD;AAMD,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE7C"}
|
package/dist/url-util.js
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
// URL canonicalization helpers. Kept as non-regex string operations so they
|
|
2
|
+
// are trivially linear-time even on pathological inputs — CodeQL flags
|
|
3
|
+
// equivalent regex forms (`/\/+$/`) as polynomial-ReDoS risks despite being
|
|
4
|
+
// end-anchored.
|
|
5
|
+
// Exported for unit tests.
|
|
6
|
+
export function trimTrailingSlashes(s) {
|
|
7
|
+
let i = s.length;
|
|
8
|
+
while (i > 0 && s.charCodeAt(i - 1) === 0x2f)
|
|
9
|
+
i--;
|
|
10
|
+
return s.slice(0, i);
|
|
11
|
+
}
|
|
12
|
+
// Exported for unit tests.
|
|
13
|
+
export function stripHashFragment(s) {
|
|
14
|
+
const i = s.indexOf("#");
|
|
15
|
+
return i === -1 ? s : s.slice(0, i);
|
|
16
|
+
}
|
|
17
|
+
// Exported for unit tests. Returns a dedupe key: scheme+host+path+query with
|
|
18
|
+
// any trailing slashes removed and any URL fragment stripped. Fragment-only
|
|
19
|
+
// changes shouldn't create duplicate cache entries; trailing-slash variants
|
|
20
|
+
// shouldn't either.
|
|
21
|
+
export function dedupeKey(url) {
|
|
22
|
+
return trimTrailingSlashes(stripHashFragment(url));
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=url-util.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url-util.js","sourceRoot":"","sources":["../src/url-util.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,uEAAuE;AACvE,4EAA4E;AAC5E,gBAAgB;AAEhB,2BAA2B;AAC3B,MAAM,UAAU,mBAAmB,CAAC,CAAS;IAC3C,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;IACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI;QAAE,CAAC,EAAE,CAAC;IAClD,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACvB,CAAC;AAED,2BAA2B;AAC3B,MAAM,UAAU,iBAAiB,CAAC,CAAS;IACzC,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACzB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACtC,CAAC;AAED,6EAA6E;AAC7E,4EAA4E;AAC5E,4EAA4E;AAC5E,oBAAoB;AACpB,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,OAAO,mBAAmB,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;AACrD,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askalf/deepdive",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "A local research agent. One command, cited answer. Routes every LLM call through your own proxy (dario, Anthropic-compat, OpenAI-compat). Headless browser + pluggable search + multi-provider LLM — zero hosted dependencies.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
],
|
|
23
23
|
"scripts": {
|
|
24
24
|
"build": "tsc",
|
|
25
|
+
"typecheck": "tsc --noEmit",
|
|
25
26
|
"test": "node --test --test-concurrency=4 test/*.test.mjs",
|
|
26
27
|
"dev": "tsx src/cli.ts",
|
|
27
28
|
"start": "node dist/cli.js",
|
|
@@ -60,7 +61,7 @@
|
|
|
60
61
|
"playwright": "^1.47.0"
|
|
61
62
|
},
|
|
62
63
|
"devDependencies": {
|
|
63
|
-
"@types/node": "^
|
|
64
|
+
"@types/node": "^25.6.0",
|
|
64
65
|
"tsx": "^4.19.0",
|
|
65
66
|
"typescript": "^5.7.0"
|
|
66
67
|
}
|