open-research 0.1.26 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ import {
2
+ loadOntology
3
+ } from "./chunk-3WM33M3O.js";
4
+ import {
5
+ getConnections,
6
+ getNote,
7
+ searchNotes
8
+ } from "./chunk-IOR7G25X.js";
9
+ import "./chunk-3RG5ZIWI.js";
10
+
11
+ // src/lib/ontology/query-agent.ts
12
+ var SYSTEM_PROMPT = `You answer research questions by reading a project's ontology \u2014 a structured graph of sources, findings, claims, questions, methods, and insights connected by typed edges (supports, contradicts, derived-from, relates-to).
13
+
14
+ # How to search
15
+
16
+ Pick the right strategy based on the question type:
17
+
18
+ | Question type | Search approach | Example |
19
+ |---------------|----------------|---------|
20
+ | Structural | Use filters only (no text) | "unsupported claims" \u2192 { kind: "claim", missingEdge: "supports" } |
21
+ | Specific entity | Text query + kind filter | "Smith 2024 findings" \u2192 { queries: ["Smith 2024"], kind: "source" } then follow derived-from edges |
22
+ | Broad topic | Multiple synonym phrases | "attention efficiency" \u2192 { queries: ["attention efficiency", "quadratic scaling", "computational cost"] } |
23
+ | Evidence chain | Find the claim, then traverse | search for claim \u2192 get_connections depth 2 \u2192 follow supports/contradicts edges |
24
+
25
+ After finding any node, ALWAYS call get_connections to explore its neighborhood. The most relevant notes are often one edge away from a search result.
26
+
27
+ # Response rules
28
+
29
+ - Write for a researcher: be precise, cite specific sources and page/table numbers when available
30
+ - Always state the confidence level of claims and findings you reference
31
+ - When reporting contradictions, present BOTH sides with their edge contexts \u2014 do not take a side
32
+ - Explicitly say what's NOT in the ontology: "The ontology has no data on X" is a useful answer
33
+ - Never include raw note IDs \u2014 use source names and natural language references
34
+ - Aim for 100-300 words. Longer for complex evidence chains. Shorter for simple lookups.`;
35
+ var QUERY_TOOLS = [
36
+ {
37
+ type: "function",
38
+ function: {
39
+ name: "get_note",
40
+ description: "Retrieve a single note by ID. Returns full content, kind, confidence, all edges with their contexts, and source metadata. Use when you have a note ID from search results or edge targets and need the complete details.",
41
+ parameters: {
42
+ type: "object",
43
+ properties: {
44
+ noteId: { type: "string", description: "UUID of the note" }
45
+ },
46
+ required: ["noteId"],
47
+ additionalProperties: false
48
+ }
49
+ }
50
+ },
51
+ {
52
+ type: "function",
53
+ function: {
54
+ name: "search_notes",
55
+ description: "Find notes by text and/or structural filters. Include 2-3 synonym phrases for broad queries. Combine text with kind filter for precision. Use structural filters for graph-shape queries (e.g. unsupported claims \u2192 { kind: 'claim', missingEdge: 'supports' }).",
56
+ parameters: {
57
+ type: "object",
58
+ properties: {
59
+ queries: { type: "array", items: { type: "string" }, description: "Text search phrases (OR logic). Include synonyms: ['attention efficiency', 'quadratic scaling', 'computational cost']" },
60
+ kind: { type: "string", enum: ["source", "finding", "claim", "question", "method", "insight"] },
61
+ confidence: { type: "string", enum: ["established", "supported", "hypothesized", "questioned", "refuted"] },
62
+ hasEdge: { type: "string", enum: ["supports", "contradicts", "derived-from", "relates-to"], description: "Note must have at least one edge of this type" },
63
+ missingEdge: { type: "string", enum: ["supports", "contradicts", "derived-from", "relates-to"], description: "Note must have NO edges of this type" },
64
+ limit: { type: "number", description: "Max results. Default: 10" }
65
+ },
66
+ additionalProperties: false
67
+ }
68
+ }
69
+ },
70
+ {
71
+ type: "function",
72
+ function: {
73
+ name: "get_connections",
74
+ description: "Explore a note's neighborhood by traversing edges. Returns the note and all connected notes within N hops. ALWAYS call after search_notes finds a relevant node \u2014 the most important information is often one edge away.",
75
+ parameters: {
76
+ type: "object",
77
+ properties: {
78
+ noteId: { type: "string", description: "Starting note ID" },
79
+ depth: { type: "number", description: "Hops to traverse. Default: 1 (immediate neighbors). Use 2 for evidence chains." }
80
+ },
81
+ required: ["noteId"],
82
+ additionalProperties: false
83
+ }
84
+ }
85
+ }
86
+ ];
87
+ function executeQueryTool(name, args, ontology) {
88
+ switch (name) {
89
+ case "get_note": {
90
+ const note = getNote(ontology, String(args.noteId));
91
+ if (!note) return `Note not found: ${args.noteId}`;
92
+ return JSON.stringify(note, null, 2);
93
+ }
94
+ case "search_notes": {
95
+ const results = searchNotes(ontology, {
96
+ queries: args.queries,
97
+ kind: args.kind,
98
+ confidence: args.confidence,
99
+ hasEdge: args.hasEdge,
100
+ missingEdge: args.missingEdge,
101
+ limit: args.limit
102
+ });
103
+ if (results.length === 0) return "No matching notes found.";
104
+ return results.map(
105
+ (n) => `[${n.id}] (${n.kind}, ${n.confidence}) "${n.content}" \u2014 ${n.edges.length} edges`
106
+ ).join("\n");
107
+ }
108
+ case "get_connections": {
109
+ const { root, connected } = getConnections(
110
+ ontology,
111
+ String(args.noteId),
112
+ args.depth ?? 1
113
+ );
114
+ if (!root) return `Note not found: ${args.noteId}`;
115
+ const lines = [
116
+ `Root: [${root.id}] (${root.kind}, ${root.confidence}) "${root.content}"`,
117
+ `Edges: ${root.edges.map((e) => `${e.relation}(${e.strength}) \u2192 ${e.targetId} \u2014 "${e.context}"`).join("\n ") || "none"}`,
118
+ `Connected (${connected.length}):`,
119
+ ...connected.map(
120
+ (c) => ` [${c.id}] (${c.kind}, ${c.confidence}) "${c.content}" \u2014 edges: ${c.edges.map((e) => `${e.relation} \u2192 ${e.targetId.slice(0, 8)}`).join(", ") || "none"}`
121
+ )
122
+ ];
123
+ return lines.join("\n");
124
+ }
125
+ default:
126
+ return `Unknown tool: ${name}`;
127
+ }
128
+ }
129
+ var MAX_ITERATIONS = 8;
130
+ async function runQueryAgent(input) {
131
+ const ontology = await loadOntology(input.workspaceDir);
132
+ if (ontology.notes.length === 0) {
133
+ return "The ontology is empty \u2014 no notes have been captured yet. As you do research, the ontology will automatically populate with sources, findings, and claims.";
134
+ }
135
+ const userMessage = input.scope ? `${input.query}
136
+
137
+ (Focus on: ${input.scope})` : input.query;
138
+ const messages = [
139
+ { role: "system", content: SYSTEM_PROMPT },
140
+ { role: "user", content: userMessage }
141
+ ];
142
+ for (let i = 0; i < MAX_ITERATIONS; i++) {
143
+ let fullText = "";
144
+ let toolCalls = [];
145
+ for await (const chunk of input.provider.callLLMStreaming({
146
+ messages,
147
+ tools: QUERY_TOOLS,
148
+ model: "gpt-5.4-mini"
149
+ })) {
150
+ if (chunk.type === "text_delta") {
151
+ fullText += chunk.content;
152
+ } else if (chunk.type === "done") {
153
+ toolCalls = chunk.toolCalls;
154
+ }
155
+ }
156
+ if (toolCalls.length === 0) {
157
+ return fullText || "(Query agent returned empty response)";
158
+ }
159
+ messages.push({
160
+ role: "assistant",
161
+ content: fullText || null,
162
+ tool_calls: toolCalls.map((tc) => ({
163
+ id: tc.id,
164
+ type: "function",
165
+ function: { name: tc.name, arguments: tc.arguments }
166
+ }))
167
+ });
168
+ for (const toolCall of toolCalls) {
169
+ const args = JSON.parse(toolCall.arguments || "{}");
170
+ const result = executeQueryTool(toolCall.name, args, ontology);
171
+ messages.push({
172
+ role: "tool",
173
+ tool_call_id: toolCall.id,
174
+ content: result
175
+ });
176
+ }
177
+ }
178
+ messages.push({
179
+ role: "user",
180
+ content: "Summarize what you've found so far. Be concise."
181
+ });
182
+ let finalText = "";
183
+ for await (const chunk of input.provider.callLLMStreaming({
184
+ messages,
185
+ model: "gpt-5.4-mini"
186
+ })) {
187
+ if (chunk.type === "text_delta") finalText += chunk.content;
188
+ }
189
+ return finalText || "(Query agent could not synthesize an answer)";
190
+ }
191
+ export {
192
+ runQueryAgent
193
+ };
@@ -0,0 +1,13 @@
1
+ import {
2
+ findExistingSource,
3
+ getConnections,
4
+ getNote,
5
+ searchNotes
6
+ } from "./chunk-IOR7G25X.js";
7
+ import "./chunk-3RG5ZIWI.js";
8
+ export {
9
+ findExistingSource,
10
+ getConnections,
11
+ getNote,
12
+ searchNotes
13
+ };
@@ -0,0 +1,74 @@
1
+ import "./chunk-3RG5ZIWI.js";
2
+
3
+ // src/lib/ontology/relevance-agent.ts
4
+ function shouldRunRelevanceAgent(message, ontology) {
5
+ if (ontology.notes.length === 0) return false;
6
+ if (message.startsWith("/")) return false;
7
+ if (message.length < 15) return false;
8
+ if (/^(hi|hello|hey|thanks|thank you|ok|yes|no|sure|got it)\b/i.test(message)) return false;
9
+ return true;
10
+ }
11
+ var SYSTEM_PROMPT = `You select which notes from a research ontology are relevant to the user's current message.
12
+
13
+ Return a JSON array of note IDs. Nothing else \u2014 no markdown, no explanation, no text outside the array.
14
+
15
+ # Selection criteria
16
+
17
+ Include a note if ANY of these apply:
18
+ - It is about the same topic or concept (even using different terminology)
19
+ - It provides evidence for or against what the user is discussing
20
+ - It is a source, finding, or method referenced directly or indirectly
21
+ - It contains a contradiction or open question the user should be aware of
22
+
23
+ Prioritize: claims with contradictions > claims the user is building on > directly relevant findings > contextual sources > tangential notes.
24
+
25
+ # Output
26
+
27
+ - Return 5-15 IDs (fewer if few are relevant, more if the topic is well-covered)
28
+ - Return [] if nothing is relevant
29
+ - Output ONLY a JSON array: ["id1", "id2", ...]`;
30
+ function buildNoteList(notes) {
31
+ return notes.map((n) => `[${n.id}] "${n.content}" (${n.kind})`).join("\n");
32
+ }
33
+ async function runRelevanceAgent(input) {
34
+ const { userMessage, ontology, provider } = input;
35
+ if (!shouldRunRelevanceAgent(userMessage, ontology)) {
36
+ return [];
37
+ }
38
+ let candidates = ontology.notes;
39
+ if (candidates.length > 100) {
40
+ candidates = [...candidates].sort((a, b) => b.updatedAt.localeCompare(a.updatedAt)).slice(0, 100);
41
+ }
42
+ const noteList = buildNoteList(candidates);
43
+ try {
44
+ const response = await provider.callLLM({
45
+ messages: [
46
+ { role: "system", content: SYSTEM_PROMPT },
47
+ {
48
+ role: "user",
49
+ content: `## User's message
50
+ ${userMessage}
51
+
52
+ ## Notes in ontology
53
+ ${noteList}`
54
+ }
55
+ ],
56
+ model: "gpt-5.4-mini",
57
+ maxTokens: 500,
58
+ temperature: 0
59
+ });
60
+ const raw = response.content.trim();
61
+ const jsonStr = raw.startsWith("[") ? raw : raw.match(/\[[\s\S]*\]/)?.[0];
62
+ if (!jsonStr) return [];
63
+ const parsed = JSON.parse(jsonStr);
64
+ if (!Array.isArray(parsed)) return [];
65
+ const noteIds = new Set(ontology.notes.map((n) => n.id));
66
+ return parsed.filter((id) => typeof id === "string" && noteIds.has(id)).slice(0, 15);
67
+ } catch {
68
+ return [];
69
+ }
70
+ }
71
+ export {
72
+ runRelevanceAgent,
73
+ shouldRunRelevanceAgent
74
+ };
@@ -0,0 +1,90 @@
1
+ import {
2
+ getNote
3
+ } from "./chunk-IOR7G25X.js";
4
+ import "./chunk-3RG5ZIWI.js";
5
+
6
+ // src/lib/ontology/scaffolding.ts
7
+ function buildScaffoldingContext(ontology, relevantIds) {
8
+ if (relevantIds.length === 0) return null;
9
+ const notes = relevantIds.map((id) => getNote(ontology, id)).filter((n) => n !== null);
10
+ if (notes.length === 0) return null;
11
+ const byKind = /* @__PURE__ */ new Map();
12
+ for (const note of notes) {
13
+ const group = byKind.get(note.kind) ?? [];
14
+ group.push(note);
15
+ byKind.set(note.kind, group);
16
+ }
17
+ const supportCount = /* @__PURE__ */ new Map();
18
+ const contradictCount = /* @__PURE__ */ new Map();
19
+ for (const note of ontology.notes) {
20
+ for (const edge of note.edges) {
21
+ if (edge.relation === "supports") {
22
+ supportCount.set(edge.targetId, (supportCount.get(edge.targetId) ?? 0) + 1);
23
+ }
24
+ if (edge.relation === "contradicts") {
25
+ contradictCount.set(edge.targetId, (contradictCount.get(edge.targetId) ?? 0) + 1);
26
+ }
27
+ }
28
+ }
29
+ const lines = ["## Ontology Context", ""];
30
+ lines.push("Your project ontology contains the following related to this topic:");
31
+ lines.push("");
32
+ const claims = byKind.get("claim") ?? [];
33
+ for (const claim of claims) {
34
+ const s = supportCount.get(claim.id) ?? 0;
35
+ const c = contradictCount.get(claim.id) ?? 0;
36
+ const evidence = [];
37
+ if (s > 0) evidence.push(`${s} supporting`);
38
+ if (c > 0) evidence.push(`${c} contradicting`);
39
+ const evidenceStr = evidence.length > 0 ? `, ${evidence.join(", ")}` : "";
40
+ lines.push(`- CLAIM: "${truncate(claim.content, 120)}" (${claim.confidence}${evidenceStr})`);
41
+ }
42
+ const findings = byKind.get("finding") ?? [];
43
+ if (findings.length > 0) {
44
+ const sourceNames = /* @__PURE__ */ new Set();
45
+ for (const f of findings) {
46
+ for (const edge of f.edges) {
47
+ if (edge.relation === "derived-from") {
48
+ const source = getNote(ontology, edge.targetId);
49
+ if (source) sourceNames.add(truncate(source.content, 40));
50
+ }
51
+ }
52
+ }
53
+ const from = sourceNames.size > 0 ? ` from ${[...sourceNames].join(", ")}` : "";
54
+ lines.push(`- ${findings.length} finding${findings.length !== 1 ? "s" : ""}${from}`);
55
+ }
56
+ const sources = byKind.get("source") ?? [];
57
+ if (sources.length > 0) {
58
+ lines.push(`- ${sources.length} source${sources.length !== 1 ? "s" : ""}: ${sources.map((s) => truncate(s.content, 40)).join(", ")}`);
59
+ }
60
+ const questions = byKind.get("question") ?? [];
61
+ for (const q of questions) {
62
+ lines.push(`- QUESTION: "${truncate(q.content, 100)}"`);
63
+ }
64
+ const methods = byKind.get("method") ?? [];
65
+ if (methods.length > 0) {
66
+ lines.push(`- ${methods.length} method${methods.length !== 1 ? "s" : ""}`);
67
+ }
68
+ const insights = byKind.get("insight") ?? [];
69
+ for (const ins of insights) {
70
+ lines.push(`- INSIGHT: "${truncate(ins.content, 100)}"`);
71
+ }
72
+ const hasContradictions = claims.some((c) => (contradictCount.get(c.id) ?? 0) > 0);
73
+ if (hasContradictions) {
74
+ lines.push("");
75
+ lines.push("\u26A0 There is contradicting evidence on one or more claims.");
76
+ }
77
+ const unsupported = claims.filter((c) => (supportCount.get(c.id) ?? 0) === 0);
78
+ if (unsupported.length > 0) {
79
+ lines.push(`\u26A0 ${unsupported.length} claim${unsupported.length !== 1 ? "s have" : " has"} no supporting evidence yet.`);
80
+ }
81
+ lines.push("");
82
+ lines.push("Use query_ontology to get full details on any of the above.");
83
+ return lines.join("\n");
84
+ }
85
+ function truncate(text, max) {
86
+ return text.length > max ? text.slice(0, max) + "\u2026" : text;
87
+ }
88
+ export {
89
+ buildScaffoldingContext
90
+ };
@@ -2,7 +2,9 @@ import {
2
2
  appendSessionEvent,
3
3
  listSessions,
4
4
  loadSessionHistory
5
- } from "./chunk-AYB7CAO5.js";
5
+ } from "./chunk-ZUSIRA5S.js";
6
+ import "./chunk-I5NVYKG7.js";
7
+ import "./chunk-3RG5ZIWI.js";
6
8
  export {
7
9
  appendSessionEvent,
8
10
  listSessions,
@@ -0,0 +1,120 @@
1
+ import {
2
+ NOTE_KINDS
3
+ } from "./chunk-KJHM7ZW2.js";
4
+ import "./chunk-3RG5ZIWI.js";
5
+
6
+ // src/lib/ontology/status.ts
7
+ function getOntologyStatus(ontology) {
8
+ const { notes } = ontology;
9
+ if (notes.length === 0) {
10
+ return "Ontology: empty \u2014 no notes yet.";
11
+ }
12
+ const kindCounts = {
13
+ source: 0,
14
+ finding: 0,
15
+ claim: 0,
16
+ question: 0,
17
+ method: 0,
18
+ insight: 0
19
+ };
20
+ for (const note of notes) {
21
+ kindCounts[note.kind]++;
22
+ }
23
+ const contradictionPairs = /* @__PURE__ */ new Set();
24
+ for (const note of notes) {
25
+ for (const edge of note.edges) {
26
+ if (edge.relation === "contradicts") {
27
+ const pair = [note.id, edge.targetId].sort().join(":");
28
+ contradictionPairs.add(pair);
29
+ }
30
+ }
31
+ }
32
+ const supportedClaimIds = /* @__PURE__ */ new Set();
33
+ for (const note of notes) {
34
+ for (const edge of note.edges) {
35
+ if (edge.relation === "supports") {
36
+ supportedClaimIds.add(edge.targetId);
37
+ }
38
+ }
39
+ }
40
+ const unsupportedClaims = notes.filter(
41
+ (n) => n.kind === "claim" && !supportedClaimIds.has(n.id)
42
+ );
43
+ const openQuestions = notes.filter((n) => n.kind === "question");
44
+ const lines = [`Ontology: ${notes.length} notes`];
45
+ for (const kind of NOTE_KINDS) {
46
+ const count = kindCounts[kind];
47
+ if (count === 0) continue;
48
+ let suffix = "";
49
+ if (kind === "claim") {
50
+ const parts = [];
51
+ if (unsupportedClaims.length > 0) parts.push(`${unsupportedClaims.length} unsupported`);
52
+ const refuted = notes.filter((n) => n.kind === "claim" && n.confidence === "refuted").length;
53
+ if (refuted > 0) parts.push(`${refuted} refuted`);
54
+ if (parts.length > 0) suffix = ` (${parts.join(", ")})`;
55
+ }
56
+ const label = kind.charAt(0).toUpperCase() + kind.slice(1) + "s";
57
+ lines.push(` ${label}: ${count}${suffix}`);
58
+ }
59
+ if (contradictionPairs.size > 0 || unsupportedClaims.length > 0 || openQuestions.length > 0) {
60
+ lines.push("");
61
+ if (contradictionPairs.size > 0) lines.push(`Contradictions: ${contradictionPairs.size}`);
62
+ if (unsupportedClaims.length > 0) lines.push(`Unsupported claims: ${unsupportedClaims.length}`);
63
+ if (openQuestions.length > 0) lines.push(`Open questions: ${openQuestions.length}`);
64
+ }
65
+ return lines.join("\n");
66
+ }
67
+ function formatClaims(ontology) {
68
+ const claims = ontology.notes.filter((n) => n.kind === "claim");
69
+ if (claims.length === 0) return "No claims in ontology.";
70
+ const supportCount = /* @__PURE__ */ new Map();
71
+ const contradictCount = /* @__PURE__ */ new Map();
72
+ for (const note of ontology.notes) {
73
+ for (const edge of note.edges) {
74
+ if (edge.relation === "supports") {
75
+ supportCount.set(edge.targetId, (supportCount.get(edge.targetId) ?? 0) + 1);
76
+ }
77
+ if (edge.relation === "contradicts") {
78
+ contradictCount.set(edge.targetId, (contradictCount.get(edge.targetId) ?? 0) + 1);
79
+ }
80
+ }
81
+ }
82
+ return claims.map((c) => {
83
+ const s = supportCount.get(c.id) ?? 0;
84
+ const ct = contradictCount.get(c.id) ?? 0;
85
+ return `[${c.id.slice(0, 8)}] "${c.content}" (${c.confidence})
86
+ Supports: ${s} Contradicts: ${ct}`;
87
+ }).join("\n\n");
88
+ }
89
+ function formatConflicts(ontology) {
90
+ const pairs = [];
91
+ const seen = /* @__PURE__ */ new Set();
92
+ for (const note of ontology.notes) {
93
+ for (const edge of note.edges) {
94
+ if (edge.relation !== "contradicts") continue;
95
+ const pairKey = [note.id, edge.targetId].sort().join(":");
96
+ if (seen.has(pairKey)) continue;
97
+ seen.add(pairKey);
98
+ const target = ontology.notes.find((n) => n.id === edge.targetId);
99
+ if (!target) continue;
100
+ pairs.push({
101
+ a: note.content,
102
+ b: target.content,
103
+ context: edge.context
104
+ });
105
+ }
106
+ }
107
+ if (pairs.length === 0) return "No contradictions found.";
108
+ return pairs.map(
109
+ (p, i) => `CONTRADICTION ${i + 1}:
110
+ "${p.a}"
111
+ vs.
112
+ "${p.b}"
113
+ Context: ${p.context}`
114
+ ).join("\n\n");
115
+ }
116
+ export {
117
+ formatClaims,
118
+ formatConflicts,
119
+ getOntologyStatus
120
+ };
@@ -0,0 +1,13 @@
1
+ import {
2
+ cleanupStaleTmp,
3
+ getOntologyPath,
4
+ loadOntology,
5
+ saveOntology
6
+ } from "./chunk-3WM33M3O.js";
7
+ import "./chunk-3RG5ZIWI.js";
8
+ export {
9
+ cleanupStaleTmp,
10
+ getOntologyPath,
11
+ loadOntology,
12
+ saveOntology
13
+ };
@@ -0,0 +1,177 @@
1
+ import {
2
+ USER_AGENT,
3
+ extractBatch,
4
+ fetchAndParseContent,
5
+ formatExtractionResults,
6
+ loadOpenResearchConfig
7
+ } from "./chunk-TQSQRNX6.js";
8
+ import "./chunk-I5NVYKG7.js";
9
+ import "./chunk-3RG5ZIWI.js";
10
+
11
+ // src/lib/search/duckduckgo.ts
12
+ import { load as loadCheerio } from "cheerio";
13
+ var DDG_URL = "https://html.duckduckgo.com/html/";
14
+ var TIMEOUT_MS = 1e4;
15
+ async function searchDuckDuckGo(query, numResults = 10) {
16
+ const controller = new AbortController();
17
+ const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
18
+ try {
19
+ const response = await fetch(DDG_URL, {
20
+ method: "POST",
21
+ headers: {
22
+ "User-Agent": USER_AGENT,
23
+ "Content-Type": "application/x-www-form-urlencoded"
24
+ },
25
+ body: `q=${encodeURIComponent(query)}`,
26
+ redirect: "follow",
27
+ signal: controller.signal
28
+ });
29
+ if (!response.ok) return [];
30
+ const html = await response.text();
31
+ const $ = loadCheerio(html);
32
+ const results = [];
33
+ $(".result").each((_, el) => {
34
+ if (results.length >= numResults) return false;
35
+ const $el = $(el);
36
+ const titleEl = $el.find(".result__a").first();
37
+ const snippetEl = $el.find(".result__snippet").first();
38
+ const title = titleEl.text().trim();
39
+ let url = titleEl.attr("href") ?? "";
40
+ if (url.includes("uddg=")) {
41
+ try {
42
+ const parsed = new URL(url, "https://duckduckgo.com");
43
+ url = decodeURIComponent(parsed.searchParams.get("uddg") ?? url);
44
+ } catch {
45
+ }
46
+ }
47
+ const snippet = snippetEl.text().trim();
48
+ if (title && url && url.startsWith("http")) {
49
+ results.push({ title, url, snippet });
50
+ }
51
+ });
52
+ return results;
53
+ } catch {
54
+ return [];
55
+ } finally {
56
+ clearTimeout(timer);
57
+ }
58
+ }
59
+
60
+ // src/lib/search/brave.ts
61
+ var BRAVE_API_URL = "https://api.search.brave.com/res/v1/web/search";
62
+ var TIMEOUT_MS2 = 1e4;
63
+ async function searchBrave(query, apiKey, numResults = 10) {
64
+ const controller = new AbortController();
65
+ const timer = setTimeout(() => controller.abort(), TIMEOUT_MS2);
66
+ try {
67
+ const params = new URLSearchParams({
68
+ q: query,
69
+ count: String(Math.min(numResults, 20)),
70
+ text_decorations: "false"
71
+ });
72
+ const response = await fetch(`${BRAVE_API_URL}?${params}`, {
73
+ headers: {
74
+ Accept: "application/json",
75
+ "Accept-Encoding": "gzip",
76
+ "X-Subscription-Token": apiKey
77
+ },
78
+ signal: controller.signal
79
+ });
80
+ if (!response.ok) return [];
81
+ const data = await response.json();
82
+ const results = data.web?.results ?? [];
83
+ return results.map((r) => ({
84
+ title: r.title,
85
+ url: r.url,
86
+ description: r.description,
87
+ age: r.age,
88
+ pageAge: r.page_age,
89
+ hostname: r.meta_url?.hostname
90
+ }));
91
+ } catch {
92
+ return [];
93
+ } finally {
94
+ clearTimeout(timer);
95
+ }
96
+ }
97
+
98
+ // src/lib/agent/tools/web-search.ts
99
+ async function discoverWebResults(query, numResults) {
100
+ const config = await loadOpenResearchConfig().catch(() => null);
101
+ const braveKey = config?.apiKeys?.brave;
102
+ if (braveKey) {
103
+ const results2 = await searchBrave(query, braveKey, numResults + 3);
104
+ if (results2.length > 0) {
105
+ return {
106
+ results: results2.map((r) => ({ title: r.title, url: r.url, snippet: r.description })),
107
+ backend: "brave"
108
+ };
109
+ }
110
+ }
111
+ const results = await searchDuckDuckGo(query, numResults + 3);
112
+ return {
113
+ results: results.map((r) => ({ title: r.title, url: r.url, snippet: r.snippet })),
114
+ backend: "duckduckgo"
115
+ };
116
+ }
117
+ async function executeWebSearch(args, provider) {
118
+ if (!args.target || !args.query) {
119
+ return { result: "Error: both target and query are required." };
120
+ }
121
+ const numResults = Math.min(args.num_results ?? 5, 8);
122
+ const { results: searchResults, backend } = await discoverWebResults(args.query, numResults);
123
+ if (searchResults.length === 0) {
124
+ return { result: "No web results found. Try a different query." };
125
+ }
126
+ if (!provider) {
127
+ const summary = searchResults.slice(0, numResults).map((r, i) => `${i + 1}. ${r.title}
128
+ ${r.url}
129
+ ${r.snippet}`).join("\n\n");
130
+ return { result: `[${backend}] ${summary}` };
131
+ }
132
+ const toFetch = searchResults.slice(0, numResults);
133
+ const contentResults = await Promise.allSettled(
134
+ toFetch.map(async (hit) => {
135
+ const content = await fetchAndParseContent(hit.url);
136
+ if (content) return { hit, text: content.text };
137
+ return null;
138
+ })
139
+ );
140
+ const extractionInputs = [];
141
+ const titleMap = /* @__PURE__ */ new Map();
142
+ for (const result of contentResults) {
143
+ if (result.status === "fulfilled" && result.value) {
144
+ const { hit, text } = result.value;
145
+ extractionInputs.push({
146
+ title: hit.title,
147
+ content: text,
148
+ url: hit.url,
149
+ target: args.target
150
+ });
151
+ titleMap.set(hit.url, hit.title);
152
+ }
153
+ }
154
+ if (extractionInputs.length === 0) {
155
+ const summary = toFetch.map((r, i) => `${i + 1}. ${r.title}
156
+ ${r.url}
157
+ ${r.snippet}`).join("\n\n");
158
+ return { result: `Could not fetch page content. Search snippets:
159
+
160
+ ${summary}` };
161
+ }
162
+ const extractions = await extractBatch(extractionInputs, provider);
163
+ const extracted = [];
164
+ for (const [url, extraction] of extractions) {
165
+ if (extraction.relevanceScore >= 2) {
166
+ extracted.push({
167
+ title: titleMap.get(url) ?? url,
168
+ url,
169
+ extraction
170
+ });
171
+ }
172
+ }
173
+ return { result: formatExtractionResults(extracted) };
174
+ }
175
+ export {
176
+ executeWebSearch
177
+ };