@steel-dev/atlas 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +219 -0
  3. package/dist/agent.d.ts +34 -0
  4. package/dist/agent.js +133 -0
  5. package/dist/async.d.ts +19 -0
  6. package/dist/async.js +172 -0
  7. package/dist/atlas.d.ts +19 -0
  8. package/dist/atlas.js +69 -0
  9. package/dist/budget.d.ts +64 -0
  10. package/dist/budget.js +336 -0
  11. package/dist/checklist.d.ts +115 -0
  12. package/dist/checklist.js +297 -0
  13. package/dist/cli.js +38700 -0
  14. package/dist/config.d.ts +80 -0
  15. package/dist/config.js +109 -0
  16. package/dist/context.d.ts +26 -0
  17. package/dist/context.js +250 -0
  18. package/dist/custom-tools.d.ts +26 -0
  19. package/dist/custom-tools.js +33 -0
  20. package/dist/defaults.d.ts +10 -0
  21. package/dist/defaults.js +37 -0
  22. package/dist/economy.d.ts +12 -0
  23. package/dist/economy.js +6 -0
  24. package/dist/env.d.ts +1 -0
  25. package/dist/env.js +8 -0
  26. package/dist/errors.d.ts +6 -0
  27. package/dist/errors.js +11 -0
  28. package/dist/event-hub.d.ts +11 -0
  29. package/dist/event-hub.js +83 -0
  30. package/dist/events.d.ts +105 -0
  31. package/dist/events.js +1 -0
  32. package/dist/html-extract.d.ts +21 -0
  33. package/dist/html-extract.js +459 -0
  34. package/dist/index.d.ts +59 -0
  35. package/dist/index.js +26 -0
  36. package/dist/memory.d.ts +2 -0
  37. package/dist/memory.js +38 -0
  38. package/dist/model.d.ts +49 -0
  39. package/dist/model.js +630 -0
  40. package/dist/orchestrate.d.ts +5 -0
  41. package/dist/orchestrate.js +277 -0
  42. package/dist/pdf-extract.d.ts +5 -0
  43. package/dist/pdf-extract.js +20 -0
  44. package/dist/prompts.d.ts +2 -0
  45. package/dist/prompts.js +6 -0
  46. package/dist/providers/domain/arxiv.d.ts +6 -0
  47. package/dist/providers/domain/arxiv.js +83 -0
  48. package/dist/providers/domain/clinicaltrials.d.ts +6 -0
  49. package/dist/providers/domain/clinicaltrials.js +104 -0
  50. package/dist/providers/domain/edgar.d.ts +10 -0
  51. package/dist/providers/domain/edgar.js +92 -0
  52. package/dist/providers/domain/index.d.ts +14 -0
  53. package/dist/providers/domain/index.js +7 -0
  54. package/dist/providers/domain/openalex.d.ts +7 -0
  55. package/dist/providers/domain/openalex.js +128 -0
  56. package/dist/providers/domain/pubmed.d.ts +8 -0
  57. package/dist/providers/domain/pubmed.js +123 -0
  58. package/dist/providers/domain/semantic-scholar.d.ts +6 -0
  59. package/dist/providers/domain/semantic-scholar.js +112 -0
  60. package/dist/providers/domain/shared.d.ts +12 -0
  61. package/dist/providers/domain/shared.js +39 -0
  62. package/dist/providers/domain/wikipedia.d.ts +6 -0
  63. package/dist/providers/domain/wikipedia.js +71 -0
  64. package/dist/providers/exa-agent.d.ts +9 -0
  65. package/dist/providers/exa-agent.js +67 -0
  66. package/dist/providers/fetch.d.ts +66 -0
  67. package/dist/providers/fetch.js +675 -0
  68. package/dist/providers/parallel-agent.d.ts +11 -0
  69. package/dist/providers/parallel-agent.js +100 -0
  70. package/dist/providers/perplexity-agent.d.ts +17 -0
  71. package/dist/providers/perplexity-agent.js +86 -0
  72. package/dist/providers/search.d.ts +65 -0
  73. package/dist/providers/search.js +433 -0
  74. package/dist/providers/store.d.ts +48 -0
  75. package/dist/providers/store.js +217 -0
  76. package/dist/researcher.d.ts +20 -0
  77. package/dist/researcher.js +3 -0
  78. package/dist/robots.d.ts +16 -0
  79. package/dist/robots.js +146 -0
  80. package/dist/roles.d.ts +6 -0
  81. package/dist/roles.js +4 -0
  82. package/dist/run.d.ts +65 -0
  83. package/dist/run.js +371 -0
  84. package/dist/safe-dispatcher.d.ts +16 -0
  85. package/dist/safe-dispatcher.js +32 -0
  86. package/dist/safety.d.ts +23 -0
  87. package/dist/safety.js +206 -0
  88. package/dist/sandbox.d.ts +22 -0
  89. package/dist/sandbox.js +228 -0
  90. package/dist/search-normalize.d.ts +2 -0
  91. package/dist/search-normalize.js +13 -0
  92. package/dist/source-documents.d.ts +77 -0
  93. package/dist/source-documents.js +421 -0
  94. package/dist/sources.d.ts +57 -0
  95. package/dist/sources.js +1 -0
  96. package/dist/spine.d.ts +19 -0
  97. package/dist/spine.js +722 -0
  98. package/dist/state.d.ts +90 -0
  99. package/dist/state.js +27 -0
  100. package/dist/structured.d.ts +7 -0
  101. package/dist/structured.js +18 -0
  102. package/dist/tools.d.ts +33 -0
  103. package/dist/tools.js +1187 -0
  104. package/dist/trace-digest.d.ts +11 -0
  105. package/dist/trace-digest.js +309 -0
  106. package/dist/trace.d.ts +225 -0
  107. package/dist/trace.js +278 -0
  108. package/dist/trail.d.ts +15 -0
  109. package/dist/trail.js +74 -0
  110. package/dist/url.d.ts +1 -0
  111. package/dist/url.js +25 -0
  112. package/package.json +107 -0
@@ -0,0 +1,128 @@
1
+ import { readEnv } from "../../env.js";
2
+ import { errorMessage } from "../../errors.js";
3
+ import { safeDomain, } from "../search.js";
4
+ import { buildContent, clampLimit, collapse, fetchJson } from "./shared.js";
5
+ const ENDPOINT = "https://api.openalex.org/works";
6
+ export function openalex(opts = {}) {
7
+ const defaultLimit = clampLimit(opts.defaultLimit ?? 5);
8
+ const email = opts.email ?? readEnv("ATLAS_OPENALEX_EMAIL");
9
+ const sort = opts.sort === "date"
10
+ ? "publication_date:desc"
11
+ : opts.sort === "citations"
12
+ ? "cited_by_count:desc"
13
+ : undefined;
14
+ return {
15
+ id: "openalex",
16
+ async search({ query, maxResults, signal }) {
17
+ const q = query.trim();
18
+ if (!q)
19
+ return [];
20
+ const params = new URLSearchParams({
21
+ search: q,
22
+ per_page: String(clampLimit(maxResults ?? defaultLimit)),
23
+ });
24
+ if (sort)
25
+ params.set("sort", sort);
26
+ if (email)
27
+ params.set("mailto", email);
28
+ let data;
29
+ try {
30
+ data = await fetchJson(`${ENDPOINT}?${params.toString()}`, signal);
31
+ }
32
+ catch (err) {
33
+ throw new Error(`openalex: request failed: ${errorMessage(err)}`);
34
+ }
35
+ return toResults(data);
36
+ },
37
+ };
38
+ }
39
+ function toResults(data) {
40
+ const results = data && typeof data === "object"
41
+ ? data.results
42
+ : undefined;
43
+ if (!Array.isArray(results))
44
+ return [];
45
+ const out = [];
46
+ for (const row of results) {
47
+ const w = (row ?? {});
48
+ const title = collapse(String(w.title ?? w.display_name ?? ""));
49
+ const url = workUrl(w);
50
+ if (!title || !url)
51
+ continue;
52
+ const abstract = reconstructAbstract(w.abstract_inverted_index);
53
+ const authors = Array.isArray(w.authorships)
54
+ ? w.authorships
55
+ .map((a) => collapse(String(a?.author?.display_name ?? "")))
56
+ .filter(Boolean)
57
+ .slice(0, 12)
58
+ : [];
59
+ const venue = collapse(String(w.primary_location?.source?.display_name ?? ""));
60
+ const year = w.publication_year ? String(w.publication_year) : "";
61
+ const meta = [];
62
+ if (venue && year)
63
+ meta.push(`${venue} (${year})`);
64
+ else if (venue)
65
+ meta.push(venue);
66
+ else if (year)
67
+ meta.push(`(${year})`);
68
+ if (typeof w.cited_by_count === "number")
69
+ meta.push(`Cited by ${w.cited_by_count}`);
70
+ out.push({
71
+ position: out.length + 1,
72
+ title,
73
+ url,
74
+ snippet: collapse([meta.join(" · "), abstract].filter(Boolean).join(" — ")),
75
+ domain: safeDomain(url),
76
+ meta: {
77
+ openUrls: openAccessUrls(w),
78
+ fallbackText: buildContent({ title, authors, meta, abstract }),
79
+ },
80
+ });
81
+ }
82
+ return out;
83
+ }
84
+ function openAccessUrls(w) {
85
+ const out = [];
86
+ const push = (u) => {
87
+ if (typeof u === "string" && /^https?:\/\//.test(u))
88
+ out.push(u);
89
+ };
90
+ const best = w.best_oa_location;
91
+ push(best?.pdf_url);
92
+ push(best?.landing_page_url);
93
+ push(w.open_access?.oa_url);
94
+ const locs = w.oa_locations;
95
+ if (Array.isArray(locs)) {
96
+ for (const loc of locs) {
97
+ const l = loc;
98
+ push(l?.pdf_url);
99
+ push(l?.landing_page_url);
100
+ }
101
+ }
102
+ return out;
103
+ }
104
+ function workUrl(w) {
105
+ const doi = typeof w.doi === "string" ? w.doi : "";
106
+ if (doi)
107
+ return /^https?:\/\//.test(doi)
108
+ ? doi
109
+ : `https://doi.org/${doi.replace(/^doi:/, "")}`;
110
+ const landing = w.primary_location?.landing_page_url;
111
+ if (typeof landing === "string" && /^https?:\/\//.test(landing))
112
+ return landing;
113
+ const id = w.id;
114
+ return typeof id === "string" && /^https?:\/\//.test(id) ? id : "";
115
+ }
116
+ function reconstructAbstract(inv) {
117
+ if (!inv || typeof inv !== "object")
118
+ return "";
119
+ const slots = [];
120
+ for (const [word, positions] of Object.entries(inv)) {
121
+ if (!Array.isArray(positions))
122
+ continue;
123
+ for (const p of positions)
124
+ if (typeof p === "number")
125
+ slots[p] = word;
126
+ }
127
+ return collapse(slots.filter((s) => typeof s === "string").join(" "));
128
+ }
@@ -0,0 +1,8 @@
1
+ import { type SearchProvider } from "../search.js";
2
+ export interface PubmedOptions {
3
+ defaultLimit?: number;
4
+ sort?: "relevance" | "date";
5
+ apiKey?: string;
6
+ email?: string;
7
+ }
8
+ export declare function pubmed(opts?: PubmedOptions): SearchProvider;
@@ -0,0 +1,123 @@
1
+ import * as cheerio from "cheerio";
2
+ import { readEnv } from "../../env.js";
3
+ import { errorMessage } from "../../errors.js";
4
+ import { safeDomain, } from "../search.js";
5
+ import { buildContent, clampLimit, collapse, fetchJson, fetchText, } from "./shared.js";
6
+ const EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
7
+ export function pubmed(opts = {}) {
8
+ const defaultLimit = clampLimit(opts.defaultLimit ?? 5);
9
+ const sort = opts.sort === "date" ? "pub_date" : "relevance";
10
+ const apiKey = opts.apiKey ?? readEnv("ATLAS_NCBI_API_KEY");
11
+ const email = opts.email ?? readEnv("ATLAS_NCBI_EMAIL");
12
+ const common = () => {
13
+ const p = { tool: "atlas" };
14
+ if (email)
15
+ p.email = email;
16
+ if (apiKey)
17
+ p.api_key = apiKey;
18
+ return p;
19
+ };
20
+ return {
21
+ id: "pubmed",
22
+ async search({ query, maxResults, signal }) {
23
+ const q = query.trim();
24
+ if (!q)
25
+ return [];
26
+ const limit = clampLimit(maxResults ?? defaultLimit);
27
+ let ids;
28
+ try {
29
+ const params = new URLSearchParams({
30
+ db: "pubmed",
31
+ term: q,
32
+ retmode: "json",
33
+ retmax: String(limit),
34
+ sort,
35
+ ...common(),
36
+ });
37
+ const data = await fetchJson(`${EUTILS}/esearch.fcgi?${params.toString()}`, signal);
38
+ ids = extractIds(data);
39
+ }
40
+ catch (err) {
41
+ throw new Error(`pubmed: search failed: ${errorMessage(err)}`);
42
+ }
43
+ if (ids.length === 0)
44
+ return [];
45
+ let xml;
46
+ try {
47
+ const params = new URLSearchParams({
48
+ db: "pubmed",
49
+ id: ids.join(","),
50
+ rettype: "abstract",
51
+ retmode: "xml",
52
+ ...common(),
53
+ });
54
+ xml = await fetchText(`${EUTILS}/efetch.fcgi?${params.toString()}`, signal, "application/xml");
55
+ }
56
+ catch (err) {
57
+ throw new Error(`pubmed: fetch failed: ${errorMessage(err)}`);
58
+ }
59
+ return toResults(xml);
60
+ },
61
+ };
62
+ }
63
+ function extractIds(data) {
64
+ const idlist = data && typeof data === "object"
65
+ ? data.esearchresult?.idlist
66
+ : undefined;
67
+ return Array.isArray(idlist)
68
+ ? idlist.filter((x) => typeof x === "string")
69
+ : [];
70
+ }
71
+ function toResults(xml) {
72
+ const $ = cheerio.load(xml, { xml: true });
73
+ const out = [];
74
+ $("PubmedArticle").each((_, el) => {
75
+ const art = $(el);
76
+ const pmid = art.find("MedlineCitation > PMID").first().text().trim();
77
+ const title = collapse(art.find("ArticleTitle").first().text());
78
+ if (!pmid || !title)
79
+ return;
80
+ const abstract = art
81
+ .find("Abstract > AbstractText")
82
+ .map((_, t) => {
83
+ const label = $(t).attr("Label");
84
+ const text = collapse($(t).text());
85
+ return label && text ? `${label}: ${text}` : text;
86
+ })
87
+ .get()
88
+ .filter(Boolean)
89
+ .join("\n");
90
+ const authors = art
91
+ .find("AuthorList > Author")
92
+ .map((_, a) => {
93
+ const last = collapse($(a).find("LastName").first().text());
94
+ const fore = collapse($(a).find("ForeName").first().text());
95
+ return [fore, last].filter(Boolean).join(" ");
96
+ })
97
+ .get()
98
+ .filter(Boolean)
99
+ .slice(0, 12);
100
+ const journal = collapse(art.find("Journal > Title").first().text());
101
+ const year = art.find("PubDate > Year").first().text().trim() ||
102
+ art.find("PubDate > MedlineDate").first().text().trim().slice(0, 4);
103
+ const venue = journal && year ? `${journal} (${year})` : journal;
104
+ const url = `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`;
105
+ out.push({
106
+ position: out.length + 1,
107
+ title,
108
+ url,
109
+ snippet: collapse([venue, abstract].filter(Boolean).join(" — ")),
110
+ domain: safeDomain(url),
111
+ meta: {
112
+ openUrls: [],
113
+ fallbackText: buildContent({
114
+ title,
115
+ authors,
116
+ meta: venue ? [venue] : [],
117
+ abstract,
118
+ }),
119
+ },
120
+ });
121
+ });
122
+ return out;
123
+ }
@@ -0,0 +1,6 @@
1
+ import { type ResearchTool } from "../../custom-tools.js";
2
+ export interface SemanticScholarOptions {
3
+ defaultLimit?: number;
4
+ apiKey?: string;
5
+ }
6
+ export declare function semanticScholar(opts?: SemanticScholarOptions): ResearchTool;
@@ -0,0 +1,112 @@
1
+ import { jsonSchema } from "ai";
2
+ import { researchTool, } from "../../custom-tools.js";
3
+ import { readEnv } from "../../env.js";
4
+ import { errorMessage } from "../../errors.js";
5
+ import { buildContent, clampLimit, collapse, manifest, USER_AGENT, } from "./shared.js";
6
+ const ENDPOINT = "https://api.semanticscholar.org/graph/v1/paper/search";
7
+ const FIELDS = "title,abstract,authors,year,venue,citationCount,tldr,externalIds,url";
8
+ const RATE_LIMITED = "rate-limited";
9
+ export function semanticScholar(opts = {}) {
10
+ const defaultLimit = clampLimit(opts.defaultLimit ?? 5);
11
+ const apiKey = opts.apiKey ?? readEnv("ATLAS_S2_API_KEY");
12
+ return researchTool({
13
+ description: "Search Semantic Scholar, an AI-powered index of scientific papers across all fields. Returns abstracts, one-line TL;DR summaries and citation counts as cited sources.",
14
+ inputSchema: jsonSchema({
15
+ type: "object",
16
+ properties: {
17
+ query: { type: "string", description: "Search query" },
18
+ },
19
+ required: ["query"],
20
+ additionalProperties: false,
21
+ }),
22
+ async execute(input, ctx) {
23
+ const query = String(input.query ?? "").trim();
24
+ if (!query)
25
+ return "semantic-scholar: empty query";
26
+ const params = new URLSearchParams({
27
+ query,
28
+ limit: String(defaultLimit),
29
+ fields: FIELDS,
30
+ });
31
+ let data;
32
+ try {
33
+ data = await search(`${ENDPOINT}?${params.toString()}`, apiKey, ctx.signal);
34
+ }
35
+ catch (err) {
36
+ const message = errorMessage(err);
37
+ if (message === RATE_LIMITED)
38
+ return "semantic-scholar: rate limited — set ATLAS_S2_API_KEY (or pass { apiKey }) for higher limits.";
39
+ return `semantic-scholar: request failed: ${message}`;
40
+ }
41
+ return manifest("semantic-scholar", query, ingest(data, ctx));
42
+ },
43
+ });
44
+ }
45
+ async function search(url, apiKey, signal) {
46
+ const headers = {
47
+ "user-agent": USER_AGENT,
48
+ accept: "application/json",
49
+ };
50
+ if (apiKey)
51
+ headers["x-api-key"] = apiKey;
52
+ const resp = await fetch(url, { signal, headers });
53
+ if (resp.status === 429)
54
+ throw new Error(RATE_LIMITED);
55
+ if (!resp.ok)
56
+ throw new Error(`HTTP ${resp.status} ${resp.statusText}`.trim());
57
+ return resp.json();
58
+ }
59
+ function ingest(data, ctx) {
60
+ const rows = data && typeof data === "object"
61
+ ? data.data
62
+ : undefined;
63
+ if (!Array.isArray(rows))
64
+ return [];
65
+ const titles = [];
66
+ for (const row of rows) {
67
+ const p = (row ?? {});
68
+ const title = collapse(String(p.title ?? ""));
69
+ const url = paperUrl(p);
70
+ if (!title || !url)
71
+ continue;
72
+ const authors = Array.isArray(p.authors)
73
+ ? p.authors
74
+ .map((a) => collapse(String(a?.name ?? "")))
75
+ .filter(Boolean)
76
+ .slice(0, 12)
77
+ : [];
78
+ const venue = collapse(String(p.venue ?? ""));
79
+ const year = p.year ? String(p.year) : "";
80
+ const tldr = collapse(String(p.tldr?.text ?? ""));
81
+ const abstract = collapse(String(p.abstract ?? ""));
82
+ const meta = [];
83
+ if (venue && year)
84
+ meta.push(`${venue} (${year})`);
85
+ else if (venue)
86
+ meta.push(venue);
87
+ else if (year)
88
+ meta.push(`(${year})`);
89
+ if (typeof p.citationCount === "number")
90
+ meta.push(`Cited by ${p.citationCount}`);
91
+ if (tldr)
92
+ meta.push(`TL;DR: ${tldr}`);
93
+ ctx.addSource({
94
+ url,
95
+ title,
96
+ content: buildContent({ title, authors, meta, abstract }),
97
+ });
98
+ titles.push(title);
99
+ }
100
+ return titles;
101
+ }
102
+ function paperUrl(p) {
103
+ const doi = p.externalIds?.DOI;
104
+ if (typeof doi === "string" && doi.trim())
105
+ return `https://doi.org/${doi.replace(/^doi:/i, "").trim()}`;
106
+ if (typeof p.url === "string" && /^https?:\/\//.test(p.url))
107
+ return p.url;
108
+ const id = p.paperId;
109
+ return typeof id === "string" && id
110
+ ? `https://www.semanticscholar.org/paper/${id}`
111
+ : "";
112
+ }
@@ -0,0 +1,12 @@
1
+ export declare const USER_AGENT = "atlas-research/0.1 (+https://github.com/steel-experiments/atlas)";
2
+ export declare function collapse(text: string): string;
3
+ export declare function clampLimit(n: number, max?: number): number;
4
+ export declare function fetchText(url: string, signal: AbortSignal | undefined, accept: string): Promise<string>;
5
+ export declare function fetchJson(url: string, signal: AbortSignal | undefined): Promise<unknown>;
6
+ export declare function buildContent(parts: {
7
+ title: string;
8
+ authors?: string[];
9
+ meta?: string[];
10
+ abstract?: string;
11
+ }): string;
12
+ export declare function manifest(tool: string, query: string, titles: string[]): string;
@@ -0,0 +1,39 @@
1
+ export const USER_AGENT = "atlas-research/0.1 (+https://github.com/steel-experiments/atlas)";
2
+ export function collapse(text) {
3
+ return text.replace(/\s+/g, " ").trim();
4
+ }
5
+ export function clampLimit(n, max = 25) {
6
+ if (!Number.isFinite(n))
7
+ return 5;
8
+ return Math.min(Math.max(1, Math.floor(n)), max);
9
+ }
10
+ export async function fetchText(url, signal, accept) {
11
+ const resp = await fetch(url, {
12
+ signal,
13
+ headers: { "user-agent": USER_AGENT, accept },
14
+ });
15
+ if (!resp.ok)
16
+ throw new Error(`HTTP ${resp.status} ${resp.statusText}`.trim());
17
+ return resp.text();
18
+ }
19
+ export async function fetchJson(url, signal) {
20
+ return JSON.parse(await fetchText(url, signal, "application/json"));
21
+ }
22
+ export function buildContent(parts) {
23
+ const lines = [parts.title];
24
+ if (parts.authors?.length)
25
+ lines.push(`Authors: ${parts.authors.join(", ")}`);
26
+ for (const m of parts.meta ?? [])
27
+ if (m)
28
+ lines.push(m);
29
+ const body = parts.abstract?.trim();
30
+ if (body)
31
+ lines.push("", body);
32
+ return lines.join("\n");
33
+ }
34
+ export function manifest(tool, query, titles) {
35
+ if (titles.length === 0)
36
+ return `${tool}: no results for "${query}"`;
37
+ const list = titles.map((t) => `- ${t}`).join("\n");
38
+ return `${tool}: found ${titles.length} result(s) for "${query}"; submitted as sources:\n${list}`;
39
+ }
@@ -0,0 +1,6 @@
1
+ import { type ResearchTool } from "../../custom-tools.js";
2
+ export interface WikipediaOptions {
3
+ defaultLimit?: number;
4
+ lang?: string;
5
+ }
6
+ export declare function wikipedia(opts?: WikipediaOptions): ResearchTool;
@@ -0,0 +1,71 @@
1
+ import { jsonSchema } from "ai";
2
+ import { researchTool, } from "../../custom-tools.js";
3
+ import { errorMessage } from "../../errors.js";
4
+ import { buildContent, clampLimit, collapse, fetchJson, manifest, } from "./shared.js";
5
+ export function wikipedia(opts = {}) {
6
+ const defaultLimit = clampLimit(opts.defaultLimit ?? 3, 10);
7
+ const lang = (opts.lang ?? "en").toLowerCase();
8
+ const api = `https://${lang}.wikipedia.org/w/api.php`;
9
+ return researchTool({
10
+ description: "Search Wikipedia for encyclopedic background on people, places, organizations, concepts, and events. Returns article introductions as cited sources.",
11
+ inputSchema: jsonSchema({
12
+ type: "object",
13
+ properties: { query: { type: "string", description: "Search query" } },
14
+ required: ["query"],
15
+ additionalProperties: false,
16
+ }),
17
+ async execute(input, ctx) {
18
+ const query = String(input.query ?? "").trim();
19
+ if (!query)
20
+ return "wikipedia: empty query";
21
+ const params = new URLSearchParams({
22
+ action: "query",
23
+ generator: "search",
24
+ gsrsearch: query,
25
+ gsrlimit: String(defaultLimit),
26
+ prop: "extracts",
27
+ exintro: "1",
28
+ explaintext: "1",
29
+ format: "json",
30
+ formatversion: "2",
31
+ });
32
+ let data;
33
+ try {
34
+ data = await fetchJson(`${api}?${params.toString()}`, ctx.signal);
35
+ }
36
+ catch (err) {
37
+ return `wikipedia: request failed: ${errorMessage(err)}`;
38
+ }
39
+ return manifest("wikipedia", query, ingest(data, lang, ctx));
40
+ },
41
+ });
42
+ }
43
+ function ingest(data, lang, ctx) {
44
+ const pages = data && typeof data === "object"
45
+ ? data.query?.pages
46
+ : undefined;
47
+ if (!Array.isArray(pages))
48
+ return [];
49
+ const rows = pages
50
+ .map((p) => {
51
+ const rec = (p ?? {});
52
+ return {
53
+ title: String(rec.title ?? ""),
54
+ extract: collapse(String(rec.extract ?? "")),
55
+ index: typeof rec.index === "number" ? rec.index : 0,
56
+ };
57
+ })
58
+ .filter((p) => p.title && p.extract)
59
+ .sort((a, b) => a.index - b.index);
60
+ const titles = [];
61
+ for (const row of rows) {
62
+ const url = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(row.title.replace(/ /g, "_"))}`;
63
+ ctx.addSource({
64
+ url,
65
+ title: row.title,
66
+ content: buildContent({ title: row.title, abstract: row.extract }),
67
+ });
68
+ titles.push(row.title);
69
+ }
70
+ return titles;
71
+ }
@@ -0,0 +1,9 @@
1
+ import type { Researcher } from "../researcher.js";
2
+ export interface ExaAgentOptions {
3
+ apiKey?: string;
4
+ baseUrl?: string;
5
+ model?: "exa-research-fast" | "exa-research" | "exa-research-pro";
6
+ timeoutMs?: number;
7
+ description?: string;
8
+ }
9
+ export declare function exaAgent(opts?: ExaAgentOptions): Researcher;
@@ -0,0 +1,67 @@
1
+ import { Exa } from "exa-js";
2
+ import { readEnv } from "../env.js";
3
+ const DEFAULT_DESCRIPTION = "Exa's agentic deep-research (exa-research): autonomously searches, reads, and synthesizes a grounded report. Strong on shopping/product comparison, personalized and recency-heavy queries.";
4
+ const DEFAULT_TIMEOUT_MS = 10 * 60_000;
5
+ const URL_RE = /https?:\/\/[^\s)<>\]]+/g;
6
+ function abortError(signal) {
7
+ return signal.reason instanceof Error
8
+ ? signal.reason
9
+ : new DOMException("The operation was aborted", "AbortError");
10
+ }
11
+ async function raceAbort(p, signal) {
12
+ if (!signal)
13
+ return p;
14
+ p.catch(() => { });
15
+ if (signal.aborted)
16
+ throw abortError(signal);
17
+ let onAbort;
18
+ const abortP = new Promise((_, reject) => {
19
+ onAbort = () => reject(abortError(signal));
20
+ signal.addEventListener("abort", onAbort, { once: true });
21
+ });
22
+ try {
23
+ return await Promise.race([p, abortP]);
24
+ }
25
+ finally {
26
+ signal.removeEventListener("abort", onAbort);
27
+ }
28
+ }
29
+ function sourcesFromText(text) {
30
+ const seen = new Set();
31
+ const out = [];
32
+ for (const match of text.matchAll(URL_RE)) {
33
+ const url = match[0].replace(/[.,;:]+$/, "");
34
+ if (!seen.has(url)) {
35
+ seen.add(url);
36
+ out.push({ url });
37
+ }
38
+ }
39
+ return out;
40
+ }
41
+ export function exaAgent(opts = {}) {
42
+ const apiKey = opts.apiKey ?? readEnv("ATLAS_EXA_API_KEY", "EXA_API_KEY");
43
+ return {
44
+ description: opts.description ?? DEFAULT_DESCRIPTION,
45
+ async research(query, ctx) {
46
+ if (!apiKey) {
47
+ throw new Error("exa.agent: no Exa API key (set ATLAS_EXA_API_KEY / EXA_API_KEY or pass { apiKey })");
48
+ }
49
+ const client = new Exa(apiKey, opts.baseUrl);
50
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
51
+ const created = (await raceAbort(client.research.create({
52
+ instructions: query,
53
+ model: opts.model ?? "exa-research",
54
+ }), ctx.signal));
55
+ const result = await raceAbort(client.research.pollUntilFinished(created.researchId, { timeoutMs }), ctx.signal);
56
+ if (result.status !== "completed") {
57
+ throw new Error(`exa.agent: research ${result.status}`);
58
+ }
59
+ const fields = result;
60
+ const report = (fields.output?.content ?? "").trim();
61
+ const sources = sourcesFromText(report);
62
+ const cost = fields.costDollars?.total;
63
+ ctx.log(`exa.agent: ${sources.length} sources${cost != null ? `, $${cost}` : ""}`);
64
+ return { report, sources, ...(cost != null ? { cost } : {}) };
65
+ },
66
+ };
67
+ }
@@ -0,0 +1,66 @@
1
+ import type { Dispatcher } from "undici";
2
+ import type { SourceExtractionAttempt, SourceExtractionMetadata } from "../sources.js";
3
+ export declare function looksBlocked(text: string | undefined | null): boolean;
4
+ export declare function looksBlockedPage(markdown: string, raw?: string): boolean;
5
+ export interface FetchedPage {
6
+ finalUrl: string;
7
+ title: string | null;
8
+ markdown: string;
9
+ metadata: SourceExtractionMetadata;
10
+ renderedWith: string;
11
+ }
12
+ export type FetchAttempt = {
13
+ ok: true;
14
+ page: FetchedPage;
15
+ attempt: SourceExtractionAttempt;
16
+ } | {
17
+ ok: false;
18
+ attempt: SourceExtractionAttempt;
19
+ escalate: boolean;
20
+ };
21
+ export interface FetchRequest {
22
+ url: string;
23
+ signal?: AbortSignal | undefined;
24
+ onRateLimit?: ((retryAfterSeconds: number) => void) | undefined;
25
+ guardRedirect?: ((url: string) => Promise<{
26
+ ok: true;
27
+ } | {
28
+ ok: false;
29
+ reason: string;
30
+ }>) | undefined;
31
+ dispatcher?: Dispatcher | undefined;
32
+ }
33
+ export interface FetchProvider {
34
+ readonly id: string;
35
+ fetch(req: FetchRequest): Promise<FetchAttempt>;
36
+ }
37
+ export declare function isLikelyPdfUrl(url: string): boolean;
38
+ export declare function basicFetch(): FetchProvider;
39
+ export interface SteelOptions {
40
+ apiKey?: string;
41
+ baseUrl?: string;
42
+ proxy?: boolean;
43
+ }
44
+ export declare function steel(opts?: SteelOptions): FetchProvider;
45
+ export interface SteelScrapeResponse {
46
+ content?: {
47
+ html?: string;
48
+ markdown?: string;
49
+ };
50
+ metadata?: {
51
+ statusCode?: number;
52
+ canonical?: string;
53
+ title?: string;
54
+ };
55
+ }
56
+ export declare function steelAttemptFromResponse(response: SteelScrapeResponse, url: string): FetchAttempt;
57
+ export declare function exaContents(opts?: {
58
+ apiKey?: string;
59
+ baseUrl?: string;
60
+ }): FetchProvider;
61
+ export declare function defaultFetchProviders(): FetchProvider[];
62
+ export interface ChainFetchOutcome {
63
+ page: FetchedPage | null;
64
+ attempts: SourceExtractionAttempt[];
65
+ }
66
+ export declare function fetchThroughChain(chain: FetchProvider[], req: FetchRequest): Promise<ChainFetchOutcome>;