@steel-dev/atlas 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +219 -0
- package/dist/agent.d.ts +34 -0
- package/dist/agent.js +133 -0
- package/dist/async.d.ts +19 -0
- package/dist/async.js +172 -0
- package/dist/atlas.d.ts +19 -0
- package/dist/atlas.js +69 -0
- package/dist/budget.d.ts +64 -0
- package/dist/budget.js +336 -0
- package/dist/checklist.d.ts +115 -0
- package/dist/checklist.js +297 -0
- package/dist/cli.js +38700 -0
- package/dist/config.d.ts +80 -0
- package/dist/config.js +109 -0
- package/dist/context.d.ts +26 -0
- package/dist/context.js +250 -0
- package/dist/custom-tools.d.ts +26 -0
- package/dist/custom-tools.js +33 -0
- package/dist/defaults.d.ts +10 -0
- package/dist/defaults.js +37 -0
- package/dist/economy.d.ts +12 -0
- package/dist/economy.js +6 -0
- package/dist/env.d.ts +1 -0
- package/dist/env.js +8 -0
- package/dist/errors.d.ts +6 -0
- package/dist/errors.js +11 -0
- package/dist/event-hub.d.ts +11 -0
- package/dist/event-hub.js +83 -0
- package/dist/events.d.ts +105 -0
- package/dist/events.js +1 -0
- package/dist/html-extract.d.ts +21 -0
- package/dist/html-extract.js +459 -0
- package/dist/index.d.ts +59 -0
- package/dist/index.js +26 -0
- package/dist/memory.d.ts +2 -0
- package/dist/memory.js +38 -0
- package/dist/model.d.ts +49 -0
- package/dist/model.js +630 -0
- package/dist/orchestrate.d.ts +5 -0
- package/dist/orchestrate.js +277 -0
- package/dist/pdf-extract.d.ts +5 -0
- package/dist/pdf-extract.js +20 -0
- package/dist/prompts.d.ts +2 -0
- package/dist/prompts.js +6 -0
- package/dist/providers/domain/arxiv.d.ts +6 -0
- package/dist/providers/domain/arxiv.js +83 -0
- package/dist/providers/domain/clinicaltrials.d.ts +6 -0
- package/dist/providers/domain/clinicaltrials.js +104 -0
- package/dist/providers/domain/edgar.d.ts +10 -0
- package/dist/providers/domain/edgar.js +92 -0
- package/dist/providers/domain/index.d.ts +14 -0
- package/dist/providers/domain/index.js +7 -0
- package/dist/providers/domain/openalex.d.ts +7 -0
- package/dist/providers/domain/openalex.js +128 -0
- package/dist/providers/domain/pubmed.d.ts +8 -0
- package/dist/providers/domain/pubmed.js +123 -0
- package/dist/providers/domain/semantic-scholar.d.ts +6 -0
- package/dist/providers/domain/semantic-scholar.js +112 -0
- package/dist/providers/domain/shared.d.ts +12 -0
- package/dist/providers/domain/shared.js +39 -0
- package/dist/providers/domain/wikipedia.d.ts +6 -0
- package/dist/providers/domain/wikipedia.js +71 -0
- package/dist/providers/exa-agent.d.ts +9 -0
- package/dist/providers/exa-agent.js +67 -0
- package/dist/providers/fetch.d.ts +66 -0
- package/dist/providers/fetch.js +675 -0
- package/dist/providers/parallel-agent.d.ts +11 -0
- package/dist/providers/parallel-agent.js +100 -0
- package/dist/providers/perplexity-agent.d.ts +17 -0
- package/dist/providers/perplexity-agent.js +86 -0
- package/dist/providers/search.d.ts +65 -0
- package/dist/providers/search.js +433 -0
- package/dist/providers/store.d.ts +48 -0
- package/dist/providers/store.js +217 -0
- package/dist/researcher.d.ts +20 -0
- package/dist/researcher.js +3 -0
- package/dist/robots.d.ts +16 -0
- package/dist/robots.js +146 -0
- package/dist/roles.d.ts +6 -0
- package/dist/roles.js +4 -0
- package/dist/run.d.ts +65 -0
- package/dist/run.js +371 -0
- package/dist/safe-dispatcher.d.ts +16 -0
- package/dist/safe-dispatcher.js +32 -0
- package/dist/safety.d.ts +23 -0
- package/dist/safety.js +206 -0
- package/dist/sandbox.d.ts +22 -0
- package/dist/sandbox.js +228 -0
- package/dist/search-normalize.d.ts +2 -0
- package/dist/search-normalize.js +13 -0
- package/dist/source-documents.d.ts +77 -0
- package/dist/source-documents.js +421 -0
- package/dist/sources.d.ts +57 -0
- package/dist/sources.js +1 -0
- package/dist/spine.d.ts +19 -0
- package/dist/spine.js +722 -0
- package/dist/state.d.ts +90 -0
- package/dist/state.js +27 -0
- package/dist/structured.d.ts +7 -0
- package/dist/structured.js +18 -0
- package/dist/tools.d.ts +33 -0
- package/dist/tools.js +1187 -0
- package/dist/trace-digest.d.ts +11 -0
- package/dist/trace-digest.js +309 -0
- package/dist/trace.d.ts +225 -0
- package/dist/trace.js +278 -0
- package/dist/trail.d.ts +15 -0
- package/dist/trail.js +74 -0
- package/dist/url.d.ts +1 -0
- package/dist/url.js +25 -0
- package/package.json +107 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export interface ResearcherContext {
|
|
2
|
+
budget: {
|
|
3
|
+
maxUSD: number;
|
|
4
|
+
};
|
|
5
|
+
readonly signal?: AbortSignal | undefined;
|
|
6
|
+
log(message: string): void;
|
|
7
|
+
}
|
|
8
|
+
export interface ResearchReport {
|
|
9
|
+
report: string;
|
|
10
|
+
sources: {
|
|
11
|
+
url: string;
|
|
12
|
+
title?: string;
|
|
13
|
+
}[];
|
|
14
|
+
cost?: number;
|
|
15
|
+
}
|
|
16
|
+
export interface Researcher {
|
|
17
|
+
description: string;
|
|
18
|
+
research(query: string, ctx: ResearcherContext): Promise<ResearchReport>;
|
|
19
|
+
}
|
|
20
|
+
export declare function researcher(r: Researcher): Researcher;
|
package/dist/robots.d.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { Dispatcher } from "undici";
|
|
2
|
+
export interface RobotsRule {
|
|
3
|
+
allow: boolean;
|
|
4
|
+
pattern: string;
|
|
5
|
+
}
|
|
6
|
+
export declare function parseRobots(text: string, agentToken: string): RobotsRule[];
|
|
7
|
+
export declare function robotsAllows(rules: RobotsRule[], path: string): boolean;
|
|
8
|
+
export interface RobotsCache {
|
|
9
|
+
allows(url: string, signal?: AbortSignal, dispatcher?: Dispatcher): Promise<boolean>;
|
|
10
|
+
}
|
|
11
|
+
export interface RobotsCacheOptions {
|
|
12
|
+
agentToken: string;
|
|
13
|
+
userAgent: string;
|
|
14
|
+
fetchImpl?: typeof fetch;
|
|
15
|
+
}
|
|
16
|
+
export declare function createRobotsCache(opts: RobotsCacheOptions): RobotsCache;
|
package/dist/robots.js
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
const ROBOTS_FETCH_TIMEOUT_MS = 5_000;
|
|
2
|
+
const ROBOTS_MAX_CHARS = 512_000;
|
|
3
|
+
function robotsMatches(pattern, path) {
|
|
4
|
+
const anchored = pattern.endsWith("$");
|
|
5
|
+
const pat = anchored ? pattern.slice(0, -1) : pattern;
|
|
6
|
+
let s = 0;
|
|
7
|
+
let p = 0;
|
|
8
|
+
let starIdx = -1;
|
|
9
|
+
let matchIdx = 0;
|
|
10
|
+
while (s < path.length) {
|
|
11
|
+
if (p < pat.length && pat[p] === "*") {
|
|
12
|
+
starIdx = p;
|
|
13
|
+
matchIdx = s;
|
|
14
|
+
p++;
|
|
15
|
+
}
|
|
16
|
+
else if (p < pat.length && pat[p] === path[s]) {
|
|
17
|
+
p++;
|
|
18
|
+
s++;
|
|
19
|
+
}
|
|
20
|
+
else if (starIdx !== -1) {
|
|
21
|
+
p = starIdx + 1;
|
|
22
|
+
matchIdx++;
|
|
23
|
+
s = matchIdx;
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
if (!anchored && p === pat.length)
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
while (p < pat.length && pat[p] === "*")
|
|
32
|
+
p++;
|
|
33
|
+
if (anchored)
|
|
34
|
+
return p === pat.length && s === path.length;
|
|
35
|
+
return p === pat.length;
|
|
36
|
+
}
|
|
37
|
+
export function parseRobots(text, agentToken) {
|
|
38
|
+
const token = agentToken.toLowerCase();
|
|
39
|
+
const groups = [];
|
|
40
|
+
let current = null;
|
|
41
|
+
let agentsOpen = false;
|
|
42
|
+
for (const rawLine of text.slice(0, ROBOTS_MAX_CHARS).split(/\r?\n/)) {
|
|
43
|
+
const line = rawLine.split("#")[0].trim();
|
|
44
|
+
if (!line)
|
|
45
|
+
continue;
|
|
46
|
+
const separator = line.indexOf(":");
|
|
47
|
+
if (separator < 0)
|
|
48
|
+
continue;
|
|
49
|
+
const field = line.slice(0, separator).trim().toLowerCase();
|
|
50
|
+
const value = line.slice(separator + 1).trim();
|
|
51
|
+
if (field === "user-agent") {
|
|
52
|
+
if (!agentsOpen || !current) {
|
|
53
|
+
current = { agents: [], rules: [] };
|
|
54
|
+
groups.push(current);
|
|
55
|
+
agentsOpen = true;
|
|
56
|
+
}
|
|
57
|
+
current.agents.push(value.toLowerCase());
|
|
58
|
+
}
|
|
59
|
+
else if (field === "allow" || field === "disallow") {
|
|
60
|
+
if (!current)
|
|
61
|
+
continue;
|
|
62
|
+
agentsOpen = false;
|
|
63
|
+
if (value) {
|
|
64
|
+
current.rules.push({
|
|
65
|
+
allow: field === "allow",
|
|
66
|
+
pattern: value,
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
agentsOpen = false;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
let best = null;
|
|
75
|
+
let bestAgentLength = -1;
|
|
76
|
+
let wildcard = null;
|
|
77
|
+
for (const group of groups) {
|
|
78
|
+
for (const agent of group.agents) {
|
|
79
|
+
if (agent === "*") {
|
|
80
|
+
wildcard = [...(wildcard ?? []), ...group.rules];
|
|
81
|
+
}
|
|
82
|
+
else if (token.includes(agent) && agent.length > bestAgentLength) {
|
|
83
|
+
best = group.rules;
|
|
84
|
+
bestAgentLength = agent.length;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return best ?? wildcard ?? [];
|
|
89
|
+
}
|
|
90
|
+
export function robotsAllows(rules, path) {
|
|
91
|
+
let verdict = true;
|
|
92
|
+
let matchedLength = -1;
|
|
93
|
+
for (const rule of rules) {
|
|
94
|
+
if (!robotsMatches(rule.pattern, path))
|
|
95
|
+
continue;
|
|
96
|
+
if (rule.pattern.length > matchedLength ||
|
|
97
|
+
(rule.pattern.length === matchedLength && rule.allow && !verdict)) {
|
|
98
|
+
matchedLength = rule.pattern.length;
|
|
99
|
+
verdict = rule.allow;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return verdict;
|
|
103
|
+
}
|
|
104
|
+
export function createRobotsCache(opts) {
|
|
105
|
+
const fetchImpl = opts.fetchImpl ?? fetch;
|
|
106
|
+
const byOrigin = new Map();
|
|
107
|
+
function load(origin, signal, dispatcher) {
|
|
108
|
+
let pending = byOrigin.get(origin);
|
|
109
|
+
if (!pending) {
|
|
110
|
+
pending = (async () => {
|
|
111
|
+
try {
|
|
112
|
+
const timeout = AbortSignal.timeout(ROBOTS_FETCH_TIMEOUT_MS);
|
|
113
|
+
const response = await fetchImpl(`${origin}/robots.txt`, {
|
|
114
|
+
signal: signal ? AbortSignal.any([signal, timeout]) : timeout,
|
|
115
|
+
headers: { "user-agent": opts.userAgent },
|
|
116
|
+
...(dispatcher ? { dispatcher } : {}),
|
|
117
|
+
});
|
|
118
|
+
if (!response.ok)
|
|
119
|
+
return null;
|
|
120
|
+
const text = await response.text();
|
|
121
|
+
return parseRobots(text, opts.agentToken);
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
})();
|
|
127
|
+
byOrigin.set(origin, pending);
|
|
128
|
+
}
|
|
129
|
+
return pending;
|
|
130
|
+
}
|
|
131
|
+
return {
|
|
132
|
+
async allows(url, signal, dispatcher) {
|
|
133
|
+
let parsed;
|
|
134
|
+
try {
|
|
135
|
+
parsed = new URL(url);
|
|
136
|
+
}
|
|
137
|
+
catch {
|
|
138
|
+
return true;
|
|
139
|
+
}
|
|
140
|
+
const rules = await load(parsed.origin, signal, dispatcher);
|
|
141
|
+
if (!rules || rules.length === 0)
|
|
142
|
+
return true;
|
|
143
|
+
return robotsAllows(rules, parsed.pathname + parsed.search);
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
}
|
package/dist/roles.d.ts
ADDED
package/dist/roles.js
ADDED
package/dist/run.d.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import type { FlexibleSchema } from "ai";
|
|
2
|
+
import { type AtlasConfig, type ResearchOptions } from "./config.js";
|
|
3
|
+
import type { Citation, ResearchEvent, RunStats, StopReason } from "./events.js";
|
|
4
|
+
import { type ReplayCache } from "./providers/store.js";
|
|
5
|
+
import type { RunTrace } from "./trace.js";
|
|
6
|
+
export type RunStatus = "running" | "completed" | "failed" | "aborted" | "paused";
|
|
7
|
+
export interface SourceRecord {
|
|
8
|
+
id: string;
|
|
9
|
+
url: string;
|
|
10
|
+
finalUrl: string;
|
|
11
|
+
title: string;
|
|
12
|
+
via: string;
|
|
13
|
+
chars: number;
|
|
14
|
+
warnings?: string[];
|
|
15
|
+
}
|
|
16
|
+
export interface ResearchResult {
|
|
17
|
+
runId: string;
|
|
18
|
+
question: string;
|
|
19
|
+
report: string;
|
|
20
|
+
note: string;
|
|
21
|
+
sources: SourceRecord[];
|
|
22
|
+
citations: Citation[];
|
|
23
|
+
unboundCitations: string[];
|
|
24
|
+
warnings: string[];
|
|
25
|
+
stats: RunStats;
|
|
26
|
+
trace?: RunTrace;
|
|
27
|
+
eventVersion: string;
|
|
28
|
+
}
|
|
29
|
+
export interface ResearchRun {
|
|
30
|
+
readonly id: string;
|
|
31
|
+
events(): AsyncIterable<ResearchEvent>;
|
|
32
|
+
result(): Promise<ResearchResult>;
|
|
33
|
+
abort(): Promise<void>;
|
|
34
|
+
pause(): Promise<void>;
|
|
35
|
+
finish(): Promise<void>;
|
|
36
|
+
status(): RunStatus;
|
|
37
|
+
trace(): RunTrace | undefined;
|
|
38
|
+
}
|
|
39
|
+
export interface StartRunOptions {
|
|
40
|
+
config: AtlasConfig;
|
|
41
|
+
question: string;
|
|
42
|
+
options: ResearchOptions;
|
|
43
|
+
schema?: FlexibleSchema<unknown> | undefined;
|
|
44
|
+
replay?: ReplayCache | undefined;
|
|
45
|
+
anchorStartedAt?: number | undefined;
|
|
46
|
+
now?: (() => number) | undefined;
|
|
47
|
+
}
|
|
48
|
+
export declare function startRun(start: StartRunOptions): ResearchRun;
|
|
49
|
+
interface RepairBalance {
|
|
50
|
+
citationsUnsupported: number;
|
|
51
|
+
citationsBound: number;
|
|
52
|
+
}
|
|
53
|
+
export declare function acceptsRepair(before: RepairBalance, after: RepairBalance): boolean;
|
|
54
|
+
export interface StopReasonInputs {
|
|
55
|
+
finished: boolean;
|
|
56
|
+
budgetExhausted: boolean;
|
|
57
|
+
tokensExhausted: boolean;
|
|
58
|
+
timedOut: boolean;
|
|
59
|
+
}
|
|
60
|
+
export declare function deriveStopReason(inputs: StopReasonInputs): StopReason;
|
|
61
|
+
export type ResumeOptions = Pick<ResearchOptions, "signal">;
|
|
62
|
+
export declare function resumeRun(runId: string, config: AtlasConfig, resume?: ResumeOptions & {
|
|
63
|
+
now?: () => number;
|
|
64
|
+
}): Promise<ResearchRun>;
|
|
65
|
+
export {};
|
package/dist/run.js
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import { resolveRunConfig, } from "./config.js";
|
|
3
|
+
import { assembleRun } from "./context.js";
|
|
4
|
+
import { AtlasError, errorMessage } from "./errors.js";
|
|
5
|
+
import { EventHub } from "./event-hub.js";
|
|
6
|
+
import { EVENT_SCHEMA_VERSION } from "./events.js";
|
|
7
|
+
import { totalFreshTokens } from "./model.js";
|
|
8
|
+
import { runOrchestrated } from "./orchestrate.js";
|
|
9
|
+
import { isoDate } from "./prompts.js";
|
|
10
|
+
import { JournalWriter, loadReplayCache, loadRunMeta, memoryStore, } from "./providers/store.js";
|
|
11
|
+
import { runSpine } from "./spine.js";
|
|
12
|
+
import { extractStructured } from "./structured.js";
|
|
13
|
+
import { computeDigest } from "./trace-digest.js";
|
|
14
|
+
const EXTRACTION_FRACTION = 0.1;
|
|
15
|
+
const EXTRACTION_MIN_USD = 0.02;
|
|
16
|
+
export function startRun(start) {
|
|
17
|
+
const question = start.question?.trim();
|
|
18
|
+
if (!question) {
|
|
19
|
+
throw new AtlasError("research question is required", "config");
|
|
20
|
+
}
|
|
21
|
+
const resolved = resolveRunConfig(start.config, start.options);
|
|
22
|
+
const runId = start.options.runId ?? `run_${randomUUID().replace(/-/g, "").slice(0, 16)}`;
|
|
23
|
+
const store = start.config.store ?? memoryStore();
|
|
24
|
+
const hub = new EventHub();
|
|
25
|
+
const hardController = new AbortController();
|
|
26
|
+
const stopController = new AbortController();
|
|
27
|
+
let statusValue = "running";
|
|
28
|
+
let pauseRequested = false;
|
|
29
|
+
let deadlineHit = false;
|
|
30
|
+
const deadlineTimer = resolved.maxDurationMs !== undefined &&
|
|
31
|
+
Number.isFinite(resolved.maxDurationMs) &&
|
|
32
|
+
resolved.maxDurationMs > 0
|
|
33
|
+
? setTimeout(() => {
|
|
34
|
+
deadlineHit = true;
|
|
35
|
+
hardController.abort();
|
|
36
|
+
}, resolved.maxDurationMs)
|
|
37
|
+
: undefined;
|
|
38
|
+
let recorder;
|
|
39
|
+
const externalSignal = start.options.signal;
|
|
40
|
+
const onExternalAbort = () => hardController.abort(externalSignal?.reason);
|
|
41
|
+
if (externalSignal) {
|
|
42
|
+
if (externalSignal.aborted)
|
|
43
|
+
hardController.abort(externalSignal.reason);
|
|
44
|
+
else
|
|
45
|
+
externalSignal.addEventListener("abort", onExternalAbort, {
|
|
46
|
+
once: true,
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
const resultPromise = (async () => {
|
|
50
|
+
await Promise.resolve();
|
|
51
|
+
const journal = new JournalWriter(store, runId);
|
|
52
|
+
try {
|
|
53
|
+
const result = await executeRun({
|
|
54
|
+
runId,
|
|
55
|
+
question,
|
|
56
|
+
resolved,
|
|
57
|
+
config: start.config,
|
|
58
|
+
schema: start.schema,
|
|
59
|
+
journal,
|
|
60
|
+
replay: start.replay,
|
|
61
|
+
hub,
|
|
62
|
+
hardSignal: hardController.signal,
|
|
63
|
+
stopSignal: stopController.signal,
|
|
64
|
+
now: start.now ?? Date.now,
|
|
65
|
+
anchorStartedAt: start.anchorStartedAt,
|
|
66
|
+
captureRecorder: (r) => {
|
|
67
|
+
recorder = r;
|
|
68
|
+
},
|
|
69
|
+
});
|
|
70
|
+
statusValue = "completed";
|
|
71
|
+
return result;
|
|
72
|
+
}
|
|
73
|
+
catch (err) {
|
|
74
|
+
if (pauseRequested) {
|
|
75
|
+
statusValue = "paused";
|
|
76
|
+
journal.event("run.paused", { runId });
|
|
77
|
+
throw new AtlasError("run paused; resume with atlas.resume()", "paused");
|
|
78
|
+
}
|
|
79
|
+
if (deadlineHit) {
|
|
80
|
+
statusValue = "failed";
|
|
81
|
+
const event = {
|
|
82
|
+
type: "run.error",
|
|
83
|
+
message: "run exceeded maxDurationMs before completing",
|
|
84
|
+
recoverable: false,
|
|
85
|
+
};
|
|
86
|
+
hub.emit(event);
|
|
87
|
+
journal.event(event.type, event);
|
|
88
|
+
throw new AtlasError("run exceeded maxDurationMs", "timeout");
|
|
89
|
+
}
|
|
90
|
+
if (hardController.signal.aborted) {
|
|
91
|
+
statusValue = "aborted";
|
|
92
|
+
throw new AtlasError("run aborted", "aborted");
|
|
93
|
+
}
|
|
94
|
+
statusValue = "failed";
|
|
95
|
+
const event = {
|
|
96
|
+
type: "run.error",
|
|
97
|
+
message: errorMessage(err),
|
|
98
|
+
recoverable: false,
|
|
99
|
+
};
|
|
100
|
+
hub.emit(event);
|
|
101
|
+
journal.event(event.type, event);
|
|
102
|
+
throw err;
|
|
103
|
+
}
|
|
104
|
+
finally {
|
|
105
|
+
if (deadlineTimer)
|
|
106
|
+
clearTimeout(deadlineTimer);
|
|
107
|
+
externalSignal?.removeEventListener("abort", onExternalAbort);
|
|
108
|
+
await journal.flush();
|
|
109
|
+
hub.close();
|
|
110
|
+
}
|
|
111
|
+
})();
|
|
112
|
+
resultPromise.catch(() => { });
|
|
113
|
+
return {
|
|
114
|
+
id: runId,
|
|
115
|
+
events: () => hub.iterable(),
|
|
116
|
+
result: () => resultPromise,
|
|
117
|
+
status: () => statusValue,
|
|
118
|
+
trace: () => recorder?.snapshot(),
|
|
119
|
+
abort: async () => {
|
|
120
|
+
hardController.abort();
|
|
121
|
+
await resultPromise.catch(() => { });
|
|
122
|
+
},
|
|
123
|
+
pause: async () => {
|
|
124
|
+
pauseRequested = true;
|
|
125
|
+
hardController.abort();
|
|
126
|
+
await resultPromise.catch(() => { });
|
|
127
|
+
},
|
|
128
|
+
finish: async () => {
|
|
129
|
+
stopController.abort();
|
|
130
|
+
await resultPromise.catch(() => { });
|
|
131
|
+
},
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
async function executeRun(args) {
|
|
135
|
+
const { resolved, question, runId } = args;
|
|
136
|
+
const startedAt = args.now();
|
|
137
|
+
const { rctx, meter, synthesisGrant } = await assembleRun({
|
|
138
|
+
runId,
|
|
139
|
+
question,
|
|
140
|
+
todayISO: isoDate(args.anchorStartedAt ?? startedAt),
|
|
141
|
+
resolved,
|
|
142
|
+
config: args.config,
|
|
143
|
+
journal: args.journal,
|
|
144
|
+
replay: args.replay,
|
|
145
|
+
hub: args.hub,
|
|
146
|
+
hardSignal: args.hardSignal,
|
|
147
|
+
stopSignal: args.stopSignal,
|
|
148
|
+
now: args.now,
|
|
149
|
+
startedAt,
|
|
150
|
+
});
|
|
151
|
+
const { emit } = rctx;
|
|
152
|
+
args.captureRecorder?.(rctx.recorder);
|
|
153
|
+
args.journal.meta({
|
|
154
|
+
runId,
|
|
155
|
+
question,
|
|
156
|
+
effort: resolved.effort,
|
|
157
|
+
budgetUSD: resolved.budgetUSD,
|
|
158
|
+
maxTokens: resolved.maxTokens,
|
|
159
|
+
...(resolved.maxDurationMs !== undefined
|
|
160
|
+
? { maxDurationMs: resolved.maxDurationMs }
|
|
161
|
+
: {}),
|
|
162
|
+
maxSources: resolved.maxSources,
|
|
163
|
+
...(resolved.sourceFilter ? { sourceFilter: resolved.sourceFilter } : {}),
|
|
164
|
+
eventVersion: EVENT_SCHEMA_VERSION,
|
|
165
|
+
startedAt,
|
|
166
|
+
});
|
|
167
|
+
emit({
|
|
168
|
+
type: "run.started",
|
|
169
|
+
runId,
|
|
170
|
+
question,
|
|
171
|
+
effort: resolved.effort,
|
|
172
|
+
budgetUSD: resolved.budgetUSD,
|
|
173
|
+
});
|
|
174
|
+
if (!rctx.runCodeEnabled) {
|
|
175
|
+
emit({
|
|
176
|
+
type: "run_code.unavailable",
|
|
177
|
+
detail: 'the optional "isolated-vm" sandbox dependency is not installed or failed to build, ' +
|
|
178
|
+
"so the run_code tool is omitted this run; install it to let agents compute over source text. " +
|
|
179
|
+
"Research continues without it.",
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
synthesisGrant.release();
|
|
183
|
+
const extractionGrant = args.schema
|
|
184
|
+
? meter.grant({
|
|
185
|
+
fraction: EXTRACTION_FRACTION,
|
|
186
|
+
minUSD: EXTRACTION_MIN_USD,
|
|
187
|
+
})
|
|
188
|
+
: null;
|
|
189
|
+
const out = Object.keys(resolved.researchers).length > 0
|
|
190
|
+
? await runOrchestrated(rctx, resolved.researchers)
|
|
191
|
+
: await runSpine(rctx, { meter });
|
|
192
|
+
emit({ type: "report.completed", report: out.report });
|
|
193
|
+
let structured;
|
|
194
|
+
if (args.schema) {
|
|
195
|
+
try {
|
|
196
|
+
structured = await extractStructured(rctx.bindModel("write", extractionGrant ?? meter), question, out.report, args.schema, args.hardSignal);
|
|
197
|
+
}
|
|
198
|
+
finally {
|
|
199
|
+
extractionGrant?.release();
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
const durationMs = args.now() - startedAt;
|
|
203
|
+
const stats = buildStats({
|
|
204
|
+
rctx,
|
|
205
|
+
bound: {
|
|
206
|
+
citationsBound: out.citations.length,
|
|
207
|
+
citationsUnsupported: out.unboundCitations.length,
|
|
208
|
+
},
|
|
209
|
+
durationMs,
|
|
210
|
+
finished: args.stopSignal.aborted,
|
|
211
|
+
});
|
|
212
|
+
if (rctx.recorder) {
|
|
213
|
+
rctx.recorder.finalize(computeDigest(rctx.recorder.spans, rctx.recorder.steps, {
|
|
214
|
+
runId,
|
|
215
|
+
wallMs: durationMs,
|
|
216
|
+
costUSD: stats.costUSD,
|
|
217
|
+
freshTokens: totalFreshTokens(rctx.usage),
|
|
218
|
+
replayedUSD: rctx.usage.replayedUSD,
|
|
219
|
+
gateLimitModel: resolved.maxConcurrentModelCalls,
|
|
220
|
+
gateLimitIo: resolved.maxConcurrentIo,
|
|
221
|
+
}));
|
|
222
|
+
}
|
|
223
|
+
const result = {
|
|
224
|
+
runId,
|
|
225
|
+
question,
|
|
226
|
+
report: out.report,
|
|
227
|
+
note: out.note,
|
|
228
|
+
sources: out.sources
|
|
229
|
+
? out.sources.map((source, index) => ({
|
|
230
|
+
id: `source_${index + 1}`,
|
|
231
|
+
url: source.url,
|
|
232
|
+
finalUrl: source.url,
|
|
233
|
+
title: source.title,
|
|
234
|
+
via: source.via,
|
|
235
|
+
chars: source.chars ?? 0,
|
|
236
|
+
}))
|
|
237
|
+
: rctx.sources.fetchedSources.map((source) => {
|
|
238
|
+
const document = source.sourceId
|
|
239
|
+
? rctx.sources.byId.get(source.sourceId)
|
|
240
|
+
: undefined;
|
|
241
|
+
return {
|
|
242
|
+
id: source.sourceId ?? "",
|
|
243
|
+
url: source.url,
|
|
244
|
+
finalUrl: document?.metadata.finalUrl ?? source.url,
|
|
245
|
+
title: source.title,
|
|
246
|
+
via: document?.metadata.method ?? "unknown",
|
|
247
|
+
chars: document?.storedChars ?? 0,
|
|
248
|
+
...(document?.metadata.qualityWarnings
|
|
249
|
+
? { warnings: document.metadata.qualityWarnings }
|
|
250
|
+
: {}),
|
|
251
|
+
};
|
|
252
|
+
}),
|
|
253
|
+
citations: out.citations,
|
|
254
|
+
unboundCitations: out.unboundCitations,
|
|
255
|
+
warnings: out.warnings ?? [],
|
|
256
|
+
stats,
|
|
257
|
+
...(rctx.recorder ? { trace: rctx.recorder.snapshot() } : {}),
|
|
258
|
+
eventVersion: EVENT_SCHEMA_VERSION,
|
|
259
|
+
};
|
|
260
|
+
if (args.schema) {
|
|
261
|
+
result.object = structured;
|
|
262
|
+
}
|
|
263
|
+
emit({ type: "run.completed", stats });
|
|
264
|
+
return result;
|
|
265
|
+
}
|
|
266
|
+
export function acceptsRepair(before, after) {
|
|
267
|
+
return (after.citationsUnsupported < before.citationsUnsupported &&
|
|
268
|
+
after.citationsBound >= before.citationsBound);
|
|
269
|
+
}
|
|
270
|
+
export function deriveStopReason(inputs) {
|
|
271
|
+
if (inputs.finished)
|
|
272
|
+
return "finished";
|
|
273
|
+
if (inputs.budgetExhausted)
|
|
274
|
+
return "budget";
|
|
275
|
+
if (inputs.tokensExhausted)
|
|
276
|
+
return "tokens";
|
|
277
|
+
if (inputs.timedOut)
|
|
278
|
+
return "timeout";
|
|
279
|
+
return "completed";
|
|
280
|
+
}
|
|
281
|
+
function buildStats(opts) {
|
|
282
|
+
const { rctx } = opts;
|
|
283
|
+
const tokens = {};
|
|
284
|
+
for (const [role, roleUsage] of rctx.usage.byRole) {
|
|
285
|
+
tokens[role] = {
|
|
286
|
+
input: roleUsage.input + roleUsage.cacheRead + roleUsage.cacheWrite,
|
|
287
|
+
output: roleUsage.output,
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
const budgetExhausted = rctx.meter.exhausted();
|
|
291
|
+
const tokensExhausted = totalFreshTokens(rctx.usage) >= rctx.config.maxTokens;
|
|
292
|
+
const timedOut = rctx.deadlineAt !== undefined && rctx.now() >= rctx.deadlineAt;
|
|
293
|
+
return {
|
|
294
|
+
effort: rctx.config.effort,
|
|
295
|
+
searches: rctx.counters.searches,
|
|
296
|
+
searchCacheHits: rctx.counters.searchCacheHits,
|
|
297
|
+
modelCacheHits: rctx.counters.modelCacheHits,
|
|
298
|
+
modelGatePeakWidth: rctx.counters.modelGatePeakWidth,
|
|
299
|
+
sourcesFetched: rctx.counters.sourcesFetched,
|
|
300
|
+
sourcesFailed: rctx.counters.sourcesFailed,
|
|
301
|
+
citationsBound: opts.bound.citationsBound,
|
|
302
|
+
citationsUnsupported: opts.bound.citationsUnsupported,
|
|
303
|
+
tokens,
|
|
304
|
+
costUSD: Math.round(Math.max(0, rctx.meter.totalSpentUSD() - rctx.usage.replayedUSD) *
|
|
305
|
+
10_000) / 10_000,
|
|
306
|
+
durationMs: opts.durationMs,
|
|
307
|
+
budgetExhausted,
|
|
308
|
+
tokensExhausted,
|
|
309
|
+
stopReason: deriveStopReason({
|
|
310
|
+
finished: opts.finished,
|
|
311
|
+
budgetExhausted,
|
|
312
|
+
tokensExhausted,
|
|
313
|
+
timedOut,
|
|
314
|
+
}),
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
function sourceFilterFromMeta(value) {
|
|
318
|
+
if (!value || typeof value !== "object")
|
|
319
|
+
return undefined;
|
|
320
|
+
const filter = value;
|
|
321
|
+
const domains = (key) => Array.isArray(filter[key]) &&
|
|
322
|
+
filter[key].every((domain) => typeof domain === "string")
|
|
323
|
+
? { [key]: filter[key] }
|
|
324
|
+
: {};
|
|
325
|
+
const restored = {
|
|
326
|
+
...domains("includeDomains"),
|
|
327
|
+
...domains("excludeDomains"),
|
|
328
|
+
};
|
|
329
|
+
return Object.keys(restored).length > 0 ? restored : undefined;
|
|
330
|
+
}
|
|
331
|
+
export async function resumeRun(runId, config, resume = {}) {
|
|
332
|
+
const store = config.store;
|
|
333
|
+
if (!store) {
|
|
334
|
+
throw new AtlasError("resume requires config.store (the store the original run journaled to)", "resume");
|
|
335
|
+
}
|
|
336
|
+
const meta = await loadRunMeta(store, runId);
|
|
337
|
+
if (!meta || typeof meta.question !== "string") {
|
|
338
|
+
throw new AtlasError(`no journaled run found for "${runId}"`, "resume");
|
|
339
|
+
}
|
|
340
|
+
const replay = await loadReplayCache(store, runId);
|
|
341
|
+
const budget = {
|
|
342
|
+
...(typeof meta.budgetUSD === "number" ? { maxUSD: meta.budgetUSD } : {}),
|
|
343
|
+
...(typeof meta.maxTokens === "number"
|
|
344
|
+
? { maxTokens: meta.maxTokens }
|
|
345
|
+
: {}),
|
|
346
|
+
...(typeof meta.maxDurationMs === "number"
|
|
347
|
+
? { maxDurationMs: meta.maxDurationMs }
|
|
348
|
+
: {}),
|
|
349
|
+
...(typeof meta.maxSources === "number"
|
|
350
|
+
? { maxSources: meta.maxSources }
|
|
351
|
+
: {}),
|
|
352
|
+
};
|
|
353
|
+
const sources = sourceFilterFromMeta(meta.sourceFilter);
|
|
354
|
+
const options = {
|
|
355
|
+
runId,
|
|
356
|
+
...(typeof meta.effort === "string"
|
|
357
|
+
? { effort: meta.effort }
|
|
358
|
+
: {}),
|
|
359
|
+
...(Object.keys(budget).length > 0 ? { budget } : {}),
|
|
360
|
+
...(sources ? { sources } : {}),
|
|
361
|
+
...(resume.signal ? { signal: resume.signal } : {}),
|
|
362
|
+
};
|
|
363
|
+
return startRun({
|
|
364
|
+
config,
|
|
365
|
+
question: meta.question,
|
|
366
|
+
options,
|
|
367
|
+
replay,
|
|
368
|
+
anchorStartedAt: typeof meta.startedAt === "number" ? meta.startedAt : undefined,
|
|
369
|
+
now: resume.now,
|
|
370
|
+
});
|
|
371
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { Agent } from "undici";
|
|
2
|
+
import { type SafetyPolicy } from "./safety.js";
|
|
3
|
+
export type SafeDispatcher = Agent;
|
|
4
|
+
type SafeLookupOptions = {
|
|
5
|
+
all?: boolean;
|
|
6
|
+
family?: number;
|
|
7
|
+
hints?: number;
|
|
8
|
+
verbatim?: boolean;
|
|
9
|
+
};
|
|
10
|
+
type SafeLookupCallback = (err: NodeJS.ErrnoException | null, address: string | Array<{
|
|
11
|
+
address: string;
|
|
12
|
+
family: number;
|
|
13
|
+
}>, family?: number) => void;
|
|
14
|
+
export declare function safeLookup(policy: SafetyPolicy): (hostname: string, options: SafeLookupOptions, callback: SafeLookupCallback) => void;
|
|
15
|
+
export declare function createSafeDispatcher(policy: SafetyPolicy): SafeDispatcher;
|
|
16
|
+
export {};
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { lookup as dnsLookup } from "node:dns";
|
|
2
|
+
import { Agent } from "undici";
|
|
3
|
+
import { isPrivateAddress } from "./safety.js";
|
|
4
|
+
export function safeLookup(policy) {
|
|
5
|
+
return (hostname, options, callback) => {
|
|
6
|
+
dnsLookup(hostname, { ...options, all: true }, (err, addresses) => {
|
|
7
|
+
if (err) {
|
|
8
|
+
callback(err, "", 0);
|
|
9
|
+
return;
|
|
10
|
+
}
|
|
11
|
+
if (!policy.allowPrivateNetworks) {
|
|
12
|
+
const blocked = addresses.find((entry) => isPrivateAddress(entry.address));
|
|
13
|
+
if (blocked) {
|
|
14
|
+
callback(Object.assign(new Error(`SSRF blocked: ${hostname} resolves to private address ${blocked.address}`), { code: "ESSRFBLOCKED" }), "", 0);
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
if (options.all) {
|
|
19
|
+
callback(null, addresses);
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
callback(null, addresses[0].address, addresses[0].family);
|
|
23
|
+
}
|
|
24
|
+
});
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
export function createSafeDispatcher(policy) {
|
|
28
|
+
const options = {
|
|
29
|
+
connect: { lookup: safeLookup(policy) },
|
|
30
|
+
};
|
|
31
|
+
return new Agent(options);
|
|
32
|
+
}
|