open-research 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +145 -22
- package/dist/cli.js +337 -38
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -74,36 +74,159 @@ It has tools that coding agents don't: federated academic paper search, PDF extr
|
|
|
74
74
|
|
|
75
75
|
Everything stays local. Your workspace is a directory with `sources/`, `notes/`, `papers/`, `experiments/`. The agent reads and writes to it. Risky edits go to a review queue.
|
|
76
76
|
|
|
77
|
-
##
|
|
77
|
+
## Agent Modes
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
Open Research operates in three modes. Cycle with `Shift+Tab`:
|
|
80
80
|
|
|
81
|
-
|
|
82
|
-
- **devils-advocate** — stress-test claims and assumptions
|
|
83
|
-
- **methodology-critic** — critique research methodology
|
|
84
|
-
- **evidence-adjudicator** — evaluate evidence quality
|
|
85
|
-
- **experiment-designer** — design experiments
|
|
86
|
-
- **draft-paper** — draft LaTeX papers from workspace evidence
|
|
87
|
-
- **paper-explainer** — explain complex papers
|
|
88
|
-
- **synthesis-updater** — update syntheses with new findings
|
|
81
|
+
### Manual Review (default)
|
|
89
82
|
|
|
90
|
-
|
|
83
|
+
The agent proposes changes. You review and accept (`a`) or reject (`r`) each one. Best for sensitive work where every edit matters.
|
|
84
|
+
|
|
85
|
+
### Auto-Approve
|
|
86
|
+
|
|
87
|
+
All file writes are applied immediately without review. Best for exploratory work where speed matters more than control.
|
|
88
|
+
|
|
89
|
+
### Auto-Research
|
|
90
|
+
|
|
91
|
+
The most powerful mode. A two-phase autonomous research workflow:
|
|
92
|
+
|
|
93
|
+
**Phase 1 — Planning.** The agent enters read-only planning mode. It reads your workspace, searches academic databases, and asks you clarifying questions. It then produces a **Research Charter** — a structured contract defining:
|
|
94
|
+
|
|
95
|
+
- The research question (precisely stated)
|
|
96
|
+
- Success criteria (what "done" looks like)
|
|
97
|
+
- Scope boundaries (what's explicitly out of scope)
|
|
98
|
+
- Known starting points (papers, data, leads)
|
|
99
|
+
- Proposed investigation steps
|
|
100
|
+
|
|
101
|
+
You review the charter and either approve it, send it back for revision, or cancel.
|
|
102
|
+
|
|
103
|
+
**Phase 2 — Execution.** Once approved, the agent executes the charter autonomously — searching papers, reading sources, running analysis code, writing notes, and producing artifacts. It runs until the success criteria are met or it hits a dead end and reports what it found.
|
|
104
|
+
|
|
105
|
+
## Research Skills
|
|
106
|
+
|
|
107
|
+
Skills are pluggable research methodologies — detailed workflow prompts that guide the agent through a specific research task. Type `/<skill-name>` to activate.
|
|
108
|
+
|
|
109
|
+
### Discovery & Reading
|
|
110
|
+
|
|
111
|
+
| Skill | What it does |
|
|
112
|
+
|---|---|
|
|
113
|
+
| **`/source-scout`** | Systematically finds papers the workspace is missing. Searches with multiple query variations, evaluates relevance by citation count and venue, fetches key papers, produces a prioritized scout report with gap analysis. |
|
|
114
|
+
| **`/paper-explainer`** | Deep-reads a paper and produces a structured breakdown: one-sentence summary, problem & motivation, key contributions, method explained at two levels (intuitive + technical), experimental results, limitations, and connections to your workspace. |
|
|
115
|
+
| **`/literature-reviewer`** | Produces a structured literature review: inventories all sources, clusters by theme, synthesizes each theme chronologically, maps relationships between papers, performs gap analysis (methodological, empirical, theoretical), and writes the review with optional PRISMA systematic review support. |
|
|
116
|
+
|
|
117
|
+
### Critical Evaluation
|
|
118
|
+
|
|
119
|
+
| Skill | What it does |
|
|
120
|
+
|---|---|
|
|
121
|
+
| **`/devils-advocate`** | Stress-tests every claim in the workspace. Attacks each one through six lenses: evidence gap, logical gap, scope overclaim, alternative explanation, replication concern, and statistical concern. Actively searches for counter-evidence. Rates each weakness as Critical/Significant/Minor. |
|
|
122
|
+
| **`/methodology-critic`** | Reviews study design, sample selection, controls, measurement validity, statistical methods, and reporting completeness. If code is available, reproduces the analysis to verify results. Rates each study Rigorous/Acceptable/Concerning/Flawed. |
|
|
123
|
+
| **`/evidence-adjudicator`** | Judges conflicting claims using a formal evidence hierarchy (meta-analysis → RCT → cohort → case study → opinion). Checks for bias and conflicts of interest. Delivers a clear verdict with evidence ratings: Strong/Moderate/Weak/Insufficient. |
|
|
124
|
+
|
|
125
|
+
### Analysis & Experimentation
|
|
126
|
+
|
|
127
|
+
| Skill | What it does |
|
|
128
|
+
|---|---|
|
|
129
|
+
| **`/experiment-designer`** | Autonomous proof engine. Takes a hypothesis and runs the full loop: formalize → design minimal experiment → write code → run it → analyze results → iterate (up to 5x) until proven or disproven. All artifacts saved to `experiments/` with versioned scripts. |
|
|
130
|
+
| **`/data-analyst`** | End-to-end statistical analysis: explore data (distributions, missing values) → clean (with documented decisions) → analyze (appropriate tests, mandatory effect sizes and confidence intervals) → visualize (matplotlib/seaborn) → interpret with honest caveats. |
|
|
131
|
+
|
|
132
|
+
### Synthesis & Writing
|
|
133
|
+
|
|
134
|
+
| Skill | What it does |
|
|
135
|
+
|---|---|
|
|
136
|
+
| **`/synthesis-updater`** | Living-document management. Integrates new evidence into existing notes with full provenance tracking (`[Source: Author Year]`), confidence labels (`[Strong]`, `[Moderate]`, `[Weak]`, `[Contested]`), change trails, and a synthesis changelog. |
|
|
137
|
+
| **`/draft-paper`** | Drafts a publication-quality LaTeX paper: gathers workspace evidence → outlines the argument → writes each section (intro through conclusion) → generates BibTeX from sources → self-reviews for unsupported claims and argument flow. |
|
|
138
|
+
|
|
139
|
+
### Meta
|
|
140
|
+
|
|
141
|
+
| Skill | What it does |
|
|
142
|
+
|---|---|
|
|
143
|
+
| **`/skill-creator`** | Create your own custom skills in `~/.open-research/skills/`. Each skill is a markdown file with a workflow prompt — no code needed. |
|
|
144
|
+
|
|
145
|
+
## Memory
|
|
146
|
+
|
|
147
|
+
The agent learns about you automatically. After each conversation, a background process identifies facts worth remembering — your research field, preferred tools, current projects, methodological preferences.
|
|
148
|
+
|
|
149
|
+
Memories persist in `~/.open-research/memory.json` across sessions. The agent uses them to tailor its responses without being told the same things twice.
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
/memory View all stored memories
|
|
153
|
+
/memory clear Delete everything
|
|
154
|
+
/memory delete <id> Remove a specific memory
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Live LaTeX Preview
|
|
158
|
+
|
|
159
|
+
When the agent drafts a paper, preview it instantly:
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
/preview papers/draft.tex
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Opens a localhost server in your browser with:
|
|
166
|
+
- Sections, math (KaTeX), citations, lists rendered as styled HTML
|
|
167
|
+
- Auto-reload — the page refreshes every time the file changes
|
|
168
|
+
- Dark theme matching the CLI aesthetic
|
|
169
|
+
- No LaTeX installation required for preview
|
|
170
|
+
|
|
171
|
+
For final PDF output, the agent compiles with `pdflatex` or `tectonic` via `run_command`.
|
|
91
172
|
|
|
92
173
|
## Tools
|
|
93
174
|
|
|
175
|
+
The agent has 13 tools with full filesystem and shell access:
|
|
176
|
+
|
|
94
177
|
| Tool | Description |
|
|
95
178
|
|---|---|
|
|
96
|
-
| `read_file` | Read any file
|
|
97
|
-
| `read_pdf` | Extract text from PDFs |
|
|
98
|
-
| `run_command` | Shell execution — Python, R, LaTeX, anything |
|
|
99
|
-
| `list_directory` | Explore directory trees |
|
|
100
|
-
| `search_external_sources` | arXiv + Semantic Scholar + OpenAlex |
|
|
101
|
-
| `fetch_url` | Fetch web pages and APIs |
|
|
179
|
+
| `read_file` | Read any file — streaming, binary detection, `~` expansion |
|
|
180
|
+
| `read_pdf` | Extract text from PDFs with page-range selection |
|
|
181
|
+
| `run_command` | Shell execution — Python, R, LaTeX, curl, git, anything |
|
|
182
|
+
| `list_directory` | Explore directory trees with depth control |
|
|
183
|
+
| `search_external_sources` | Federated search: arXiv + Semantic Scholar + OpenAlex |
|
|
184
|
+
| `fetch_url` | Fetch web pages and APIs, HTML auto-converted to text via cheerio |
|
|
102
185
|
| `write_new_file` | Create workspace files |
|
|
103
|
-
| `update_existing_file` | Edit with review policy |
|
|
104
|
-
| `ask_user` | Pause and ask
|
|
105
|
-
| `search_workspace` | Full-text search across files |
|
|
106
|
-
| `create_paper` | Create LaTeX drafts |
|
|
186
|
+
| `update_existing_file` | Edit existing files with review policy |
|
|
187
|
+
| `ask_user` | Pause and ask the user a question with selectable options |
|
|
188
|
+
| `search_workspace` | Full-text search across workspace files |
|
|
189
|
+
| `create_paper` | Create LaTeX paper drafts |
|
|
190
|
+
| `load_skill` | Activate a research skill |
|
|
191
|
+
| `read_skill_reference` | Read reference materials from active skills |
|
|
192
|
+
|
|
193
|
+
## Commands
|
|
194
|
+
|
|
195
|
+
| Command | Description |
|
|
196
|
+
|---|---|
|
|
197
|
+
| `/auth` | Connect OpenAI account via browser |
|
|
198
|
+
| `/auth-codex` | Import existing Codex CLI auth |
|
|
199
|
+
| `/init` | Initialize workspace in current directory |
|
|
200
|
+
| `/skills` | List available research skills |
|
|
201
|
+
| `/preview <file>` | Live-preview a LaTeX file in browser |
|
|
202
|
+
| `/memory` | View or manage stored memories |
|
|
203
|
+
| `/config` | View or change settings (model, theme, mode) |
|
|
204
|
+
| `/resume` | Resume a previous session |
|
|
205
|
+
| `/clear` | Start a new conversation |
|
|
206
|
+
| `/help` | Show all commands |
|
|
207
|
+
|
|
208
|
+
## Workspace
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
my-research/
|
|
212
|
+
sources/ # PDFs, papers, raw data
|
|
213
|
+
notes/ # Research notes, syntheses, reviews
|
|
214
|
+
artifacts/ # Generated outputs
|
|
215
|
+
papers/ # LaTeX paper drafts
|
|
216
|
+
experiments/ # Analysis scripts, results, hypotheses
|
|
217
|
+
.open-research/ # Workspace metadata and session logs
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Features
|
|
221
|
+
|
|
222
|
+
- **Terminal markdown** — bold, italic, code blocks, headings rendered natively
|
|
223
|
+
- **Autocomplete** — slash commands and skills in an arrow-key navigable dropdown
|
|
224
|
+
- **@file mentions** — reference workspace files inline in prompts
|
|
225
|
+
- **Shift+Enter** — multi-line input
|
|
226
|
+
- **Context management** — automatic compaction when history exceeds 90% of context window
|
|
227
|
+
- **Token tracking** — context usage visible in the status bar
|
|
228
|
+
- **Tool activity streaming** — real-time display of what the agent is doing
|
|
229
|
+
- **Update notifications** — checks for new versions on launch
|
|
107
230
|
|
|
108
231
|
## Development
|
|
109
232
|
|
|
@@ -112,7 +235,7 @@ git clone https://github.com/gangj277/open-research.git
|
|
|
112
235
|
cd open-research
|
|
113
236
|
npm install
|
|
114
237
|
npm run dev # dev mode
|
|
115
|
-
npm test #
|
|
238
|
+
npm test # 80 tests
|
|
116
239
|
npm run build # production build
|
|
117
240
|
```
|
|
118
241
|
|
package/dist/cli.js
CHANGED
|
@@ -270,7 +270,7 @@ import { createHash, randomBytes } from "crypto";
|
|
|
270
270
|
var OPENAI_AUTH_URL = "https://auth.openai.com/oauth/authorize";
|
|
271
271
|
var OPENAI_TOKEN_URL = "https://auth.openai.com/oauth/token";
|
|
272
272
|
var OPENAI_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann";
|
|
273
|
-
var OPENAI_SCOPES = "openid profile email offline_access
|
|
273
|
+
var OPENAI_SCOPES = "openid profile email offline_access";
|
|
274
274
|
function getRedirectUri(port) {
|
|
275
275
|
return `http://localhost:${port}/auth/callback`;
|
|
276
276
|
}
|
|
@@ -294,7 +294,7 @@ function buildAuthorizationUrl(input2) {
|
|
|
294
294
|
code_challenge_method: "S256",
|
|
295
295
|
id_token_add_organizations: "true",
|
|
296
296
|
codex_cli_simplified_flow: "true",
|
|
297
|
-
originator: "
|
|
297
|
+
originator: "open-research"
|
|
298
298
|
});
|
|
299
299
|
return `${OPENAI_AUTH_URL}?${params.toString()}`;
|
|
300
300
|
}
|
|
@@ -1482,8 +1482,11 @@ function TextInput({
|
|
|
1482
1482
|
} else if (key.rightArrow) {
|
|
1483
1483
|
if (showCursor) nextCursor++;
|
|
1484
1484
|
} else if (!key.ctrl && !key.meta) {
|
|
1485
|
-
|
|
1486
|
-
|
|
1485
|
+
const clean = input2.replace(/\x1b\[[?>=!]*[0-9;]*[a-zA-Z]/g, "").replace(/[\x00-\x08\x0e-\x1f]/g, "");
|
|
1486
|
+
if (clean) {
|
|
1487
|
+
nextValue = originalValue.slice(0, cursorOffset) + clean + originalValue.slice(cursorOffset);
|
|
1488
|
+
nextCursor += clean.length;
|
|
1489
|
+
}
|
|
1487
1490
|
}
|
|
1488
1491
|
nextCursor = Math.max(0, Math.min(nextCursor, nextValue.length));
|
|
1489
1492
|
setCursorOffset(nextCursor);
|
|
@@ -1696,10 +1699,14 @@ function createOpenAIAuthProvider(credentials, onTokenRefresh, onValidationChang
|
|
|
1696
1699
|
}
|
|
1697
1700
|
return creds.accessToken;
|
|
1698
1701
|
}
|
|
1702
|
+
const sessionId = crypto.randomUUID();
|
|
1699
1703
|
function buildHeaders3(token) {
|
|
1700
1704
|
const headers = {
|
|
1701
1705
|
Authorization: `Bearer ${token}`,
|
|
1702
|
-
"Content-Type": "application/json"
|
|
1706
|
+
"Content-Type": "application/json",
|
|
1707
|
+
originator: "open-research",
|
|
1708
|
+
"User-Agent": `open-research/${process.env.npm_package_version ?? "0.1.0"} (${process.platform} ${process.arch})`,
|
|
1709
|
+
session_id: sessionId
|
|
1703
1710
|
};
|
|
1704
1711
|
if (creds.accountId) {
|
|
1705
1712
|
headers["ChatGPT-Account-Id"] = creds.accountId;
|
|
@@ -1779,11 +1786,17 @@ function createOpenAIAuthProvider(credentials, onTokenRefresh, onValidationChang
|
|
|
1779
1786
|
} else if (event.type === "response.completed") {
|
|
1780
1787
|
const resp = event.data.response;
|
|
1781
1788
|
if (resp?.usage) {
|
|
1782
|
-
const
|
|
1789
|
+
const u = resp.usage;
|
|
1790
|
+
const inputDetails = u.input_tokens_details;
|
|
1791
|
+
const outputDetails = u.output_tokens_details;
|
|
1792
|
+
const inputTokens = u.input_tokens ?? 0;
|
|
1793
|
+
const outputTokens = u.output_tokens ?? 0;
|
|
1783
1794
|
usageData = {
|
|
1784
|
-
promptTokens:
|
|
1785
|
-
completionTokens:
|
|
1786
|
-
totalTokens:
|
|
1795
|
+
promptTokens: inputTokens,
|
|
1796
|
+
completionTokens: outputTokens,
|
|
1797
|
+
totalTokens: u.total_tokens ?? inputTokens + outputTokens,
|
|
1798
|
+
cachedTokens: inputDetails?.cached_tokens ?? 0,
|
|
1799
|
+
reasoningTokens: outputDetails?.reasoning_tokens ?? 0
|
|
1787
1800
|
};
|
|
1788
1801
|
}
|
|
1789
1802
|
if (resp?.model) {
|
|
@@ -1895,11 +1908,17 @@ function createOpenAIAuthProvider(credentials, onTokenRefresh, onValidationChang
|
|
|
1895
1908
|
case "response.completed": {
|
|
1896
1909
|
const resp = event.data.response;
|
|
1897
1910
|
if (resp?.usage) {
|
|
1898
|
-
const
|
|
1911
|
+
const u = resp.usage;
|
|
1912
|
+
const inputDetails = u.input_tokens_details;
|
|
1913
|
+
const outputDetails = u.output_tokens_details;
|
|
1914
|
+
const inputTokens = u.input_tokens ?? 0;
|
|
1915
|
+
const outputTokens = u.output_tokens ?? 0;
|
|
1899
1916
|
usage = {
|
|
1900
|
-
promptTokens:
|
|
1901
|
-
completionTokens:
|
|
1902
|
-
totalTokens:
|
|
1917
|
+
promptTokens: inputTokens,
|
|
1918
|
+
completionTokens: outputTokens,
|
|
1919
|
+
totalTokens: u.total_tokens ?? inputTokens + outputTokens,
|
|
1920
|
+
cachedTokens: inputDetails?.cached_tokens ?? 0,
|
|
1921
|
+
reasoningTokens: outputDetails?.reasoning_tokens ?? 0
|
|
1903
1922
|
};
|
|
1904
1923
|
}
|
|
1905
1924
|
break;
|
|
@@ -4507,35 +4526,65 @@ var MODEL_CONTEXT_WINDOWS = {
|
|
|
4507
4526
|
"o4-mini": 2e5
|
|
4508
4527
|
};
|
|
4509
4528
|
var DEFAULT_CONTEXT_WINDOW = 128e3;
|
|
4510
|
-
var
|
|
4529
|
+
var AUTO_COMPACT_TOKEN_LIMIT = 25e4;
|
|
4511
4530
|
function getContextWindow(model) {
|
|
4512
4531
|
return MODEL_CONTEXT_WINDOWS[model] ?? DEFAULT_CONTEXT_WINDOW;
|
|
4513
4532
|
}
|
|
4514
4533
|
function getCompactThreshold(model) {
|
|
4515
|
-
|
|
4534
|
+
const window = getContextWindow(model);
|
|
4535
|
+
return window > AUTO_COMPACT_TOKEN_LIMIT ? AUTO_COMPACT_TOKEN_LIMIT : Math.floor(window * 0.8);
|
|
4536
|
+
}
|
|
4537
|
+
function emptyBreakdown() {
|
|
4538
|
+
return { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 }, total: 0 };
|
|
4516
4539
|
}
|
|
4517
4540
|
function createSessionUsage() {
|
|
4518
4541
|
return {
|
|
4542
|
+
cumulative: emptyBreakdown(),
|
|
4543
|
+
lastTurn: emptyBreakdown(),
|
|
4544
|
+
estimatedCurrentTokens: 0,
|
|
4545
|
+
compactionCount: 0,
|
|
4519
4546
|
inputTokens: 0,
|
|
4520
4547
|
outputTokens: 0,
|
|
4521
4548
|
totalTokens: 0,
|
|
4522
|
-
lastTurnTokens: 0
|
|
4523
|
-
estimatedCurrentTokens: 0,
|
|
4524
|
-
compactionCount: 0
|
|
4549
|
+
lastTurnTokens: 0
|
|
4525
4550
|
};
|
|
4526
4551
|
}
|
|
4527
4552
|
function updateUsageFromApi(usage, apiUsage) {
|
|
4528
|
-
|
|
4529
|
-
|
|
4530
|
-
|
|
4553
|
+
const cached = apiUsage.cachedTokens ?? 0;
|
|
4554
|
+
const reasoning = apiUsage.reasoningTokens ?? 0;
|
|
4555
|
+
const adjustedInput = Math.max(0, apiUsage.promptTokens - cached);
|
|
4556
|
+
const adjustedOutput = Math.max(0, apiUsage.completionTokens - reasoning);
|
|
4557
|
+
usage.cumulative.input += adjustedInput;
|
|
4558
|
+
usage.cumulative.output += adjustedOutput;
|
|
4559
|
+
usage.cumulative.reasoning += reasoning;
|
|
4560
|
+
usage.cumulative.cache.read += cached;
|
|
4561
|
+
usage.cumulative.total += apiUsage.totalTokens;
|
|
4562
|
+
usage.lastTurn = {
|
|
4563
|
+
input: adjustedInput,
|
|
4564
|
+
output: adjustedOutput,
|
|
4565
|
+
reasoning,
|
|
4566
|
+
cache: { read: cached, write: 0 },
|
|
4567
|
+
total: apiUsage.totalTokens
|
|
4568
|
+
};
|
|
4569
|
+
usage.inputTokens = usage.cumulative.input;
|
|
4570
|
+
usage.outputTokens = usage.cumulative.output;
|
|
4571
|
+
usage.totalTokens = usage.cumulative.total;
|
|
4531
4572
|
usage.lastTurnTokens = apiUsage.totalTokens;
|
|
4532
4573
|
}
|
|
4533
4574
|
var PRUNE_PROTECT_TOKENS = 4e4;
|
|
4534
4575
|
var PRUNE_MIN_SAVINGS = 2e4;
|
|
4576
|
+
var PRUNE_SKIP_RECENT_USER_TURNS = 2;
|
|
4535
4577
|
function pruneToolOutputs(messages) {
|
|
4578
|
+
const userIndices = [];
|
|
4579
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
4580
|
+
if (messages[i].role === "user") userIndices.push(i);
|
|
4581
|
+
}
|
|
4582
|
+
const protectBoundary = userIndices.length >= PRUNE_SKIP_RECENT_USER_TURNS ? userIndices[PRUNE_SKIP_RECENT_USER_TURNS - 1] : 0;
|
|
4536
4583
|
const toolIndices = [];
|
|
4537
4584
|
for (let i = 0; i < messages.length; i++) {
|
|
4538
|
-
if (messages[i].role === "tool"
|
|
4585
|
+
if (messages[i].role === "tool" && i < protectBoundary) {
|
|
4586
|
+
toolIndices.push(i);
|
|
4587
|
+
}
|
|
4539
4588
|
}
|
|
4540
4589
|
if (toolIndices.length === 0) return { messages, savedTokens: 0 };
|
|
4541
4590
|
let protectedTokens = 0;
|
|
@@ -4555,44 +4604,105 @@ function pruneToolOutputs(messages) {
|
|
|
4555
4604
|
const idx = toolIndices[i];
|
|
4556
4605
|
const msg = result[idx];
|
|
4557
4606
|
const oldTokens = estimateMessageTokens(msg);
|
|
4558
|
-
const
|
|
4559
|
-
savedTokens += oldTokens - estimateTokens(
|
|
4560
|
-
result[idx] = { ...msg, content:
|
|
4607
|
+
const stub = "[output pruned \u2014 use read_file to re-read if needed]";
|
|
4608
|
+
savedTokens += oldTokens - estimateTokens(stub);
|
|
4609
|
+
result[idx] = { ...msg, content: stub };
|
|
4561
4610
|
}
|
|
4562
4611
|
if (savedTokens < PRUNE_MIN_SAVINGS) {
|
|
4563
4612
|
return { messages, savedTokens: 0 };
|
|
4564
4613
|
}
|
|
4565
4614
|
return { messages: result, savedTokens };
|
|
4566
4615
|
}
|
|
4567
|
-
|
|
4616
|
+
var COMPACTION_SYSTEM_PROMPT = `You are a conversation summarizer for a research agent. Your job is to create a handoff summary that another agent instance can use to seamlessly continue the work.
|
|
4617
|
+
|
|
4618
|
+
Do not respond to any questions in the conversation. Only output the summary.
|
|
4619
|
+
Respond in the same language the user used.`;
|
|
4620
|
+
var COMPACTION_USER_TEMPLATE = `Provide a detailed summary of our conversation above for handoff to another agent that will continue the work.
|
|
4621
|
+
|
|
4622
|
+
Stick to this template:
|
|
4623
|
+
|
|
4624
|
+
## Goal
|
|
4625
|
+
[What is the user trying to accomplish? Be specific.]
|
|
4626
|
+
|
|
4627
|
+
## Instructions
|
|
4628
|
+
- [Important instructions or preferences the user gave]
|
|
4629
|
+
- [Research methodology constraints or requirements]
|
|
4630
|
+
- [If there is a research charter or plan, summarize its key points]
|
|
4631
|
+
|
|
4632
|
+
## Discoveries
|
|
4633
|
+
- [Key findings from paper searches, data analysis, or experiments]
|
|
4634
|
+
- [Important facts, numbers, or evidence discovered]
|
|
4635
|
+
- [Any surprising or contradicting results]
|
|
4636
|
+
|
|
4637
|
+
## Accomplished
|
|
4638
|
+
- [What work has been completed]
|
|
4639
|
+
- [What is currently in progress]
|
|
4640
|
+
- [What remains to be done]
|
|
4641
|
+
|
|
4642
|
+
## Relevant Files
|
|
4643
|
+
[List workspace files that were read, created, or modified. Include what each contains.]
|
|
4644
|
+
- path/to/file.md \u2014 description of contents
|
|
4645
|
+
- experiments/script.py \u2014 what it does and its results
|
|
4646
|
+
|
|
4647
|
+
## Active Context
|
|
4648
|
+
- [Current research question or hypothesis being investigated]
|
|
4649
|
+
- [Which skills are active]
|
|
4650
|
+
- [Any pending user decisions or questions]
|
|
4651
|
+
|
|
4652
|
+
## Next Steps
|
|
4653
|
+
1. [Most immediate next action]
|
|
4654
|
+
2. [Following action]
|
|
4655
|
+
3. [And so on]
|
|
4656
|
+
|
|
4657
|
+
{CUSTOM_INSTRUCTIONS}`;
|
|
4658
|
+
async function compactConversation(messages, provider, model, customInstructions, signal) {
|
|
4568
4659
|
const systemMsg = messages.find((m) => m.role === "system");
|
|
4569
4660
|
const conversationMsgs = messages.filter((m) => m.role !== "system");
|
|
4570
4661
|
const conversationText = conversationMsgs.map((m) => {
|
|
4571
4662
|
const role = m.role === "assistant" ? "Agent" : m.role === "user" ? "User" : "Tool";
|
|
4572
|
-
|
|
4573
|
-
|
|
4663
|
+
let content;
|
|
4664
|
+
if (typeof m.content === "string") {
|
|
4665
|
+
content = m.content.length > 3e3 ? m.content.slice(0, 3e3) + "\n[... truncated]" : m.content;
|
|
4666
|
+
} else if (m.content) {
|
|
4667
|
+
content = JSON.stringify(m.content).slice(0, 1e3);
|
|
4668
|
+
} else if (m.tool_calls?.length) {
|
|
4669
|
+
content = m.tool_calls.map((tc) => `[tool: ${tc.function.name}]`).join(", ");
|
|
4670
|
+
} else {
|
|
4671
|
+
content = "[empty]";
|
|
4672
|
+
}
|
|
4673
|
+
return `[${role}]: ${content}`;
|
|
4574
4674
|
}).join("\n\n");
|
|
4675
|
+
const customBlock = customInstructions ? `
|
|
4676
|
+
|
|
4677
|
+
Additional instructions: ${customInstructions}` : "";
|
|
4678
|
+
const userPrompt = COMPACTION_USER_TEMPLATE.replace("{CUSTOM_INSTRUCTIONS}", customBlock);
|
|
4679
|
+
const compactionModel = model.includes("5.4") ? "gpt-5.4-mini" : model;
|
|
4575
4680
|
const summaryResponse = await provider.callLLM({
|
|
4576
4681
|
messages: [
|
|
4577
|
-
{
|
|
4578
|
-
role: "system",
|
|
4579
|
-
content: "You are performing a CONTEXT COMPACTION. Summarize the conversation into a concise handoff document. Include:\n1. **Goal**: What the user is trying to accomplish\n2. **Key discoveries**: Important findings, file paths, data points\n3. **Work completed**: What has been done so far\n4. **Next steps**: What should happen next\n5. **Active files**: Key file paths and their contents summary\n\nBe concise but preserve all actionable information. This summary will replace the full conversation history."
|
|
4580
|
-
},
|
|
4682
|
+
{ role: "system", content: COMPACTION_SYSTEM_PROMPT },
|
|
4581
4683
|
{
|
|
4582
4684
|
role: "user",
|
|
4583
|
-
content: `
|
|
4685
|
+
content: `Here is the conversation to summarize:
|
|
4686
|
+
|
|
4687
|
+
${conversationText.slice(0, 12e4)}
|
|
4584
4688
|
|
|
4585
|
-
|
|
4689
|
+
---
|
|
4690
|
+
|
|
4691
|
+
${userPrompt}`
|
|
4586
4692
|
}
|
|
4587
4693
|
],
|
|
4588
|
-
model,
|
|
4694
|
+
model: compactionModel,
|
|
4589
4695
|
maxTokens: 4096
|
|
4590
4696
|
});
|
|
4591
4697
|
const compacted = [];
|
|
4592
4698
|
if (systemMsg) compacted.push(systemMsg);
|
|
4593
4699
|
compacted.push({
|
|
4594
4700
|
role: "user",
|
|
4595
|
-
content: "
|
|
4701
|
+
content: "What have we accomplished so far in this research session?"
|
|
4702
|
+
});
|
|
4703
|
+
compacted.push({
|
|
4704
|
+
role: "assistant",
|
|
4705
|
+
content: summaryResponse.content
|
|
4596
4706
|
});
|
|
4597
4707
|
return compacted;
|
|
4598
4708
|
}
|
|
@@ -4610,7 +4720,17 @@ async function maybeCompact(messages, model, provider, usage, signal) {
|
|
|
4610
4720
|
usage.compactionCount++;
|
|
4611
4721
|
return { messages: pruned, didCompact: true };
|
|
4612
4722
|
}
|
|
4613
|
-
const compacted = await compactConversation(pruned, provider, model, signal);
|
|
4723
|
+
const compacted = await compactConversation(pruned, provider, model, void 0, signal);
|
|
4724
|
+
usage.estimatedCurrentTokens = estimateConversationTokens(compacted);
|
|
4725
|
+
usage.compactionCount++;
|
|
4726
|
+
return { messages: compacted, didCompact: true };
|
|
4727
|
+
}
|
|
4728
|
+
async function manualCompact(messages, model, provider, usage, customInstructions, signal) {
|
|
4729
|
+
if (messages.length <= 2) {
|
|
4730
|
+
return { messages, didCompact: false };
|
|
4731
|
+
}
|
|
4732
|
+
const { messages: pruned } = pruneToolOutputs(messages);
|
|
4733
|
+
const compacted = await compactConversation(pruned, provider, model, customInstructions, signal);
|
|
4614
4734
|
usage.estimatedCurrentTokens = estimateConversationTokens(compacted);
|
|
4615
4735
|
usage.compactionCount++;
|
|
4616
4736
|
return { messages: compacted, didCompact: true };
|
|
@@ -5630,6 +5750,13 @@ var SLASH_COMMANDS = [
|
|
|
5630
5750
|
{ name: "clear", aliases: ["/new"], description: "Clear conversation and start fresh", category: "session" },
|
|
5631
5751
|
{ name: "help", aliases: ["/commands"], description: "Show available commands", category: "system" },
|
|
5632
5752
|
{ name: "config", aliases: ["/settings"], description: "View or change settings (e.g. /config theme dark)", category: "system" },
|
|
5753
|
+
{ name: "compact", aliases: [], description: "Manually compress conversation to save context (e.g. /compact keep the statistics)", category: "session" },
|
|
5754
|
+
{ name: "cost", aliases: ["/tokens", "/usage"], description: "Show token usage and cost for the current session", category: "system" },
|
|
5755
|
+
{ name: "context", aliases: [], description: "Show context window usage \u2014 how full it is", category: "system" },
|
|
5756
|
+
{ name: "btw", aliases: ["/aside"], description: "Ask a side question without affecting the main conversation", category: "session" },
|
|
5757
|
+
{ name: "export", aliases: [], description: "Export conversation as markdown to a file", category: "session" },
|
|
5758
|
+
{ name: "diff", aliases: ["/changes"], description: "Show files the agent has changed in this session", category: "workspace" },
|
|
5759
|
+
{ name: "doctor", aliases: [], description: "Diagnose auth, connectivity, and tool availability", category: "system" },
|
|
5633
5760
|
{ name: "preview", aliases: [], description: "Live preview a LaTeX file in browser (e.g. /preview papers/draft.tex)", category: "workspace" },
|
|
5634
5761
|
{ name: "memory", aliases: ["/memories"], description: "View or clear stored memories about you", category: "system" },
|
|
5635
5762
|
{ name: "exit", aliases: ["/quit", "/q"], description: "Exit Open Research", category: "system" }
|
|
@@ -6458,6 +6585,178 @@ function App({
|
|
|
6458
6585
|
addSystemMessage(" Esc unfocus prompt");
|
|
6459
6586
|
break;
|
|
6460
6587
|
}
|
|
6588
|
+
case "compact": {
|
|
6589
|
+
if (history.length === 0) {
|
|
6590
|
+
addSystemMessage("Nothing to compact \u2014 conversation is empty.");
|
|
6591
|
+
break;
|
|
6592
|
+
}
|
|
6593
|
+
const customInstructions = args || void 0;
|
|
6594
|
+
addSystemMessage(customInstructions ? `Compacting conversation (preserving: ${customInstructions})...` : "Compacting conversation...");
|
|
6595
|
+
setBusy(true);
|
|
6596
|
+
try {
|
|
6597
|
+
const provider = await createProviderFromStoredAuth({ homeDir });
|
|
6598
|
+
const msgs = [{ role: "system", content: "compaction" }, ...history.map((m) => m)];
|
|
6599
|
+
const { messages: compacted, didCompact } = await manualCompact(
|
|
6600
|
+
msgs,
|
|
6601
|
+
config?.defaults.model ?? "gpt-5.4",
|
|
6602
|
+
provider,
|
|
6603
|
+
sessionTokens,
|
|
6604
|
+
customInstructions
|
|
6605
|
+
);
|
|
6606
|
+
if (didCompact) {
|
|
6607
|
+
const newHistory = compacted.filter((m) => m.role !== "system").map((m) => ({
|
|
6608
|
+
role: m.role,
|
|
6609
|
+
content: m.content
|
|
6610
|
+
}));
|
|
6611
|
+
setHistory(newHistory);
|
|
6612
|
+
const k = (n) => n >= 1e3 ? `${(n / 1e3).toFixed(1)}k` : String(n);
|
|
6613
|
+
setTokenDisplay(`${k(sessionTokens.estimatedCurrentTokens)} ctx \xB7 ${k(sessionTokens.totalTokens)} total`);
|
|
6614
|
+
addSystemMessage(`Compacted. Context reduced to ~${Math.round(sessionTokens.estimatedCurrentTokens / 1e3)}k tokens.`);
|
|
6615
|
+
} else {
|
|
6616
|
+
addSystemMessage("Nothing to compact \u2014 conversation too short.");
|
|
6617
|
+
}
|
|
6618
|
+
} catch (err) {
|
|
6619
|
+
addSystemMessage(`Compaction failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
6620
|
+
} finally {
|
|
6621
|
+
setBusy(false);
|
|
6622
|
+
}
|
|
6623
|
+
break;
|
|
6624
|
+
}
|
|
6625
|
+
case "cost": {
|
|
6626
|
+
const k = (n) => n >= 1e3 ? `${(n / 1e3).toFixed(1)}k` : String(n);
|
|
6627
|
+
const c = sessionTokens.cumulative;
|
|
6628
|
+
addSystemMessage("Session token usage:");
|
|
6629
|
+
addSystemMessage(` Input: ${k(c.input)} tokens`);
|
|
6630
|
+
addSystemMessage(` Output: ${k(c.output)} tokens`);
|
|
6631
|
+
if (c.reasoning > 0) addSystemMessage(` Reasoning: ${k(c.reasoning)} tokens`);
|
|
6632
|
+
if (c.cache.read > 0) addSystemMessage(` Cache read: ${k(c.cache.read)} tokens`);
|
|
6633
|
+
if (c.cache.write > 0) addSystemMessage(` Cache write: ${k(c.cache.write)} tokens`);
|
|
6634
|
+
addSystemMessage(` Total: ${k(c.total)} tokens`);
|
|
6635
|
+
addSystemMessage(` Context: ~${k(sessionTokens.estimatedCurrentTokens)} (current window)`);
|
|
6636
|
+
addSystemMessage(` Compactions: ${sessionTokens.compactionCount}`);
|
|
6637
|
+
break;
|
|
6638
|
+
}
|
|
6639
|
+
case "context": {
|
|
6640
|
+
const model = config?.defaults.model ?? "gpt-5.4";
|
|
6641
|
+
const window = getContextWindow(model);
|
|
6642
|
+
const threshold = getCompactThreshold(model);
|
|
6643
|
+
const current = sessionTokens.estimatedCurrentTokens || estimateConversationTokens(
|
|
6644
|
+
history.map((m) => m)
|
|
6645
|
+
);
|
|
6646
|
+
const pct = Math.round(current / window * 100);
|
|
6647
|
+
const barWidth = 40;
|
|
6648
|
+
const filled = Math.round(pct / 100 * barWidth);
|
|
6649
|
+
const bar = "\u2588".repeat(filled) + "\u2591".repeat(barWidth - filled);
|
|
6650
|
+
const color = pct > 90 ? "red" : pct > 70 ? "yellow" : "green";
|
|
6651
|
+
addSystemMessage(`Context window: ${model} (${(window / 1e3).toFixed(0)}k)`);
|
|
6652
|
+
addSystemMessage(` [${bar}] ${pct}%`);
|
|
6653
|
+
addSystemMessage(` ${(current / 1e3).toFixed(1)}k / ${(window / 1e3).toFixed(0)}k tokens used`);
|
|
6654
|
+
addSystemMessage(` Auto-compact at ${(threshold / 1e3).toFixed(0)}k (90%)`);
|
|
6655
|
+
if (pct > 80) {
|
|
6656
|
+
addSystemMessage(" Tip: run /compact to free space, or /clear to start fresh.");
|
|
6657
|
+
}
|
|
6658
|
+
break;
|
|
6659
|
+
}
|
|
6660
|
+
case "btw": {
|
|
6661
|
+
if (!args) {
|
|
6662
|
+
addSystemMessage("Usage: /btw <your side question>");
|
|
6663
|
+
break;
|
|
6664
|
+
}
|
|
6665
|
+
if (!hasAuth) {
|
|
6666
|
+
addSystemMessage("Not connected. Run /auth first.");
|
|
6667
|
+
break;
|
|
6668
|
+
}
|
|
6669
|
+
addSystemMessage(`Side question: ${args}`);
|
|
6670
|
+
setBusy(true);
|
|
6671
|
+
try {
|
|
6672
|
+
const provider = await createProviderFromStoredAuth({ homeDir });
|
|
6673
|
+
const response = await provider.callLLM({
|
|
6674
|
+
messages: [
|
|
6675
|
+
{ role: "system", content: "Answer this quick side question concisely. Do not reference any prior conversation." },
|
|
6676
|
+
{ role: "user", content: args }
|
|
6677
|
+
],
|
|
6678
|
+
model: config?.defaults.model ?? "gpt-5.4",
|
|
6679
|
+
maxTokens: 1e3
|
|
6680
|
+
});
|
|
6681
|
+
addSystemMessage(`Answer: ${response.content}`);
|
|
6682
|
+
} catch (err) {
|
|
6683
|
+
addSystemMessage(`Error: ${err instanceof Error ? err.message : String(err)}`);
|
|
6684
|
+
} finally {
|
|
6685
|
+
setBusy(false);
|
|
6686
|
+
}
|
|
6687
|
+
break;
|
|
6688
|
+
}
|
|
6689
|
+
case "export": {
|
|
6690
|
+
const fileName = args?.trim() || "conversation-export.md";
|
|
6691
|
+
const exportPath = __require("path").resolve(workspacePath ?? process.cwd(), fileName);
|
|
6692
|
+
const lines = [`# Open Research \u2014 Conversation Export
|
|
6693
|
+
`];
|
|
6694
|
+
for (const msg of messages) {
|
|
6695
|
+
if (msg.role === "user") lines.push(`## You
|
|
6696
|
+
${msg.text}
|
|
6697
|
+
`);
|
|
6698
|
+
else if (msg.role === "assistant") lines.push(`## Agent
|
|
6699
|
+
${msg.text}
|
|
6700
|
+
`);
|
|
6701
|
+
else lines.push(`> ${msg.text}
|
|
6702
|
+
`);
|
|
6703
|
+
}
|
|
6704
|
+
try {
|
|
6705
|
+
const fsModule = __require("fs/promises");
|
|
6706
|
+
await fsModule.writeFile(exportPath, lines.join("\n"), "utf8");
|
|
6707
|
+
addSystemMessage(`Exported ${messages.length} messages to ${exportPath}`);
|
|
6708
|
+
} catch (err) {
|
|
6709
|
+
addSystemMessage(`Export failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
6710
|
+
}
|
|
6711
|
+
break;
|
|
6712
|
+
}
|
|
6713
|
+
case "diff": {
|
|
6714
|
+
if (!workspacePath) {
|
|
6715
|
+
addSystemMessage("No workspace active.");
|
|
6716
|
+
break;
|
|
6717
|
+
}
|
|
6718
|
+
try {
|
|
6719
|
+
const { execSync } = __require("child_process");
|
|
6720
|
+
const gitStatus = execSync("git status --short 2>/dev/null || echo 'Not a git repo'", {
|
|
6721
|
+
cwd: workspacePath,
|
|
6722
|
+
encoding: "utf8"
|
|
6723
|
+
}).trim();
|
|
6724
|
+
if (!gitStatus || gitStatus === "Not a git repo") {
|
|
6725
|
+
addSystemMessage("No changes detected (not a git repo or no modifications).");
|
|
6726
|
+
} else {
|
|
6727
|
+
addSystemMessage("Changed files:");
|
|
6728
|
+
for (const line of gitStatus.split("\n")) {
|
|
6729
|
+
addSystemMessage(` ${line}`);
|
|
6730
|
+
}
|
|
6731
|
+
}
|
|
6732
|
+
} catch {
|
|
6733
|
+
addSystemMessage("Could not check changes.");
|
|
6734
|
+
}
|
|
6735
|
+
break;
|
|
6736
|
+
}
|
|
6737
|
+
case "doctor": {
|
|
6738
|
+
addSystemMessage("Running diagnostics...");
|
|
6739
|
+
const authResult = await getAuthStatus({ homeDir });
|
|
6740
|
+
addSystemMessage(` Auth: ${authResult.connected ? "connected" : "not connected"} \u2014 ${authResult.message}`);
|
|
6741
|
+
addSystemMessage(` Workspace: ${workspacePath ? workspacePath : "none"}`);
|
|
6742
|
+
addSystemMessage(` Files: ${workspaceFiles.length}`);
|
|
6743
|
+
addSystemMessage(` Skills: ${skills2.length} loaded`);
|
|
6744
|
+
const mems = await loadMemories({ homeDir });
|
|
6745
|
+
addSystemMessage(` Memories: ${mems.length} stored`);
|
|
6746
|
+
addSystemMessage(` Node: ${process.version}`);
|
|
6747
|
+
const toolChecks = ["python3 --version", "pdflatex --version", "git --version"];
|
|
6748
|
+
for (const cmd2 of toolChecks) {
|
|
6749
|
+
try {
|
|
6750
|
+
const { execSync } = __require("child_process");
|
|
6751
|
+
const out = execSync(cmd2 + " 2>&1", { encoding: "utf8", timeout: 3e3 }).trim().split("\n")[0];
|
|
6752
|
+
addSystemMessage(` ${cmd2.split(" ")[0]}: ${out}`);
|
|
6753
|
+
} catch {
|
|
6754
|
+
addSystemMessage(` ${cmd2.split(" ")[0]}: not found`);
|
|
6755
|
+
}
|
|
6756
|
+
}
|
|
6757
|
+
addSystemMessage("Diagnostics complete.");
|
|
6758
|
+
break;
|
|
6759
|
+
}
|
|
6461
6760
|
case "preview": {
|
|
6462
6761
|
if (!args) {
|
|
6463
6762
|
addSystemMessage("Usage: /preview <path-to-tex-file>");
|
|
@@ -7182,7 +7481,7 @@ program.name("open-research").description("Local-first research CLI powered by C
|
|
|
7182
7481
|
}),
|
|
7183
7482
|
{
|
|
7184
7483
|
kittyKeyboard: {
|
|
7185
|
-
mode: "
|
|
7484
|
+
mode: "enabled",
|
|
7186
7485
|
flags: ["disambiguateEscapeCodes", "reportAlternateKeys"]
|
|
7187
7486
|
}
|
|
7188
7487
|
}
|
package/package.json
CHANGED