open-research 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +145 -22
- package/dist/cli.js +324 -32
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -74,36 +74,159 @@ It has tools that coding agents don't: federated academic paper search, PDF extr
|
|
|
74
74
|
|
|
75
75
|
Everything stays local. Your workspace is a directory with `sources/`, `notes/`, `papers/`, `experiments/`. The agent reads and writes to it. Risky edits go to a review queue.
|
|
76
76
|
|
|
77
|
-
##
|
|
77
|
+
## Agent Modes
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
Open Research operates in three modes. Cycle with `Shift+Tab`:
|
|
80
80
|
|
|
81
|
-
|
|
82
|
-
- **devils-advocate** — stress-test claims and assumptions
|
|
83
|
-
- **methodology-critic** — critique research methodology
|
|
84
|
-
- **evidence-adjudicator** — evaluate evidence quality
|
|
85
|
-
- **experiment-designer** — design experiments
|
|
86
|
-
- **draft-paper** — draft LaTeX papers from workspace evidence
|
|
87
|
-
- **paper-explainer** — explain complex papers
|
|
88
|
-
- **synthesis-updater** — update syntheses with new findings
|
|
81
|
+
### Manual Review (default)
|
|
89
82
|
|
|
90
|
-
|
|
83
|
+
The agent proposes changes. You review and accept (`a`) or reject (`r`) each one. Best for sensitive work where every edit matters.
|
|
84
|
+
|
|
85
|
+
### Auto-Approve
|
|
86
|
+
|
|
87
|
+
All file writes are applied immediately without review. Best for exploratory work where speed matters more than control.
|
|
88
|
+
|
|
89
|
+
### Auto-Research
|
|
90
|
+
|
|
91
|
+
The most powerful mode. A two-phase autonomous research workflow:
|
|
92
|
+
|
|
93
|
+
**Phase 1 — Planning.** The agent enters read-only planning mode. It reads your workspace, searches academic databases, and asks you clarifying questions. It then produces a **Research Charter** — a structured contract defining:
|
|
94
|
+
|
|
95
|
+
- The research question (precisely stated)
|
|
96
|
+
- Success criteria (what "done" looks like)
|
|
97
|
+
- Scope boundaries (what's explicitly out of scope)
|
|
98
|
+
- Known starting points (papers, data, leads)
|
|
99
|
+
- Proposed investigation steps
|
|
100
|
+
|
|
101
|
+
You review the charter and either approve it, send it back for revision, or cancel.
|
|
102
|
+
|
|
103
|
+
**Phase 2 — Execution.** Once approved, the agent executes the charter autonomously — searching papers, reading sources, running analysis code, writing notes, and producing artifacts. It runs until the success criteria are met or it hits a dead end and reports what it found.
|
|
104
|
+
|
|
105
|
+
## Research Skills
|
|
106
|
+
|
|
107
|
+
Skills are pluggable research methodologies — detailed workflow prompts that guide the agent through a specific research task. Type `/<skill-name>` to activate.
|
|
108
|
+
|
|
109
|
+
### Discovery & Reading
|
|
110
|
+
|
|
111
|
+
| Skill | What it does |
|
|
112
|
+
|---|---|
|
|
113
|
+
| **`/source-scout`** | Systematically finds papers the workspace is missing. Searches with multiple query variations, evaluates relevance by citation count and venue, fetches key papers, produces a prioritized scout report with gap analysis. |
|
|
114
|
+
| **`/paper-explainer`** | Deep-reads a paper and produces a structured breakdown: one-sentence summary, problem & motivation, key contributions, method explained at two levels (intuitive + technical), experimental results, limitations, and connections to your workspace. |
|
|
115
|
+
| **`/literature-reviewer`** | Produces a structured literature review: inventories all sources, clusters by theme, synthesizes each theme chronologically, maps relationships between papers, performs gap analysis (methodological, empirical, theoretical), and writes the review with optional PRISMA systematic review support. |
|
|
116
|
+
|
|
117
|
+
### Critical Evaluation
|
|
118
|
+
|
|
119
|
+
| Skill | What it does |
|
|
120
|
+
|---|---|
|
|
121
|
+
| **`/devils-advocate`** | Stress-tests every claim in the workspace. Attacks each one through six lenses: evidence gap, logical gap, scope overclaim, alternative explanation, replication concern, and statistical concern. Actively searches for counter-evidence. Rates each weakness as Critical/Significant/Minor. |
|
|
122
|
+
| **`/methodology-critic`** | Reviews study design, sample selection, controls, measurement validity, statistical methods, and reporting completeness. If code is available, reproduces the analysis to verify results. Rates each study Rigorous/Acceptable/Concerning/Flawed. |
|
|
123
|
+
| **`/evidence-adjudicator`** | Judges conflicting claims using a formal evidence hierarchy (meta-analysis → RCT → cohort → case study → opinion). Checks for bias and conflicts of interest. Delivers a clear verdict with evidence ratings: Strong/Moderate/Weak/Insufficient. |
|
|
124
|
+
|
|
125
|
+
### Analysis & Experimentation
|
|
126
|
+
|
|
127
|
+
| Skill | What it does |
|
|
128
|
+
|---|---|
|
|
129
|
+
| **`/experiment-designer`** | Autonomous proof engine. Takes a hypothesis and runs the full loop: formalize → design minimal experiment → write code → run it → analyze results → iterate (up to 5x) until proven or disproven. All artifacts saved to `experiments/` with versioned scripts. |
|
|
130
|
+
| **`/data-analyst`** | End-to-end statistical analysis: explore data (distributions, missing values) → clean (with documented decisions) → analyze (appropriate tests, mandatory effect sizes and confidence intervals) → visualize (matplotlib/seaborn) → interpret with honest caveats. |
|
|
131
|
+
|
|
132
|
+
### Synthesis & Writing
|
|
133
|
+
|
|
134
|
+
| Skill | What it does |
|
|
135
|
+
|---|---|
|
|
136
|
+
| **`/synthesis-updater`** | Living-document management. Integrates new evidence into existing notes with full provenance tracking (`[Source: Author Year]`), confidence labels (`[Strong]`, `[Moderate]`, `[Weak]`, `[Contested]`), change trails, and a synthesis changelog. |
|
|
137
|
+
| **`/draft-paper`** | Drafts a publication-quality LaTeX paper: gathers workspace evidence → outlines the argument → writes each section (intro through conclusion) → generates BibTeX from sources → self-reviews for unsupported claims and argument flow. |
|
|
138
|
+
|
|
139
|
+
### Meta
|
|
140
|
+
|
|
141
|
+
| Skill | What it does |
|
|
142
|
+
|---|---|
|
|
143
|
+
| **`/skill-creator`** | Create your own custom skills in `~/.open-research/skills/`. Each skill is a markdown file with a workflow prompt — no code needed. |
|
|
144
|
+
|
|
145
|
+
## Memory
|
|
146
|
+
|
|
147
|
+
The agent learns about you automatically. After each conversation, a background process identifies facts worth remembering — your research field, preferred tools, current projects, methodological preferences.
|
|
148
|
+
|
|
149
|
+
Memories persist in `~/.open-research/memory.json` across sessions. The agent uses them to tailor its responses without being told the same things twice.
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
/memory View all stored memories
|
|
153
|
+
/memory clear Delete everything
|
|
154
|
+
/memory delete <id> Remove a specific memory
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Live LaTeX Preview
|
|
158
|
+
|
|
159
|
+
When the agent drafts a paper, preview it instantly:
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
/preview papers/draft.tex
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Opens a localhost server in your browser with:
|
|
166
|
+
- Sections, math (KaTeX), citations, lists rendered as styled HTML
|
|
167
|
+
- Auto-reload — the page refreshes every time the file changes
|
|
168
|
+
- Dark theme matching the CLI aesthetic
|
|
169
|
+
- No LaTeX installation required for preview
|
|
170
|
+
|
|
171
|
+
For final PDF output, the agent compiles with `pdflatex` or `tectonic` via `run_command`.
|
|
91
172
|
|
|
92
173
|
## Tools
|
|
93
174
|
|
|
175
|
+
The agent has 13 tools with full filesystem and shell access:
|
|
176
|
+
|
|
94
177
|
| Tool | Description |
|
|
95
178
|
|---|---|
|
|
96
|
-
| `read_file` | Read any file
|
|
97
|
-
| `read_pdf` | Extract text from PDFs |
|
|
98
|
-
| `run_command` | Shell execution — Python, R, LaTeX, anything |
|
|
99
|
-
| `list_directory` | Explore directory trees |
|
|
100
|
-
| `search_external_sources` | arXiv + Semantic Scholar + OpenAlex |
|
|
101
|
-
| `fetch_url` | Fetch web pages and APIs |
|
|
179
|
+
| `read_file` | Read any file — streaming, binary detection, `~` expansion |
|
|
180
|
+
| `read_pdf` | Extract text from PDFs with page-range selection |
|
|
181
|
+
| `run_command` | Shell execution — Python, R, LaTeX, curl, git, anything |
|
|
182
|
+
| `list_directory` | Explore directory trees with depth control |
|
|
183
|
+
| `search_external_sources` | Federated search: arXiv + Semantic Scholar + OpenAlex |
|
|
184
|
+
| `fetch_url` | Fetch web pages and APIs, HTML auto-converted to text via cheerio |
|
|
102
185
|
| `write_new_file` | Create workspace files |
|
|
103
|
-
| `update_existing_file` | Edit with review policy |
|
|
104
|
-
| `ask_user` | Pause and ask
|
|
105
|
-
| `search_workspace` | Full-text search across files |
|
|
106
|
-
| `create_paper` | Create LaTeX drafts |
|
|
186
|
+
| `update_existing_file` | Edit existing files with review policy |
|
|
187
|
+
| `ask_user` | Pause and ask the user a question with selectable options |
|
|
188
|
+
| `search_workspace` | Full-text search across workspace files |
|
|
189
|
+
| `create_paper` | Create LaTeX paper drafts |
|
|
190
|
+
| `load_skill` | Activate a research skill |
|
|
191
|
+
| `read_skill_reference` | Read reference materials from active skills |
|
|
192
|
+
|
|
193
|
+
## Commands
|
|
194
|
+
|
|
195
|
+
| Command | Description |
|
|
196
|
+
|---|---|
|
|
197
|
+
| `/auth` | Connect OpenAI account via browser |
|
|
198
|
+
| `/auth-codex` | Import existing Codex CLI auth |
|
|
199
|
+
| `/init` | Initialize workspace in current directory |
|
|
200
|
+
| `/skills` | List available research skills |
|
|
201
|
+
| `/preview <file>` | Live-preview a LaTeX file in browser |
|
|
202
|
+
| `/memory` | View or manage stored memories |
|
|
203
|
+
| `/config` | View or change settings (model, theme, mode) |
|
|
204
|
+
| `/resume` | Resume a previous session |
|
|
205
|
+
| `/clear` | Start a new conversation |
|
|
206
|
+
| `/help` | Show all commands |
|
|
207
|
+
|
|
208
|
+
## Workspace
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
my-research/
|
|
212
|
+
sources/ # PDFs, papers, raw data
|
|
213
|
+
notes/ # Research notes, syntheses, reviews
|
|
214
|
+
artifacts/ # Generated outputs
|
|
215
|
+
papers/ # LaTeX paper drafts
|
|
216
|
+
experiments/ # Analysis scripts, results, hypotheses
|
|
217
|
+
.open-research/ # Workspace metadata and session logs
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Features
|
|
221
|
+
|
|
222
|
+
- **Terminal markdown** — bold, italic, code blocks, headings rendered natively
|
|
223
|
+
- **Autocomplete** — slash commands and skills in an arrow-key navigable dropdown
|
|
224
|
+
- **@file mentions** — reference workspace files inline in prompts
|
|
225
|
+
- **Shift+Enter** — multi-line input
|
|
226
|
+
- **Context management** — automatic compaction when history exceeds 90% of context window
|
|
227
|
+
- **Token tracking** — context usage visible in the status bar
|
|
228
|
+
- **Tool activity streaming** — real-time display of what the agent is doing
|
|
229
|
+
- **Update notifications** — checks for new versions on launch
|
|
107
230
|
|
|
108
231
|
## Development
|
|
109
232
|
|
|
@@ -112,7 +235,7 @@ git clone https://github.com/gangj277/open-research.git
|
|
|
112
235
|
cd open-research
|
|
113
236
|
npm install
|
|
114
237
|
npm run dev # dev mode
|
|
115
|
-
npm test #
|
|
238
|
+
npm test # 80 tests
|
|
116
239
|
npm run build # production build
|
|
117
240
|
```
|
|
118
241
|
|
package/dist/cli.js
CHANGED
|
@@ -1779,11 +1779,17 @@ function createOpenAIAuthProvider(credentials, onTokenRefresh, onValidationChang
|
|
|
1779
1779
|
} else if (event.type === "response.completed") {
|
|
1780
1780
|
const resp = event.data.response;
|
|
1781
1781
|
if (resp?.usage) {
|
|
1782
|
-
const
|
|
1782
|
+
const u = resp.usage;
|
|
1783
|
+
const inputDetails = u.input_tokens_details;
|
|
1784
|
+
const outputDetails = u.output_tokens_details;
|
|
1785
|
+
const inputTokens = u.input_tokens ?? 0;
|
|
1786
|
+
const outputTokens = u.output_tokens ?? 0;
|
|
1783
1787
|
usageData = {
|
|
1784
|
-
promptTokens:
|
|
1785
|
-
completionTokens:
|
|
1786
|
-
totalTokens:
|
|
1788
|
+
promptTokens: inputTokens,
|
|
1789
|
+
completionTokens: outputTokens,
|
|
1790
|
+
totalTokens: u.total_tokens ?? inputTokens + outputTokens,
|
|
1791
|
+
cachedTokens: inputDetails?.cached_tokens ?? 0,
|
|
1792
|
+
reasoningTokens: outputDetails?.reasoning_tokens ?? 0
|
|
1787
1793
|
};
|
|
1788
1794
|
}
|
|
1789
1795
|
if (resp?.model) {
|
|
@@ -1895,11 +1901,17 @@ function createOpenAIAuthProvider(credentials, onTokenRefresh, onValidationChang
|
|
|
1895
1901
|
case "response.completed": {
|
|
1896
1902
|
const resp = event.data.response;
|
|
1897
1903
|
if (resp?.usage) {
|
|
1898
|
-
const
|
|
1904
|
+
const u = resp.usage;
|
|
1905
|
+
const inputDetails = u.input_tokens_details;
|
|
1906
|
+
const outputDetails = u.output_tokens_details;
|
|
1907
|
+
const inputTokens = u.input_tokens ?? 0;
|
|
1908
|
+
const outputTokens = u.output_tokens ?? 0;
|
|
1899
1909
|
usage = {
|
|
1900
|
-
promptTokens:
|
|
1901
|
-
completionTokens:
|
|
1902
|
-
totalTokens:
|
|
1910
|
+
promptTokens: inputTokens,
|
|
1911
|
+
completionTokens: outputTokens,
|
|
1912
|
+
totalTokens: u.total_tokens ?? inputTokens + outputTokens,
|
|
1913
|
+
cachedTokens: inputDetails?.cached_tokens ?? 0,
|
|
1914
|
+
reasoningTokens: outputDetails?.reasoning_tokens ?? 0
|
|
1903
1915
|
};
|
|
1904
1916
|
}
|
|
1905
1917
|
break;
|
|
@@ -4507,35 +4519,65 @@ var MODEL_CONTEXT_WINDOWS = {
|
|
|
4507
4519
|
"o4-mini": 2e5
|
|
4508
4520
|
};
|
|
4509
4521
|
var DEFAULT_CONTEXT_WINDOW = 128e3;
|
|
4510
|
-
var
|
|
4522
|
+
var AUTO_COMPACT_TOKEN_LIMIT = 25e4;
|
|
4511
4523
|
function getContextWindow(model) {
|
|
4512
4524
|
return MODEL_CONTEXT_WINDOWS[model] ?? DEFAULT_CONTEXT_WINDOW;
|
|
4513
4525
|
}
|
|
4514
4526
|
function getCompactThreshold(model) {
|
|
4515
|
-
|
|
4527
|
+
const window = getContextWindow(model);
|
|
4528
|
+
return window > AUTO_COMPACT_TOKEN_LIMIT ? AUTO_COMPACT_TOKEN_LIMIT : Math.floor(window * 0.8);
|
|
4529
|
+
}
|
|
4530
|
+
function emptyBreakdown() {
|
|
4531
|
+
return { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 }, total: 0 };
|
|
4516
4532
|
}
|
|
4517
4533
|
function createSessionUsage() {
|
|
4518
4534
|
return {
|
|
4535
|
+
cumulative: emptyBreakdown(),
|
|
4536
|
+
lastTurn: emptyBreakdown(),
|
|
4537
|
+
estimatedCurrentTokens: 0,
|
|
4538
|
+
compactionCount: 0,
|
|
4519
4539
|
inputTokens: 0,
|
|
4520
4540
|
outputTokens: 0,
|
|
4521
4541
|
totalTokens: 0,
|
|
4522
|
-
lastTurnTokens: 0
|
|
4523
|
-
estimatedCurrentTokens: 0,
|
|
4524
|
-
compactionCount: 0
|
|
4542
|
+
lastTurnTokens: 0
|
|
4525
4543
|
};
|
|
4526
4544
|
}
|
|
4527
4545
|
function updateUsageFromApi(usage, apiUsage) {
|
|
4528
|
-
|
|
4529
|
-
|
|
4530
|
-
|
|
4546
|
+
const cached = apiUsage.cachedTokens ?? 0;
|
|
4547
|
+
const reasoning = apiUsage.reasoningTokens ?? 0;
|
|
4548
|
+
const adjustedInput = Math.max(0, apiUsage.promptTokens - cached);
|
|
4549
|
+
const adjustedOutput = Math.max(0, apiUsage.completionTokens - reasoning);
|
|
4550
|
+
usage.cumulative.input += adjustedInput;
|
|
4551
|
+
usage.cumulative.output += adjustedOutput;
|
|
4552
|
+
usage.cumulative.reasoning += reasoning;
|
|
4553
|
+
usage.cumulative.cache.read += cached;
|
|
4554
|
+
usage.cumulative.total += apiUsage.totalTokens;
|
|
4555
|
+
usage.lastTurn = {
|
|
4556
|
+
input: adjustedInput,
|
|
4557
|
+
output: adjustedOutput,
|
|
4558
|
+
reasoning,
|
|
4559
|
+
cache: { read: cached, write: 0 },
|
|
4560
|
+
total: apiUsage.totalTokens
|
|
4561
|
+
};
|
|
4562
|
+
usage.inputTokens = usage.cumulative.input;
|
|
4563
|
+
usage.outputTokens = usage.cumulative.output;
|
|
4564
|
+
usage.totalTokens = usage.cumulative.total;
|
|
4531
4565
|
usage.lastTurnTokens = apiUsage.totalTokens;
|
|
4532
4566
|
}
|
|
4533
4567
|
var PRUNE_PROTECT_TOKENS = 4e4;
|
|
4534
4568
|
var PRUNE_MIN_SAVINGS = 2e4;
|
|
4569
|
+
var PRUNE_SKIP_RECENT_USER_TURNS = 2;
|
|
4535
4570
|
function pruneToolOutputs(messages) {
|
|
4571
|
+
const userIndices = [];
|
|
4572
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
4573
|
+
if (messages[i].role === "user") userIndices.push(i);
|
|
4574
|
+
}
|
|
4575
|
+
const protectBoundary = userIndices.length >= PRUNE_SKIP_RECENT_USER_TURNS ? userIndices[PRUNE_SKIP_RECENT_USER_TURNS - 1] : 0;
|
|
4536
4576
|
const toolIndices = [];
|
|
4537
4577
|
for (let i = 0; i < messages.length; i++) {
|
|
4538
|
-
if (messages[i].role === "tool"
|
|
4578
|
+
if (messages[i].role === "tool" && i < protectBoundary) {
|
|
4579
|
+
toolIndices.push(i);
|
|
4580
|
+
}
|
|
4539
4581
|
}
|
|
4540
4582
|
if (toolIndices.length === 0) return { messages, savedTokens: 0 };
|
|
4541
4583
|
let protectedTokens = 0;
|
|
@@ -4555,44 +4597,105 @@ function pruneToolOutputs(messages) {
|
|
|
4555
4597
|
const idx = toolIndices[i];
|
|
4556
4598
|
const msg = result[idx];
|
|
4557
4599
|
const oldTokens = estimateMessageTokens(msg);
|
|
4558
|
-
const
|
|
4559
|
-
savedTokens += oldTokens - estimateTokens(
|
|
4560
|
-
result[idx] = { ...msg, content:
|
|
4600
|
+
const stub = "[output pruned \u2014 use read_file to re-read if needed]";
|
|
4601
|
+
savedTokens += oldTokens - estimateTokens(stub);
|
|
4602
|
+
result[idx] = { ...msg, content: stub };
|
|
4561
4603
|
}
|
|
4562
4604
|
if (savedTokens < PRUNE_MIN_SAVINGS) {
|
|
4563
4605
|
return { messages, savedTokens: 0 };
|
|
4564
4606
|
}
|
|
4565
4607
|
return { messages: result, savedTokens };
|
|
4566
4608
|
}
|
|
4567
|
-
|
|
4609
|
+
var COMPACTION_SYSTEM_PROMPT = `You are a conversation summarizer for a research agent. Your job is to create a handoff summary that another agent instance can use to seamlessly continue the work.
|
|
4610
|
+
|
|
4611
|
+
Do not respond to any questions in the conversation. Only output the summary.
|
|
4612
|
+
Respond in the same language the user used.`;
|
|
4613
|
+
var COMPACTION_USER_TEMPLATE = `Provide a detailed summary of our conversation above for handoff to another agent that will continue the work.
|
|
4614
|
+
|
|
4615
|
+
Stick to this template:
|
|
4616
|
+
|
|
4617
|
+
## Goal
|
|
4618
|
+
[What is the user trying to accomplish? Be specific.]
|
|
4619
|
+
|
|
4620
|
+
## Instructions
|
|
4621
|
+
- [Important instructions or preferences the user gave]
|
|
4622
|
+
- [Research methodology constraints or requirements]
|
|
4623
|
+
- [If there is a research charter or plan, summarize its key points]
|
|
4624
|
+
|
|
4625
|
+
## Discoveries
|
|
4626
|
+
- [Key findings from paper searches, data analysis, or experiments]
|
|
4627
|
+
- [Important facts, numbers, or evidence discovered]
|
|
4628
|
+
- [Any surprising or contradicting results]
|
|
4629
|
+
|
|
4630
|
+
## Accomplished
|
|
4631
|
+
- [What work has been completed]
|
|
4632
|
+
- [What is currently in progress]
|
|
4633
|
+
- [What remains to be done]
|
|
4634
|
+
|
|
4635
|
+
## Relevant Files
|
|
4636
|
+
[List workspace files that were read, created, or modified. Include what each contains.]
|
|
4637
|
+
- path/to/file.md \u2014 description of contents
|
|
4638
|
+
- experiments/script.py \u2014 what it does and its results
|
|
4639
|
+
|
|
4640
|
+
## Active Context
|
|
4641
|
+
- [Current research question or hypothesis being investigated]
|
|
4642
|
+
- [Which skills are active]
|
|
4643
|
+
- [Any pending user decisions or questions]
|
|
4644
|
+
|
|
4645
|
+
## Next Steps
|
|
4646
|
+
1. [Most immediate next action]
|
|
4647
|
+
2. [Following action]
|
|
4648
|
+
3. [And so on]
|
|
4649
|
+
|
|
4650
|
+
{CUSTOM_INSTRUCTIONS}`;
|
|
4651
|
+
async function compactConversation(messages, provider, model, customInstructions, signal) {
|
|
4568
4652
|
const systemMsg = messages.find((m) => m.role === "system");
|
|
4569
4653
|
const conversationMsgs = messages.filter((m) => m.role !== "system");
|
|
4570
4654
|
const conversationText = conversationMsgs.map((m) => {
|
|
4571
4655
|
const role = m.role === "assistant" ? "Agent" : m.role === "user" ? "User" : "Tool";
|
|
4572
|
-
|
|
4573
|
-
|
|
4656
|
+
let content;
|
|
4657
|
+
if (typeof m.content === "string") {
|
|
4658
|
+
content = m.content.length > 3e3 ? m.content.slice(0, 3e3) + "\n[... truncated]" : m.content;
|
|
4659
|
+
} else if (m.content) {
|
|
4660
|
+
content = JSON.stringify(m.content).slice(0, 1e3);
|
|
4661
|
+
} else if (m.tool_calls?.length) {
|
|
4662
|
+
content = m.tool_calls.map((tc) => `[tool: ${tc.function.name}]`).join(", ");
|
|
4663
|
+
} else {
|
|
4664
|
+
content = "[empty]";
|
|
4665
|
+
}
|
|
4666
|
+
return `[${role}]: ${content}`;
|
|
4574
4667
|
}).join("\n\n");
|
|
4668
|
+
const customBlock = customInstructions ? `
|
|
4669
|
+
|
|
4670
|
+
Additional instructions: ${customInstructions}` : "";
|
|
4671
|
+
const userPrompt = COMPACTION_USER_TEMPLATE.replace("{CUSTOM_INSTRUCTIONS}", customBlock);
|
|
4672
|
+
const compactionModel = model.includes("5.4") ? "gpt-5.4-mini" : model;
|
|
4575
4673
|
const summaryResponse = await provider.callLLM({
|
|
4576
4674
|
messages: [
|
|
4577
|
-
{
|
|
4578
|
-
role: "system",
|
|
4579
|
-
content: "You are performing a CONTEXT COMPACTION. Summarize the conversation into a concise handoff document. Include:\n1. **Goal**: What the user is trying to accomplish\n2. **Key discoveries**: Important findings, file paths, data points\n3. **Work completed**: What has been done so far\n4. **Next steps**: What should happen next\n5. **Active files**: Key file paths and their contents summary\n\nBe concise but preserve all actionable information. This summary will replace the full conversation history."
|
|
4580
|
-
},
|
|
4675
|
+
{ role: "system", content: COMPACTION_SYSTEM_PROMPT },
|
|
4581
4676
|
{
|
|
4582
4677
|
role: "user",
|
|
4583
|
-
content: `
|
|
4678
|
+
content: `Here is the conversation to summarize:
|
|
4679
|
+
|
|
4680
|
+
${conversationText.slice(0, 12e4)}
|
|
4681
|
+
|
|
4682
|
+
---
|
|
4584
4683
|
|
|
4585
|
-
${
|
|
4684
|
+
${userPrompt}`
|
|
4586
4685
|
}
|
|
4587
4686
|
],
|
|
4588
|
-
model,
|
|
4687
|
+
model: compactionModel,
|
|
4589
4688
|
maxTokens: 4096
|
|
4590
4689
|
});
|
|
4591
4690
|
const compacted = [];
|
|
4592
4691
|
if (systemMsg) compacted.push(systemMsg);
|
|
4593
4692
|
compacted.push({
|
|
4594
4693
|
role: "user",
|
|
4595
|
-
content: "
|
|
4694
|
+
content: "What have we accomplished so far in this research session?"
|
|
4695
|
+
});
|
|
4696
|
+
compacted.push({
|
|
4697
|
+
role: "assistant",
|
|
4698
|
+
content: summaryResponse.content
|
|
4596
4699
|
});
|
|
4597
4700
|
return compacted;
|
|
4598
4701
|
}
|
|
@@ -4610,7 +4713,17 @@ async function maybeCompact(messages, model, provider, usage, signal) {
|
|
|
4610
4713
|
usage.compactionCount++;
|
|
4611
4714
|
return { messages: pruned, didCompact: true };
|
|
4612
4715
|
}
|
|
4613
|
-
const compacted = await compactConversation(pruned, provider, model, signal);
|
|
4716
|
+
const compacted = await compactConversation(pruned, provider, model, void 0, signal);
|
|
4717
|
+
usage.estimatedCurrentTokens = estimateConversationTokens(compacted);
|
|
4718
|
+
usage.compactionCount++;
|
|
4719
|
+
return { messages: compacted, didCompact: true };
|
|
4720
|
+
}
|
|
4721
|
+
async function manualCompact(messages, model, provider, usage, customInstructions, signal) {
|
|
4722
|
+
if (messages.length <= 2) {
|
|
4723
|
+
return { messages, didCompact: false };
|
|
4724
|
+
}
|
|
4725
|
+
const { messages: pruned } = pruneToolOutputs(messages);
|
|
4726
|
+
const compacted = await compactConversation(pruned, provider, model, customInstructions, signal);
|
|
4614
4727
|
usage.estimatedCurrentTokens = estimateConversationTokens(compacted);
|
|
4615
4728
|
usage.compactionCount++;
|
|
4616
4729
|
return { messages: compacted, didCompact: true };
|
|
@@ -5630,6 +5743,13 @@ var SLASH_COMMANDS = [
|
|
|
5630
5743
|
{ name: "clear", aliases: ["/new"], description: "Clear conversation and start fresh", category: "session" },
|
|
5631
5744
|
{ name: "help", aliases: ["/commands"], description: "Show available commands", category: "system" },
|
|
5632
5745
|
{ name: "config", aliases: ["/settings"], description: "View or change settings (e.g. /config theme dark)", category: "system" },
|
|
5746
|
+
{ name: "compact", aliases: [], description: "Manually compress conversation to save context (e.g. /compact keep the statistics)", category: "session" },
|
|
5747
|
+
{ name: "cost", aliases: ["/tokens", "/usage"], description: "Show token usage and cost for the current session", category: "system" },
|
|
5748
|
+
{ name: "context", aliases: [], description: "Show context window usage \u2014 how full it is", category: "system" },
|
|
5749
|
+
{ name: "btw", aliases: ["/aside"], description: "Ask a side question without affecting the main conversation", category: "session" },
|
|
5750
|
+
{ name: "export", aliases: [], description: "Export conversation as markdown to a file", category: "session" },
|
|
5751
|
+
{ name: "diff", aliases: ["/changes"], description: "Show files the agent has changed in this session", category: "workspace" },
|
|
5752
|
+
{ name: "doctor", aliases: [], description: "Diagnose auth, connectivity, and tool availability", category: "system" },
|
|
5633
5753
|
{ name: "preview", aliases: [], description: "Live preview a LaTeX file in browser (e.g. /preview papers/draft.tex)", category: "workspace" },
|
|
5634
5754
|
{ name: "memory", aliases: ["/memories"], description: "View or clear stored memories about you", category: "system" },
|
|
5635
5755
|
{ name: "exit", aliases: ["/quit", "/q"], description: "Exit Open Research", category: "system" }
|
|
@@ -6458,6 +6578,178 @@ function App({
|
|
|
6458
6578
|
addSystemMessage(" Esc unfocus prompt");
|
|
6459
6579
|
break;
|
|
6460
6580
|
}
|
|
6581
|
+
case "compact": {
|
|
6582
|
+
if (history.length === 0) {
|
|
6583
|
+
addSystemMessage("Nothing to compact \u2014 conversation is empty.");
|
|
6584
|
+
break;
|
|
6585
|
+
}
|
|
6586
|
+
const customInstructions = args || void 0;
|
|
6587
|
+
addSystemMessage(customInstructions ? `Compacting conversation (preserving: ${customInstructions})...` : "Compacting conversation...");
|
|
6588
|
+
setBusy(true);
|
|
6589
|
+
try {
|
|
6590
|
+
const provider = await createProviderFromStoredAuth({ homeDir });
|
|
6591
|
+
const msgs = [{ role: "system", content: "compaction" }, ...history.map((m) => m)];
|
|
6592
|
+
const { messages: compacted, didCompact } = await manualCompact(
|
|
6593
|
+
msgs,
|
|
6594
|
+
config?.defaults.model ?? "gpt-5.4",
|
|
6595
|
+
provider,
|
|
6596
|
+
sessionTokens,
|
|
6597
|
+
customInstructions
|
|
6598
|
+
);
|
|
6599
|
+
if (didCompact) {
|
|
6600
|
+
const newHistory = compacted.filter((m) => m.role !== "system").map((m) => ({
|
|
6601
|
+
role: m.role,
|
|
6602
|
+
content: m.content
|
|
6603
|
+
}));
|
|
6604
|
+
setHistory(newHistory);
|
|
6605
|
+
const k = (n) => n >= 1e3 ? `${(n / 1e3).toFixed(1)}k` : String(n);
|
|
6606
|
+
setTokenDisplay(`${k(sessionTokens.estimatedCurrentTokens)} ctx \xB7 ${k(sessionTokens.totalTokens)} total`);
|
|
6607
|
+
addSystemMessage(`Compacted. Context reduced to ~${Math.round(sessionTokens.estimatedCurrentTokens / 1e3)}k tokens.`);
|
|
6608
|
+
} else {
|
|
6609
|
+
addSystemMessage("Nothing to compact \u2014 conversation too short.");
|
|
6610
|
+
}
|
|
6611
|
+
} catch (err) {
|
|
6612
|
+
addSystemMessage(`Compaction failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
6613
|
+
} finally {
|
|
6614
|
+
setBusy(false);
|
|
6615
|
+
}
|
|
6616
|
+
break;
|
|
6617
|
+
}
|
|
6618
|
+
case "cost": {
|
|
6619
|
+
const k = (n) => n >= 1e3 ? `${(n / 1e3).toFixed(1)}k` : String(n);
|
|
6620
|
+
const c = sessionTokens.cumulative;
|
|
6621
|
+
addSystemMessage("Session token usage:");
|
|
6622
|
+
addSystemMessage(` Input: ${k(c.input)} tokens`);
|
|
6623
|
+
addSystemMessage(` Output: ${k(c.output)} tokens`);
|
|
6624
|
+
if (c.reasoning > 0) addSystemMessage(` Reasoning: ${k(c.reasoning)} tokens`);
|
|
6625
|
+
if (c.cache.read > 0) addSystemMessage(` Cache read: ${k(c.cache.read)} tokens`);
|
|
6626
|
+
if (c.cache.write > 0) addSystemMessage(` Cache write: ${k(c.cache.write)} tokens`);
|
|
6627
|
+
addSystemMessage(` Total: ${k(c.total)} tokens`);
|
|
6628
|
+
addSystemMessage(` Context: ~${k(sessionTokens.estimatedCurrentTokens)} (current window)`);
|
|
6629
|
+
addSystemMessage(` Compactions: ${sessionTokens.compactionCount}`);
|
|
6630
|
+
break;
|
|
6631
|
+
}
|
|
6632
|
+
case "context": {
|
|
6633
|
+
const model = config?.defaults.model ?? "gpt-5.4";
|
|
6634
|
+
const window = getContextWindow(model);
|
|
6635
|
+
const threshold = getCompactThreshold(model);
|
|
6636
|
+
const current = sessionTokens.estimatedCurrentTokens || estimateConversationTokens(
|
|
6637
|
+
history.map((m) => m)
|
|
6638
|
+
);
|
|
6639
|
+
const pct = Math.round(current / window * 100);
|
|
6640
|
+
const barWidth = 40;
|
|
6641
|
+
const filled = Math.round(pct / 100 * barWidth);
|
|
6642
|
+
const bar = "\u2588".repeat(filled) + "\u2591".repeat(barWidth - filled);
|
|
6643
|
+
const color = pct > 90 ? "red" : pct > 70 ? "yellow" : "green";
|
|
6644
|
+
addSystemMessage(`Context window: ${model} (${(window / 1e3).toFixed(0)}k)`);
|
|
6645
|
+
addSystemMessage(` [${bar}] ${pct}%`);
|
|
6646
|
+
addSystemMessage(` ${(current / 1e3).toFixed(1)}k / ${(window / 1e3).toFixed(0)}k tokens used`);
|
|
6647
|
+
addSystemMessage(` Auto-compact at ${(threshold / 1e3).toFixed(0)}k (90%)`);
|
|
6648
|
+
if (pct > 80) {
|
|
6649
|
+
addSystemMessage(" Tip: run /compact to free space, or /clear to start fresh.");
|
|
6650
|
+
}
|
|
6651
|
+
break;
|
|
6652
|
+
}
|
|
6653
|
+
case "btw": {
|
|
6654
|
+
if (!args) {
|
|
6655
|
+
addSystemMessage("Usage: /btw <your side question>");
|
|
6656
|
+
break;
|
|
6657
|
+
}
|
|
6658
|
+
if (!hasAuth) {
|
|
6659
|
+
addSystemMessage("Not connected. Run /auth first.");
|
|
6660
|
+
break;
|
|
6661
|
+
}
|
|
6662
|
+
addSystemMessage(`Side question: ${args}`);
|
|
6663
|
+
setBusy(true);
|
|
6664
|
+
try {
|
|
6665
|
+
const provider = await createProviderFromStoredAuth({ homeDir });
|
|
6666
|
+
const response = await provider.callLLM({
|
|
6667
|
+
messages: [
|
|
6668
|
+
{ role: "system", content: "Answer this quick side question concisely. Do not reference any prior conversation." },
|
|
6669
|
+
{ role: "user", content: args }
|
|
6670
|
+
],
|
|
6671
|
+
model: config?.defaults.model ?? "gpt-5.4",
|
|
6672
|
+
maxTokens: 1e3
|
|
6673
|
+
});
|
|
6674
|
+
addSystemMessage(`Answer: ${response.content}`);
|
|
6675
|
+
} catch (err) {
|
|
6676
|
+
addSystemMessage(`Error: ${err instanceof Error ? err.message : String(err)}`);
|
|
6677
|
+
} finally {
|
|
6678
|
+
setBusy(false);
|
|
6679
|
+
}
|
|
6680
|
+
break;
|
|
6681
|
+
}
|
|
6682
|
+
case "export": {
|
|
6683
|
+
const fileName = args?.trim() || "conversation-export.md";
|
|
6684
|
+
const exportPath = __require("path").resolve(workspacePath ?? process.cwd(), fileName);
|
|
6685
|
+
const lines = [`# Open Research \u2014 Conversation Export
|
|
6686
|
+
`];
|
|
6687
|
+
for (const msg of messages) {
|
|
6688
|
+
if (msg.role === "user") lines.push(`## You
|
|
6689
|
+
${msg.text}
|
|
6690
|
+
`);
|
|
6691
|
+
else if (msg.role === "assistant") lines.push(`## Agent
|
|
6692
|
+
${msg.text}
|
|
6693
|
+
`);
|
|
6694
|
+
else lines.push(`> ${msg.text}
|
|
6695
|
+
`);
|
|
6696
|
+
}
|
|
6697
|
+
try {
|
|
6698
|
+
const fsModule = __require("fs/promises");
|
|
6699
|
+
await fsModule.writeFile(exportPath, lines.join("\n"), "utf8");
|
|
6700
|
+
addSystemMessage(`Exported ${messages.length} messages to ${exportPath}`);
|
|
6701
|
+
} catch (err) {
|
|
6702
|
+
addSystemMessage(`Export failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
6703
|
+
}
|
|
6704
|
+
break;
|
|
6705
|
+
}
|
|
6706
|
+
case "diff": {
|
|
6707
|
+
if (!workspacePath) {
|
|
6708
|
+
addSystemMessage("No workspace active.");
|
|
6709
|
+
break;
|
|
6710
|
+
}
|
|
6711
|
+
try {
|
|
6712
|
+
const { execSync } = __require("child_process");
|
|
6713
|
+
const gitStatus = execSync("git status --short 2>/dev/null || echo 'Not a git repo'", {
|
|
6714
|
+
cwd: workspacePath,
|
|
6715
|
+
encoding: "utf8"
|
|
6716
|
+
}).trim();
|
|
6717
|
+
if (!gitStatus || gitStatus === "Not a git repo") {
|
|
6718
|
+
addSystemMessage("No changes detected (not a git repo or no modifications).");
|
|
6719
|
+
} else {
|
|
6720
|
+
addSystemMessage("Changed files:");
|
|
6721
|
+
for (const line of gitStatus.split("\n")) {
|
|
6722
|
+
addSystemMessage(` ${line}`);
|
|
6723
|
+
}
|
|
6724
|
+
}
|
|
6725
|
+
} catch {
|
|
6726
|
+
addSystemMessage("Could not check changes.");
|
|
6727
|
+
}
|
|
6728
|
+
break;
|
|
6729
|
+
}
|
|
6730
|
+
case "doctor": {
|
|
6731
|
+
addSystemMessage("Running diagnostics...");
|
|
6732
|
+
const authResult = await getAuthStatus({ homeDir });
|
|
6733
|
+
addSystemMessage(` Auth: ${authResult.connected ? "connected" : "not connected"} \u2014 ${authResult.message}`);
|
|
6734
|
+
addSystemMessage(` Workspace: ${workspacePath ? workspacePath : "none"}`);
|
|
6735
|
+
addSystemMessage(` Files: ${workspaceFiles.length}`);
|
|
6736
|
+
addSystemMessage(` Skills: ${skills2.length} loaded`);
|
|
6737
|
+
const mems = await loadMemories({ homeDir });
|
|
6738
|
+
addSystemMessage(` Memories: ${mems.length} stored`);
|
|
6739
|
+
addSystemMessage(` Node: ${process.version}`);
|
|
6740
|
+
const toolChecks = ["python3 --version", "pdflatex --version", "git --version"];
|
|
6741
|
+
for (const cmd2 of toolChecks) {
|
|
6742
|
+
try {
|
|
6743
|
+
const { execSync } = __require("child_process");
|
|
6744
|
+
const out = execSync(cmd2 + " 2>&1", { encoding: "utf8", timeout: 3e3 }).trim().split("\n")[0];
|
|
6745
|
+
addSystemMessage(` ${cmd2.split(" ")[0]}: ${out}`);
|
|
6746
|
+
} catch {
|
|
6747
|
+
addSystemMessage(` ${cmd2.split(" ")[0]}: not found`);
|
|
6748
|
+
}
|
|
6749
|
+
}
|
|
6750
|
+
addSystemMessage("Diagnostics complete.");
|
|
6751
|
+
break;
|
|
6752
|
+
}
|
|
6461
6753
|
case "preview": {
|
|
6462
6754
|
if (!args) {
|
|
6463
6755
|
addSystemMessage("Usage: /preview <path-to-tex-file>");
|
package/package.json
CHANGED