open-research 0.1.26 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -85
- package/dist/chunk-3RG5ZIWI.js +10 -0
- package/dist/chunk-3WM33M3O.js +38 -0
- package/dist/chunk-I5NVYKG7.js +37 -0
- package/dist/chunk-IOR7G25X.js +215 -0
- package/dist/chunk-KJHM7ZW2.js +15 -0
- package/dist/chunk-TQSQRNX6.js +515 -0
- package/dist/{chunk-AYB7CAO5.js → chunk-ZUSIRA5S.js} +6 -47
- package/dist/cli.js +528 -452
- package/dist/manager-queue-F4VVZMTE.js +608 -0
- package/dist/query-agent-LRUUJR4F.js +193 -0
- package/dist/read-tools-GHBKBZFE.js +13 -0
- package/dist/relevance-agent-CCN7JGTM.js +74 -0
- package/dist/scaffolding-MSAICMWV.js +90 -0
- package/dist/{sessions-FMB5GHSR.js → sessions-GRES2MUV.js} +3 -1
- package/dist/status-GEEAGLPF.js +120 -0
- package/dist/store-LT5EGDOI.js +13 -0
- package/dist/web-search-B7D5WMHU.js +177 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -15,11 +15,6 @@
|
|
|
15
15
|
|
|
16
16
|
## Install
|
|
17
17
|
|
|
18
|
-
```bash
|
|
19
|
-
# curl
|
|
20
|
-
curl -fsSL https://raw.githubusercontent.com/gangj277/open-research/main/install.sh | bash
|
|
21
|
-
```
|
|
22
|
-
|
|
23
18
|
```bash
|
|
24
19
|
# npm
|
|
25
20
|
npm install -g open-research
|
|
@@ -64,170 +59,228 @@ Then ask anything:
|
|
|
64
59
|
and identify gaps in the literature
|
|
65
60
|
```
|
|
66
61
|
|
|
67
|
-
The agent searches arXiv, Semantic Scholar, and OpenAlex — reads papers, runs analysis scripts, writes source-grounded notes, and drafts artifacts in your local workspace.
|
|
62
|
+
The agent searches arXiv, Semantic Scholar, and OpenAlex — reads papers (including PDFs), extracts evidence for and against your research target, runs analysis scripts, writes source-grounded notes, and drafts artifacts in your local workspace.
|
|
68
63
|
|
|
69
64
|
## How is this different from Cursor / Claude Code?
|
|
70
65
|
|
|
71
66
|
Those are coding agents. Open Research is a **research agent**.
|
|
72
67
|
|
|
73
|
-
It has tools that coding agents don't: federated academic paper search
|
|
68
|
+
It has tools that coding agents don't: federated academic paper search with target extraction, web search with evidence analysis, PDF parsing from URLs, a research knowledge graph (ontology), sub-agent delegation, and pluggable research skills.
|
|
74
69
|
|
|
75
70
|
Everything stays local. Your workspace is a directory with `sources/`, `notes/`, `papers/`, `experiments/`. The agent reads and writes to it. Risky edits go to a review queue.
|
|
76
71
|
|
|
77
|
-
##
|
|
72
|
+
## Research Ontology
|
|
78
73
|
|
|
79
|
-
|
|
74
|
+
The agent automatically builds a **structured knowledge graph** as you research. Every paper read, claim made, finding extracted, and method discovered gets captured as typed, connected notes.
|
|
80
75
|
|
|
81
|
-
###
|
|
76
|
+
### How it works
|
|
82
77
|
|
|
83
|
-
|
|
78
|
+
You don't manage the ontology manually — it emerges from conversation:
|
|
84
79
|
|
|
85
|
-
|
|
80
|
+
1. **After each turn**, a background ontology manager extracts knowledge from the conversation and tool outputs
|
|
81
|
+
2. **Before each turn**, a relevance agent selects notes related to your current question and injects them as context
|
|
82
|
+
3. **During a turn**, the agent can query the ontology for evidence, contradictions, and connections
|
|
86
83
|
|
|
87
|
-
|
|
84
|
+
### Note types
|
|
88
85
|
|
|
89
|
-
|
|
86
|
+
| Kind | What it captures |
|
|
87
|
+
|------|-----------------|
|
|
88
|
+
| `source` | Citable origin — paper, URL, dataset, book |
|
|
89
|
+
| `finding` | Specific result extracted from a source |
|
|
90
|
+
| `claim` | Argument or assertion in the research |
|
|
91
|
+
| `question` | Open gap, uncertainty, research question |
|
|
92
|
+
| `method` | Methodology or analytical technique |
|
|
93
|
+
| `insight` | Synthesis connecting multiple findings |
|
|
90
94
|
|
|
91
|
-
|
|
95
|
+
### Connections
|
|
92
96
|
|
|
93
|
-
|
|
97
|
+
Notes are linked with typed edges: `supports`, `contradicts`, `derived-from`, `relates-to` — each with a strength (strong/moderate/weak) and a context explaining *why* the connection exists.
|
|
94
98
|
|
|
95
|
-
|
|
96
|
-
- Success criteria (what "done" looks like)
|
|
97
|
-
- Scope boundaries (what's explicitly out of scope)
|
|
98
|
-
- Known starting points (papers, data, leads)
|
|
99
|
-
- Proposed investigation steps
|
|
99
|
+
### Slash commands
|
|
100
100
|
|
|
101
|
-
|
|
101
|
+
```
|
|
102
|
+
/ontology Overview — note counts, contradictions, open questions
|
|
103
|
+
/ontology claims List all claims with evidence counts
|
|
104
|
+
/ontology conflicts Show all contradiction pairs
|
|
105
|
+
/ontology around <term> Find notes related to a topic with their edges
|
|
106
|
+
/ontology delete <id> Remove a note and its edges
|
|
107
|
+
```
|
|
102
108
|
|
|
103
|
-
|
|
109
|
+
### Agent tools
|
|
104
110
|
|
|
105
|
-
|
|
111
|
+
| Tool | What it does |
|
|
112
|
+
|------|-------------|
|
|
113
|
+
| `query_ontology` | Ask research questions — a sub-agent traverses the graph and returns a synthesized answer |
|
|
114
|
+
| `ontology_status` | Get a snapshot: note counts, contradictions, unsupported claims, open questions |
|
|
115
|
+
|
|
116
|
+
## Search with Target Extraction
|
|
106
117
|
|
|
107
|
-
|
|
118
|
+
Both search tools use a **target extraction pipeline**: discover sources → fetch content (PDFs, HTML, abstracts) → extract evidence with gpt-5.4-mini → return structured findings. The main agent never sees raw page content.
|
|
119
|
+
|
|
120
|
+
### Academic search
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
search_external_sources(
|
|
124
|
+
target: "What speedups do efficient attention methods achieve",
|
|
125
|
+
searches: [{ query: "transformer attention efficiency" }]
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Returns structured findings per paper:
|
|
130
|
+
- **Supports**: Evidence supporting your target
|
|
131
|
+
- **Contradicts**: Evidence challenging your target
|
|
132
|
+
- **Related**: Relevant context (methods, definitions, frameworks)
|
|
133
|
+
- **Summary**: One-paragraph synthesis
|
|
134
|
+
- **Relevance score**: 0-10
|
|
135
|
+
|
|
136
|
+
The pipeline handles PDFs from URLs (arXiv, open access journals) — downloads, parses via pdfjs, extracts text from the first 5 pages. arXiv papers use the abstract directly (zero network cost).
|
|
137
|
+
|
|
138
|
+
### Web search
|
|
108
139
|
|
|
109
140
|
```
|
|
110
|
-
|
|
141
|
+
web_search(
|
|
142
|
+
target: "Best practices for PyTorch DataLoader multi-GPU",
|
|
143
|
+
query: "pytorch dataloader num_workers multi gpu"
|
|
144
|
+
)
|
|
111
145
|
```
|
|
112
146
|
|
|
113
|
-
|
|
147
|
+
Same extraction pipeline, different discovery backend:
|
|
148
|
+
- **Default**: DuckDuckGo HTML scraping (zero config, no API key)
|
|
149
|
+
- **Upgrade**: Brave Search API for better results — set via `/api-keys brave <key>` (~1,000 free queries/month)
|
|
150
|
+
|
|
151
|
+
## Agent Modes
|
|
152
|
+
|
|
153
|
+
Three modes. Cycle with `Shift+Tab`:
|
|
154
|
+
|
|
155
|
+
- **Manual Review** (default) — agent proposes changes, you accept (`a`) or reject (`r`)
|
|
156
|
+
- **Auto-Approve** — all file writes applied immediately
|
|
157
|
+
- **Auto-Research** — two-phase: planning (produces a Research Charter) → autonomous execution
|
|
158
|
+
|
|
159
|
+
## Sub-Agents
|
|
160
|
+
|
|
161
|
+
The main agent delegates exploration to lightweight sub-agents running on their own context window.
|
|
162
|
+
|
|
163
|
+
The **explore** sub-agent (gpt-5.4-mini, high reasoning) has read-only tools and returns concise findings. The main agent gets answers without burning its context on raw file reads.
|
|
164
|
+
|
|
165
|
+
## Task Tracking
|
|
114
166
|
|
|
115
|
-
|
|
167
|
+
For multi-step research, the agent creates a visible task checklist:
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
⠋ Searching for chain-of-thought papers...
|
|
171
|
+
○ Read and extract from top papers
|
|
172
|
+
○ Build comparison table
|
|
173
|
+
✓ 1 completed
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Tasks are injected into the agent's context on every turn — it always knows what it's done and what's next. Toggle with `Ctrl+T`.
|
|
116
177
|
|
|
117
178
|
## Research Skills
|
|
118
179
|
|
|
119
|
-
Skills are pluggable research methodologies
|
|
180
|
+
Skills are pluggable research methodologies. Type `/<skill-name>` to activate.
|
|
120
181
|
|
|
121
182
|
### Ideation & Discovery
|
|
122
183
|
|
|
123
184
|
| Skill | What it does |
|
|
124
185
|
|---|---|
|
|
125
|
-
| **`/novelty-checker`** | Quick "has this been done?" assessment
|
|
126
|
-
| **`/source-scout`** |
|
|
127
|
-
| **`/paper-explainer`** |
|
|
186
|
+
| **`/novelty-checker`** | Quick "has this been done?" assessment with verdict: Novel, Partially novel, Incremental, or Already done. |
|
|
187
|
+
| **`/source-scout`** | Finds papers the workspace is missing with gap analysis and prioritized scout report. |
|
|
188
|
+
| **`/paper-explainer`** | Single paper deep read with red flags, or multi-paper comparison table (Elicit-style). |
|
|
128
189
|
|
|
129
190
|
### Critical Evaluation
|
|
130
191
|
|
|
131
192
|
| Skill | What it does |
|
|
132
193
|
|---|---|
|
|
133
|
-
| **`/devils-advocate`** | Stress-tests
|
|
134
|
-
| **`/methodology-critic`** | Reviews study design,
|
|
135
|
-
| **`/evidence-adjudicator`** | Judges conflicting claims using
|
|
194
|
+
| **`/devils-advocate`** | Stress-tests claims through six lenses. Actively searches for counter-evidence. |
|
|
195
|
+
| **`/methodology-critic`** | Reviews study design, statistical methods, reproducibility. Rates Rigorous to Flawed. |
|
|
196
|
+
| **`/evidence-adjudicator`** | Judges conflicting claims using formal evidence hierarchy. Delivers verdict with ratings. |
|
|
136
197
|
|
|
137
198
|
### Analysis & Experimentation
|
|
138
199
|
|
|
139
200
|
| Skill | What it does |
|
|
140
201
|
|---|---|
|
|
141
|
-
| **`/experiment-designer`** | Autonomous proof engine
|
|
142
|
-
| **`/data-analyst`** | End-to-end statistical analysis
|
|
202
|
+
| **`/experiment-designer`** | Autonomous proof engine: hypothesis → experiment → code → run → iterate. |
|
|
203
|
+
| **`/data-analyst`** | End-to-end statistical analysis with mandatory effect sizes and confidence intervals. |
|
|
143
204
|
|
|
144
205
|
### Writing & Revision
|
|
145
206
|
|
|
146
207
|
| Skill | What it does |
|
|
147
208
|
|---|---|
|
|
148
|
-
| **`/draft-paper`** | Drafts
|
|
149
|
-
| **`/reviewer-response`** | Parses peer review
|
|
209
|
+
| **`/draft-paper`** | Drafts publication-quality LaTeX with BibTeX from workspace sources. |
|
|
210
|
+
| **`/reviewer-response`** | Parses peer review, generates point-by-point response letter with revision tracking. |
|
|
150
211
|
|
|
151
212
|
### Meta
|
|
152
213
|
|
|
153
214
|
| Skill | What it does |
|
|
154
215
|
|---|---|
|
|
155
|
-
| **`/skill-creator`** | Create custom skills
|
|
216
|
+
| **`/skill-creator`** | Create custom skills with full format guide and validation. |
|
|
156
217
|
|
|
157
218
|
## Memory
|
|
158
219
|
|
|
159
|
-
The agent learns about you automatically
|
|
220
|
+
The agent learns about you automatically — research field, preferred tools, methodological preferences.
|
|
160
221
|
|
|
161
|
-
|
|
162
|
-
- **Global** (`~/.open-research/memory.json`) — your profile, preferences
|
|
222
|
+
Two levels:
|
|
223
|
+
- **Global** (`~/.open-research/memory.json`) — your profile, preferences
|
|
163
224
|
- **Project** (`<workspace>/.open-research/memory.json`) — project-specific context
|
|
164
225
|
|
|
165
|
-
Only relevant memories are injected each turn based on query similarity, keeping the context window efficient.
|
|
166
|
-
|
|
167
226
|
```
|
|
168
|
-
/memory View
|
|
227
|
+
/memory View stored memories
|
|
169
228
|
/memory clear Delete everything
|
|
170
|
-
/memory delete <id> Remove
|
|
229
|
+
/memory delete <id> Remove one
|
|
171
230
|
```
|
|
172
231
|
|
|
173
232
|
## Live LaTeX Preview
|
|
174
233
|
|
|
175
|
-
When the agent drafts a paper, preview it instantly:
|
|
176
|
-
|
|
177
234
|
```
|
|
178
235
|
/preview papers/draft.tex
|
|
179
236
|
```
|
|
180
237
|
|
|
181
|
-
Opens a localhost server
|
|
182
|
-
- Sections, math (KaTeX), citations, lists rendered as styled HTML
|
|
183
|
-
- Auto-reload — the page refreshes every time the file changes
|
|
184
|
-
- Dark theme matching the CLI aesthetic
|
|
185
|
-
- No LaTeX installation required for preview
|
|
186
|
-
|
|
187
|
-
For final PDF output, the agent compiles with `pdflatex` or `tectonic` via `run_command`.
|
|
238
|
+
Opens a localhost server with KaTeX math, auto-reload on file changes, and dark theme. No LaTeX installation required.
|
|
188
239
|
|
|
189
240
|
## Tools
|
|
190
241
|
|
|
191
|
-
The agent has 14 tools with full filesystem and shell access:
|
|
192
|
-
|
|
193
242
|
| Tool | Description |
|
|
194
243
|
|---|---|
|
|
195
244
|
| `read_file` | Read any file — streaming, binary detection, `~` expansion |
|
|
196
245
|
| `read_pdf` | Extract text from PDFs with page-range selection |
|
|
197
246
|
| `run_command` | Shell execution — Python, R, LaTeX, curl, git, anything |
|
|
198
247
|
| `list_directory` | Explore directory trees with depth control |
|
|
199
|
-
| `search_external_sources` |
|
|
200
|
-
| `
|
|
248
|
+
| `search_external_sources` | Academic search with target extraction (arXiv + Semantic Scholar + OpenAlex) |
|
|
249
|
+
| `web_search` | Web search with target extraction (DuckDuckGo or Brave) |
|
|
250
|
+
| `fetch_url` | Fetch a specific URL, HTML auto-converted to text |
|
|
201
251
|
| `write_new_file` | Create workspace files |
|
|
202
252
|
| `update_existing_file` | Edit existing files with review policy |
|
|
203
|
-
| `ask_user` | Pause and ask the user a question
|
|
253
|
+
| `ask_user` | Pause and ask the user a question |
|
|
204
254
|
| `search_workspace` | Full-text search across workspace files |
|
|
205
255
|
| `create_paper` | Create LaTeX paper drafts |
|
|
206
256
|
| `load_skill` | Activate a research skill |
|
|
207
|
-
| `
|
|
208
|
-
| `
|
|
257
|
+
| `launch_subagent` | Delegate tasks to lightweight sub-agents |
|
|
258
|
+
| `create_tasks` | Create a research task checklist |
|
|
259
|
+
| `update_task` | Update task status and details |
|
|
260
|
+
| `query_ontology` | Query the research knowledge graph |
|
|
261
|
+
| `ontology_status` | Get ontology overview — notes, contradictions, gaps |
|
|
209
262
|
|
|
210
263
|
## Commands
|
|
211
264
|
|
|
212
265
|
| Command | Description |
|
|
213
266
|
|---|---|
|
|
214
267
|
| `/auth` | Connect OpenAI account via browser |
|
|
215
|
-
| `/auth-codex` | Import existing Codex CLI auth |
|
|
216
268
|
| `/init` | Initialize workspace in current directory |
|
|
217
269
|
| `/skills` | List available research skills |
|
|
270
|
+
| `/ontology` | View or manage the research ontology |
|
|
218
271
|
| `/preview <file>` | Live-preview a LaTeX file in browser |
|
|
219
272
|
| `/memory` | View or manage stored memories |
|
|
220
|
-
| `/api-keys` | Set API keys
|
|
221
|
-
| `/config` |
|
|
222
|
-
| `/compact` |
|
|
223
|
-
| `/cost` |
|
|
224
|
-
| `/context` |
|
|
225
|
-
| `/btw` |
|
|
273
|
+
| `/api-keys` | Set API keys (Semantic Scholar, OpenAlex, Brave) |
|
|
274
|
+
| `/config` | Settings (model, theme, mode, apikey) |
|
|
275
|
+
| `/compact` | Compress conversation to save context |
|
|
276
|
+
| `/cost` | Token usage for the session |
|
|
277
|
+
| `/context` | Context window usage |
|
|
278
|
+
| `/btw` | Side question without affecting main conversation |
|
|
226
279
|
| `/export` | Export conversation as markdown |
|
|
227
|
-
| `/diff` |
|
|
228
|
-
| `/doctor` | Diagnose auth, connectivity,
|
|
280
|
+
| `/diff` | Files changed this session |
|
|
281
|
+
| `/doctor` | Diagnose auth, connectivity, tools |
|
|
229
282
|
| `/resume` | Resume a previous session |
|
|
230
|
-
| `/clear` | Start
|
|
283
|
+
| `/clear` | Start fresh |
|
|
231
284
|
| `/help` | Show all commands |
|
|
232
285
|
|
|
233
286
|
## Workspace
|
|
@@ -239,24 +292,30 @@ my-research/
|
|
|
239
292
|
artifacts/ # Generated outputs
|
|
240
293
|
papers/ # LaTeX paper drafts
|
|
241
294
|
experiments/ # Analysis scripts, results, hypotheses
|
|
242
|
-
.open-research/
|
|
243
|
-
AGENTS.md # Auto-generated project context
|
|
295
|
+
.open-research/
|
|
296
|
+
AGENTS.md # Auto-generated project context
|
|
297
|
+
ontology.json # Research knowledge graph
|
|
298
|
+
tasks.json # Task tracking state
|
|
299
|
+
memory.json # Project-scoped memories
|
|
300
|
+
sessions/ # Chat history
|
|
244
301
|
```
|
|
245
302
|
|
|
246
303
|
## Features
|
|
247
304
|
|
|
248
|
-
- **
|
|
249
|
-
- **
|
|
250
|
-
- **
|
|
251
|
-
- **
|
|
252
|
-
- **
|
|
253
|
-
- **
|
|
305
|
+
- **Research ontology** — automatic knowledge graph that captures sources, findings, claims, contradictions, and connections as you work
|
|
306
|
+
- **Target extraction search** — academic and web search that returns structured evidence (supports/contradicts/related), not raw pages
|
|
307
|
+
- **PDF parsing from URLs** — fetches and extracts text from academic PDFs directly during search
|
|
308
|
+
- **Task tracking** — visible checklist for multi-step work, injected into agent context every turn
|
|
309
|
+
- **Sub-agent delegation** — explore agent navigates the workspace on its own context, returns summaries
|
|
310
|
+
- **Init banner** — version, model, context window, workspace info at launch
|
|
311
|
+
- **Terminal markdown** — bold, italic, code blocks, headings rendered natively
|
|
312
|
+
- **Autocomplete** — commands, skills, and @file mentions in a scrollable dropdown
|
|
313
|
+
- **Condensed tool activity** — grouped summary per turn, Ctrl+O to expand
|
|
254
314
|
- **Slash command highlighting** — commands appear in blue as you type
|
|
255
315
|
- **Context management** — automatic two-phase compaction at 90% of context window
|
|
256
|
-
- **Token tracking** — context usage
|
|
257
|
-
- **
|
|
258
|
-
- **
|
|
259
|
-
- **Update notifications** — checks for new versions on launch
|
|
316
|
+
- **Token tracking** — context usage in the status bar
|
|
317
|
+
- **Two-tier memory** — global + project-level, selective retrieval per turn
|
|
318
|
+
- **AGENTS.md** — auto-generated project context, injected into system prompt
|
|
260
319
|
|
|
261
320
|
## Development
|
|
262
321
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
2
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
3
|
+
}) : x)(function(x) {
|
|
4
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
5
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
6
|
+
});
|
|
7
|
+
|
|
8
|
+
export {
|
|
9
|
+
__require
|
|
10
|
+
};
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// src/lib/ontology/store.ts
|
|
2
|
+
import fs from "fs/promises";
|
|
3
|
+
import path from "path";
|
|
4
|
+
function getOntologyPath(workspaceDir) {
|
|
5
|
+
return path.join(workspaceDir, ".open-research", "ontology.json");
|
|
6
|
+
}
|
|
7
|
+
var EMPTY_ONTOLOGY = { version: 1, notes: [] };
|
|
8
|
+
async function loadOntology(workspaceDir) {
|
|
9
|
+
try {
|
|
10
|
+
const raw = await fs.readFile(getOntologyPath(workspaceDir), "utf8");
|
|
11
|
+
const parsed = JSON.parse(raw);
|
|
12
|
+
if (!parsed.notes || !Array.isArray(parsed.notes)) return { ...EMPTY_ONTOLOGY };
|
|
13
|
+
return parsed;
|
|
14
|
+
} catch {
|
|
15
|
+
return { ...EMPTY_ONTOLOGY };
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
async function saveOntology(ontology, workspaceDir) {
|
|
19
|
+
const filePath = getOntologyPath(workspaceDir);
|
|
20
|
+
const tmpPath = filePath + ".tmp";
|
|
21
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
22
|
+
await fs.writeFile(tmpPath, JSON.stringify(ontology, null, 2), "utf8");
|
|
23
|
+
await fs.rename(tmpPath, filePath);
|
|
24
|
+
}
|
|
25
|
+
async function cleanupStaleTmp(workspaceDir) {
|
|
26
|
+
const tmpPath = getOntologyPath(workspaceDir) + ".tmp";
|
|
27
|
+
try {
|
|
28
|
+
await fs.unlink(tmpPath);
|
|
29
|
+
} catch {
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export {
|
|
34
|
+
getOntologyPath,
|
|
35
|
+
loadOntology,
|
|
36
|
+
saveOntology,
|
|
37
|
+
cleanupStaleTmp
|
|
38
|
+
};
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
// src/lib/fs/paths.ts
|
|
2
|
+
import os from "os";
|
|
3
|
+
import path from "path";
|
|
4
|
+
function resolveHomeDir(options) {
|
|
5
|
+
return options?.homeDir ?? os.homedir();
|
|
6
|
+
}
|
|
7
|
+
function getOpenResearchRoot(options) {
|
|
8
|
+
return path.join(resolveHomeDir(options), ".open-research");
|
|
9
|
+
}
|
|
10
|
+
function getOpenResearchAuthFile(options) {
|
|
11
|
+
return path.join(getOpenResearchRoot(options), "auth.json");
|
|
12
|
+
}
|
|
13
|
+
function getOpenResearchConfigFile(options) {
|
|
14
|
+
return path.join(getOpenResearchRoot(options), "config.json");
|
|
15
|
+
}
|
|
16
|
+
function getOpenResearchSkillsDir(options) {
|
|
17
|
+
return path.join(getOpenResearchRoot(options), "skills");
|
|
18
|
+
}
|
|
19
|
+
function getWorkspaceMetaDir(workspaceDir) {
|
|
20
|
+
return path.join(workspaceDir, ".open-research");
|
|
21
|
+
}
|
|
22
|
+
function getWorkspaceProjectFile(workspaceDir) {
|
|
23
|
+
return path.join(getWorkspaceMetaDir(workspaceDir), "project.json");
|
|
24
|
+
}
|
|
25
|
+
function getWorkspaceSessionsDir(workspaceDir) {
|
|
26
|
+
return path.join(getWorkspaceMetaDir(workspaceDir), "sessions");
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export {
|
|
30
|
+
getOpenResearchRoot,
|
|
31
|
+
getOpenResearchAuthFile,
|
|
32
|
+
getOpenResearchConfigFile,
|
|
33
|
+
getOpenResearchSkillsDir,
|
|
34
|
+
getWorkspaceMetaDir,
|
|
35
|
+
getWorkspaceProjectFile,
|
|
36
|
+
getWorkspaceSessionsDir
|
|
37
|
+
};
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
// src/lib/ontology/read-tools.ts
|
|
2
|
+
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
3
|
+
"the",
|
|
4
|
+
"a",
|
|
5
|
+
"an",
|
|
6
|
+
"is",
|
|
7
|
+
"are",
|
|
8
|
+
"was",
|
|
9
|
+
"were",
|
|
10
|
+
"be",
|
|
11
|
+
"been",
|
|
12
|
+
"being",
|
|
13
|
+
"have",
|
|
14
|
+
"has",
|
|
15
|
+
"had",
|
|
16
|
+
"do",
|
|
17
|
+
"does",
|
|
18
|
+
"did",
|
|
19
|
+
"will",
|
|
20
|
+
"would",
|
|
21
|
+
"could",
|
|
22
|
+
"should",
|
|
23
|
+
"may",
|
|
24
|
+
"might",
|
|
25
|
+
"can",
|
|
26
|
+
"shall",
|
|
27
|
+
"to",
|
|
28
|
+
"of",
|
|
29
|
+
"in",
|
|
30
|
+
"for",
|
|
31
|
+
"on",
|
|
32
|
+
"with",
|
|
33
|
+
"at",
|
|
34
|
+
"by",
|
|
35
|
+
"from",
|
|
36
|
+
"as",
|
|
37
|
+
"into",
|
|
38
|
+
"through",
|
|
39
|
+
"about",
|
|
40
|
+
"and",
|
|
41
|
+
"but",
|
|
42
|
+
"or",
|
|
43
|
+
"not",
|
|
44
|
+
"no",
|
|
45
|
+
"if",
|
|
46
|
+
"then",
|
|
47
|
+
"than",
|
|
48
|
+
"so",
|
|
49
|
+
"that",
|
|
50
|
+
"this",
|
|
51
|
+
"it",
|
|
52
|
+
"its",
|
|
53
|
+
"i",
|
|
54
|
+
"me",
|
|
55
|
+
"my",
|
|
56
|
+
"we",
|
|
57
|
+
"our",
|
|
58
|
+
"you",
|
|
59
|
+
"your",
|
|
60
|
+
"what",
|
|
61
|
+
"which",
|
|
62
|
+
"who",
|
|
63
|
+
"how",
|
|
64
|
+
"when",
|
|
65
|
+
"where",
|
|
66
|
+
"why"
|
|
67
|
+
]);
|
|
68
|
+
function tokenize(text) {
|
|
69
|
+
return text.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w));
|
|
70
|
+
}
|
|
71
|
+
function getNote(ontology, noteId) {
|
|
72
|
+
return ontology.notes.find((n) => n.id === noteId) ?? null;
|
|
73
|
+
}
|
|
74
|
+
function hasMutualIncoming(ontology, noteId, relation) {
|
|
75
|
+
return ontology.notes.some(
|
|
76
|
+
(other) => other.id !== noteId && other.edges.some(
|
|
77
|
+
(e) => e.targetId === noteId && e.relation === relation && e.direction === "mutual"
|
|
78
|
+
)
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
function searchNotes(ontology, params) {
|
|
82
|
+
const { queries, kind, confidence, hasEdge, missingEdge, limit = 10 } = params;
|
|
83
|
+
let candidates = ontology.notes;
|
|
84
|
+
if (kind) {
|
|
85
|
+
candidates = candidates.filter((n) => n.kind === kind);
|
|
86
|
+
}
|
|
87
|
+
if (confidence) {
|
|
88
|
+
candidates = candidates.filter((n) => n.confidence === confidence);
|
|
89
|
+
}
|
|
90
|
+
if (hasEdge) {
|
|
91
|
+
candidates = candidates.filter(
|
|
92
|
+
(n) => n.edges.some((e) => e.relation === hasEdge) || hasMutualIncoming(ontology, n.id, hasEdge)
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
if (missingEdge) {
|
|
96
|
+
candidates = candidates.filter(
|
|
97
|
+
(n) => !n.edges.some((e) => e.relation === missingEdge) && !hasMutualIncoming(ontology, n.id, missingEdge)
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
if (!queries || queries.length === 0) {
|
|
101
|
+
return candidates.sort((a, b2) => b2.updatedAt.localeCompare(a.updatedAt)).slice(0, limit);
|
|
102
|
+
}
|
|
103
|
+
const queryTokenSets = queries.map((q) => tokenize(q));
|
|
104
|
+
const N = candidates.length;
|
|
105
|
+
if (N === 0) return [];
|
|
106
|
+
const docTokensCache = /* @__PURE__ */ new Map();
|
|
107
|
+
let totalDocLen = 0;
|
|
108
|
+
for (const note of candidates) {
|
|
109
|
+
const tokens = tokenize(note.content);
|
|
110
|
+
docTokensCache.set(note.id, tokens);
|
|
111
|
+
totalDocLen += tokens.length;
|
|
112
|
+
}
|
|
113
|
+
const avgDocLen = totalDocLen / N;
|
|
114
|
+
const df = /* @__PURE__ */ new Map();
|
|
115
|
+
for (const note of candidates) {
|
|
116
|
+
const uniqueTokens = new Set(docTokensCache.get(note.id));
|
|
117
|
+
for (const token of uniqueTokens) {
|
|
118
|
+
df.set(token, (df.get(token) ?? 0) + 1);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
const k1 = 1.2;
|
|
122
|
+
const b = 0.75;
|
|
123
|
+
const scored = candidates.map((note) => {
|
|
124
|
+
const noteTokens = docTokensCache.get(note.id);
|
|
125
|
+
const docLen = noteTokens.length;
|
|
126
|
+
const tf = /* @__PURE__ */ new Map();
|
|
127
|
+
for (const token of noteTokens) {
|
|
128
|
+
tf.set(token, (tf.get(token) ?? 0) + 1);
|
|
129
|
+
}
|
|
130
|
+
let bestBM25 = 0;
|
|
131
|
+
for (const queryTokens of queryTokenSets) {
|
|
132
|
+
let score = 0;
|
|
133
|
+
for (const qt of queryTokens) {
|
|
134
|
+
const termFreq = tf.get(qt) ?? 0;
|
|
135
|
+
const docFreq = df.get(qt) ?? 0;
|
|
136
|
+
if (termFreq === 0) continue;
|
|
137
|
+
const idf = Math.log((N - docFreq + 0.5) / (docFreq + 0.5) + 1);
|
|
138
|
+
const tfNorm = termFreq * (k1 + 1) / (termFreq + k1 * (1 - b + b * docLen / avgDocLen));
|
|
139
|
+
score += idf * tfNorm;
|
|
140
|
+
}
|
|
141
|
+
bestBM25 = Math.max(bestBM25, score);
|
|
142
|
+
}
|
|
143
|
+
let metaBonus = 0;
|
|
144
|
+
if (note.kind === "source" && note.meta) {
|
|
145
|
+
const metaText = [
|
|
146
|
+
note.meta.authors,
|
|
147
|
+
note.meta.venue,
|
|
148
|
+
note.meta.year?.toString()
|
|
149
|
+
].filter(Boolean).join(" ");
|
|
150
|
+
const metaTokens = new Set(tokenize(metaText));
|
|
151
|
+
for (const queryTokens of queryTokenSets) {
|
|
152
|
+
const hits = queryTokens.filter((qt) => metaTokens.has(qt)).length;
|
|
153
|
+
metaBonus = Math.max(metaBonus, hits * 0.5);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return { note, score: bestBM25 + metaBonus };
|
|
157
|
+
});
|
|
158
|
+
return scored.filter((s) => s.score > 0).sort((a, b2) => b2.score - a.score).slice(0, limit).map((s) => s.note);
|
|
159
|
+
}
|
|
160
|
+
function getConnections(ontology, noteId, depth = 1) {
|
|
161
|
+
const clampedDepth = Math.min(Math.max(depth, 1), 3);
|
|
162
|
+
const root = getNote(ontology, noteId);
|
|
163
|
+
if (!root) return { root: null, connected: [] };
|
|
164
|
+
const visited = /* @__PURE__ */ new Set([noteId]);
|
|
165
|
+
let frontier = [noteId];
|
|
166
|
+
for (let d = 0; d < clampedDepth; d++) {
|
|
167
|
+
const nextFrontier = [];
|
|
168
|
+
for (const currentId of frontier) {
|
|
169
|
+
const current = getNote(ontology, currentId);
|
|
170
|
+
if (!current) continue;
|
|
171
|
+
for (const edge of current.edges) {
|
|
172
|
+
if (!visited.has(edge.targetId)) {
|
|
173
|
+
visited.add(edge.targetId);
|
|
174
|
+
nextFrontier.push(edge.targetId);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
for (const other of ontology.notes) {
|
|
178
|
+
if (visited.has(other.id)) continue;
|
|
179
|
+
const hasMutual = other.edges.some(
|
|
180
|
+
(e) => e.targetId === currentId && e.direction === "mutual"
|
|
181
|
+
);
|
|
182
|
+
if (hasMutual) {
|
|
183
|
+
visited.add(other.id);
|
|
184
|
+
nextFrontier.push(other.id);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
frontier = nextFrontier;
|
|
189
|
+
if (frontier.length === 0) break;
|
|
190
|
+
}
|
|
191
|
+
visited.delete(noteId);
|
|
192
|
+
const connected = [...visited].map((id) => getNote(ontology, id)).filter((n) => n !== null);
|
|
193
|
+
return { root, connected };
|
|
194
|
+
}
|
|
195
|
+
function normalizeTitle(text) {
|
|
196
|
+
return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
|
|
197
|
+
}
|
|
198
|
+
function findExistingSource(ontology, meta) {
|
|
199
|
+
for (const note of ontology.notes) {
|
|
200
|
+
if (note.kind !== "source" || !note.meta) continue;
|
|
201
|
+
if (meta.doi && note.meta.doi && meta.doi === note.meta.doi) return note;
|
|
202
|
+
if (meta.url && note.meta.url && meta.url === note.meta.url) return note;
|
|
203
|
+
if (meta.authors && meta.year && note.meta.authors && note.meta.year && meta.year === note.meta.year && normalizeTitle(meta.authors) === normalizeTitle(note.meta.authors)) {
|
|
204
|
+
return note;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return null;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
export {
|
|
211
|
+
getNote,
|
|
212
|
+
searchNotes,
|
|
213
|
+
getConnections,
|
|
214
|
+
findExistingSource
|
|
215
|
+
};
|