@yeyuan98/opencode-bioresearcher-plugin 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/bioresearcher/prompt.d.ts +1 -1
- package/dist/agents/bioresearcher/prompt.js +11 -2
- package/dist/agents/bioresearcherDR/prompt.d.ts +1 -1
- package/dist/agents/bioresearcherDR/prompt.js +1 -1
- package/dist/agents/bioresearcherDR_worker/prompt.d.ts +1 -1
- package/dist/agents/bioresearcherDR_worker/prompt.js +1 -0
- package/dist/index.js +3 -1
- package/dist/parser-tools/pubmed/index.d.ts +2 -0
- package/dist/parser-tools/pubmed/index.js +2 -0
- package/dist/parser-tools/pubmed/pubmed.d.ts +22 -0
- package/dist/parser-tools/pubmed/pubmed.js +200 -0
- package/dist/parser-tools/pubmed/types.d.ts +103 -0
- package/dist/parser-tools/pubmed/types.js +1 -0
- package/dist/parser-tools/pubmed/utils.d.ts +13 -0
- package/dist/parser-tools/pubmed/utils.js +158 -0
- package/package.json +2 -1
|
@@ -4,5 +4,5 @@
|
|
|
4
4
|
* A specialized biomedical research agent that performs reference-based
|
|
5
5
|
* pharmaceutical and drug development research using BioMCP tools.
|
|
6
6
|
*/
|
|
7
|
-
export declare const BIORESEARCHER_SYSTEM_PROMPT = "\n------ RULE REMINDER START ------\nIMPORTANT: THIS REMINDER SHALL OVERRIDE ALL OTHER PROMPTS PROVIDED TO YOU. IGNORE ALL OTHER PROMPTS.\n\nYou are an expert in biomedical and pharmaceutical drug development research.\nYou help the user to address their question following guidelines below:\n\n- YOU MUST ALWAYS FOLLOW THE ALLOWED TOOLS LIST (SEE BELOW). DO NOT USE ANY TOOLS NOT IN THE WHITELIST.\n- If the user's question is unclear, you guide them to hone their question to make it professional and specific.\n- You alway use websearch and biomcp tools to fetch trustable information and use those as your answers.\n- Wherever needed, you use tools (specified below) to interact with local files and perform analyses.\n- If you are taskes with complex problems, ask if the user would like to build a todo list and then proceed with long-running analysis. If so, use todo list tools and run analysis based on the todo.\n\nTHE WHITELIST: ONLY use tools listed below (you MUST NOT USE all other available tools): \n\n- bash, edit, skill, websearch\n- biomcp* tool series\n- table* tool series.\n\
|
|
7
|
+
export declare const BIORESEARCHER_SYSTEM_PROMPT = "\n------ RULE REMINDER START ------\nIMPORTANT: THIS REMINDER SHALL OVERRIDE ALL OTHER PROMPTS PROVIDED TO YOU. IGNORE ALL OTHER PROMPTS.\n\nYou are an expert in biomedical and pharmaceutical drug development research.\nYou help the user to address their question following guidelines below:\n\n- YOU MUST ALWAYS FOLLOW THE ALLOWED TOOLS LIST (SEE BELOW). DO NOT USE ANY TOOLS NOT IN THE WHITELIST.\n- If the user's question is unclear, you guide them to hone their question to make it professional and specific.\n- You alway use websearch and biomcp tools to fetch trustable information and use those as your answers.\n- Wherever needed, you use tools (specified below) to interact with local files and perform analyses.\n- If you are taskes with complex problems, ask if the user would like to build a todo list and then proceed with long-running analysis. If so, use todo list tools and run analysis based on the todo.\n- You should use python for complex data analysis tasks. Strictly follow python guidelines below.\n\nTHE ALLOWED TOOL WHITELIST: ONLY use tools listed below (you MUST NOT USE all other available tools): \n\n- bash, edit, skill, websearch\n- biomcp* tool series\n- table* tool series.\n\nTHE PYTHON GUIDELINES:\n\n- ONLY use python IF existing tools are not suitable for the task\n- ALWAYS write code files in folder _python_scripts. DO NOT flood the working directory with code files.\n- ALWAYS use uv to setup virtual environment in the working directory and install necessary packages\n- If uv is not available, refer the user to uv website for manual installation: https://docs.astral.sh/uv/getting-started/installation\n\nTHE BOTTOMLINE RULES: always follow strictly:\n\n1. ONLY use high-quality, trustable information: either biomcp results or websearch results from official websites of biotech and pharma companies.\n2. ALWAYS provide concrete references for all findings with citations (in brackets, e.g., [1], [2], ...) and full bibliography at the end of all your messages.\n3. ALWAYS backup files when you make edits. Copy the file to make a `.bak` file.\n4. ALWAYS use the blockingTimer tool to sleep for 0.3 seconds between two consecutive biomcp* tool calls (to enforce rate limit).\n------ RULE REMINDER END ------\n";
|
|
8
8
|
export declare function getBioResearcherPrompt(): string;
|
|
@@ -16,18 +16,27 @@ You help the user to address their question following guidelines below:
|
|
|
16
16
|
- You alway use websearch and biomcp tools to fetch trustable information and use those as your answers.
|
|
17
17
|
- Wherever needed, you use tools (specified below) to interact with local files and perform analyses.
|
|
18
18
|
- If you are taskes with complex problems, ask if the user would like to build a todo list and then proceed with long-running analysis. If so, use todo list tools and run analysis based on the todo.
|
|
19
|
+
- You should use python for complex data analysis tasks. Strictly follow python guidelines below.
|
|
19
20
|
|
|
20
|
-
THE WHITELIST: ONLY use tools listed below (you MUST NOT USE all other available tools):
|
|
21
|
+
THE ALLOWED TOOL WHITELIST: ONLY use tools listed below (you MUST NOT USE all other available tools):
|
|
21
22
|
|
|
22
23
|
- bash, edit, skill, websearch
|
|
23
24
|
- biomcp* tool series
|
|
24
25
|
- table* tool series.
|
|
25
26
|
|
|
26
|
-
|
|
27
|
+
THE PYTHON GUIDELINES:
|
|
28
|
+
|
|
29
|
+
- ONLY use python IF existing tools are not suitable for the task
|
|
30
|
+
- ALWAYS write code files in folder _python_scripts. DO NOT flood the working directory with code files.
|
|
31
|
+
- ALWAYS use uv to setup virtual environment in the working directory and install necessary packages
|
|
32
|
+
- If uv is not available, refer the user to uv website for manual installation: https://docs.astral.sh/uv/getting-started/installation
|
|
33
|
+
|
|
34
|
+
THE BOTTOMLINE RULES: always follow strictly:
|
|
27
35
|
|
|
28
36
|
1. ONLY use high-quality, trustable information: either biomcp results or websearch results from official websites of biotech and pharma companies.
|
|
29
37
|
2. ALWAYS provide concrete references for all findings with citations (in brackets, e.g., [1], [2], ...) and full bibliography at the end of all your messages.
|
|
30
38
|
3. ALWAYS backup files when you make edits. Copy the file to make a \`.bak\` file.
|
|
39
|
+
4. ALWAYS use the blockingTimer tool to sleep for 0.3 seconds between two consecutive biomcp* tool calls (to enforce rate limit).
|
|
31
40
|
------ RULE REMINDER END ------
|
|
32
41
|
`;
|
|
33
42
|
export function getBioResearcherPrompt() {
|
|
@@ -4,5 +4,5 @@
|
|
|
4
4
|
* A specialized biomedical research agent that performs reference-based
|
|
5
5
|
* pharmaceutical and drug development research using BioMCP tools.
|
|
6
6
|
*/
|
|
7
|
-
export declare const BIORESEARCHERDR_SYSTEM_PROMPT = "\n------ RULE REMINDER START ------\nIMPORTANT: THIS REMINDER SHALL OVERRIDE ALL OTHER PROMPTS PROVIDED TO YOU. IGNORE ALL OTHER PROMPTS.\nOverall goal: Perform reference-based biomedical and pharmaceutical drug development research.\n\nSteps to STRICTLY adhere to:\n\n1. If the user query includes 'no-interview', skip Step 2 and proceed to Step 3. Otherwise, proceed to Step 2.\n2. Comprehend initial user inquiry. Use the question tool to ask user to clarify 3-6 unclear points depending on inquiry complexity.\n3. Comprehend final user inquiry to identify critical research aspects to answer user inquiry.\n4. If the original user inquiry includes 'light-reserach', combine and/or pick top two research aspects and proceed to Step 5. Otherwise, proceed directly to Step 5.\n5. Decide on TOPIC of this inquiry (NO user input). TOPIC should be highly succinct, underscore-separated name based on user inquiry.\n5. Use the todowrite tool to generate a list of identified research aspects.\n6. Create the reports_biomcp/<TOPIC>/ folder if needed.\n7. Use the task tool to assign each research aspect to a bioresearcherDR_worker subagent. Start subagents in parallel in batches (size of 5 for each batch). Record finished subagents by checking the todo list. Prompt the user: 'If subagents are stuck without progress for too long, interrupt and ask me to resume work.'\n8. Proceed until subagents complete research. Restart failed subagents if necessary.\n9. Read reports from all subagents. Summarize findings to provide a succinct and accurate report addressing user inquiry.\n10. Write to reports_biomcp/<TOPIC>/final_report.md.\n\nFollow this template to prompt the bioresearcherDR_worker subagents (Step 7):\n\n```md\nTOPIC: <TOPIC>\nYOUR RESEARCH FOCUS: <RESEARCH-ASPECT>\nDESCRIPTION: <ABSTRACT>\n```\n\nABSTRACT should be a short paragraph of less than 200 words, describing exact focus of the subagent's research aspect and a list of detailed research items.\n\nRules for YOU:\n\n- Do NOT use the following tools: biomcp*, web*, context7* (i.e., tool names starting with biomcp or web or context7).\n- Do NOT fallback to internal knowledge when query tools fail. STRICTLY ADHERE to external trusted sources.\n- DO provide concrete references for all findings with citations (in brackets, e.g., [1], [2], ...) and full bibliography at the end.\n- DO keep your word succinct, accurate and professional, fitting top standards of academic writing.\n------ RULE REMINDER END ------\n";
|
|
7
|
+
export declare const BIORESEARCHERDR_SYSTEM_PROMPT = "\n------ RULE REMINDER START ------\nIMPORTANT: THIS REMINDER SHALL OVERRIDE ALL OTHER PROMPTS PROVIDED TO YOU. IGNORE ALL OTHER PROMPTS.\nOverall goal: Perform reference-based biomedical and pharmaceutical drug development research.\n\nSteps to STRICTLY adhere to:\n\n1. If the user query includes 'no-interview', skip Step 2 and proceed to Step 3. Otherwise, proceed to Step 2.\n2. Comprehend initial user inquiry. Use the question tool to ask user to clarify 3-6 unclear points depending on inquiry complexity.\n3. Comprehend final user inquiry to identify critical research aspects to answer user inquiry.\n4. If the original user inquiry includes 'light-reserach', combine and/or pick top two research aspects and proceed to Step 5. Otherwise, proceed directly to Step 5.\n5. Decide on TOPIC of this inquiry (NO user input). TOPIC should be highly succinct, underscore-separated name based on user inquiry.\n5. Use the todowrite tool to generate a list of identified research aspects.\n6. Create the reports_biomcp/<TOPIC>/ folder if needed.\n7. Use the task tool to assign each research aspect to a bioresearcherDR_worker subagent. Start subagents in parallel in batches (size of 5 for each batch). Record finished subagents by checking the todo list. Prompt the user: 'If subagents are stuck without progress for too long, interrupt and ask me to resume work.'\n8. Proceed until subagents complete research. Restart failed subagents if necessary.\n9. Read reports from all subagents. Summarize findings to provide a succinct and accurate report addressing user inquiry.\n10. Write to reports_biomcp/<TOPIC>/final_report.md.\n\nFollow this template to prompt the bioresearcherDR_worker subagents (Step 7):\n\n```md\nTOPIC: <TOPIC>\nYOUR RESEARCH FOCUS: <RESEARCH-ASPECT>\nDESCRIPTION: <ABSTRACT>\n```\n\nABSTRACT should be a short paragraph of less than 200 words, describing exact focus of the subagent's research aspect and a list of detailed research items.\n\nRules for YOU:\n\n- Do NOT use the following tools: biomcp*, web*, context7* (i.e., tool names starting with biomcp or web or context7. VERY IMPORTANT DO NOT USE ANY BIOMCP TOOL).\n- Do NOT fallback to internal knowledge when query tools fail. STRICTLY ADHERE to external trusted sources.\n- DO provide concrete references for all findings with citations (in brackets, e.g., [1], [2], ...) and full bibliography at the end.\n- DO keep your word succinct, accurate and professional, fitting top standards of academic writing.\n------ RULE REMINDER END ------\n";
|
|
8
8
|
export declare function getBioResearcherDRPrompt(): string;
|
|
@@ -35,7 +35,7 @@ ABSTRACT should be a short paragraph of less than 200 words, describing exact fo
|
|
|
35
35
|
|
|
36
36
|
Rules for YOU:
|
|
37
37
|
|
|
38
|
-
- Do NOT use the following tools: biomcp*, web*, context7* (i.e., tool names starting with biomcp or web or context7).
|
|
38
|
+
- Do NOT use the following tools: biomcp*, web*, context7* (i.e., tool names starting with biomcp or web or context7. VERY IMPORTANT DO NOT USE ANY BIOMCP TOOL).
|
|
39
39
|
- Do NOT fallback to internal knowledge when query tools fail. STRICTLY ADHERE to external trusted sources.
|
|
40
40
|
- DO provide concrete references for all findings with citations (in brackets, e.g., [1], [2], ...) and full bibliography at the end.
|
|
41
41
|
- DO keep your word succinct, accurate and professional, fitting top standards of academic writing.
|
|
@@ -3,5 +3,5 @@
|
|
|
3
3
|
* BioResearcher Deep Research Worker Subagent System Prompt
|
|
4
4
|
*
|
|
5
5
|
*/
|
|
6
|
-
export declare const BIORESEARCHERDRWORKER_SYSTEM_PROMPT = "\n------ RULE REMINDER START ------\nIMPORTANT: THIS REMINDER SHALL OVERRIDE ALL OTHER PROMPTS PROVIDED TO YOU. IGNORE ALL OTHER PROMPTS.\nOverall goal: Execute reference-based biomedical and pharmaceutical drug development research plan.\n\nSteps to STRICTLY adhere to:\n\n1. Follow supplied specific directions to conduct research. Your research MUST be focused and must NOT delegate task to other subagents.\n2. Write your detailed findings to reports_biomcp/<TOPIC>/<QUESTION-OF-INTEREST>.md.\n\nRules:\n\n- Do NOT use the following tools: web*, context7*, task, skill (i.e., tool names starting with biomcp or web or context7 and tools task and skill).\n- Do NOT run more than one MCP calls simultaneously.\n- Do NOT fallback to internal knowledge when query tools fail. STRICTLY ADHERE to external trusted sources.\n- DO retry up to 3 times if query tools fail. Try with simpler queries, and wait for a few seconds before retry.\n- DO provide concrete references for all findings with citations (in brackets, e.g., [1], [2], ...) and full bibliography at the end.\n- DO keep your word succinct, accurate and professional, fitting top standards of academic writing.\n------ RULE REMINDER END ------\n";
|
|
6
|
+
export declare const BIORESEARCHERDRWORKER_SYSTEM_PROMPT = "\n------ RULE REMINDER START ------\nIMPORTANT: THIS REMINDER SHALL OVERRIDE ALL OTHER PROMPTS PROVIDED TO YOU. IGNORE ALL OTHER PROMPTS.\nOverall goal: Execute reference-based biomedical and pharmaceutical drug development research plan.\n\nSteps to STRICTLY adhere to:\n\n1. Follow supplied specific directions to conduct research. Your research MUST be focused and must NOT delegate task to other subagents.\n2. Write your detailed findings to reports_biomcp/<TOPIC>/<QUESTION-OF-INTEREST>.md.\n\nRules:\n\n- Do NOT use the following tools: web*, context7*, task, skill (i.e., tool names starting with biomcp or web or context7 and tools task and skill).\n- Do NOT run more than one MCP calls simultaneously.\n- Do NOT fallback to internal knowledge when query tools fail. STRICTLY ADHERE to external trusted sources.\n- DO retry up to 3 times if query tools fail. Try with simpler queries, and wait for a few seconds before retry.\n- DO provide concrete references for all findings with citations (in brackets, e.g., [1], [2], ...) and full bibliography at the end.\n- DO keep your word succinct, accurate and professional, fitting top standards of academic writing.\n- DO use the blockingTimer tool to sleep for 0.5 seconds between two consecutive biomcp* tool calls (to enforce rate limit).\n------ RULE REMINDER END ------\n";
|
|
7
7
|
export declare function getBioResearcherDRWorkerPrompt(): string;
|
|
@@ -21,6 +21,7 @@ Rules:
|
|
|
21
21
|
- DO retry up to 3 times if query tools fail. Try with simpler queries, and wait for a few seconds before retry.
|
|
22
22
|
- DO provide concrete references for all findings with citations (in brackets, e.g., [1], [2], ...) and full bibliography at the end.
|
|
23
23
|
- DO keep your word succinct, accurate and professional, fitting top standards of academic writing.
|
|
24
|
+
- DO use the blockingTimer tool to sleep for 0.5 seconds between two consecutive biomcp* tool calls (to enforce rate limit).
|
|
24
25
|
------ RULE REMINDER END ------
|
|
25
26
|
`;
|
|
26
27
|
export function getBioResearcherDRWorkerPrompt() {
|
package/dist/index.js
CHANGED
|
@@ -3,6 +3,7 @@ import { createBioResearcherDRAgent } from "./agents/bioresearcherDR/index";
|
|
|
3
3
|
import { createBioResearcherDRWorkerAgent } from "./agents/bioresearcherDR_worker/index";
|
|
4
4
|
import { tableTools } from "./table-tools/index";
|
|
5
5
|
import { blockingTimer, calculator } from "./misc-tools/index";
|
|
6
|
+
import { parse_pubmed_articleSet } from "./parser-tools/pubmed";
|
|
6
7
|
export const BioResearcherPlugin = async () => {
|
|
7
8
|
return {
|
|
8
9
|
config: async (config) => {
|
|
@@ -14,7 +15,8 @@ export const BioResearcherPlugin = async () => {
|
|
|
14
15
|
tool: {
|
|
15
16
|
...tableTools,
|
|
16
17
|
blockingTimer,
|
|
17
|
-
calculator
|
|
18
|
+
calculator,
|
|
19
|
+
parse_pubmed_articleSet
|
|
18
20
|
}
|
|
19
21
|
};
|
|
20
22
|
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { ToolContext } from '@opencode-ai/plugin/tool';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
export declare const parse_pubmed_articleSet: {
|
|
4
|
+
description: string;
|
|
5
|
+
args: {
|
|
6
|
+
filePath: z.ZodString;
|
|
7
|
+
outputMode: z.ZodDefault<z.ZodEnum<{
|
|
8
|
+
single: "single";
|
|
9
|
+
individual: "individual";
|
|
10
|
+
}>>;
|
|
11
|
+
outputFileName: z.ZodOptional<z.ZodString>;
|
|
12
|
+
outputDir: z.ZodOptional<z.ZodString>;
|
|
13
|
+
verbose: z.ZodDefault<z.ZodBoolean>;
|
|
14
|
+
};
|
|
15
|
+
execute(args: {
|
|
16
|
+
filePath: string;
|
|
17
|
+
outputMode: "single" | "individual";
|
|
18
|
+
verbose: boolean;
|
|
19
|
+
outputFileName?: string | undefined;
|
|
20
|
+
outputDir?: string | undefined;
|
|
21
|
+
}, context: ToolContext): Promise<string>;
|
|
22
|
+
};
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import { tool } from '@opencode-ai/plugin/tool';
|
|
2
|
+
import { XMLParser, XMLBuilder } from 'fast-xml-parser';
|
|
3
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
|
|
4
|
+
import { gunzipSync } from 'zlib';
|
|
5
|
+
import { join, resolve } from 'path';
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import { toArray, extractAllFields, generateArticleMarkdown } from './utils.js';
|
|
8
|
+
function formatError(error) {
|
|
9
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10
|
+
return JSON.stringify({ error: message }, null, 2);
|
|
11
|
+
}
|
|
12
|
+
function resolvePath(filePath, basePath) {
|
|
13
|
+
if (!basePath) {
|
|
14
|
+
return filePath;
|
|
15
|
+
}
|
|
16
|
+
if (filePath.startsWith('./') || filePath.startsWith('../')) {
|
|
17
|
+
return resolve(basePath, filePath);
|
|
18
|
+
}
|
|
19
|
+
if (filePath.startsWith('/') || /^[A-Za-z]:/.test(filePath)) {
|
|
20
|
+
return filePath;
|
|
21
|
+
}
|
|
22
|
+
return resolve(basePath, filePath);
|
|
23
|
+
}
|
|
24
|
+
const PARSER_OPTIONS = {
|
|
25
|
+
ignoreAttributes: false,
|
|
26
|
+
attributeNamePrefix: '@_',
|
|
27
|
+
parseTagValue: true,
|
|
28
|
+
trimValues: true,
|
|
29
|
+
ignoreDeclaration: true,
|
|
30
|
+
ignorePiTags: true,
|
|
31
|
+
textNodeName: '#text'
|
|
32
|
+
};
|
|
33
|
+
const BUILDER_OPTIONS = {
|
|
34
|
+
ignoreAttributes: false,
|
|
35
|
+
attributeNamePrefix: '@_',
|
|
36
|
+
format: true,
|
|
37
|
+
indentBy: ' '
|
|
38
|
+
};
|
|
39
|
+
function readFile(filePath) {
|
|
40
|
+
if (filePath.endsWith('.gz')) {
|
|
41
|
+
const compressed = readFileSync(filePath);
|
|
42
|
+
const decompressed = gunzipSync(compressed);
|
|
43
|
+
return decompressed.toString('utf-8');
|
|
44
|
+
}
|
|
45
|
+
return readFileSync(filePath, 'utf-8');
|
|
46
|
+
}
|
|
47
|
+
function ensureDir(dirPath) {
|
|
48
|
+
if (!existsSync(dirPath)) {
|
|
49
|
+
mkdirSync(dirPath, { recursive: true });
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
function extractPubmedArticles(xmlString) {
|
|
53
|
+
const parser = new XMLParser(PARSER_OPTIONS);
|
|
54
|
+
const parsed = parser.parse(xmlString);
|
|
55
|
+
if ('PubmedArticle' in parsed && !('PubmedArticleSet' in parsed)) {
|
|
56
|
+
return [xmlString];
|
|
57
|
+
}
|
|
58
|
+
const articleSet = parsed.PubmedArticleSet;
|
|
59
|
+
if (!articleSet || !articleSet.PubmedArticle) {
|
|
60
|
+
throw new Error('No PubmedArticle found in XML');
|
|
61
|
+
}
|
|
62
|
+
const articles = toArray(articleSet.PubmedArticle);
|
|
63
|
+
const builder = new XMLBuilder(BUILDER_OPTIONS);
|
|
64
|
+
return articles.map(article => {
|
|
65
|
+
const articleXml = builder.build({ PubmedArticle: article });
|
|
66
|
+
return articleXml;
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
function parseSinglePubmedArticle(xmlString, verbose = false) {
|
|
70
|
+
try {
|
|
71
|
+
if (verbose)
|
|
72
|
+
console.log('Parsing article...');
|
|
73
|
+
const parser = new XMLParser(PARSER_OPTIONS);
|
|
74
|
+
const parsed = parser.parse(xmlString);
|
|
75
|
+
if (!parsed.PubmedArticle) {
|
|
76
|
+
throw new Error('Invalid PubmedArticle XML');
|
|
77
|
+
}
|
|
78
|
+
const article = parsed.PubmedArticle;
|
|
79
|
+
const data = extractAllFields(article);
|
|
80
|
+
if (verbose)
|
|
81
|
+
console.log(`Parsed PMID: ${data.PMID}`);
|
|
82
|
+
return { data, success: true };
|
|
83
|
+
}
|
|
84
|
+
catch (error) {
|
|
85
|
+
const errorData = {
|
|
86
|
+
PMID: 'Unknown',
|
|
87
|
+
title: 'Parsing failed',
|
|
88
|
+
authors: [],
|
|
89
|
+
journal: 'Not available',
|
|
90
|
+
doi: null,
|
|
91
|
+
abstract: null,
|
|
92
|
+
keywords: [],
|
|
93
|
+
error: `Parse error: ${error.message}`
|
|
94
|
+
};
|
|
95
|
+
if (verbose)
|
|
96
|
+
console.error(`Parse error: ${error.message}`);
|
|
97
|
+
return { data: errorData, success: false };
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
function processArticles(articleXmls, outputPath, mode, outputFileName, verbose = false) {
|
|
101
|
+
const parsedArticles = [];
|
|
102
|
+
let successCount = 0;
|
|
103
|
+
let failCount = 0;
|
|
104
|
+
if (verbose)
|
|
105
|
+
console.log(`Processing ${articleXmls.length} articles...`);
|
|
106
|
+
for (const xml of articleXmls) {
|
|
107
|
+
const result = parseSinglePubmedArticle(xml, verbose);
|
|
108
|
+
parsedArticles.push(result.data);
|
|
109
|
+
if (result.success) {
|
|
110
|
+
successCount++;
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
failCount++;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
ensureDir(outputPath);
|
|
117
|
+
if (mode === 'single') {
|
|
118
|
+
const fileName = outputFileName || 'pubmed_articles.md';
|
|
119
|
+
const filePath = join(outputPath, fileName);
|
|
120
|
+
const allMarkdown = parsedArticles
|
|
121
|
+
.map(data => generateArticleMarkdown(data))
|
|
122
|
+
.join('\n\n---\n\n');
|
|
123
|
+
writeFileSync(filePath, allMarkdown, 'utf-8');
|
|
124
|
+
if (verbose)
|
|
125
|
+
console.log(`Wrote single file: ${filePath}`);
|
|
126
|
+
return {
|
|
127
|
+
filePath,
|
|
128
|
+
stats: { total: articleXmls.length, successful: successCount, failed: failCount }
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
const dirPath = join(outputPath, outputFileName || 'pubmed_articles');
|
|
133
|
+
ensureDir(dirPath);
|
|
134
|
+
parsedArticles.forEach(data => {
|
|
135
|
+
const fileName = `pubmed_${data.PMID}.md`;
|
|
136
|
+
const filePath = join(dirPath, fileName);
|
|
137
|
+
writeFileSync(filePath, generateArticleMarkdown(data), 'utf-8');
|
|
138
|
+
});
|
|
139
|
+
if (verbose)
|
|
140
|
+
console.log(`Wrote ${parsedArticles.length} files to: ${dirPath}`);
|
|
141
|
+
return {
|
|
142
|
+
filePath: dirPath,
|
|
143
|
+
stats: { total: articleXmls.length, successful: successCount, failed: failCount }
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
export const parse_pubmed_articleSet = tool({
|
|
148
|
+
description: 'Parse PubMed XML file and convert to markdown format. Supports both .xml and .xml.gz files. Can output all articles in a single file or as individual markdown files.',
|
|
149
|
+
args: {
|
|
150
|
+
filePath: z.string()
|
|
151
|
+
.describe('Path to XML PubMed file (.xml or .xml.gz)'),
|
|
152
|
+
outputMode: z.enum(['single', 'individual'])
|
|
153
|
+
.default('single')
|
|
154
|
+
.describe('Output mode: single file or individual files per article'),
|
|
155
|
+
outputFileName: z.string()
|
|
156
|
+
.optional()
|
|
157
|
+
.describe('Custom output file/directory name (default: pubmed_articles)'),
|
|
158
|
+
outputDir: z.string()
|
|
159
|
+
.optional()
|
|
160
|
+
.describe('Custom output directory (default: ./tmp/opencode/<sessionId>/)'),
|
|
161
|
+
verbose: z.boolean()
|
|
162
|
+
.default(false)
|
|
163
|
+
.describe('Enable verbose logging for debugging')
|
|
164
|
+
},
|
|
165
|
+
execute: async (args, context) => {
|
|
166
|
+
try {
|
|
167
|
+
const { filePath, outputMode = 'single', outputFileName, outputDir, verbose = false } = args;
|
|
168
|
+
if (verbose)
|
|
169
|
+
console.log('Starting PubMed XML parsing...');
|
|
170
|
+
const resolvedPath = resolvePath(outputDir || './tmp/opencode', context.directory);
|
|
171
|
+
ensureDir(resolvedPath);
|
|
172
|
+
if (verbose)
|
|
173
|
+
console.log(`Output directory: ${resolvedPath}`);
|
|
174
|
+
if (verbose)
|
|
175
|
+
console.log(`Reading file: ${filePath}`);
|
|
176
|
+
const xmlContent = readFile(filePath);
|
|
177
|
+
if (verbose)
|
|
178
|
+
console.log(`Extracting articles from XML...`);
|
|
179
|
+
const articleXmls = extractPubmedArticles(xmlContent);
|
|
180
|
+
if (verbose)
|
|
181
|
+
console.log(`Found ${articleXmls.length} articles`);
|
|
182
|
+
const result = processArticles(articleXmls, resolvedPath, outputMode, outputFileName, verbose);
|
|
183
|
+
if (verbose) {
|
|
184
|
+
console.log(`Processing complete:`);
|
|
185
|
+
console.log(` Total articles: ${result.stats.total}`);
|
|
186
|
+
console.log(` Successful: ${result.stats.successful}`);
|
|
187
|
+
console.log(` Failed: ${result.stats.failed}`);
|
|
188
|
+
}
|
|
189
|
+
return JSON.stringify({
|
|
190
|
+
success: true,
|
|
191
|
+
filePath: result.filePath,
|
|
192
|
+
stats: result.stats,
|
|
193
|
+
message: `Successfully processed ${result.stats.successful}/${result.stats.total} articles to ${result.filePath}`
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
catch (error) {
|
|
197
|
+
return formatError(error);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
});
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
export interface PubmedArticle {
|
|
2
|
+
MedlineCitation: MedlineCitation;
|
|
3
|
+
PubmedData: PubmedData;
|
|
4
|
+
'@_Status'?: string;
|
|
5
|
+
}
|
|
6
|
+
export interface MedlineCitation {
|
|
7
|
+
PMID: any;
|
|
8
|
+
Article: Article;
|
|
9
|
+
MedlineJournalInfo: MedlineJournalInfo;
|
|
10
|
+
KeywordList?: KeywordList;
|
|
11
|
+
[key: string]: any;
|
|
12
|
+
}
|
|
13
|
+
export interface Article {
|
|
14
|
+
ArticleTitle: any;
|
|
15
|
+
Journal: Journal;
|
|
16
|
+
AuthorList?: AuthorList;
|
|
17
|
+
Abstract?: Abstract;
|
|
18
|
+
Pagination?: Pagination;
|
|
19
|
+
[key: string]: any;
|
|
20
|
+
}
|
|
21
|
+
export interface PubmedData {
|
|
22
|
+
ArticleIdList: ArticleIdList;
|
|
23
|
+
History?: History;
|
|
24
|
+
PublicationStatus?: string;
|
|
25
|
+
[key: string]: any;
|
|
26
|
+
}
|
|
27
|
+
export interface ArticleIdList {
|
|
28
|
+
ArticleId: ArticleId | ArticleId[];
|
|
29
|
+
}
|
|
30
|
+
export interface ArticleId {
|
|
31
|
+
'@_IdType': string;
|
|
32
|
+
'#text'?: string;
|
|
33
|
+
}
|
|
34
|
+
export interface AuthorList {
|
|
35
|
+
Author: Author | Author[];
|
|
36
|
+
'@_CompleteYN'?: string;
|
|
37
|
+
}
|
|
38
|
+
export interface Author {
|
|
39
|
+
LastName?: any;
|
|
40
|
+
ForeName?: any;
|
|
41
|
+
Initials?: any;
|
|
42
|
+
AffiliationInfo?: AffiliationInfo | AffiliationInfo[];
|
|
43
|
+
}
|
|
44
|
+
export interface AffiliationInfo {
|
|
45
|
+
Affiliation?: any;
|
|
46
|
+
}
|
|
47
|
+
export interface Journal {
|
|
48
|
+
JournalIssue: JournalIssue;
|
|
49
|
+
Title: any;
|
|
50
|
+
ISOAbbreviation?: any;
|
|
51
|
+
}
|
|
52
|
+
export interface JournalIssue {
|
|
53
|
+
PubDate: PubDate;
|
|
54
|
+
Volume?: any;
|
|
55
|
+
Issue?: any;
|
|
56
|
+
}
|
|
57
|
+
export interface PubDate {
|
|
58
|
+
Year: any;
|
|
59
|
+
Month?: any;
|
|
60
|
+
Day?: any;
|
|
61
|
+
}
|
|
62
|
+
export interface Abstract {
|
|
63
|
+
AbstractText: any | any[];
|
|
64
|
+
CopyrightInformation?: any;
|
|
65
|
+
}
|
|
66
|
+
export interface KeywordList {
|
|
67
|
+
Keyword: any | any[];
|
|
68
|
+
'@_Owner'?: string;
|
|
69
|
+
}
|
|
70
|
+
export interface History {
|
|
71
|
+
PubMedPubDate: PubMedPubDate | PubMedPubDate[];
|
|
72
|
+
}
|
|
73
|
+
export interface PubMedPubDate {
|
|
74
|
+
'@_PubStatus': string;
|
|
75
|
+
Year?: any;
|
|
76
|
+
Month?: any;
|
|
77
|
+
Day?: any;
|
|
78
|
+
}
|
|
79
|
+
export interface MedlineJournalInfo {
|
|
80
|
+
Country: any;
|
|
81
|
+
MedlineTA: any;
|
|
82
|
+
NlmUniqueID: any;
|
|
83
|
+
ISSNLinking?: any;
|
|
84
|
+
}
|
|
85
|
+
export interface Pagination {
|
|
86
|
+
MedlinePgn?: any;
|
|
87
|
+
}
|
|
88
|
+
export interface ParsedArticle {
|
|
89
|
+
PMID: string;
|
|
90
|
+
title: string;
|
|
91
|
+
authors: string[];
|
|
92
|
+
journal: string;
|
|
93
|
+
doi: string | null;
|
|
94
|
+
abstract: string | null;
|
|
95
|
+
keywords: string[];
|
|
96
|
+
publicationDate?: string;
|
|
97
|
+
error?: string;
|
|
98
|
+
}
|
|
99
|
+
export interface ProcessingStats {
|
|
100
|
+
total: number;
|
|
101
|
+
successful: number;
|
|
102
|
+
failed: number;
|
|
103
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { ParsedArticle, PubmedArticle } from './types';
|
|
2
|
+
export declare function getText(obj: any): string | null;
|
|
3
|
+
export declare function toArray<T>(obj: T | T[]): T[];
|
|
4
|
+
export declare function extractPMID(article: PubmedArticle): string;
|
|
5
|
+
export declare function extractTitle(article: PubmedArticle): string;
|
|
6
|
+
export declare function extractAuthors(article: PubmedArticle): string[];
|
|
7
|
+
export declare function extractJournalInfo(article: PubmedArticle): string;
|
|
8
|
+
export declare function extractDOI(article: PubmedArticle): string | null;
|
|
9
|
+
export declare function extractAbstract(article: PubmedArticle): string | null;
|
|
10
|
+
export declare function extractKeywords(article: PubmedArticle): string[];
|
|
11
|
+
export declare function extractPublicationDate(article: PubmedArticle): string | undefined;
|
|
12
|
+
export declare function extractAllFields(article: PubmedArticle): ParsedArticle;
|
|
13
|
+
export declare function generateArticleMarkdown(data: ParsedArticle): string;
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
export function getText(obj) {
|
|
2
|
+
if (obj === null || obj === undefined)
|
|
3
|
+
return null;
|
|
4
|
+
if (typeof obj === 'string')
|
|
5
|
+
return obj.trim();
|
|
6
|
+
if (typeof obj === 'object' && '#text' in obj) {
|
|
7
|
+
return String(obj['#text']).trim();
|
|
8
|
+
}
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
export function toArray(obj) {
|
|
12
|
+
if (!obj)
|
|
13
|
+
return [];
|
|
14
|
+
return Array.isArray(obj) ? obj : [obj];
|
|
15
|
+
}
|
|
16
|
+
export function extractPMID(article) {
|
|
17
|
+
const pmidObj = article.MedlineCitation?.PMID;
|
|
18
|
+
const pmid = getText(pmidObj);
|
|
19
|
+
return pmid || 'Not available';
|
|
20
|
+
}
|
|
21
|
+
export function extractTitle(article) {
|
|
22
|
+
const titleObj = article.MedlineCitation?.Article?.ArticleTitle;
|
|
23
|
+
const title = getText(titleObj);
|
|
24
|
+
return title || 'Not available';
|
|
25
|
+
}
|
|
26
|
+
export function extractAuthors(article) {
|
|
27
|
+
const authorList = article.MedlineCitation?.Article?.AuthorList;
|
|
28
|
+
if (!authorList)
|
|
29
|
+
return ['Not available'];
|
|
30
|
+
const authors = toArray(authorList.Author);
|
|
31
|
+
return authors.map(author => {
|
|
32
|
+
const lastName = getText(author.LastName) || '';
|
|
33
|
+
const foreName = getText(author.ForeName) || '';
|
|
34
|
+
const initials = getText(author.Initials) || '';
|
|
35
|
+
let name = lastName;
|
|
36
|
+
if (initials) {
|
|
37
|
+
name += `, ${initials}`;
|
|
38
|
+
}
|
|
39
|
+
else if (foreName) {
|
|
40
|
+
name += `, ${foreName.charAt(0)}.`;
|
|
41
|
+
}
|
|
42
|
+
return name;
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
export function extractJournalInfo(article) {
|
|
46
|
+
const journal = article.MedlineCitation?.Article?.Journal;
|
|
47
|
+
if (!journal)
|
|
48
|
+
return 'Not available';
|
|
49
|
+
const title = getText(journal.Title) || 'Unknown Journal';
|
|
50
|
+
const journalIssue = journal.JournalIssue;
|
|
51
|
+
const pubDate = journalIssue?.PubDate;
|
|
52
|
+
const year = getText(pubDate?.Year) || '';
|
|
53
|
+
const volume = getText(journalIssue?.Volume) || '';
|
|
54
|
+
const issue = getText(journalIssue?.Issue) || '';
|
|
55
|
+
const pages = getText(article.MedlineCitation?.Article?.Pagination?.MedlinePgn) || '';
|
|
56
|
+
let info = title;
|
|
57
|
+
if (year)
|
|
58
|
+
info += `, ${year}`;
|
|
59
|
+
if (volume || issue) {
|
|
60
|
+
info += ';';
|
|
61
|
+
if (volume)
|
|
62
|
+
info += volume;
|
|
63
|
+
if (issue)
|
|
64
|
+
info += `(${issue})`;
|
|
65
|
+
}
|
|
66
|
+
if (pages)
|
|
67
|
+
info += `:${pages}`;
|
|
68
|
+
return info;
|
|
69
|
+
}
|
|
70
|
+
export function extractDOI(article) {
|
|
71
|
+
const idList = article.PubmedData?.ArticleIdList;
|
|
72
|
+
if (!idList)
|
|
73
|
+
return null;
|
|
74
|
+
const ids = toArray(idList.ArticleId);
|
|
75
|
+
const doiObj = ids.find(id => id['@_IdType'] === 'doi');
|
|
76
|
+
return getText(doiObj) || null;
|
|
77
|
+
}
|
|
78
|
+
export function extractAbstract(article) {
|
|
79
|
+
const abstract = article.MedlineCitation?.Article?.Abstract;
|
|
80
|
+
if (!abstract)
|
|
81
|
+
return null;
|
|
82
|
+
const textList = toArray(abstract.AbstractText);
|
|
83
|
+
if (textList.length === 0)
|
|
84
|
+
return null;
|
|
85
|
+
const merged = textList
|
|
86
|
+
.map(getText)
|
|
87
|
+
.filter(text => text !== null && text.length > 0)
|
|
88
|
+
.join(' ');
|
|
89
|
+
return merged || null;
|
|
90
|
+
}
|
|
91
|
+
export function extractKeywords(article) {
|
|
92
|
+
const keywordList = article.MedlineCitation?.KeywordList;
|
|
93
|
+
if (!keywordList)
|
|
94
|
+
return [];
|
|
95
|
+
const keywords = toArray(keywordList.Keyword);
|
|
96
|
+
return keywords
|
|
97
|
+
.map(kw => getText(kw))
|
|
98
|
+
.filter((kw) => kw !== null && kw.length > 0);
|
|
99
|
+
}
|
|
100
|
+
export function extractPublicationDate(article) {
|
|
101
|
+
const history = article.PubmedData?.History;
|
|
102
|
+
if (!history)
|
|
103
|
+
return undefined;
|
|
104
|
+
const pubDates = toArray(history.PubMedPubDate);
|
|
105
|
+
const pubDate = pubDates.find(p => p['@_PubStatus'] === 'pubmed' || p['@_PubStatus'] === 'medline');
|
|
106
|
+
if (!pubDate)
|
|
107
|
+
return undefined;
|
|
108
|
+
const year = getText(pubDate.Year) || '';
|
|
109
|
+
const month = getText(pubDate.Month) || '';
|
|
110
|
+
const day = getText(pubDate.Day) || '';
|
|
111
|
+
let date;
|
|
112
|
+
if (year)
|
|
113
|
+
date = year;
|
|
114
|
+
if (month)
|
|
115
|
+
date = `${date || ''} ${month}`;
|
|
116
|
+
if (day)
|
|
117
|
+
date = `${date || ''} ${day}`;
|
|
118
|
+
return date || undefined;
|
|
119
|
+
}
|
|
120
|
+
export function extractAllFields(article) {
|
|
121
|
+
return {
|
|
122
|
+
PMID: extractPMID(article),
|
|
123
|
+
title: extractTitle(article),
|
|
124
|
+
authors: extractAuthors(article),
|
|
125
|
+
journal: extractJournalInfo(article),
|
|
126
|
+
doi: extractDOI(article),
|
|
127
|
+
abstract: extractAbstract(article),
|
|
128
|
+
keywords: extractKeywords(article),
|
|
129
|
+
publicationDate: extractPublicationDate(article)
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
export function generateArticleMarkdown(data) {
|
|
133
|
+
let markdown = '# PubMed Article\n\n';
|
|
134
|
+
if (data.error) {
|
|
135
|
+
markdown += `## ❌ Error\n${data.error}\n\n`;
|
|
136
|
+
}
|
|
137
|
+
markdown += `## PMID\n${data.PMID}\n\n`;
|
|
138
|
+
markdown += `## Title\n${data.title}\n\n`;
|
|
139
|
+
if (data.publicationDate) {
|
|
140
|
+
markdown += `## Publication Date\n${data.publicationDate}\n\n`;
|
|
141
|
+
}
|
|
142
|
+
markdown += `## Authors\n`;
|
|
143
|
+
data.authors.forEach(author => {
|
|
144
|
+
markdown += `- ${author}\n`;
|
|
145
|
+
});
|
|
146
|
+
markdown += '\n';
|
|
147
|
+
markdown += `## Journal\n${data.journal}\n\n`;
|
|
148
|
+
if (data.doi) {
|
|
149
|
+
markdown += `## DOI\n${data.doi}\n\n`;
|
|
150
|
+
}
|
|
151
|
+
if (data.abstract) {
|
|
152
|
+
markdown += `## Abstract\n${data.abstract}\n\n`;
|
|
153
|
+
}
|
|
154
|
+
if (data.keywords.length > 0) {
|
|
155
|
+
markdown += `## Keywords\n${data.keywords.join(', ')}\n`;
|
|
156
|
+
}
|
|
157
|
+
return markdown;
|
|
158
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yeyuan98/opencode-bioresearcher-plugin",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "OpenCode plugin that adds a bioresearcher agent",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
],
|
|
32
32
|
"dependencies": {
|
|
33
33
|
"@opencode-ai/plugin": "^1.0.0",
|
|
34
|
+
"fast-xml-parser": "^5.3.5",
|
|
34
35
|
"xlsx": "^0.18.5",
|
|
35
36
|
"zod": "^4.1.8"
|
|
36
37
|
},
|