@yeyuan98/opencode-bioresearcher-plugin 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -7,6 +7,7 @@ export declare const parse_pubmed_articleSet: {
|
|
|
7
7
|
outputMode: z.ZodDefault<z.ZodEnum<{
|
|
8
8
|
single: "single";
|
|
9
9
|
individual: "individual";
|
|
10
|
+
excel: "excel";
|
|
10
11
|
}>>;
|
|
11
12
|
outputFileName: z.ZodOptional<z.ZodString>;
|
|
12
13
|
outputDir: z.ZodOptional<z.ZodString>;
|
|
@@ -14,7 +15,7 @@ export declare const parse_pubmed_articleSet: {
|
|
|
14
15
|
};
|
|
15
16
|
execute(args: {
|
|
16
17
|
filePath: string;
|
|
17
|
-
outputMode: "single" | "individual";
|
|
18
|
+
outputMode: "single" | "individual" | "excel";
|
|
18
19
|
verbose: boolean;
|
|
19
20
|
outputFileName?: string | undefined;
|
|
20
21
|
outputDir?: string | undefined;
|
|
@@ -4,7 +4,7 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
|
|
|
4
4
|
import { gunzipSync } from 'zlib';
|
|
5
5
|
import { join, resolve } from 'path';
|
|
6
6
|
import { z } from 'zod';
|
|
7
|
-
import { toArray, extractAllFields, generateArticleMarkdown } from './utils.js';
|
|
7
|
+
import { toArray, extractAllFields, generateArticleMarkdown, generateExcelFile } from './utils.js';
|
|
8
8
|
function formatError(error) {
|
|
9
9
|
const message = error instanceof Error ? error.message : String(error);
|
|
10
10
|
return JSON.stringify({ error: message }, null, 2);
|
|
@@ -114,7 +114,18 @@ function processArticles(articleXmls, outputPath, mode, outputFileName, verbose
|
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
116
|
ensureDir(outputPath);
|
|
117
|
-
if (mode === '
|
|
117
|
+
if (mode === 'excel') {
|
|
118
|
+
const fileName = outputFileName || 'pubmed_articles.xlsx';
|
|
119
|
+
const filePath = join(outputPath, fileName);
|
|
120
|
+
generateExcelFile(parsedArticles, filePath);
|
|
121
|
+
if (verbose)
|
|
122
|
+
console.log(`Wrote Excel file: ${filePath}`);
|
|
123
|
+
return {
|
|
124
|
+
filePath,
|
|
125
|
+
stats: { total: articleXmls.length, successful: successCount, failed: failCount }
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
else if (mode === 'single') {
|
|
118
129
|
const fileName = outputFileName || 'pubmed_articles.md';
|
|
119
130
|
const filePath = join(outputPath, fileName);
|
|
120
131
|
const allMarkdown = parsedArticles
|
|
@@ -145,13 +156,13 @@ function processArticles(articleXmls, outputPath, mode, outputFileName, verbose
|
|
|
145
156
|
}
|
|
146
157
|
}
|
|
147
158
|
export const parse_pubmed_articleSet = tool({
|
|
148
|
-
description: 'Parse PubMed XML file and convert to markdown format. Supports both .xml and .xml.gz files.
|
|
159
|
+
description: 'Parse PubMed XML file and convert to markdown or Excel format. Supports both .xml and .xml.gz files. Output modes: single markdown file, individual markdown files, or single Excel file.',
|
|
149
160
|
args: {
|
|
150
161
|
filePath: z.string()
|
|
151
162
|
.describe('Path to XML PubMed file (.xml or .xml.gz)'),
|
|
152
|
-
outputMode: z.enum(['single', 'individual'])
|
|
163
|
+
outputMode: z.enum(['single', 'individual', 'excel'])
|
|
153
164
|
.default('single')
|
|
154
|
-
.describe('Output mode: single file
|
|
165
|
+
.describe('Output mode: single markdown file, individual markdown files, or single Excel file'),
|
|
155
166
|
outputFileName: z.string()
|
|
156
167
|
.optional()
|
|
157
168
|
.describe('Custom output file/directory name (default: pubmed_articles)'),
|
|
@@ -93,7 +93,6 @@ export interface ParsedArticle {
|
|
|
93
93
|
doi: string | null;
|
|
94
94
|
abstract: string | null;
|
|
95
95
|
keywords: string[];
|
|
96
|
-
publicationDate?: string;
|
|
97
96
|
error?: string;
|
|
98
97
|
}
|
|
99
98
|
export interface ProcessingStats {
|
|
@@ -101,3 +100,13 @@ export interface ProcessingStats {
|
|
|
101
100
|
successful: number;
|
|
102
101
|
failed: number;
|
|
103
102
|
}
|
|
103
|
+
export interface ExcelRow {
|
|
104
|
+
PMID: string;
|
|
105
|
+
Title: string;
|
|
106
|
+
Authors: string;
|
|
107
|
+
Journal: string;
|
|
108
|
+
DOI: string;
|
|
109
|
+
Abstract: string;
|
|
110
|
+
Keywords: string;
|
|
111
|
+
Error: string;
|
|
112
|
+
}
|
|
@@ -8,6 +8,6 @@ export declare function extractJournalInfo(article: PubmedArticle): string;
|
|
|
8
8
|
export declare function extractDOI(article: PubmedArticle): string | null;
|
|
9
9
|
export declare function extractAbstract(article: PubmedArticle): string | null;
|
|
10
10
|
export declare function extractKeywords(article: PubmedArticle): string[];
|
|
11
|
-
export declare function extractPublicationDate(article: PubmedArticle): string | undefined;
|
|
12
11
|
export declare function extractAllFields(article: PubmedArticle): ParsedArticle;
|
|
13
12
|
export declare function generateArticleMarkdown(data: ParsedArticle): string;
|
|
13
|
+
export declare function generateExcelFile(articles: ParsedArticle[], outputFilePath: string): void;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import XLSX from 'xlsx';
|
|
1
2
|
export function getText(obj) {
|
|
2
3
|
if (obj === null || obj === undefined)
|
|
3
4
|
return null;
|
|
@@ -52,7 +53,6 @@ export function extractJournalInfo(article) {
|
|
|
52
53
|
const year = getText(pubDate?.Year) || '';
|
|
53
54
|
const volume = getText(journalIssue?.Volume) || '';
|
|
54
55
|
const issue = getText(journalIssue?.Issue) || '';
|
|
55
|
-
const pages = getText(article.MedlineCitation?.Article?.Pagination?.MedlinePgn) || '';
|
|
56
56
|
let info = title;
|
|
57
57
|
if (year)
|
|
58
58
|
info += `, ${year}`;
|
|
@@ -63,8 +63,6 @@ export function extractJournalInfo(article) {
|
|
|
63
63
|
if (issue)
|
|
64
64
|
info += `(${issue})`;
|
|
65
65
|
}
|
|
66
|
-
if (pages)
|
|
67
|
-
info += `:${pages}`;
|
|
68
66
|
return info;
|
|
69
67
|
}
|
|
70
68
|
export function extractDOI(article) {
|
|
@@ -97,26 +95,6 @@ export function extractKeywords(article) {
|
|
|
97
95
|
.map(kw => getText(kw))
|
|
98
96
|
.filter((kw) => kw !== null && kw.length > 0);
|
|
99
97
|
}
|
|
100
|
-
export function extractPublicationDate(article) {
|
|
101
|
-
const history = article.PubmedData?.History;
|
|
102
|
-
if (!history)
|
|
103
|
-
return undefined;
|
|
104
|
-
const pubDates = toArray(history.PubMedPubDate);
|
|
105
|
-
const pubDate = pubDates.find(p => p['@_PubStatus'] === 'pubmed' || p['@_PubStatus'] === 'medline');
|
|
106
|
-
if (!pubDate)
|
|
107
|
-
return undefined;
|
|
108
|
-
const year = getText(pubDate.Year) || '';
|
|
109
|
-
const month = getText(pubDate.Month) || '';
|
|
110
|
-
const day = getText(pubDate.Day) || '';
|
|
111
|
-
let date;
|
|
112
|
-
if (year)
|
|
113
|
-
date = year;
|
|
114
|
-
if (month)
|
|
115
|
-
date = `${date || ''} ${month}`;
|
|
116
|
-
if (day)
|
|
117
|
-
date = `${date || ''} ${day}`;
|
|
118
|
-
return date || undefined;
|
|
119
|
-
}
|
|
120
98
|
export function extractAllFields(article) {
|
|
121
99
|
return {
|
|
122
100
|
PMID: extractPMID(article),
|
|
@@ -125,8 +103,7 @@ export function extractAllFields(article) {
|
|
|
125
103
|
journal: extractJournalInfo(article),
|
|
126
104
|
doi: extractDOI(article),
|
|
127
105
|
abstract: extractAbstract(article),
|
|
128
|
-
keywords: extractKeywords(article)
|
|
129
|
-
publicationDate: extractPublicationDate(article)
|
|
106
|
+
keywords: extractKeywords(article)
|
|
130
107
|
};
|
|
131
108
|
}
|
|
132
109
|
export function generateArticleMarkdown(data) {
|
|
@@ -136,9 +113,6 @@ export function generateArticleMarkdown(data) {
|
|
|
136
113
|
}
|
|
137
114
|
markdown += `## PMID\n${data.PMID}\n\n`;
|
|
138
115
|
markdown += `## Title\n${data.title}\n\n`;
|
|
139
|
-
if (data.publicationDate) {
|
|
140
|
-
markdown += `## Publication Date\n${data.publicationDate}\n\n`;
|
|
141
|
-
}
|
|
142
116
|
markdown += `## Authors\n`;
|
|
143
117
|
data.authors.forEach(author => {
|
|
144
118
|
markdown += `- ${author}\n`;
|
|
@@ -156,3 +130,19 @@ export function generateArticleMarkdown(data) {
|
|
|
156
130
|
}
|
|
157
131
|
return markdown;
|
|
158
132
|
}
|
|
133
|
+
export function generateExcelFile(articles, outputFilePath) {
|
|
134
|
+
const excelRows = articles.map(article => ({
|
|
135
|
+
PMID: article.error ? `[ERROR] ${article.PMID}` : article.PMID,
|
|
136
|
+
Title: article.title,
|
|
137
|
+
Authors: article.authors.join(', '),
|
|
138
|
+
Journal: article.journal,
|
|
139
|
+
DOI: article.doi || '',
|
|
140
|
+
Abstract: article.abstract || '',
|
|
141
|
+
Keywords: article.keywords.join(', '),
|
|
142
|
+
Error: article.error || ''
|
|
143
|
+
}));
|
|
144
|
+
const workbook = XLSX.utils.book_new();
|
|
145
|
+
const worksheet = XLSX.utils.json_to_sheet(excelRows);
|
|
146
|
+
XLSX.utils.book_append_sheet(workbook, worksheet, 'PubMed Articles');
|
|
147
|
+
XLSX.writeFile(workbook, outputFilePath);
|
|
148
|
+
}
|