@yeyuan98/opencode-bioresearcher-plugin 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +14 -0
  2. package/dist/index.js +4 -1
  3. package/dist/misc-tools/index.d.ts +3 -0
  4. package/dist/misc-tools/index.js +3 -0
  5. package/dist/misc-tools/json-extract.d.ts +13 -0
  6. package/dist/misc-tools/json-extract.js +394 -0
  7. package/dist/misc-tools/json-infer.d.ts +13 -0
  8. package/dist/misc-tools/json-infer.js +199 -0
  9. package/dist/misc-tools/json-tools.d.ts +33 -0
  10. package/dist/misc-tools/json-tools.js +187 -0
  11. package/dist/misc-tools/json-validate.d.ts +13 -0
  12. package/dist/misc-tools/json-validate.js +228 -0
  13. package/dist/skills/bioresearcher-core/README.md +210 -0
  14. package/dist/skills/bioresearcher-core/SKILL.md +128 -0
  15. package/dist/skills/bioresearcher-core/examples/contexts.json +29 -0
  16. package/dist/skills/bioresearcher-core/examples/data-exchange-example.md +303 -0
  17. package/dist/skills/bioresearcher-core/examples/template.md +49 -0
  18. package/dist/skills/bioresearcher-core/patterns/calculator.md +215 -0
  19. package/dist/skills/bioresearcher-core/patterns/data-exchange.md +406 -0
  20. package/dist/skills/bioresearcher-core/patterns/json-tools.md +263 -0
  21. package/dist/skills/bioresearcher-core/patterns/progress.md +127 -0
  22. package/dist/skills/bioresearcher-core/patterns/retry.md +110 -0
  23. package/dist/skills/bioresearcher-core/patterns/shell-commands.md +79 -0
  24. package/dist/skills/bioresearcher-core/patterns/subagent-waves.md +186 -0
  25. package/dist/skills/bioresearcher-core/patterns/table-tools.md +260 -0
  26. package/dist/skills/bioresearcher-core/patterns/user-confirmation.md +187 -0
  27. package/dist/skills/bioresearcher-core/python/template.md +273 -0
  28. package/dist/skills/bioresearcher-core/python/template.py +323 -0
  29. package/dist/skills/long-table-summary/SKILL.md +437 -0
  30. package/dist/skills/long-table-summary/combine_outputs.py +336 -0
  31. package/dist/skills/long-table-summary/generate_prompts.py +211 -0
  32. package/dist/skills/long-table-summary/pyproject.toml +8 -0
  33. package/dist/skills/pubmed-weekly/SKILL.md +329 -329
  34. package/dist/skills/pubmed-weekly/pubmed_weekly.py +411 -411
  35. package/dist/skills/pubmed-weekly/pyproject.toml +8 -8
  36. package/package.json +7 -2
@@ -0,0 +1,336 @@
1
+ #!/usr/bin/env python3
2
+ """Combine subagent JSON outputs into a single Excel table."""
3
+
4
+ import argparse
5
+ import json
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import List, Dict, Any, Tuple
10
+
11
+
12
+ def read_json_outputs(input_dir: str, verbose: bool = False) -> Dict[str, Any]:
13
+ """Read all batch*.md files and extract JSON content.
14
+
15
+ Args:
16
+ input_dir: Directory containing batch output files
17
+ verbose: Enable verbose output
18
+
19
+ Returns:
20
+ Dict with success status and parsed data
21
+ """
22
+ input_path = Path(input_dir)
23
+
24
+ if not input_path.exists():
25
+ return {"success": False, "error": f"Directory not found: {input_dir}"}
26
+
27
+ # Find all batch files
28
+ batch_files = sorted(input_path.glob("batch*.md"))
29
+
30
+ if not batch_files:
31
+ return {"success": False, "error": f"No batch files found in {input_dir}"}
32
+
33
+ all_summaries = []
34
+
35
+ for batch_file in batch_files:
36
+ try:
37
+ with open(batch_file, "r", encoding="utf-8") as f:
38
+ content = f.read().strip()
39
+
40
+ # Find JSON in markdown (typically the entire content)
41
+ json_start = content.find("{")
42
+ json_end = content.rfind("}") + 1
43
+
44
+ if json_start == -1 or json_end == 0:
45
+ if verbose:
46
+ print(f"Warning: No JSON found in {batch_file.name}")
47
+ continue
48
+
49
+ json_str = content[json_start:json_end]
50
+ data = json.loads(json_str)
51
+ all_summaries.append(data)
52
+
53
+ if verbose:
54
+ print(
55
+ f"Parsed: {batch_file.name} - {len(data.get('summaries', []))} summaries"
56
+ )
57
+
58
+ except json.JSONDecodeError as e:
59
+ if verbose:
60
+ print(f"Warning: Failed to parse {batch_file.name}: {e}")
61
+ continue
62
+ except Exception as e:
63
+ if verbose:
64
+ print(f"Warning: Error reading {batch_file.name}: {e}", file=sys.stderr)
65
+ continue
66
+
67
+ return {"success": True, "summaries": all_summaries}
68
+
69
+
70
+ def merge_summaries(
71
+ summaries: List[Dict[str, Any]],
72
+ deduplicate: bool = False,
73
+ column_order: str = "preserve",
74
+ verbose: bool = False,
75
+ ) -> Tuple[List[Dict[str, Any]], List[str]]:
76
+ """Merge all batch summaries into a unified table structure.
77
+
78
+ Args:
79
+ summaries: List of batch JSON objects
80
+ deduplicate: Remove duplicate row numbers (keep first occurrence)
81
+ column_order: Column order strategy ('preserve' or 'alphabetical')
82
+ verbose: Enable verbose output
83
+
84
+ Returns:
85
+ Tuple of (flattened list of row summaries, sorted list of column names)
86
+ """
87
+ merged = []
88
+
89
+ # Determine all column names from first batch's summaries
90
+ all_columns = set()
91
+
92
+ for batch in summaries:
93
+ batch_summaries = batch.get("summaries", [])
94
+
95
+ for row_summary in batch_summaries:
96
+ # Add all keys except batch_number, row_count, and row_number
97
+ for key in row_summary.keys():
98
+ if key not in ["batch_number", "row_count", "row_number"]:
99
+ all_columns.add(key)
100
+
101
+ # Sort columns based on strategy
102
+ if column_order == "alphabetical":
103
+ columns = sorted(all_columns)
104
+ if verbose:
105
+ print(f"Column order: alphabetical - {columns}")
106
+ else: # preserve
107
+ # Get order from first batch
108
+ for batch in summaries:
109
+ batch_summaries = batch.get("summaries", [])
110
+ if batch_summaries:
111
+ # Extract column order from first row's keys (excluding batch_number, row_count, row_number)
112
+ first_row_columns = [
113
+ k
114
+ for k in batch_summaries[0].keys()
115
+ if k not in ["batch_number", "row_count", "row_number"]
116
+ ]
117
+ if first_row_columns:
118
+ columns = first_row_columns
119
+ break
120
+ if verbose:
121
+ print(f"Column order: preserve - {columns}")
122
+
123
+ # Track duplicates
124
+ seen_rows = {}
125
+ duplicates = []
126
+
127
+ for batch in summaries:
128
+ batch_summaries = batch.get("summaries", [])
129
+
130
+ for row_summary in batch_summaries:
131
+ row_num = row_summary.get("row_number")
132
+
133
+ if row_num in seen_rows:
134
+ duplicates.append(
135
+ {
136
+ "row_number": row_num,
137
+ "first_batch": seen_rows[row_num],
138
+ "duplicate_batch": batch.get("batch_number"),
139
+ }
140
+ )
141
+ if verbose:
142
+ print(
143
+ f"Duplicate row {row_num}: batch {seen_rows[row_num]} vs {batch.get('batch_number')}"
144
+ )
145
+
146
+ seen_rows[row_num] = batch.get("batch_number")
147
+
148
+ if duplicates:
149
+ if verbose:
150
+ print(f"Found {len(duplicates)} duplicate rows")
151
+ for dup in duplicates[:5]: # Show first 5
152
+ print(
153
+ f" Row {dup['row_number']}: batch {dup['first_batch']} vs {dup['duplicate_batch']}"
154
+ )
155
+
156
+ # Remove duplicates if requested
157
+ if deduplicate:
158
+ merged_dict = {}
159
+ for batch in summaries:
160
+ batch_summaries = batch.get("summaries", [])
161
+ for row_summary in batch_summaries:
162
+ row_num = row_summary.get("row_number")
163
+ if row_num not in merged_dict:
164
+ merged_dict[row_num] = row_summary
165
+ merged = list(merged_dict.values())
166
+ if verbose:
167
+ print(f"Deduplicated to {len(merged)} rows")
168
+ else:
169
+ # Keep all (including duplicates)
170
+ for batch in summaries:
171
+ batch_summaries = batch.get("summaries", [])
172
+ for row_summary in batch_summaries:
173
+ merged.append(row_summary)
174
+ if verbose:
175
+ print(f"Keeping all rows (including duplicates): {len(merged)}")
176
+
177
+ # Sort by row_number
178
+ merged.sort(key=lambda x: x.get("row_number", 0))
179
+
180
+ return merged, columns
181
+
182
+
183
+ def write_combined_excel(
184
+ merged: List[Dict[str, Any]],
185
+ columns: List[str],
186
+ output_file: str,
187
+ verbose: bool = False,
188
+ ) -> Dict[str, Any]:
189
+ """Write merged summaries to Excel file.
190
+
191
+ Args:
192
+ merged: List of row summaries
193
+ columns: List of column names to use
194
+ output_file: Path for output Excel file
195
+ verbose: Enable verbose output
196
+
197
+ Returns:
198
+ Success/error result
199
+ """
200
+ try:
201
+ import openpyxl
202
+ except ImportError:
203
+ result = {
204
+ "success": False,
205
+ "error": "openpyxl package not installed. Install with: uv add openpyxl",
206
+ }
207
+ print(json.dumps(result, indent=2))
208
+ sys.exit(1)
209
+
210
+ output_path = Path(output_file)
211
+ output_path.parent.mkdir(parents=True, exist_ok=True)
212
+
213
+ # Create workbook
214
+ wb = openpyxl.Workbook()
215
+ ws = wb.active
216
+ ws.title = "Combined Summary"
217
+
218
+ # Write header
219
+ header_row = ["row_number"] + columns
220
+ ws.append(header_row)
221
+
222
+ if verbose:
223
+ print(f"Writing {len(merged)} rows with {len(columns)} columns")
224
+
225
+ # Write data rows
226
+ for row_data in merged:
227
+ row_values = [row_data.get("row_number", "")]
228
+
229
+ # Add values for each column in order
230
+ for col in columns:
231
+ row_values.append(row_data.get(col, ""))
232
+
233
+ ws.append(row_values)
234
+
235
+ # Save workbook with error handling
236
+ try:
237
+ wb.save(output_path)
238
+ if verbose:
239
+ print(f"Successfully saved to {output_path}")
240
+ except Exception as e:
241
+ result = {"success": False, "error": f"Failed to save Excel file: {e}"}
242
+ print(json.dumps(result, indent=2))
243
+ return result
244
+
245
+ return {
246
+ "success": True,
247
+ "output_file": str(output_path),
248
+ "total_rows": len(merged),
249
+ "columns": columns,
250
+ }
251
+
252
+
253
+ def main():
254
+ """Main entry point."""
255
+ parser = argparse.ArgumentParser(
256
+ description="Combine subagent JSON outputs into Excel table"
257
+ )
258
+ parser.add_argument(
259
+ "--input-dir",
260
+ required=True,
261
+ help="Directory containing batch output JSON files",
262
+ )
263
+ parser.add_argument(
264
+ "--output-file", required=True, help="Path for combined Excel output file"
265
+ )
266
+ parser.add_argument(
267
+ "--dry-run",
268
+ action="store_true",
269
+ help="Validate inputs without writing output file (testing only)",
270
+ )
271
+ parser.add_argument(
272
+ "--verbose", action="store_true", help="Enable verbose output for debugging"
273
+ )
274
+ parser.add_argument(
275
+ "--deduplicate",
276
+ action="store_true",
277
+ help="Remove duplicate row numbers (keep first occurrence)",
278
+ )
279
+ parser.add_argument(
280
+ "--column-order",
281
+ default="preserve",
282
+ choices=["preserve", "alphabetical"],
283
+ help="Column order: 'preserve' (from first batch) or 'alphabetical' (default)",
284
+ )
285
+
286
+ args = parser.parse_args()
287
+
288
+ # Read all batch outputs
289
+ result = read_json_outputs(args.input_dir, args.verbose)
290
+ if not result.get("success"):
291
+ print(json.dumps(result, indent=2))
292
+ sys.exit(1)
293
+
294
+ summaries = result["summaries"]
295
+
296
+ # Check for empty results
297
+ if not summaries:
298
+ result = {
299
+ "success": False,
300
+ "error": "Found batch files but none contained valid JSON",
301
+ }
302
+ print(json.dumps(result, indent=2))
303
+ sys.exit(1)
304
+
305
+ # Merge into unified structure
306
+ merged, columns = merge_summaries(
307
+ summaries, args.deduplicate, args.column_order, args.verbose
308
+ )
309
+
310
+ # Check for empty merge
311
+ if not merged:
312
+ result = {"success": False, "error": "No valid summaries found in batch files"}
313
+ print(json.dumps(result, indent=2))
314
+ sys.exit(1)
315
+
316
+ # Dry run mode - skip actual file writes
317
+ if args.dry_run:
318
+ result = {
319
+ "success": True,
320
+ "dry_run": True,
321
+ "total_rows": len(merged),
322
+ "columns": columns,
323
+ "message": "Dry run completed - no files written",
324
+ }
325
+ print(json.dumps(result, indent=2))
326
+ sys.exit(0)
327
+
328
+ # Write to Excel
329
+ result = write_combined_excel(merged, columns, args.output_file, args.verbose)
330
+
331
+ if not result.get("success"):
332
+ sys.exit(1)
333
+
334
+
335
+ if __name__ == "__main__":
336
+ main()
@@ -0,0 +1,211 @@
1
+ #!/usr/bin/env python3
2
+ """Generate subagent prompts from template for batched table processing."""
3
+
4
+ import argparse
5
+ import json
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def generate_prompts(
12
+ template_path,
13
+ output_dir,
14
+ num_batches,
15
+ sheet_name,
16
+ start_row,
17
+ batch_size,
18
+ file_path,
19
+ instructions,
20
+ dry_run=False,
21
+ verbose=False,
22
+ ):
23
+ """Generate individual prompt files from template.
24
+
25
+ Args:
26
+ template_path: Path to subagent_template.md
27
+ output_dir: Directory for generated prompts
28
+ num_batches: Total number of batches
29
+ sheet_name: Sheet name from Excel file
30
+ start_row: Starting data row (usually 2 to skip header)
31
+ batch_size: Rows per batch
32
+ file_path: Full path to input table file
33
+ instructions: User-provided summarization instructions (JSON string)
34
+ dry_run: Validate without creating files
35
+ verbose: Enable verbose output
36
+ """
37
+ # Validate template exists
38
+ if not os.path.exists(template_path):
39
+ result = {
40
+ "success": False,
41
+ "error": f"Template file not found: {template_path}",
42
+ }
43
+ print(json.dumps(result, indent=2))
44
+ sys.exit(1)
45
+
46
+ # Validate input file exists
47
+ if not os.path.exists(file_path):
48
+ result = {"success": False, "error": f"Input file not found: {file_path}"}
49
+ print(json.dumps(result, indent=2))
50
+ sys.exit(1)
51
+
52
+ # Read template
53
+ with open(template_path, "r", encoding="utf-8") as f:
54
+ template = f.read()
55
+
56
+ # Create output directory
57
+ output_path = Path(output_dir)
58
+ output_path.mkdir(parents=True, exist_ok=True)
59
+
60
+ # Absolute path to input file
61
+ file_path_abs = os.path.abspath(file_path)
62
+
63
+ # Escape instructions JSON for markdown code block
64
+ # Replace backticks and dollar signs
65
+ instructions_escaped = instructions.replace("`", "\\`").replace("$", "\\$")
66
+
67
+ # Generate prompts for each batch
68
+ for batch_num in range(1, num_batches + 1):
69
+ # Calculate row range
70
+ row_start = start_row + (batch_num - 1) * batch_size
71
+ row_end = row_start + batch_size - 1
72
+
73
+ # Output file path
74
+ batch_str = f"{batch_num:03d}"
75
+ topic = os.path.basename(os.path.dirname(template_path))
76
+ output_file = f"./.long-table-summary/{topic}/outputs/batch{batch_str}.md"
77
+
78
+ # Replace placeholders
79
+ content = template.replace("{file_path}", file_path_abs)
80
+ content = content.replace("{sheet_name}", sheet_name)
81
+ content = content.replace("{batch_number}", str(batch_num))
82
+ content = content.replace("{row_start}", str(row_start))
83
+ content = content.replace("{row_end}", str(row_end))
84
+ content = content.replace("{output_file}", output_file)
85
+ content = content.replace("{instructions_json}", instructions_escaped)
86
+
87
+ # Dry run mode - skip actual file writes
88
+ if dry_run:
89
+ if verbose:
90
+ print(f"Would write: {output_path}")
91
+ continue
92
+
93
+ # Write prompt file
94
+ try:
95
+ prompt_file = output_path / f"batch{batch_str}.md"
96
+ with open(prompt_file, "w", encoding="utf-8") as f:
97
+ f.write(content)
98
+
99
+ if verbose:
100
+ print(f"Created: {prompt_file}")
101
+
102
+ except IOError as e:
103
+ result = {
104
+ "success": False,
105
+ "error": f"Failed to write prompt file {batch_str}: {e}",
106
+ }
107
+ print(json.dumps(result, indent=2))
108
+ sys.exit(1)
109
+
110
+ return {
111
+ "success": True,
112
+ "num_prompts": num_batches,
113
+ "output_dir": str(output_path),
114
+ "batches": list(range(1, num_batches + 1)),
115
+ }
116
+
117
+
118
+ def main():
119
+ """Main entry point."""
120
+ parser = argparse.ArgumentParser(
121
+ description="Generate subagent prompts from template"
122
+ )
123
+ parser.add_argument(
124
+ "--template", required=True, help="Path to subagent_template.md"
125
+ )
126
+ parser.add_argument(
127
+ "--output-dir", default="./prompts", help="Directory for generated prompts"
128
+ )
129
+ parser.add_argument(
130
+ "--num-batches", type=int, required=True, help="Total number of batches"
131
+ )
132
+ parser.add_argument(
133
+ "--sheet-name", required=True, help="Sheet name from Excel file"
134
+ )
135
+ parser.add_argument(
136
+ "--file-path", required=True, help="Full path to input table file"
137
+ )
138
+ parser.add_argument(
139
+ "--start-row",
140
+ type=int,
141
+ default=2,
142
+ help="Starting data row (default: 2 to skip header)",
143
+ )
144
+ parser.add_argument("--batch-size", type=int, required=True, help="Rows per batch")
145
+ parser.add_argument(
146
+ "--instructions",
147
+ required=True,
148
+ help="User-provided summarization instructions (JSON string)",
149
+ )
150
+ parser.add_argument(
151
+ "--dry-run",
152
+ action="store_true",
153
+ help="Validate without creating files (testing only)",
154
+ )
155
+ parser.add_argument(
156
+ "--verbose", action="store_true", help="Enable verbose output for debugging"
157
+ )
158
+
159
+ args = parser.parse_args()
160
+
161
+ # Validate instructions is valid JSON
162
+ try:
163
+ json.loads(args.instructions)
164
+ except json.JSONDecodeError as e:
165
+ result = {"success": False, "error": f"Invalid JSON instructions: {e}"}
166
+ print(json.dumps(result, indent=2))
167
+ sys.exit(1)
168
+
169
+ # Validate numeric arguments
170
+ if args.num_batches <= 0:
171
+ result = {
172
+ "success": False,
173
+ "error": f"num_batches must be positive (got: {args.num_batches})",
174
+ }
175
+ print(json.dumps(result, indent=2))
176
+ sys.exit(1)
177
+
178
+ if args.batch_size <= 0:
179
+ result = {
180
+ "success": False,
181
+ "error": f"batch_size must be positive (got: {args.batch_size})",
182
+ }
183
+ print(json.dumps(result, indent=2))
184
+ sys.exit(1)
185
+
186
+ if args.start_row < 1:
187
+ result = {
188
+ "success": False,
189
+ "error": f"start_row must be >= 1 (got: {args.start_row})",
190
+ }
191
+ print(json.dumps(result, indent=2))
192
+ sys.exit(1)
193
+
194
+ result = generate_prompts(
195
+ template_path=args.template,
196
+ output_dir=args.output_dir,
197
+ num_batches=args.num_batches,
198
+ sheet_name=args.sheet_name,
199
+ start_row=args.start_row,
200
+ batch_size=args.batch_size,
201
+ file_path=args.file_path,
202
+ instructions=args.instructions,
203
+ dry_run=args.dry_run,
204
+ verbose=args.verbose,
205
+ )
206
+
207
+ print(json.dumps(result, indent=2))
208
+
209
+
210
+ if __name__ == "__main__":
211
+ main()
@@ -0,0 +1,8 @@
1
+ [project]
2
+ name = "long-table-summary"
3
+ version = "1.0.0"
4
+ description = "Batched table processing with parallel subagents"
5
+ requires-python = ">=3.10"
6
+ dependencies = [
7
+ "openpyxl>=3.1.0",
8
+ ]