npm - @yeyuan98/opencode-bioresearcher-plugin - Versions diffs - 1.3.1 → 1.4.0 - Mend

@yeyuan98/opencode-bioresearcher-plugin 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +14 -0
package/dist/index.js +4 -1
package/dist/misc-tools/index.d.ts +3 -0
package/dist/misc-tools/index.js +3 -0
package/dist/misc-tools/json-extract.d.ts +13 -0
package/dist/misc-tools/json-extract.js +394 -0
package/dist/misc-tools/json-infer.d.ts +13 -0
package/dist/misc-tools/json-infer.js +199 -0
package/dist/misc-tools/json-tools.d.ts +33 -0
package/dist/misc-tools/json-tools.js +187 -0
package/dist/misc-tools/json-validate.d.ts +13 -0
package/dist/misc-tools/json-validate.js +228 -0
package/dist/skills/bioresearcher-core/README.md +210 -0
package/dist/skills/bioresearcher-core/SKILL.md +128 -0
package/dist/skills/bioresearcher-core/examples/contexts.json +29 -0
package/dist/skills/bioresearcher-core/examples/data-exchange-example.md +303 -0
package/dist/skills/bioresearcher-core/examples/template.md +49 -0
package/dist/skills/bioresearcher-core/patterns/calculator.md +215 -0
package/dist/skills/bioresearcher-core/patterns/data-exchange.md +406 -0
package/dist/skills/bioresearcher-core/patterns/json-tools.md +263 -0
package/dist/skills/bioresearcher-core/patterns/progress.md +127 -0
package/dist/skills/bioresearcher-core/patterns/retry.md +110 -0
package/dist/skills/bioresearcher-core/patterns/shell-commands.md +79 -0
package/dist/skills/bioresearcher-core/patterns/subagent-waves.md +186 -0
package/dist/skills/bioresearcher-core/patterns/table-tools.md +260 -0
package/dist/skills/bioresearcher-core/patterns/user-confirmation.md +187 -0
package/dist/skills/bioresearcher-core/python/template.md +273 -0
package/dist/skills/bioresearcher-core/python/template.py +323 -0
package/dist/skills/long-table-summary/SKILL.md +437 -0
package/dist/skills/long-table-summary/combine_outputs.py +336 -0
package/dist/skills/long-table-summary/generate_prompts.py +211 -0
package/dist/skills/long-table-summary/pyproject.toml +8 -0
package/dist/skills/pubmed-weekly/SKILL.md +329 -329
package/dist/skills/pubmed-weekly/pubmed_weekly.py +411 -411
package/dist/skills/pubmed-weekly/pyproject.toml +8 -8
package/package.json +7 -2

package/dist/skills/long-table-summary/combine_outputs.py ADDED Viewed

@@ -0,0 +1,336 @@
+#!/usr/bin/env python3
+"""Combine subagent JSON outputs into a single Excel table."""
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+from typing import List, Dict, Any, Tuple
+def read_json_outputs(input_dir: str, verbose: bool = False) -> Dict[str, Any]:
+    """Read all batch*.md files and extract JSON content.
+    Args:
+        input_dir: Directory containing batch output files
+        verbose: Enable verbose output
+    Returns:
+        Dict with success status and parsed data
+    """
+    input_path = Path(input_dir)
+    if not input_path.exists():
+        return {"success": False, "error": f"Directory not found: {input_dir}"}
+    # Find all batch files
+    batch_files = sorted(input_path.glob("batch*.md"))
+    if not batch_files:
+        return {"success": False, "error": f"No batch files found in {input_dir}"}
+    all_summaries = []
+    for batch_file in batch_files:
+        try:
+            with open(batch_file, "r", encoding="utf-8") as f:
+                content = f.read().strip()
+            # Find JSON in markdown (typically the entire content)
+            json_start = content.find("{")
+            json_end = content.rfind("}") + 1
+            if json_start == -1 or json_end == 0:
+                if verbose:
+                    print(f"Warning: No JSON found in {batch_file.name}")
+                continue
+            json_str = content[json_start:json_end]
+            data = json.loads(json_str)
+            all_summaries.append(data)
+            if verbose:
+                print(
+                    f"Parsed: {batch_file.name} - {len(data.get('summaries', []))} summaries"
+                )
+        except json.JSONDecodeError as e:
+            if verbose:
+                print(f"Warning: Failed to parse {batch_file.name}: {e}")
+            continue
+        except Exception as e:
+            if verbose:
+                print(f"Warning: Error reading {batch_file.name}: {e}", file=sys.stderr)
+            continue
+    return {"success": True, "summaries": all_summaries}
+def merge_summaries(
+    summaries: List[Dict[str, Any]],
+    deduplicate: bool = False,
+    column_order: str = "preserve",
+    verbose: bool = False,
+) -> Tuple[List[Dict[str, Any]], List[str]]:
+    """Merge all batch summaries into a unified table structure.
+    Args:
+        summaries: List of batch JSON objects
+        deduplicate: Remove duplicate row numbers (keep first occurrence)
+        column_order: Column order strategy ('preserve' or 'alphabetical')
+        verbose: Enable verbose output
+    Returns:
+        Tuple of (flattened list of row summaries, sorted list of column names)
+    """
+    merged = []
+    # Determine all column names from first batch's summaries
+    all_columns = set()
+    for batch in summaries:
+        batch_summaries = batch.get("summaries", [])
+        for row_summary in batch_summaries:
+            # Add all keys except batch_number, row_count, and row_number
+            for key in row_summary.keys():
+                if key not in ["batch_number", "row_count", "row_number"]:
+                    all_columns.add(key)
+    # Sort columns based on strategy
+    if column_order == "alphabetical":
+        columns = sorted(all_columns)
+        if verbose:
+            print(f"Column order: alphabetical - {columns}")
+    else:  # preserve
+        # Get order from first batch
+        for batch in summaries:
+            batch_summaries = batch.get("summaries", [])
+            if batch_summaries:
+                # Extract column order from first row's keys (excluding batch_number, row_count, row_number)
+                first_row_columns = [
+                    k
+                    for k in batch_summaries[0].keys()
+                    if k not in ["batch_number", "row_count", "row_number"]
+                ]
+                if first_row_columns:
+                    columns = first_row_columns
+                    break
+        if verbose:
+            print(f"Column order: preserve - {columns}")
+    # Track duplicates
+    seen_rows = {}
+    duplicates = []
+    for batch in summaries:
+        batch_summaries = batch.get("summaries", [])
+        for row_summary in batch_summaries:
+            row_num = row_summary.get("row_number")
+            if row_num in seen_rows:
+                duplicates.append(
+                    {
+                        "row_number": row_num,
+                        "first_batch": seen_rows[row_num],
+                        "duplicate_batch": batch.get("batch_number"),
+                    }
+                )
+                if verbose:
+                    print(
+                        f"Duplicate row {row_num}: batch {seen_rows[row_num]} vs {batch.get('batch_number')}"
+                    )
+            seen_rows[row_num] = batch.get("batch_number")
+    if duplicates:
+        if verbose:
+            print(f"Found {len(duplicates)} duplicate rows")
+            for dup in duplicates[:5]:  # Show first 5
+                print(
+                    f"  Row {dup['row_number']}: batch {dup['first_batch']} vs {dup['duplicate_batch']}"
+                )
+    # Remove duplicates if requested
+    if deduplicate:
+        merged_dict = {}
+        for batch in summaries:
+            batch_summaries = batch.get("summaries", [])
+            for row_summary in batch_summaries:
+                row_num = row_summary.get("row_number")
+                if row_num not in merged_dict:
+                    merged_dict[row_num] = row_summary
+        merged = list(merged_dict.values())
+        if verbose:
+            print(f"Deduplicated to {len(merged)} rows")
+    else:
+        # Keep all (including duplicates)
+        for batch in summaries:
+            batch_summaries = batch.get("summaries", [])
+            for row_summary in batch_summaries:
+                merged.append(row_summary)
+        if verbose:
+            print(f"Keeping all rows (including duplicates): {len(merged)}")
+    # Sort by row_number
+    merged.sort(key=lambda x: x.get("row_number", 0))
+    return merged, columns
+def write_combined_excel(
+    merged: List[Dict[str, Any]],
+    columns: List[str],
+    output_file: str,
+    verbose: bool = False,
+) -> Dict[str, Any]:
+    """Write merged summaries to Excel file.
+    Args:
+        merged: List of row summaries
+        columns: List of column names to use
+        output_file: Path for output Excel file
+        verbose: Enable verbose output
+    Returns:
+        Success/error result
+    """
+    try:
+        import openpyxl
+    except ImportError:
+        result = {
+            "success": False,
+            "error": "openpyxl package not installed. Install with: uv add openpyxl",
+        }
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    output_path = Path(output_file)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    # Create workbook
+    wb = openpyxl.Workbook()
+    ws = wb.active
+    ws.title = "Combined Summary"
+    # Write header
+    header_row = ["row_number"] + columns
+    ws.append(header_row)
+    if verbose:
+        print(f"Writing {len(merged)} rows with {len(columns)} columns")
+    # Write data rows
+    for row_data in merged:
+        row_values = [row_data.get("row_number", "")]
+        # Add values for each column in order
+        for col in columns:
+            row_values.append(row_data.get(col, ""))
+        ws.append(row_values)
+    # Save workbook with error handling
+    try:
+        wb.save(output_path)
+        if verbose:
+            print(f"Successfully saved to {output_path}")
+    except Exception as e:
+        result = {"success": False, "error": f"Failed to save Excel file: {e}"}
+        print(json.dumps(result, indent=2))
+        return result
+    return {
+        "success": True,
+        "output_file": str(output_path),
+        "total_rows": len(merged),
+        "columns": columns,
+    }
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Combine subagent JSON outputs into Excel table"
+    )
+    parser.add_argument(
+        "--input-dir",
+        required=True,
+        help="Directory containing batch output JSON files",
+    )
+    parser.add_argument(
+        "--output-file", required=True, help="Path for combined Excel output file"
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Validate inputs without writing output file (testing only)",
+    )
+    parser.add_argument(
+        "--verbose", action="store_true", help="Enable verbose output for debugging"
+    )
+    parser.add_argument(
+        "--deduplicate",
+        action="store_true",
+        help="Remove duplicate row numbers (keep first occurrence)",
+    )
+    parser.add_argument(
+        "--column-order",
+        default="preserve",
+        choices=["preserve", "alphabetical"],
+        help="Column order: 'preserve' (from first batch) or 'alphabetical' (default)",
+    )
+    args = parser.parse_args()
+    # Read all batch outputs
+    result = read_json_outputs(args.input_dir, args.verbose)
+    if not result.get("success"):
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    summaries = result["summaries"]
+    # Check for empty results
+    if not summaries:
+        result = {
+            "success": False,
+            "error": "Found batch files but none contained valid JSON",
+        }
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    # Merge into unified structure
+    merged, columns = merge_summaries(
+        summaries, args.deduplicate, args.column_order, args.verbose
+    )
+    # Check for empty merge
+    if not merged:
+        result = {"success": False, "error": "No valid summaries found in batch files"}
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    # Dry run mode - skip actual file writes
+    if args.dry_run:
+        result = {
+            "success": True,
+            "dry_run": True,
+            "total_rows": len(merged),
+            "columns": columns,
+            "message": "Dry run completed - no files written",
+        }
+        print(json.dumps(result, indent=2))
+        sys.exit(0)
+    # Write to Excel
+    result = write_combined_excel(merged, columns, args.output_file, args.verbose)
+    if not result.get("success"):
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

package/dist/skills/long-table-summary/generate_prompts.py ADDED Viewed

@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+"""Generate subagent prompts from template for batched table processing."""
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+def generate_prompts(
+    template_path,
+    output_dir,
+    num_batches,
+    sheet_name,
+    start_row,
+    batch_size,
+    file_path,
+    instructions,
+    dry_run=False,
+    verbose=False,
+):
+    """Generate individual prompt files from template.
+    Args:
+        template_path: Path to subagent_template.md
+        output_dir: Directory for generated prompts
+        num_batches: Total number of batches
+        sheet_name: Sheet name from Excel file
+        start_row: Starting data row (usually 2 to skip header)
+        batch_size: Rows per batch
+        file_path: Full path to input table file
+        instructions: User-provided summarization instructions (JSON string)
+        dry_run: Validate without creating files
+        verbose: Enable verbose output
+    """
+    # Validate template exists
+    if not os.path.exists(template_path):
+        result = {
+            "success": False,
+            "error": f"Template file not found: {template_path}",
+        }
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    # Validate input file exists
+    if not os.path.exists(file_path):
+        result = {"success": False, "error": f"Input file not found: {file_path}"}
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    # Read template
+    with open(template_path, "r", encoding="utf-8") as f:
+        template = f.read()
+    # Create output directory
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    # Absolute path to input file
+    file_path_abs = os.path.abspath(file_path)
+    # Escape instructions JSON for markdown code block
+    # Replace backticks and dollar signs
+    instructions_escaped = instructions.replace("`", "\\`").replace("$", "\\$")
+    # Generate prompts for each batch
+    for batch_num in range(1, num_batches + 1):
+        # Calculate row range
+        row_start = start_row + (batch_num - 1) * batch_size
+        row_end = row_start + batch_size - 1
+        # Output file path
+        batch_str = f"{batch_num:03d}"
+        topic = os.path.basename(os.path.dirname(template_path))
+        output_file = f"./.long-table-summary/{topic}/outputs/batch{batch_str}.md"
+        # Replace placeholders
+        content = template.replace("{file_path}", file_path_abs)
+        content = content.replace("{sheet_name}", sheet_name)
+        content = content.replace("{batch_number}", str(batch_num))
+        content = content.replace("{row_start}", str(row_start))
+        content = content.replace("{row_end}", str(row_end))
+        content = content.replace("{output_file}", output_file)
+        content = content.replace("{instructions_json}", instructions_escaped)
+        # Dry run mode - skip actual file writes
+        if dry_run:
+            if verbose:
+                print(f"Would write: {output_path}")
+            continue
+        # Write prompt file
+        try:
+            prompt_file = output_path / f"batch{batch_str}.md"
+            with open(prompt_file, "w", encoding="utf-8") as f:
+                f.write(content)
+            if verbose:
+                print(f"Created: {prompt_file}")
+        except IOError as e:
+            result = {
+                "success": False,
+                "error": f"Failed to write prompt file {batch_str}: {e}",
+            }
+            print(json.dumps(result, indent=2))
+            sys.exit(1)
+    return {
+        "success": True,
+        "num_prompts": num_batches,
+        "output_dir": str(output_path),
+        "batches": list(range(1, num_batches + 1)),
+    }
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Generate subagent prompts from template"
+    )
+    parser.add_argument(
+        "--template", required=True, help="Path to subagent_template.md"
+    )
+    parser.add_argument(
+        "--output-dir", default="./prompts", help="Directory for generated prompts"
+    )
+    parser.add_argument(
+        "--num-batches", type=int, required=True, help="Total number of batches"
+    )
+    parser.add_argument(
+        "--sheet-name", required=True, help="Sheet name from Excel file"
+    )
+    parser.add_argument(
+        "--file-path", required=True, help="Full path to input table file"
+    )
+    parser.add_argument(
+        "--start-row",
+        type=int,
+        default=2,
+        help="Starting data row (default: 2 to skip header)",
+    )
+    parser.add_argument("--batch-size", type=int, required=True, help="Rows per batch")
+    parser.add_argument(
+        "--instructions",
+        required=True,
+        help="User-provided summarization instructions (JSON string)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Validate without creating files (testing only)",
+    )
+    parser.add_argument(
+        "--verbose", action="store_true", help="Enable verbose output for debugging"
+    )
+    args = parser.parse_args()
+    # Validate instructions is valid JSON
+    try:
+        json.loads(args.instructions)
+    except json.JSONDecodeError as e:
+        result = {"success": False, "error": f"Invalid JSON instructions: {e}"}
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    # Validate numeric arguments
+    if args.num_batches <= 0:
+        result = {
+            "success": False,
+            "error": f"num_batches must be positive (got: {args.num_batches})",
+        }
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    if args.batch_size <= 0:
+        result = {
+            "success": False,
+            "error": f"batch_size must be positive (got: {args.batch_size})",
+        }
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    if args.start_row < 1:
+        result = {
+            "success": False,
+            "error": f"start_row must be >= 1 (got: {args.start_row})",
+        }
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    result = generate_prompts(
+        template_path=args.template,
+        output_dir=args.output_dir,
+        num_batches=args.num_batches,
+        sheet_name=args.sheet_name,
+        start_row=args.start_row,
+        batch_size=args.batch_size,
+        file_path=args.file_path,
+        instructions=args.instructions,
+        dry_run=args.dry_run,
+        verbose=args.verbose,
+    )
+    print(json.dumps(result, indent=2))
+if __name__ == "__main__":
+    main()

package/dist/skills/long-table-summary/pyproject.toml ADDED Viewed

@@ -0,0 +1,8 @@
+[project]
+name = "long-table-summary"
+version = "1.0.0"
+description = "Batched table processing with parallel subagents"
+requires-python = ">=3.10"
+dependencies = [
+    "openpyxl>=3.1.0",
+]