@synsci/cli-darwin-arm64 1.1.71 → 1.1.72
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/citation-management/SKILL.md +1109 -0
- package/bin/skills/citation-management/assets/bibtex_template.bib +264 -0
- package/bin/skills/citation-management/assets/citation_checklist.md +386 -0
- package/bin/skills/citation-management/references/bibtex_formatting.md +908 -0
- package/bin/skills/citation-management/references/citation_validation.md +794 -0
- package/bin/skills/citation-management/references/google_scholar_search.md +725 -0
- package/bin/skills/citation-management/references/metadata_extraction.md +870 -0
- package/bin/skills/citation-management/references/pubmed_search.md +839 -0
- package/bin/skills/citation-management/scripts/doi_to_bibtex.py +182 -0
- package/bin/skills/citation-management/scripts/extract_metadata.py +570 -0
- package/bin/skills/citation-management/scripts/format_bibtex.py +349 -0
- package/bin/skills/citation-management/scripts/search_google_scholar.py +251 -0
- package/bin/skills/citation-management/scripts/search_pubmed.py +348 -0
- package/bin/skills/citation-management/scripts/validate_citations.py +494 -0
- package/bin/skills/clinical-decision-support/README.md +129 -0
- package/bin/skills/clinical-decision-support/SKILL.md +506 -0
- package/bin/skills/clinical-decision-support/assets/biomarker_report_template.tex +380 -0
- package/bin/skills/clinical-decision-support/assets/clinical_pathway_template.tex +222 -0
- package/bin/skills/clinical-decision-support/assets/cohort_analysis_template.tex +359 -0
- package/bin/skills/clinical-decision-support/assets/color_schemes.tex +149 -0
- package/bin/skills/clinical-decision-support/assets/example_gbm_cohort.md +208 -0
- package/bin/skills/clinical-decision-support/assets/recommendation_strength_guide.md +328 -0
- package/bin/skills/clinical-decision-support/assets/treatment_recommendation_template.tex +529 -0
- package/bin/skills/clinical-decision-support/references/biomarker_classification.md +719 -0
- package/bin/skills/clinical-decision-support/references/clinical_decision_algorithms.md +604 -0
- package/bin/skills/clinical-decision-support/references/evidence_synthesis.md +840 -0
- package/bin/skills/clinical-decision-support/references/outcome_analysis.md +640 -0
- package/bin/skills/clinical-decision-support/references/patient_cohort_analysis.md +427 -0
- package/bin/skills/clinical-decision-support/references/treatment_recommendations.md +521 -0
- package/bin/skills/clinical-decision-support/scripts/biomarker_classifier.py +383 -0
- package/bin/skills/clinical-decision-support/scripts/build_decision_tree.py +417 -0
- package/bin/skills/clinical-decision-support/scripts/create_cohort_tables.py +509 -0
- package/bin/skills/clinical-decision-support/scripts/generate_survival_analysis.py +441 -0
- package/bin/skills/clinical-decision-support/scripts/validate_cds_document.py +326 -0
- package/bin/skills/clinical-reports/IMPLEMENTATION_SUMMARY.md +641 -0
- package/bin/skills/clinical-reports/README.md +236 -0
- package/bin/skills/clinical-reports/SKILL.md +1127 -0
- package/bin/skills/clinical-reports/assets/case_report_template.md +352 -0
- package/bin/skills/clinical-reports/assets/clinical_trial_csr_template.md +353 -0
- package/bin/skills/clinical-reports/assets/clinical_trial_sae_template.md +359 -0
- package/bin/skills/clinical-reports/assets/consult_note_template.md +305 -0
- package/bin/skills/clinical-reports/assets/discharge_summary_template.md +453 -0
- package/bin/skills/clinical-reports/assets/hipaa_compliance_checklist.md +395 -0
- package/bin/skills/clinical-reports/assets/history_physical_template.md +305 -0
- package/bin/skills/clinical-reports/assets/lab_report_template.md +309 -0
- package/bin/skills/clinical-reports/assets/pathology_report_template.md +249 -0
- package/bin/skills/clinical-reports/assets/quality_checklist.md +338 -0
- package/bin/skills/clinical-reports/assets/radiology_report_template.md +318 -0
- package/bin/skills/clinical-reports/assets/soap_note_template.md +253 -0
- package/bin/skills/clinical-reports/references/case_report_guidelines.md +570 -0
- package/bin/skills/clinical-reports/references/clinical_trial_reporting.md +693 -0
- package/bin/skills/clinical-reports/references/data_presentation.md +530 -0
- package/bin/skills/clinical-reports/references/diagnostic_reports_standards.md +629 -0
- package/bin/skills/clinical-reports/references/medical_terminology.md +588 -0
- package/bin/skills/clinical-reports/references/patient_documentation.md +744 -0
- package/bin/skills/clinical-reports/references/peer_review_standards.md +585 -0
- package/bin/skills/clinical-reports/references/regulatory_compliance.md +577 -0
- package/bin/skills/clinical-reports/scripts/check_deidentification.py +332 -0
- package/bin/skills/clinical-reports/scripts/compliance_checker.py +78 -0
- package/bin/skills/clinical-reports/scripts/extract_clinical_data.py +97 -0
- package/bin/skills/clinical-reports/scripts/format_adverse_events.py +97 -0
- package/bin/skills/clinical-reports/scripts/generate_report_template.py +149 -0
- package/bin/skills/clinical-reports/scripts/terminology_validator.py +126 -0
- package/bin/skills/clinical-reports/scripts/validate_case_report.py +323 -0
- package/bin/skills/clinical-reports/scripts/validate_trial_report.py +88 -0
- package/bin/skills/fireworks-ai/SKILL.md +665 -0
- package/bin/skills/generate-image/SKILL.md +178 -0
- package/bin/skills/generate-image/scripts/generate_image.py +254 -0
- package/bin/skills/groq/SKILL.md +347 -0
- package/bin/skills/hypothesis-generation/SKILL.md +293 -0
- package/bin/skills/hypothesis-generation/assets/FORMATTING_GUIDE.md +672 -0
- package/bin/skills/hypothesis-generation/assets/hypothesis_generation.sty +307 -0
- package/bin/skills/hypothesis-generation/assets/hypothesis_report_template.tex +572 -0
- package/bin/skills/hypothesis-generation/references/experimental_design_patterns.md +329 -0
- package/bin/skills/hypothesis-generation/references/hypothesis_quality_criteria.md +198 -0
- package/bin/skills/hypothesis-generation/references/literature_search_strategies.md +622 -0
- package/bin/skills/latex-posters/README.md +417 -0
- package/bin/skills/latex-posters/SKILL.md +1602 -0
- package/bin/skills/latex-posters/assets/baposter_template.tex +257 -0
- package/bin/skills/latex-posters/assets/beamerposter_template.tex +244 -0
- package/bin/skills/latex-posters/assets/poster_quality_checklist.md +358 -0
- package/bin/skills/latex-posters/assets/tikzposter_template.tex +251 -0
- package/bin/skills/latex-posters/references/latex_poster_packages.md +745 -0
- package/bin/skills/latex-posters/references/poster_content_guide.md +748 -0
- package/bin/skills/latex-posters/references/poster_design_principles.md +806 -0
- package/bin/skills/latex-posters/references/poster_layout_design.md +900 -0
- package/bin/skills/latex-posters/scripts/review_poster.sh +214 -0
- package/bin/skills/literature-review/SKILL.md +641 -0
- package/bin/skills/literature-review/assets/review_template.md +412 -0
- package/bin/skills/literature-review/references/citation_styles.md +166 -0
- package/bin/skills/literature-review/references/database_strategies.md +455 -0
- package/bin/skills/literature-review/scripts/generate_pdf.py +184 -0
- package/bin/skills/literature-review/scripts/search_databases.py +310 -0
- package/bin/skills/literature-review/scripts/verify_citations.py +218 -0
- package/bin/skills/market-research-reports/SKILL.md +904 -0
- package/bin/skills/market-research-reports/assets/FORMATTING_GUIDE.md +428 -0
- package/bin/skills/market-research-reports/assets/market_report_template.tex +1380 -0
- package/bin/skills/market-research-reports/assets/market_research.sty +564 -0
- package/bin/skills/market-research-reports/references/data_analysis_patterns.md +548 -0
- package/bin/skills/market-research-reports/references/report_structure_guide.md +999 -0
- package/bin/skills/market-research-reports/references/visual_generation_guide.md +1077 -0
- package/bin/skills/market-research-reports/scripts/generate_market_visuals.py +472 -0
- package/bin/skills/markitdown/INSTALLATION_GUIDE.md +318 -0
- package/bin/skills/markitdown/LICENSE.txt +22 -0
- package/bin/skills/markitdown/OPENROUTER_INTEGRATION.md +359 -0
- package/bin/skills/markitdown/QUICK_REFERENCE.md +309 -0
- package/bin/skills/markitdown/README.md +184 -0
- package/bin/skills/markitdown/SKILL.md +486 -0
- package/bin/skills/markitdown/SKILL_SUMMARY.md +307 -0
- package/bin/skills/markitdown/assets/example_usage.md +463 -0
- package/bin/skills/markitdown/references/api_reference.md +399 -0
- package/bin/skills/markitdown/references/file_formats.md +542 -0
- package/bin/skills/markitdown/scripts/batch_convert.py +195 -0
- package/bin/skills/markitdown/scripts/convert_literature.py +262 -0
- package/bin/skills/markitdown/scripts/convert_with_ai.py +224 -0
- package/bin/skills/ml-paper-writing/SKILL.md +937 -0
- package/bin/skills/ml-paper-writing/references/checklists.md +361 -0
- package/bin/skills/ml-paper-writing/references/citation-workflow.md +562 -0
- package/bin/skills/ml-paper-writing/references/reviewer-guidelines.md +367 -0
- package/bin/skills/ml-paper-writing/references/sources.md +159 -0
- package/bin/skills/ml-paper-writing/references/writing-guide.md +476 -0
- package/bin/skills/ml-paper-writing/templates/README.md +251 -0
- package/bin/skills/ml-paper-writing/templates/aaai2026/README.md +534 -0
- package/bin/skills/ml-paper-writing/templates/aaai2026/aaai2026-unified-supp.tex +144 -0
- package/bin/skills/ml-paper-writing/templates/aaai2026/aaai2026-unified-template.tex +952 -0
- package/bin/skills/ml-paper-writing/templates/aaai2026/aaai2026.bib +111 -0
- package/bin/skills/ml-paper-writing/templates/aaai2026/aaai2026.bst +1493 -0
- package/bin/skills/ml-paper-writing/templates/aaai2026/aaai2026.sty +315 -0
- package/bin/skills/ml-paper-writing/templates/acl/README.md +50 -0
- package/bin/skills/ml-paper-writing/templates/acl/acl.sty +312 -0
- package/bin/skills/ml-paper-writing/templates/acl/acl_latex.tex +377 -0
- package/bin/skills/ml-paper-writing/templates/acl/acl_lualatex.tex +101 -0
- package/bin/skills/ml-paper-writing/templates/acl/acl_natbib.bst +1940 -0
- package/bin/skills/ml-paper-writing/templates/acl/anthology.bib.txt +26 -0
- package/bin/skills/ml-paper-writing/templates/acl/custom.bib +70 -0
- package/bin/skills/ml-paper-writing/templates/acl/formatting.md +326 -0
- package/bin/skills/ml-paper-writing/templates/colm2025/README.md +3 -0
- package/bin/skills/ml-paper-writing/templates/colm2025/colm2025_conference.bib +11 -0
- package/bin/skills/ml-paper-writing/templates/colm2025/colm2025_conference.bst +1440 -0
- package/bin/skills/ml-paper-writing/templates/colm2025/colm2025_conference.pdf +0 -0
- package/bin/skills/ml-paper-writing/templates/colm2025/colm2025_conference.sty +218 -0
- package/bin/skills/ml-paper-writing/templates/colm2025/colm2025_conference.tex +305 -0
- package/bin/skills/ml-paper-writing/templates/colm2025/fancyhdr.sty +485 -0
- package/bin/skills/ml-paper-writing/templates/colm2025/math_commands.tex +508 -0
- package/bin/skills/ml-paper-writing/templates/colm2025/natbib.sty +1246 -0
- package/bin/skills/ml-paper-writing/templates/iclr2026/fancyhdr.sty +485 -0
- package/bin/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.bib +24 -0
- package/bin/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.bst +1440 -0
- package/bin/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.pdf +0 -0
- package/bin/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.sty +246 -0
- package/bin/skills/ml-paper-writing/templates/iclr2026/iclr2026_conference.tex +414 -0
- package/bin/skills/ml-paper-writing/templates/iclr2026/math_commands.tex +508 -0
- package/bin/skills/ml-paper-writing/templates/iclr2026/natbib.sty +1246 -0
- package/bin/skills/ml-paper-writing/templates/icml2026/algorithm.sty +79 -0
- package/bin/skills/ml-paper-writing/templates/icml2026/algorithmic.sty +201 -0
- package/bin/skills/ml-paper-writing/templates/icml2026/example_paper.bib +75 -0
- package/bin/skills/ml-paper-writing/templates/icml2026/example_paper.pdf +0 -0
- package/bin/skills/ml-paper-writing/templates/icml2026/example_paper.tex +662 -0
- package/bin/skills/ml-paper-writing/templates/icml2026/fancyhdr.sty +864 -0
- package/bin/skills/ml-paper-writing/templates/icml2026/icml2026.bst +1443 -0
- package/bin/skills/ml-paper-writing/templates/icml2026/icml2026.sty +767 -0
- package/bin/skills/ml-paper-writing/templates/icml2026/icml_numpapers.pdf +0 -0
- package/bin/skills/ml-paper-writing/templates/neurips2025/Makefile +36 -0
- package/bin/skills/ml-paper-writing/templates/neurips2025/extra_pkgs.tex +53 -0
- package/bin/skills/ml-paper-writing/templates/neurips2025/main.tex +38 -0
- package/bin/skills/ml-paper-writing/templates/neurips2025/neurips.sty +382 -0
- package/bin/skills/paper-2-web/SKILL.md +491 -0
- package/bin/skills/paper-2-web/references/installation.md +141 -0
- package/bin/skills/paper-2-web/references/paper2poster.md +346 -0
- package/bin/skills/paper-2-web/references/paper2video.md +305 -0
- package/bin/skills/paper-2-web/references/paper2web.md +187 -0
- package/bin/skills/paper-2-web/references/usage_examples.md +436 -0
- package/bin/skills/peer-review/SKILL.md +702 -0
- package/bin/skills/peer-review/references/calibration_guidelines.md +196 -0
- package/bin/skills/peer-review/references/common_issues.md +552 -0
- package/bin/skills/peer-review/references/paper_mechanics.md +269 -0
- package/bin/skills/peer-review/references/reporting_standards.md +290 -0
- package/bin/skills/peer-review/references/scoring_rubric.md +239 -0
- package/bin/skills/pptx-posters/SKILL.md +410 -0
- package/bin/skills/pptx-posters/assets/poster_html_template.html +257 -0
- package/bin/skills/pptx-posters/assets/poster_quality_checklist.md +358 -0
- package/bin/skills/pptx-posters/references/poster_content_guide.md +748 -0
- package/bin/skills/pptx-posters/references/poster_design_principles.md +806 -0
- package/bin/skills/pptx-posters/references/poster_layout_design.md +900 -0
- package/bin/skills/research-grants/README.md +285 -0
- package/bin/skills/research-grants/SKILL.md +938 -0
- package/bin/skills/research-grants/assets/budget_justification_template.md +453 -0
- package/bin/skills/research-grants/assets/nih_specific_aims_template.md +166 -0
- package/bin/skills/research-grants/assets/nsf_project_summary_template.md +92 -0
- package/bin/skills/research-grants/references/broader_impacts.md +392 -0
- package/bin/skills/research-grants/references/darpa_guidelines.md +636 -0
- package/bin/skills/research-grants/references/doe_guidelines.md +586 -0
- package/bin/skills/research-grants/references/nih_guidelines.md +851 -0
- package/bin/skills/research-grants/references/nsf_guidelines.md +570 -0
- package/bin/skills/research-grants/references/specific_aims_guide.md +458 -0
- package/bin/skills/research-lookup/README.md +156 -0
- package/bin/skills/research-lookup/SKILL.md +606 -0
- package/bin/skills/research-lookup/examples.py +174 -0
- package/bin/skills/research-lookup/lookup.py +187 -0
- package/bin/skills/research-lookup/research_lookup.py +483 -0
- package/bin/skills/research-lookup/scripts/research_lookup.py +483 -0
- package/bin/skills/scholar-evaluation/SKILL.md +289 -0
- package/bin/skills/scholar-evaluation/references/evaluation_framework.md +663 -0
- package/bin/skills/scholar-evaluation/scripts/calculate_scores.py +366 -0
- package/bin/skills/scientific-critical-thinking/SKILL.md +566 -0
- package/bin/skills/scientific-critical-thinking/references/common_biases.md +364 -0
- package/bin/skills/scientific-critical-thinking/references/evidence_hierarchy.md +484 -0
- package/bin/skills/scientific-critical-thinking/references/experimental_design.md +496 -0
- package/bin/skills/scientific-critical-thinking/references/logical_fallacies.md +478 -0
- package/bin/skills/scientific-critical-thinking/references/scientific_method.md +169 -0
- package/bin/skills/scientific-critical-thinking/references/statistical_pitfalls.md +506 -0
- package/bin/skills/scientific-schematics/QUICK_REFERENCE.md +207 -0
- package/bin/skills/scientific-schematics/README.md +327 -0
- package/bin/skills/scientific-schematics/SKILL.md +615 -0
- package/bin/skills/scientific-schematics/example_usage.sh +89 -0
- package/bin/skills/scientific-schematics/references/best_practices.md +559 -0
- package/bin/skills/scientific-schematics/scripts/generate_schematic.py +135 -0
- package/bin/skills/scientific-schematics/scripts/generate_schematic_ai.py +807 -0
- package/bin/skills/scientific-schematics/test_ai_generation.py +243 -0
- package/bin/skills/scientific-slides/SKILL.md +942 -0
- package/bin/skills/scientific-slides/assets/timing_guidelines.md +597 -0
- package/bin/skills/scientific-slides/references/data_visualization_slides.md +708 -0
- package/bin/skills/scientific-slides/references/presentation_structure.md +642 -0
- package/bin/skills/scientific-slides/references/slide_design_principles.md +849 -0
- package/bin/skills/scientific-slides/references/talk_types_guide.md +687 -0
- package/bin/skills/scientific-slides/references/visual_review_workflow.md +775 -0
- package/bin/skills/scientific-slides/scripts/generate_slide_image.py +143 -0
- package/bin/skills/scientific-slides/scripts/generate_slide_image_ai.py +748 -0
- package/bin/skills/scientific-slides/scripts/pdf_to_images.py +201 -0
- package/bin/skills/scientific-slides/scripts/slides_to_pdf.py +220 -0
- package/bin/skills/scientific-slides/scripts/validate_presentation.py +367 -0
- package/bin/skills/scientific-writing/SKILL.md +714 -0
- package/bin/skills/scientific-writing/assets/REPORT_FORMATTING_GUIDE.md +574 -0
- package/bin/skills/scientific-writing/assets/scientific_report.sty +606 -0
- package/bin/skills/scientific-writing/assets/scientific_report_template.tex +449 -0
- package/bin/skills/scientific-writing/references/citation_styles.md +720 -0
- package/bin/skills/scientific-writing/references/figures_tables.md +806 -0
- package/bin/skills/scientific-writing/references/imrad_structure.md +686 -0
- package/bin/skills/scientific-writing/references/professional_report_formatting.md +664 -0
- package/bin/skills/scientific-writing/references/reporting_guidelines.md +748 -0
- package/bin/skills/scientific-writing/references/writing_principles.md +824 -0
- package/bin/skills/tinker/SKILL.md +2 -3
- package/bin/skills/together-ai/SKILL.md +722 -0
- package/bin/skills/treatment-plans/README.md +488 -0
- package/bin/skills/treatment-plans/SKILL.md +1579 -0
- package/bin/skills/treatment-plans/assets/STYLING_QUICK_REFERENCE.md +185 -0
- package/bin/skills/treatment-plans/assets/chronic_disease_management_plan.tex +665 -0
- package/bin/skills/treatment-plans/assets/general_medical_treatment_plan.tex +547 -0
- package/bin/skills/treatment-plans/assets/medical_treatment_plan.sty +222 -0
- package/bin/skills/treatment-plans/assets/mental_health_treatment_plan.tex +774 -0
- package/bin/skills/treatment-plans/assets/one_page_treatment_plan.tex +193 -0
- package/bin/skills/treatment-plans/assets/pain_management_plan.tex +799 -0
- package/bin/skills/treatment-plans/assets/perioperative_care_plan.tex +753 -0
- package/bin/skills/treatment-plans/assets/quality_checklist.md +471 -0
- package/bin/skills/treatment-plans/assets/rehabilitation_treatment_plan.tex +756 -0
- package/bin/skills/treatment-plans/references/goal_setting_frameworks.md +411 -0
- package/bin/skills/treatment-plans/references/intervention_guidelines.md +507 -0
- package/bin/skills/treatment-plans/references/regulatory_compliance.md +476 -0
- package/bin/skills/treatment-plans/references/specialty_specific_guidelines.md +655 -0
- package/bin/skills/treatment-plans/references/treatment_plan_standards.md +485 -0
- package/bin/skills/treatment-plans/scripts/check_completeness.py +322 -0
- package/bin/skills/treatment-plans/scripts/generate_template.py +233 -0
- package/bin/skills/treatment-plans/scripts/timeline_generator.py +385 -0
- package/bin/skills/treatment-plans/scripts/validate_treatment_plan.py +369 -0
- package/bin/skills/unsloth/SKILL.md +565 -47
- package/bin/skills/unsloth/docs/advanced-rl.md +222 -0
- package/bin/skills/unsloth/docs/chat-templates.md +141 -0
- package/bin/skills/unsloth/docs/datasets.md +489 -0
- package/bin/skills/unsloth/docs/docker-extended.md +99 -0
- package/bin/skills/unsloth/docs/dynamic-ggufs-2.0.md +116 -0
- package/bin/skills/unsloth/docs/dynamic-ggufs-aider.md +118 -0
- package/bin/skills/unsloth/docs/faq.md +91 -0
- package/bin/skills/unsloth/docs/fp16-vs-bf16.md +61 -0
- package/bin/skills/unsloth/docs/fp8-rl.md +224 -0
- package/bin/skills/unsloth/docs/glm-4.7-flash.md +997 -0
- package/bin/skills/unsloth/docs/inference-deployment-overview.md +17 -0
- package/bin/skills/unsloth/docs/inference.md +27 -0
- package/bin/skills/unsloth/docs/installation-docker.md +155 -0
- package/bin/skills/unsloth/docs/installation-pip.md +148 -0
- package/bin/skills/unsloth/docs/kernels-packing.md +190 -0
- package/bin/skills/unsloth/docs/kimi-k2.5.md +634 -0
- package/bin/skills/unsloth/docs/lm-studio.md +235 -0
- package/bin/skills/unsloth/docs/lora-hot-swapping.md +75 -0
- package/bin/skills/unsloth/docs/lora-hyperparameters.md +363 -0
- package/bin/skills/unsloth/docs/memory-efficient-rl.md +267 -0
- package/bin/skills/unsloth/docs/model-selection.md +70 -0
- package/bin/skills/unsloth/docs/models.md +532 -0
- package/bin/skills/unsloth/docs/multi-gpu-ddp.md +90 -0
- package/bin/skills/unsloth/docs/notebooks.md +223 -0
- package/bin/skills/unsloth/docs/overview.md +110 -0
- package/bin/skills/unsloth/docs/qwen3-coder-next-extended.md +900 -0
- package/bin/skills/unsloth/docs/qwen3-coder-next.md +900 -0
- package/bin/skills/unsloth/docs/requirements.md +45 -0
- package/bin/skills/unsloth/docs/reward-hacking.md +25 -0
- package/bin/skills/unsloth/docs/saving-to-gguf.md +138 -0
- package/bin/skills/unsloth/docs/saving-to-ollama.md +46 -0
- package/bin/skills/unsloth/docs/sglang-guide.md +278 -0
- package/bin/skills/unsloth/docs/speculative-decoding.md +70 -0
- package/bin/skills/unsloth/docs/tool-calling.md +334 -0
- package/bin/skills/unsloth/docs/troubleshooting-faq.md +204 -0
- package/bin/skills/unsloth/docs/troubleshooting-inference.md +26 -0
- package/bin/skills/unsloth/docs/tts-fine-tuning.md +149 -0
- package/bin/skills/unsloth/docs/tutorial-grpo.md +273 -0
- package/bin/skills/unsloth/docs/tutorial-llama3-ollama.md +356 -0
- package/bin/skills/unsloth/docs/vision-fine-tuning.md +135 -0
- package/bin/skills/unsloth/docs/vision-rl.md +170 -0
- package/bin/skills/unsloth/docs/vllm-engine-arguments.md +43 -0
- package/bin/skills/unsloth/docs/vllm-guide.md +98 -0
- package/bin/skills/venue-templates/SKILL.md +686 -0
- package/bin/skills/venue-templates/assets/examples/cell_summary_example.md +247 -0
- package/bin/skills/venue-templates/assets/examples/medical_structured_abstract.md +313 -0
- package/bin/skills/venue-templates/assets/examples/nature_abstract_examples.md +213 -0
- package/bin/skills/venue-templates/assets/examples/neurips_introduction_example.md +245 -0
- package/bin/skills/venue-templates/assets/grants/nih_specific_aims.tex +235 -0
- package/bin/skills/venue-templates/assets/grants/nsf_proposal_template.tex +375 -0
- package/bin/skills/venue-templates/assets/journals/nature_article.tex +171 -0
- package/bin/skills/venue-templates/assets/journals/neurips_article.tex +283 -0
- package/bin/skills/venue-templates/assets/journals/plos_one.tex +317 -0
- package/bin/skills/venue-templates/assets/posters/beamerposter_academic.tex +311 -0
- package/bin/skills/venue-templates/references/cell_press_style.md +483 -0
- package/bin/skills/venue-templates/references/conferences_formatting.md +564 -0
- package/bin/skills/venue-templates/references/cs_conference_style.md +463 -0
- package/bin/skills/venue-templates/references/grants_requirements.md +787 -0
- package/bin/skills/venue-templates/references/journals_formatting.md +486 -0
- package/bin/skills/venue-templates/references/medical_journal_styles.md +535 -0
- package/bin/skills/venue-templates/references/ml_conference_style.md +556 -0
- package/bin/skills/venue-templates/references/nature_science_style.md +405 -0
- package/bin/skills/venue-templates/references/posters_guidelines.md +628 -0
- package/bin/skills/venue-templates/references/reviewer_expectations.md +417 -0
- package/bin/skills/venue-templates/references/venue_writing_styles.md +321 -0
- package/bin/skills/venue-templates/scripts/customize_template.py +195 -0
- package/bin/skills/venue-templates/scripts/query_template.py +266 -0
- package/bin/skills/venue-templates/scripts/validate_format.py +250 -0
- package/bin/synsc +0 -0
- package/package.json +1 -1
- package/bin/skills/unsloth/references/index.md +0 -7
- package/bin/skills/unsloth/references/llms-full.md +0 -16799
- package/bin/skills/unsloth/references/llms-txt.md +0 -12044
- package/bin/skills/unsloth/references/llms.md +0 -82
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
# File Format Support
|
|
2
|
+
|
|
3
|
+
This document provides detailed information about each file format supported by MarkItDown.
|
|
4
|
+
|
|
5
|
+
## Document Formats
|
|
6
|
+
|
|
7
|
+
### PDF (.pdf)
|
|
8
|
+
|
|
9
|
+
**Capabilities**:
|
|
10
|
+
- Text extraction
|
|
11
|
+
- Table detection
|
|
12
|
+
- Metadata extraction
|
|
13
|
+
- OCR for scanned documents (with dependencies)
|
|
14
|
+
|
|
15
|
+
**Dependencies**:
|
|
16
|
+
```bash
|
|
17
|
+
pip install 'markitdown[pdf]'
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
**Best For**:
|
|
21
|
+
- Scientific papers
|
|
22
|
+
- Reports
|
|
23
|
+
- Books
|
|
24
|
+
- Forms
|
|
25
|
+
|
|
26
|
+
**Limitations**:
|
|
27
|
+
- Complex layouts may not preserve perfect formatting
|
|
28
|
+
- Scanned PDFs require OCR setup
|
|
29
|
+
- Some PDF features (annotations, forms) may not convert
|
|
30
|
+
|
|
31
|
+
**Example**:
|
|
32
|
+
```python
|
|
33
|
+
from markitdown import MarkItDown
|
|
34
|
+
|
|
35
|
+
md = MarkItDown()
|
|
36
|
+
result = md.convert("research_paper.pdf")
|
|
37
|
+
print(result.text_content)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
**Enhanced with Azure Document Intelligence**:
|
|
41
|
+
```python
|
|
42
|
+
md = MarkItDown(docintel_endpoint="https://YOUR-ENDPOINT.cognitiveservices.azure.com/")
|
|
43
|
+
result = md.convert("complex_layout.pdf")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
### Microsoft Word (.docx)
|
|
49
|
+
|
|
50
|
+
**Capabilities**:
|
|
51
|
+
- Text extraction
|
|
52
|
+
- Table conversion
|
|
53
|
+
- Heading hierarchy
|
|
54
|
+
- List formatting
|
|
55
|
+
- Basic text formatting (bold, italic)
|
|
56
|
+
|
|
57
|
+
**Dependencies**:
|
|
58
|
+
```bash
|
|
59
|
+
pip install 'markitdown[docx]'
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**Best For**:
|
|
63
|
+
- Research papers
|
|
64
|
+
- Reports
|
|
65
|
+
- Documentation
|
|
66
|
+
- Manuscripts
|
|
67
|
+
|
|
68
|
+
**Preserved Elements**:
|
|
69
|
+
- Headings (converted to Markdown headers)
|
|
70
|
+
- Tables (converted to Markdown tables)
|
|
71
|
+
- Lists (bulleted and numbered)
|
|
72
|
+
- Basic formatting (bold, italic)
|
|
73
|
+
- Paragraphs
|
|
74
|
+
|
|
75
|
+
**Example**:
|
|
76
|
+
```python
|
|
77
|
+
result = md.convert("manuscript.docx")
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
### PowerPoint (.pptx)
|
|
83
|
+
|
|
84
|
+
**Capabilities**:
|
|
85
|
+
- Slide content extraction
|
|
86
|
+
- Speaker notes
|
|
87
|
+
- Table extraction
|
|
88
|
+
- Image descriptions (with AI)
|
|
89
|
+
|
|
90
|
+
**Dependencies**:
|
|
91
|
+
```bash
|
|
92
|
+
pip install 'markitdown[pptx]'
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Best For**:
|
|
96
|
+
- Presentations
|
|
97
|
+
- Lecture slides
|
|
98
|
+
- Conference talks
|
|
99
|
+
|
|
100
|
+
**Output Format**:
|
|
101
|
+
```markdown
|
|
102
|
+
# Slide 1: Title
|
|
103
|
+
|
|
104
|
+
Content from slide 1...
|
|
105
|
+
|
|
106
|
+
**Notes**: Speaker notes appear here
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
# Slide 2: Next Topic
|
|
111
|
+
|
|
112
|
+
...
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**With AI Image Descriptions**:
|
|
116
|
+
```python
|
|
117
|
+
from openai import OpenAI
|
|
118
|
+
|
|
119
|
+
client = OpenAI()
|
|
120
|
+
md = MarkItDown(llm_client=client, llm_model="gpt-4o")
|
|
121
|
+
result = md.convert("presentation.pptx")
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
### Excel (.xlsx, .xls)
|
|
127
|
+
|
|
128
|
+
**Capabilities**:
|
|
129
|
+
- Sheet extraction
|
|
130
|
+
- Table formatting
|
|
131
|
+
- Data preservation
|
|
132
|
+
- Formula values (calculated)
|
|
133
|
+
|
|
134
|
+
**Dependencies**:
|
|
135
|
+
```bash
|
|
136
|
+
pip install 'markitdown[xlsx]' # Modern Excel
|
|
137
|
+
pip install 'markitdown[xls]' # Legacy Excel
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
**Best For**:
|
|
141
|
+
- Data tables
|
|
142
|
+
- Research data
|
|
143
|
+
- Statistical results
|
|
144
|
+
- Experimental data
|
|
145
|
+
|
|
146
|
+
**Output Format**:
|
|
147
|
+
```markdown
|
|
148
|
+
# Sheet: Results
|
|
149
|
+
|
|
150
|
+
| Sample | Control | Treatment | P-value |
|
|
151
|
+
|--------|---------|-----------|---------|
|
|
152
|
+
| 1 | 10.2 | 12.5 | 0.023 |
|
|
153
|
+
| 2 | 9.8 | 11.9 | 0.031 |
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
**Example**:
|
|
157
|
+
```python
|
|
158
|
+
result = md.convert("experimental_data.xlsx")
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Image Formats
|
|
164
|
+
|
|
165
|
+
### Images (.jpg, .jpeg, .png, .gif, .webp)
|
|
166
|
+
|
|
167
|
+
**Capabilities**:
|
|
168
|
+
- EXIF metadata extraction
|
|
169
|
+
- OCR text extraction
|
|
170
|
+
- AI-powered image descriptions
|
|
171
|
+
|
|
172
|
+
**Dependencies**:
|
|
173
|
+
```bash
|
|
174
|
+
pip install 'markitdown[all]' # Includes image support
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
**Best For**:
|
|
178
|
+
- Scanned documents
|
|
179
|
+
- Charts and graphs
|
|
180
|
+
- Scientific diagrams
|
|
181
|
+
- Photographs with text
|
|
182
|
+
|
|
183
|
+
**Output Without AI**:
|
|
184
|
+
```markdown
|
|
185
|
+

|
|
186
|
+
|
|
187
|
+
**EXIF Data**:
|
|
188
|
+
- Camera: Canon EOS 5D
|
|
189
|
+
- Date: 2024-01-15
|
|
190
|
+
- Resolution: 4000x3000
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
**Output With AI**:
|
|
194
|
+
```python
|
|
195
|
+
from openai import OpenAI
|
|
196
|
+
|
|
197
|
+
client = OpenAI()
|
|
198
|
+
md = MarkItDown(
|
|
199
|
+
llm_client=client,
|
|
200
|
+
llm_model="gpt-4o",
|
|
201
|
+
llm_prompt="Describe this scientific diagram in detail"
|
|
202
|
+
)
|
|
203
|
+
result = md.convert("graph.png")
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
**OCR for Text Extraction**:
|
|
207
|
+
Requires Tesseract OCR:
|
|
208
|
+
```bash
|
|
209
|
+
# macOS
|
|
210
|
+
brew install tesseract
|
|
211
|
+
|
|
212
|
+
# Ubuntu
|
|
213
|
+
sudo apt-get install tesseract-ocr
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## Audio Formats
|
|
219
|
+
|
|
220
|
+
### Audio (.wav, .mp3)
|
|
221
|
+
|
|
222
|
+
**Capabilities**:
|
|
223
|
+
- Metadata extraction
|
|
224
|
+
- Speech-to-text transcription
|
|
225
|
+
- Duration and technical info
|
|
226
|
+
|
|
227
|
+
**Dependencies**:
|
|
228
|
+
```bash
|
|
229
|
+
pip install 'markitdown[audio-transcription]'
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
**Best For**:
|
|
233
|
+
- Lecture recordings
|
|
234
|
+
- Interviews
|
|
235
|
+
- Podcasts
|
|
236
|
+
- Meeting recordings
|
|
237
|
+
|
|
238
|
+
**Output Format**:
|
|
239
|
+
```markdown
|
|
240
|
+
# Audio: interview.mp3
|
|
241
|
+
|
|
242
|
+
**Metadata**:
|
|
243
|
+
- Duration: 45:32
|
|
244
|
+
- Bitrate: 320kbps
|
|
245
|
+
- Sample Rate: 44100Hz
|
|
246
|
+
|
|
247
|
+
**Transcription**:
|
|
248
|
+
[Transcribed text appears here...]
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
**Example**:
|
|
252
|
+
```python
|
|
253
|
+
result = md.convert("lecture.mp3")
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Web Formats
|
|
259
|
+
|
|
260
|
+
### HTML (.html, .htm)
|
|
261
|
+
|
|
262
|
+
**Capabilities**:
|
|
263
|
+
- Clean HTML to Markdown conversion
|
|
264
|
+
- Link preservation
|
|
265
|
+
- Table conversion
|
|
266
|
+
- List formatting
|
|
267
|
+
|
|
268
|
+
**Best For**:
|
|
269
|
+
- Web pages
|
|
270
|
+
- Documentation
|
|
271
|
+
- Blog posts
|
|
272
|
+
- Online articles
|
|
273
|
+
|
|
274
|
+
**Output Format**: Clean Markdown with preserved links and structure
|
|
275
|
+
|
|
276
|
+
**Example**:
|
|
277
|
+
```python
|
|
278
|
+
result = md.convert("webpage.html")
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
283
|
+
### YouTube URLs
|
|
284
|
+
|
|
285
|
+
**Capabilities**:
|
|
286
|
+
- Fetch video transcriptions
|
|
287
|
+
- Extract video metadata
|
|
288
|
+
- Caption download
|
|
289
|
+
|
|
290
|
+
**Dependencies**:
|
|
291
|
+
```bash
|
|
292
|
+
pip install 'markitdown[youtube-transcription]'
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
**Best For**:
|
|
296
|
+
- Educational videos
|
|
297
|
+
- Lectures
|
|
298
|
+
- Talks
|
|
299
|
+
- Tutorials
|
|
300
|
+
|
|
301
|
+
**Example**:
|
|
302
|
+
```python
|
|
303
|
+
result = md.convert("https://www.youtube.com/watch?v=VIDEO_ID")
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
## Data Formats
|
|
309
|
+
|
|
310
|
+
### CSV (.csv)
|
|
311
|
+
|
|
312
|
+
**Capabilities**:
|
|
313
|
+
- Automatic table conversion
|
|
314
|
+
- Delimiter detection
|
|
315
|
+
- Header preservation
|
|
316
|
+
|
|
317
|
+
**Output Format**: Markdown tables
|
|
318
|
+
|
|
319
|
+
**Example**:
|
|
320
|
+
```python
|
|
321
|
+
result = md.convert("data.csv")
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
**Output**:
|
|
325
|
+
```markdown
|
|
326
|
+
| Column1 | Column2 | Column3 |
|
|
327
|
+
|---------|---------|---------|
|
|
328
|
+
| Value1 | Value2 | Value3 |
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
---
|
|
332
|
+
|
|
333
|
+
### JSON (.json)
|
|
334
|
+
|
|
335
|
+
**Capabilities**:
|
|
336
|
+
- Structured representation
|
|
337
|
+
- Pretty formatting
|
|
338
|
+
- Nested data visualization
|
|
339
|
+
|
|
340
|
+
**Best For**:
|
|
341
|
+
- API responses
|
|
342
|
+
- Configuration files
|
|
343
|
+
- Data exports
|
|
344
|
+
|
|
345
|
+
**Example**:
|
|
346
|
+
```python
|
|
347
|
+
result = md.convert("data.json")
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
---
|
|
351
|
+
|
|
352
|
+
### XML (.xml)
|
|
353
|
+
|
|
354
|
+
**Capabilities**:
|
|
355
|
+
- Structure preservation
|
|
356
|
+
- Attribute extraction
|
|
357
|
+
- Formatted output
|
|
358
|
+
|
|
359
|
+
**Best For**:
|
|
360
|
+
- Configuration files
|
|
361
|
+
- Data interchange
|
|
362
|
+
- Structured documents
|
|
363
|
+
|
|
364
|
+
**Example**:
|
|
365
|
+
```python
|
|
366
|
+
result = md.convert("config.xml")
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
---
|
|
370
|
+
|
|
371
|
+
## Archive Formats
|
|
372
|
+
|
|
373
|
+
### ZIP (.zip)
|
|
374
|
+
|
|
375
|
+
**Capabilities**:
|
|
376
|
+
- Iterates through archive contents
|
|
377
|
+
- Converts each file individually
|
|
378
|
+
- Maintains directory structure in output
|
|
379
|
+
|
|
380
|
+
**Best For**:
|
|
381
|
+
- Document collections
|
|
382
|
+
- Project archives
|
|
383
|
+
- Batch conversions
|
|
384
|
+
|
|
385
|
+
**Output Format**:
|
|
386
|
+
```markdown
|
|
387
|
+
# Archive: documents.zip
|
|
388
|
+
|
|
389
|
+
## File: document1.pdf
|
|
390
|
+
[Content from document1.pdf...]
|
|
391
|
+
|
|
392
|
+
---
|
|
393
|
+
|
|
394
|
+
## File: document2.docx
|
|
395
|
+
[Content from document2.docx...]
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
**Example**:
|
|
399
|
+
```python
|
|
400
|
+
result = md.convert("archive.zip")
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
---
|
|
404
|
+
|
|
405
|
+
## E-book Formats
|
|
406
|
+
|
|
407
|
+
### EPUB (.epub)
|
|
408
|
+
|
|
409
|
+
**Capabilities**:
|
|
410
|
+
- Full text extraction
|
|
411
|
+
- Chapter structure
|
|
412
|
+
- Metadata extraction
|
|
413
|
+
|
|
414
|
+
**Best For**:
|
|
415
|
+
- E-books
|
|
416
|
+
- Digital publications
|
|
417
|
+
- Long-form content
|
|
418
|
+
|
|
419
|
+
**Output Format**: Markdown with preserved chapter structure
|
|
420
|
+
|
|
421
|
+
**Example**:
|
|
422
|
+
```python
|
|
423
|
+
result = md.convert("book.epub")
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
---
|
|
427
|
+
|
|
428
|
+
## Other Formats
|
|
429
|
+
|
|
430
|
+
### Outlook Messages (.msg)
|
|
431
|
+
|
|
432
|
+
**Capabilities**:
|
|
433
|
+
- Email content extraction
|
|
434
|
+
- Attachment listing
|
|
435
|
+
- Metadata (from, to, subject, date)
|
|
436
|
+
|
|
437
|
+
**Dependencies**:
|
|
438
|
+
```bash
|
|
439
|
+
pip install 'markitdown[outlook]'
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
**Best For**:
|
|
443
|
+
- Email archives
|
|
444
|
+
- Communication records
|
|
445
|
+
|
|
446
|
+
**Example**:
|
|
447
|
+
```python
|
|
448
|
+
result = md.convert("message.msg")
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
---
|
|
452
|
+
|
|
453
|
+
## Format-Specific Tips
|
|
454
|
+
|
|
455
|
+
### PDF Best Practices
|
|
456
|
+
|
|
457
|
+
1. **Use Azure Document Intelligence for complex layouts**:
|
|
458
|
+
```python
|
|
459
|
+
md = MarkItDown(docintel_endpoint="endpoint_url")
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
2. **For scanned PDFs, ensure OCR is set up**:
|
|
463
|
+
```bash
|
|
464
|
+
brew install tesseract # macOS
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
3. **Split very large PDFs before conversion** for better performance
|
|
468
|
+
|
|
469
|
+
### PowerPoint Best Practices
|
|
470
|
+
|
|
471
|
+
1. **Use AI for visual content**:
|
|
472
|
+
```python
|
|
473
|
+
md = MarkItDown(llm_client=client, llm_model="gpt-4o")
|
|
474
|
+
```
|
|
475
|
+
|
|
476
|
+
2. **Check speaker notes** - they're included in output
|
|
477
|
+
|
|
478
|
+
3. **Complex animations won't be captured** - static content only
|
|
479
|
+
|
|
480
|
+
### Excel Best Practices
|
|
481
|
+
|
|
482
|
+
1. **Large spreadsheets** may take time to convert
|
|
483
|
+
|
|
484
|
+
2. **Formulas are converted to their calculated values**
|
|
485
|
+
|
|
486
|
+
3. **Multiple sheets** are all included in output
|
|
487
|
+
|
|
488
|
+
4. **Charts become text descriptions** (use AI for better descriptions)
|
|
489
|
+
|
|
490
|
+
### Image Best Practices
|
|
491
|
+
|
|
492
|
+
1. **Use AI for meaningful descriptions**:
|
|
493
|
+
```python
|
|
494
|
+
md = MarkItDown(
|
|
495
|
+
llm_client=client,
|
|
496
|
+
llm_model="gpt-4o",
|
|
497
|
+
llm_prompt="Describe this scientific figure in detail"
|
|
498
|
+
)
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
2. **For text-heavy images, ensure OCR dependencies** are installed
|
|
502
|
+
|
|
503
|
+
3. **High-resolution images** may take longer to process
|
|
504
|
+
|
|
505
|
+
### Audio Best Practices
|
|
506
|
+
|
|
507
|
+
1. **Clear audio** produces better transcriptions
|
|
508
|
+
|
|
509
|
+
2. **Long recordings** may take significant time
|
|
510
|
+
|
|
511
|
+
3. **Consider splitting long audio files** for faster processing
|
|
512
|
+
|
|
513
|
+
---
|
|
514
|
+
|
|
515
|
+
## Unsupported Formats
|
|
516
|
+
|
|
517
|
+
If you need to convert an unsupported format:
|
|
518
|
+
|
|
519
|
+
1. **Create a custom converter** (see `api_reference.md`)
|
|
520
|
+
2. **Look for plugins** on GitHub (#markitdown-plugin)
|
|
521
|
+
3. **Pre-convert to supported format** (e.g., convert .rtf to .docx)
|
|
522
|
+
|
|
523
|
+
---
|
|
524
|
+
|
|
525
|
+
## Format Detection
|
|
526
|
+
|
|
527
|
+
MarkItDown automatically detects format from:
|
|
528
|
+
|
|
529
|
+
1. **File extension** (primary method)
|
|
530
|
+
2. **MIME type** (fallback)
|
|
531
|
+
3. **File signature** (magic bytes, fallback)
|
|
532
|
+
|
|
533
|
+
**Override detection**:
|
|
534
|
+
```python
|
|
535
|
+
# Force specific format
|
|
536
|
+
result = md.convert("file_without_extension", file_extension=".pdf")
|
|
537
|
+
|
|
538
|
+
# With streams
|
|
539
|
+
with open("file", "rb") as f:
|
|
540
|
+
result = md.convert_stream(f, file_extension=".pdf")
|
|
541
|
+
```
|
|
542
|
+
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Batch convert multiple files to Markdown using MarkItDown.
|
|
4
|
+
|
|
5
|
+
This script demonstrates how to efficiently convert multiple files
|
|
6
|
+
in a directory to Markdown format.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import sys
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import List, Optional
|
|
14
|
+
|
|
15
|
+
from markitdown import MarkItDown
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def convert_file(md: MarkItDown, file_path: Path, output_dir: Path, verbose: bool = False) -> tuple[bool, str, str]:
|
|
19
|
+
"""
|
|
20
|
+
Convert a single file to Markdown.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
md: MarkItDown instance
|
|
24
|
+
file_path: Path to input file
|
|
25
|
+
output_dir: Directory for output files
|
|
26
|
+
verbose: Print detailed messages
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Tuple of (success, input_path, message)
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
if verbose:
|
|
33
|
+
print(f"Converting: {file_path}")
|
|
34
|
+
|
|
35
|
+
result = md.convert(str(file_path))
|
|
36
|
+
|
|
37
|
+
# Create output path
|
|
38
|
+
output_file = output_dir / f"{file_path.stem}.md"
|
|
39
|
+
|
|
40
|
+
# Write content with metadata header
|
|
41
|
+
content = f"# {result.title or file_path.stem}\n\n"
|
|
42
|
+
content += f"**Source**: {file_path.name}\n"
|
|
43
|
+
content += f"**Format**: {file_path.suffix}\n\n"
|
|
44
|
+
content += "---\n\n"
|
|
45
|
+
content += result.text_content
|
|
46
|
+
|
|
47
|
+
output_file.write_text(content, encoding='utf-8')
|
|
48
|
+
|
|
49
|
+
return True, str(file_path), f"✓ Converted to {output_file.name}"
|
|
50
|
+
|
|
51
|
+
except Exception as e:
|
|
52
|
+
return False, str(file_path), f"✗ Error: {str(e)}"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def batch_convert(
|
|
56
|
+
input_dir: Path,
|
|
57
|
+
output_dir: Path,
|
|
58
|
+
extensions: Optional[List[str]] = None,
|
|
59
|
+
recursive: bool = False,
|
|
60
|
+
workers: int = 4,
|
|
61
|
+
verbose: bool = False,
|
|
62
|
+
enable_plugins: bool = False,
|
|
63
|
+
) -> dict:
|
|
64
|
+
"""
|
|
65
|
+
Batch convert files in a directory.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
input_dir: Input directory
|
|
69
|
+
output_dir: Output directory
|
|
70
|
+
extensions: List of file extensions to convert (e.g., ['.pdf', '.docx'])
|
|
71
|
+
recursive: Search subdirectories
|
|
72
|
+
workers: Number of parallel workers
|
|
73
|
+
verbose: Print detailed messages
|
|
74
|
+
enable_plugins: Enable MarkItDown plugins
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Dictionary with conversion statistics
|
|
78
|
+
"""
|
|
79
|
+
# Create output directory
|
|
80
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
|
|
82
|
+
# Default extensions if not specified
|
|
83
|
+
if extensions is None:
|
|
84
|
+
extensions = ['.pdf', '.docx', '.pptx', '.xlsx', '.html', '.jpg', '.png']
|
|
85
|
+
|
|
86
|
+
# Find files
|
|
87
|
+
files = []
|
|
88
|
+
if recursive:
|
|
89
|
+
for ext in extensions:
|
|
90
|
+
files.extend(input_dir.rglob(f"*{ext}"))
|
|
91
|
+
else:
|
|
92
|
+
for ext in extensions:
|
|
93
|
+
files.extend(input_dir.glob(f"*{ext}"))
|
|
94
|
+
|
|
95
|
+
if not files:
|
|
96
|
+
print(f"No files found with extensions: {', '.join(extensions)}")
|
|
97
|
+
return {'total': 0, 'success': 0, 'failed': 0}
|
|
98
|
+
|
|
99
|
+
print(f"Found {len(files)} file(s) to convert")
|
|
100
|
+
|
|
101
|
+
# Create MarkItDown instance
|
|
102
|
+
md = MarkItDown(enable_plugins=enable_plugins)
|
|
103
|
+
|
|
104
|
+
# Convert files in parallel
|
|
105
|
+
results = {'total': len(files), 'success': 0, 'failed': 0, 'details': []}
|
|
106
|
+
|
|
107
|
+
with ThreadPoolExecutor(max_workers=workers) as executor:
|
|
108
|
+
futures = {executor.submit(convert_file, md, file_path, output_dir, verbose): file_path for file_path in files}
|
|
109
|
+
|
|
110
|
+
for future in as_completed(futures):
|
|
111
|
+
success, path, message = future.result()
|
|
112
|
+
|
|
113
|
+
if success:
|
|
114
|
+
results['success'] += 1
|
|
115
|
+
else:
|
|
116
|
+
results['failed'] += 1
|
|
117
|
+
|
|
118
|
+
results['details'].append({'file': path, 'success': success, 'message': message})
|
|
119
|
+
|
|
120
|
+
print(message)
|
|
121
|
+
|
|
122
|
+
return results
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def main():
|
|
126
|
+
parser = argparse.ArgumentParser(
|
|
127
|
+
description="Batch convert files to Markdown using MarkItDown",
|
|
128
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
129
|
+
epilog="""
|
|
130
|
+
Examples:
|
|
131
|
+
# Convert all PDFs in a directory
|
|
132
|
+
python batch_convert.py papers/ output/ --extensions .pdf
|
|
133
|
+
|
|
134
|
+
# Convert multiple formats recursively
|
|
135
|
+
python batch_convert.py documents/ markdown/ --extensions .pdf .docx .pptx -r
|
|
136
|
+
|
|
137
|
+
# Use 8 parallel workers
|
|
138
|
+
python batch_convert.py input/ output/ --workers 8
|
|
139
|
+
|
|
140
|
+
# Enable plugins
|
|
141
|
+
python batch_convert.py input/ output/ --plugins
|
|
142
|
+
""",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
parser.add_argument('input_dir', type=Path, help='Input directory')
|
|
146
|
+
parser.add_argument('output_dir', type=Path, help='Output directory')
|
|
147
|
+
parser.add_argument('--extensions', '-e', nargs='+', help='File extensions to convert (e.g., .pdf .docx)')
|
|
148
|
+
parser.add_argument('--recursive', '-r', action='store_true', help='Search subdirectories recursively')
|
|
149
|
+
parser.add_argument('--workers', '-w', type=int, default=4, help='Number of parallel workers (default: 4)')
|
|
150
|
+
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
|
151
|
+
parser.add_argument('--plugins', '-p', action='store_true', help='Enable MarkItDown plugins')
|
|
152
|
+
|
|
153
|
+
args = parser.parse_args()
|
|
154
|
+
|
|
155
|
+
# Validate input directory
|
|
156
|
+
if not args.input_dir.exists():
|
|
157
|
+
print(f"Error: Input directory '{args.input_dir}' does not exist")
|
|
158
|
+
sys.exit(1)
|
|
159
|
+
|
|
160
|
+
if not args.input_dir.is_dir():
|
|
161
|
+
print(f"Error: '{args.input_dir}' is not a directory")
|
|
162
|
+
sys.exit(1)
|
|
163
|
+
|
|
164
|
+
# Run batch conversion
|
|
165
|
+
results = batch_convert(
|
|
166
|
+
input_dir=args.input_dir,
|
|
167
|
+
output_dir=args.output_dir,
|
|
168
|
+
extensions=args.extensions,
|
|
169
|
+
recursive=args.recursive,
|
|
170
|
+
workers=args.workers,
|
|
171
|
+
verbose=args.verbose,
|
|
172
|
+
enable_plugins=args.plugins,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Print summary
|
|
176
|
+
print("\n" + "=" * 50)
|
|
177
|
+
print("CONVERSION SUMMARY")
|
|
178
|
+
print("=" * 50)
|
|
179
|
+
print(f"Total files: {results['total']}")
|
|
180
|
+
print(f"Successful: {results['success']}")
|
|
181
|
+
print(f"Failed: {results['failed']}")
|
|
182
|
+
print(f"Success rate: {results['success'] / results['total'] * 100:.1f}%" if results['total'] > 0 else "N/A")
|
|
183
|
+
|
|
184
|
+
# Show failed files if any
|
|
185
|
+
if results['failed'] > 0:
|
|
186
|
+
print("\nFailed conversions:")
|
|
187
|
+
for detail in results['details']:
|
|
188
|
+
if not detail['success']:
|
|
189
|
+
print(f" - {detail['file']}: {detail['message']}")
|
|
190
|
+
|
|
191
|
+
sys.exit(0 if results['failed'] == 0 else 1)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
if __name__ == '__main__':
|
|
195
|
+
main()
|