scientific-writer 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scientific_writer/.claude/WRITER.md +822 -0
- scientific_writer/.claude/settings.local.json +30 -0
- scientific_writer/.claude/skills/citation-management/SKILL.md +1046 -0
- scientific_writer/.claude/skills/citation-management/assets/bibtex_template.bib +264 -0
- scientific_writer/.claude/skills/citation-management/assets/citation_checklist.md +386 -0
- scientific_writer/.claude/skills/citation-management/references/bibtex_formatting.md +908 -0
- scientific_writer/.claude/skills/citation-management/references/citation_validation.md +794 -0
- scientific_writer/.claude/skills/citation-management/references/google_scholar_search.md +725 -0
- scientific_writer/.claude/skills/citation-management/references/metadata_extraction.md +870 -0
- scientific_writer/.claude/skills/citation-management/references/pubmed_search.md +839 -0
- scientific_writer/.claude/skills/citation-management/scripts/doi_to_bibtex.py +204 -0
- scientific_writer/.claude/skills/citation-management/scripts/extract_metadata.py +569 -0
- scientific_writer/.claude/skills/citation-management/scripts/format_bibtex.py +349 -0
- scientific_writer/.claude/skills/citation-management/scripts/search_google_scholar.py +282 -0
- scientific_writer/.claude/skills/citation-management/scripts/search_pubmed.py +398 -0
- scientific_writer/.claude/skills/citation-management/scripts/validate_citations.py +497 -0
- scientific_writer/.claude/skills/clinical-reports/IMPLEMENTATION_SUMMARY.md +641 -0
- scientific_writer/.claude/skills/clinical-reports/README.md +236 -0
- scientific_writer/.claude/skills/clinical-reports/SKILL.md +1088 -0
- scientific_writer/.claude/skills/clinical-reports/assets/case_report_template.md +352 -0
- scientific_writer/.claude/skills/clinical-reports/assets/clinical_trial_csr_template.md +353 -0
- scientific_writer/.claude/skills/clinical-reports/assets/clinical_trial_sae_template.md +359 -0
- scientific_writer/.claude/skills/clinical-reports/assets/consult_note_template.md +305 -0
- scientific_writer/.claude/skills/clinical-reports/assets/discharge_summary_template.md +453 -0
- scientific_writer/.claude/skills/clinical-reports/assets/hipaa_compliance_checklist.md +395 -0
- scientific_writer/.claude/skills/clinical-reports/assets/history_physical_template.md +305 -0
- scientific_writer/.claude/skills/clinical-reports/assets/lab_report_template.md +309 -0
- scientific_writer/.claude/skills/clinical-reports/assets/pathology_report_template.md +249 -0
- scientific_writer/.claude/skills/clinical-reports/assets/quality_checklist.md +338 -0
- scientific_writer/.claude/skills/clinical-reports/assets/radiology_report_template.md +318 -0
- scientific_writer/.claude/skills/clinical-reports/assets/soap_note_template.md +253 -0
- scientific_writer/.claude/skills/clinical-reports/references/case_report_guidelines.md +570 -0
- scientific_writer/.claude/skills/clinical-reports/references/clinical_trial_reporting.md +693 -0
- scientific_writer/.claude/skills/clinical-reports/references/data_presentation.md +530 -0
- scientific_writer/.claude/skills/clinical-reports/references/diagnostic_reports_standards.md +629 -0
- scientific_writer/.claude/skills/clinical-reports/references/medical_terminology.md +588 -0
- scientific_writer/.claude/skills/clinical-reports/references/patient_documentation.md +744 -0
- scientific_writer/.claude/skills/clinical-reports/references/peer_review_standards.md +585 -0
- scientific_writer/.claude/skills/clinical-reports/references/regulatory_compliance.md +577 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/check_deidentification.py +346 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/compliance_checker.py +78 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/extract_clinical_data.py +102 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/format_adverse_events.py +103 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/generate_report_template.py +163 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/terminology_validator.py +133 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/validate_case_report.py +334 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/validate_trial_report.py +89 -0
- scientific_writer/.claude/skills/document-skills/docx/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/docx/SKILL.md +197 -0
- scientific_writer/.claude/skills/document-skills/docx/docx-js.md +350 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/mce/mc.xsd +75 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/pack.py +159 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/unpack.py +29 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validate.py +69 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/__init__.py +15 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/base.py +951 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/docx.py +274 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/pptx.py +315 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/redlining.py +279 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml.md +610 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/__init__.py +1 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/document.py +1276 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/comments.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/commentsExtended.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/commentsIds.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/people.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/utilities.py +374 -0
- scientific_writer/.claude/skills/document-skills/pdf/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/pdf/SKILL.md +294 -0
- scientific_writer/.claude/skills/document-skills/pdf/forms.md +205 -0
- scientific_writer/.claude/skills/document-skills/pdf/reference.md +612 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/check_bounding_boxes.py +70 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/check_bounding_boxes_test.py +226 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/check_fillable_fields.py +12 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/convert_pdf_to_images.py +35 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/create_validation_image.py +41 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/extract_form_field_info.py +152 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/fill_fillable_fields.py +114 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
- scientific_writer/.claude/skills/document-skills/pptx/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/pptx/SKILL.md +484 -0
- scientific_writer/.claude/skills/document-skills/pptx/html2pptx.md +625 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/mce/mc.xsd +75 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/pack.py +159 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/unpack.py +29 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validate.py +69 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/__init__.py +15 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/base.py +951 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/docx.py +274 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/pptx.py +315 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/redlining.py +279 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml.md +427 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/html2pptx.js +979 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/inventory.py +1020 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/rearrange.py +231 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/replace.py +385 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/thumbnail.py +450 -0
- scientific_writer/.claude/skills/document-skills/xlsx/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/xlsx/SKILL.md +289 -0
- scientific_writer/.claude/skills/document-skills/xlsx/recalc.py +178 -0
- scientific_writer/.claude/skills/hypothesis-generation/SKILL.md +155 -0
- scientific_writer/.claude/skills/hypothesis-generation/assets/hypothesis_output_template.md +302 -0
- scientific_writer/.claude/skills/hypothesis-generation/references/experimental_design_patterns.md +327 -0
- scientific_writer/.claude/skills/hypothesis-generation/references/hypothesis_quality_criteria.md +196 -0
- scientific_writer/.claude/skills/hypothesis-generation/references/literature_search_strategies.md +505 -0
- scientific_writer/.claude/skills/latex-posters/README.md +417 -0
- scientific_writer/.claude/skills/latex-posters/SKILL.md +919 -0
- scientific_writer/.claude/skills/latex-posters/assets/baposter_template.tex +257 -0
- scientific_writer/.claude/skills/latex-posters/assets/beamerposter_template.tex +244 -0
- scientific_writer/.claude/skills/latex-posters/assets/poster_quality_checklist.md +358 -0
- scientific_writer/.claude/skills/latex-posters/assets/tikzposter_template.tex +251 -0
- scientific_writer/.claude/skills/latex-posters/references/latex_poster_packages.md +745 -0
- scientific_writer/.claude/skills/latex-posters/references/poster_content_guide.md +748 -0
- scientific_writer/.claude/skills/latex-posters/references/poster_design_principles.md +806 -0
- scientific_writer/.claude/skills/latex-posters/references/poster_layout_design.md +900 -0
- scientific_writer/.claude/skills/latex-posters/scripts/review_poster.sh +214 -0
- scientific_writer/.claude/skills/literature-review/SKILL.md +546 -0
- scientific_writer/.claude/skills/literature-review/assets/review_template.md +412 -0
- scientific_writer/.claude/skills/literature-review/references/citation_styles.md +166 -0
- scientific_writer/.claude/skills/literature-review/references/database_strategies.md +381 -0
- scientific_writer/.claude/skills/literature-review/scripts/generate_pdf.py +176 -0
- scientific_writer/.claude/skills/literature-review/scripts/search_databases.py +303 -0
- scientific_writer/.claude/skills/literature-review/scripts/verify_citations.py +222 -0
- scientific_writer/.claude/skills/markitdown/INSTALLATION_GUIDE.md +318 -0
- scientific_writer/.claude/skills/markitdown/LICENSE.txt +22 -0
- scientific_writer/.claude/skills/markitdown/OPENROUTER_INTEGRATION.md +359 -0
- scientific_writer/.claude/skills/markitdown/QUICK_REFERENCE.md +309 -0
- scientific_writer/.claude/skills/markitdown/README.md +184 -0
- scientific_writer/.claude/skills/markitdown/SKILL.md +450 -0
- scientific_writer/.claude/skills/markitdown/SKILL_SUMMARY.md +307 -0
- scientific_writer/.claude/skills/markitdown/assets/example_usage.md +463 -0
- scientific_writer/.claude/skills/markitdown/references/api_reference.md +399 -0
- scientific_writer/.claude/skills/markitdown/references/file_formats.md +542 -0
- scientific_writer/.claude/skills/markitdown/scripts/batch_convert.py +228 -0
- scientific_writer/.claude/skills/markitdown/scripts/convert_literature.py +283 -0
- scientific_writer/.claude/skills/markitdown/scripts/convert_with_ai.py +243 -0
- scientific_writer/.claude/skills/paper-2-web/SKILL.md +455 -0
- scientific_writer/.claude/skills/paper-2-web/references/installation.md +141 -0
- scientific_writer/.claude/skills/paper-2-web/references/paper2poster.md +346 -0
- scientific_writer/.claude/skills/paper-2-web/references/paper2video.md +305 -0
- scientific_writer/.claude/skills/paper-2-web/references/paper2web.md +187 -0
- scientific_writer/.claude/skills/paper-2-web/references/usage_examples.md +436 -0
- scientific_writer/.claude/skills/peer-review/SKILL.md +375 -0
- scientific_writer/.claude/skills/peer-review/references/common_issues.md +552 -0
- scientific_writer/.claude/skills/peer-review/references/reporting_standards.md +290 -0
- scientific_writer/.claude/skills/research-grants/README.md +285 -0
- scientific_writer/.claude/skills/research-grants/SKILL.md +896 -0
- scientific_writer/.claude/skills/research-grants/assets/budget_justification_template.md +453 -0
- scientific_writer/.claude/skills/research-grants/assets/nih_specific_aims_template.md +166 -0
- scientific_writer/.claude/skills/research-grants/assets/nsf_project_summary_template.md +92 -0
- scientific_writer/.claude/skills/research-grants/references/broader_impacts.md +392 -0
- scientific_writer/.claude/skills/research-grants/references/darpa_guidelines.md +636 -0
- scientific_writer/.claude/skills/research-grants/references/doe_guidelines.md +586 -0
- scientific_writer/.claude/skills/research-grants/references/nih_guidelines.md +851 -0
- scientific_writer/.claude/skills/research-grants/references/nsf_guidelines.md +570 -0
- scientific_writer/.claude/skills/research-grants/references/specific_aims_guide.md +458 -0
- scientific_writer/.claude/skills/research-lookup/README.md +116 -0
- scientific_writer/.claude/skills/research-lookup/SKILL.md +443 -0
- scientific_writer/.claude/skills/research-lookup/examples.py +174 -0
- scientific_writer/.claude/skills/research-lookup/lookup.py +93 -0
- scientific_writer/.claude/skills/research-lookup/research_lookup.py +335 -0
- scientific_writer/.claude/skills/research-lookup/scripts/research_lookup.py +261 -0
- scientific_writer/.claude/skills/scholar-evaluation/SKILL.md +254 -0
- scientific_writer/.claude/skills/scholar-evaluation/references/evaluation_framework.md +663 -0
- scientific_writer/.claude/skills/scholar-evaluation/scripts/calculate_scores.py +378 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/SKILL.md +530 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/common_biases.md +364 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/evidence_hierarchy.md +484 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/experimental_design.md +496 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/logical_fallacies.md +478 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/scientific_method.md +169 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/statistical_pitfalls.md +506 -0
- scientific_writer/.claude/skills/scientific-schematics/SKILL.md +2035 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/block_diagram_template.tex +199 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/circuit_template.tex +159 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/flowchart_template.tex +161 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/pathway_template.tex +162 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/tikz_styles.tex +422 -0
- scientific_writer/.claude/skills/scientific-schematics/references/best_practices.md +562 -0
- scientific_writer/.claude/skills/scientific-schematics/references/diagram_types.md +637 -0
- scientific_writer/.claude/skills/scientific-schematics/references/python_libraries.md +791 -0
- scientific_writer/.claude/skills/scientific-schematics/references/tikz_guide.md +734 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/circuit_generator.py +307 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/compile_tikz.py +292 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/generate_flowchart.py +281 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/pathway_diagram.py +406 -0
- scientific_writer/.claude/skills/scientific-writing/SKILL.md +443 -0
- scientific_writer/.claude/skills/scientific-writing/references/citation_styles.md +720 -0
- scientific_writer/.claude/skills/scientific-writing/references/figures_tables.md +806 -0
- scientific_writer/.claude/skills/scientific-writing/references/imrad_structure.md +658 -0
- scientific_writer/.claude/skills/scientific-writing/references/reporting_guidelines.md +748 -0
- scientific_writer/.claude/skills/scientific-writing/references/writing_principles.md +824 -0
- scientific_writer/.claude/skills/treatment-plans/README.md +488 -0
- scientific_writer/.claude/skills/treatment-plans/SKILL.md +1536 -0
- scientific_writer/.claude/skills/treatment-plans/assets/STYLING_QUICK_REFERENCE.md +185 -0
- scientific_writer/.claude/skills/treatment-plans/assets/chronic_disease_management_plan.tex +665 -0
- scientific_writer/.claude/skills/treatment-plans/assets/general_medical_treatment_plan.tex +547 -0
- scientific_writer/.claude/skills/treatment-plans/assets/medical_treatment_plan.sty +222 -0
- scientific_writer/.claude/skills/treatment-plans/assets/mental_health_treatment_plan.tex +774 -0
- scientific_writer/.claude/skills/treatment-plans/assets/one_page_treatment_plan.tex +193 -0
- scientific_writer/.claude/skills/treatment-plans/assets/pain_management_plan.tex +799 -0
- scientific_writer/.claude/skills/treatment-plans/assets/perioperative_care_plan.tex +753 -0
- scientific_writer/.claude/skills/treatment-plans/assets/quality_checklist.md +471 -0
- scientific_writer/.claude/skills/treatment-plans/assets/rehabilitation_treatment_plan.tex +756 -0
- scientific_writer/.claude/skills/treatment-plans/references/goal_setting_frameworks.md +411 -0
- scientific_writer/.claude/skills/treatment-plans/references/intervention_guidelines.md +507 -0
- scientific_writer/.claude/skills/treatment-plans/references/regulatory_compliance.md +476 -0
- scientific_writer/.claude/skills/treatment-plans/references/specialty_specific_guidelines.md +655 -0
- scientific_writer/.claude/skills/treatment-plans/references/treatment_plan_standards.md +485 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/check_completeness.py +318 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/generate_template.py +244 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/timeline_generator.py +369 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/validate_treatment_plan.py +367 -0
- scientific_writer/.claude/skills/venue-templates/SKILL.md +590 -0
- scientific_writer/.claude/skills/venue-templates/assets/grants/nih_specific_aims.tex +235 -0
- scientific_writer/.claude/skills/venue-templates/assets/grants/nsf_proposal_template.tex +375 -0
- scientific_writer/.claude/skills/venue-templates/assets/journals/nature_article.tex +171 -0
- scientific_writer/.claude/skills/venue-templates/assets/journals/neurips_article.tex +283 -0
- scientific_writer/.claude/skills/venue-templates/assets/journals/plos_one.tex +317 -0
- scientific_writer/.claude/skills/venue-templates/assets/posters/beamerposter_academic.tex +311 -0
- scientific_writer/.claude/skills/venue-templates/references/conferences_formatting.md +564 -0
- scientific_writer/.claude/skills/venue-templates/references/grants_requirements.md +787 -0
- scientific_writer/.claude/skills/venue-templates/references/journals_formatting.md +486 -0
- scientific_writer/.claude/skills/venue-templates/references/posters_guidelines.md +628 -0
- scientific_writer/.claude/skills/venue-templates/scripts/customize_template.py +206 -0
- scientific_writer/.claude/skills/venue-templates/scripts/query_template.py +260 -0
- scientific_writer/.claude/skills/venue-templates/scripts/validate_format.py +255 -0
- scientific_writer/__init__.py +43 -0
- scientific_writer/api.py +393 -0
- scientific_writer/cli.py +326 -0
- scientific_writer/core.py +275 -0
- scientific_writer/models.py +76 -0
- scientific_writer/utils.py +289 -0
- scientific_writer-2.3.1.dist-info/METADATA +272 -0
- scientific_writer-2.3.1.dist-info/RECORD +315 -0
- scientific_writer-2.3.1.dist-info/WHEEL +4 -0
- scientific_writer-2.3.1.dist-info/entry_points.txt +2 -0
- scientific_writer-2.3.1.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
# MarkItDown Example Usage
|
|
2
|
+
|
|
3
|
+
This document provides practical examples of using MarkItDown in various scenarios.
|
|
4
|
+
|
|
5
|
+
## Basic Examples
|
|
6
|
+
|
|
7
|
+
### 1. Simple File Conversion
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from markitdown import MarkItDown
|
|
11
|
+
|
|
12
|
+
md = MarkItDown()
|
|
13
|
+
|
|
14
|
+
# Convert a PDF
|
|
15
|
+
result = md.convert("research_paper.pdf")
|
|
16
|
+
print(result.text_content)
|
|
17
|
+
|
|
18
|
+
# Convert a Word document
|
|
19
|
+
result = md.convert("manuscript.docx")
|
|
20
|
+
print(result.text_content)
|
|
21
|
+
|
|
22
|
+
# Convert a PowerPoint
|
|
23
|
+
result = md.convert("presentation.pptx")
|
|
24
|
+
print(result.text_content)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### 2. Save to File
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from markitdown import MarkItDown
|
|
31
|
+
|
|
32
|
+
md = MarkItDown()
|
|
33
|
+
result = md.convert("document.pdf")
|
|
34
|
+
|
|
35
|
+
with open("output.md", "w", encoding="utf-8") as f:
|
|
36
|
+
f.write(result.text_content)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### 3. Convert from Stream
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from markitdown import MarkItDown
|
|
43
|
+
|
|
44
|
+
md = MarkItDown()
|
|
45
|
+
|
|
46
|
+
with open("document.pdf", "rb") as f:
|
|
47
|
+
result = md.convert_stream(f, file_extension=".pdf")
|
|
48
|
+
print(result.text_content)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Scientific Workflows
|
|
52
|
+
|
|
53
|
+
### Convert Research Papers
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from markitdown import MarkItDown
|
|
57
|
+
from pathlib import Path
|
|
58
|
+
|
|
59
|
+
md = MarkItDown()
|
|
60
|
+
|
|
61
|
+
# Convert all papers in a directory
|
|
62
|
+
papers_dir = Path("research_papers/")
|
|
63
|
+
output_dir = Path("markdown_papers/")
|
|
64
|
+
output_dir.mkdir(exist_ok=True)
|
|
65
|
+
|
|
66
|
+
for paper in papers_dir.glob("*.pdf"):
|
|
67
|
+
result = md.convert(str(paper))
|
|
68
|
+
|
|
69
|
+
# Save with original filename
|
|
70
|
+
output_file = output_dir / f"{paper.stem}.md"
|
|
71
|
+
output_file.write_text(result.text_content)
|
|
72
|
+
|
|
73
|
+
print(f"Converted: {paper.name}")
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Extract Tables from Excel
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from markitdown import MarkItDown
|
|
80
|
+
|
|
81
|
+
md = MarkItDown()
|
|
82
|
+
|
|
83
|
+
# Convert Excel to Markdown tables
|
|
84
|
+
result = md.convert("experimental_data.xlsx")
|
|
85
|
+
|
|
86
|
+
# The result contains Markdown-formatted tables
|
|
87
|
+
print(result.text_content)
|
|
88
|
+
|
|
89
|
+
# Save for further processing
|
|
90
|
+
with open("data_tables.md", "w") as f:
|
|
91
|
+
f.write(result.text_content)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Process Presentation Slides
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from markitdown import MarkItDown
|
|
98
|
+
from openai import OpenAI
|
|
99
|
+
|
|
100
|
+
# With AI descriptions for images
|
|
101
|
+
client = OpenAI()
|
|
102
|
+
md = MarkItDown(
|
|
103
|
+
llm_client=client,
|
|
104
|
+
llm_model="anthropic/claude-sonnet-4.5",
|
|
105
|
+
llm_prompt="Describe this scientific slide, focusing on data and key findings"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
result = md.convert("conference_talk.pptx")
|
|
109
|
+
|
|
110
|
+
# Save with metadata
|
|
111
|
+
output = f"""# Conference Talk
|
|
112
|
+
|
|
113
|
+
{result.text_content}
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
with open("talk_notes.md", "w") as f:
|
|
117
|
+
f.write(output)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## AI-Enhanced Conversions
|
|
121
|
+
|
|
122
|
+
### Detailed Image Descriptions
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from markitdown import MarkItDown
|
|
126
|
+
from openai import OpenAI
|
|
127
|
+
|
|
128
|
+
# Initialize OpenRouter client
|
|
129
|
+
client = OpenAI(
|
|
130
|
+
api_key="your-openrouter-api-key",
|
|
131
|
+
base_url="https://openrouter.ai/api/v1"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Scientific diagram analysis
|
|
135
|
+
scientific_prompt = """
|
|
136
|
+
Analyze this scientific figure. Describe:
|
|
137
|
+
- Type of visualization (graph, microscopy, diagram, etc.)
|
|
138
|
+
- Key data points and trends
|
|
139
|
+
- Axes, labels, and legends
|
|
140
|
+
- Scientific significance
|
|
141
|
+
Be technical and precise.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
md = MarkItDown(
|
|
145
|
+
llm_client=client,
|
|
146
|
+
llm_model="anthropic/claude-sonnet-4.5", # recommended for scientific vision
|
|
147
|
+
llm_prompt=scientific_prompt
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Convert paper with figures
|
|
151
|
+
result = md.convert("paper_with_figures.pdf")
|
|
152
|
+
print(result.text_content)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Different Prompts for Different Files
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from markitdown import MarkItDown
|
|
159
|
+
from openai import OpenAI
|
|
160
|
+
|
|
161
|
+
# Initialize OpenRouter client
|
|
162
|
+
client = OpenAI(
|
|
163
|
+
api_key="your-openrouter-api-key",
|
|
164
|
+
base_url="https://openrouter.ai/api/v1"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Scientific papers - use Claude for technical analysis
|
|
168
|
+
scientific_md = MarkItDown(
|
|
169
|
+
llm_client=client,
|
|
170
|
+
llm_model="anthropic/claude-sonnet-4.5",
|
|
171
|
+
llm_prompt="Describe scientific figures with technical precision"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Presentations - use GPT-4o for visual understanding
|
|
175
|
+
presentation_md = MarkItDown(
|
|
176
|
+
llm_client=client,
|
|
177
|
+
llm_model="anthropic/claude-sonnet-4.5",
|
|
178
|
+
llm_prompt="Summarize slide content and key visual elements"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Use appropriate instance for each file
|
|
182
|
+
paper_result = scientific_md.convert("research.pdf")
|
|
183
|
+
slides_result = presentation_md.convert("talk.pptx")
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Batch Processing
|
|
187
|
+
|
|
188
|
+
### Process Multiple Files
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
from markitdown import MarkItDown
|
|
192
|
+
from pathlib import Path
|
|
193
|
+
|
|
194
|
+
md = MarkItDown()
|
|
195
|
+
|
|
196
|
+
files_to_convert = [
|
|
197
|
+
"paper1.pdf",
|
|
198
|
+
"data.xlsx",
|
|
199
|
+
"presentation.pptx",
|
|
200
|
+
"notes.docx"
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
for file in files_to_convert:
|
|
204
|
+
try:
|
|
205
|
+
result = md.convert(file)
|
|
206
|
+
output = Path(file).stem + ".md"
|
|
207
|
+
|
|
208
|
+
with open(output, "w") as f:
|
|
209
|
+
f.write(result.text_content)
|
|
210
|
+
|
|
211
|
+
print(f"✓ {file} -> {output}")
|
|
212
|
+
except Exception as e:
|
|
213
|
+
print(f"✗ Error converting {file}: {e}")
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### Parallel Processing
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
from markitdown import MarkItDown
|
|
220
|
+
from pathlib import Path
|
|
221
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
222
|
+
|
|
223
|
+
def convert_file(filepath):
|
|
224
|
+
md = MarkItDown()
|
|
225
|
+
result = md.convert(filepath)
|
|
226
|
+
|
|
227
|
+
output = Path(filepath).stem + ".md"
|
|
228
|
+
with open(output, "w") as f:
|
|
229
|
+
f.write(result.text_content)
|
|
230
|
+
|
|
231
|
+
return filepath, output
|
|
232
|
+
|
|
233
|
+
files = list(Path("documents/").glob("*.pdf"))
|
|
234
|
+
|
|
235
|
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
|
236
|
+
results = executor.map(convert_file, [str(f) for f in files])
|
|
237
|
+
|
|
238
|
+
for input_file, output_file in results:
|
|
239
|
+
print(f"Converted: {input_file} -> {output_file}")
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Integration Examples
|
|
243
|
+
|
|
244
|
+
### Literature Review Pipeline
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
from markitdown import MarkItDown
|
|
248
|
+
from pathlib import Path
|
|
249
|
+
import json
|
|
250
|
+
|
|
251
|
+
md = MarkItDown()
|
|
252
|
+
|
|
253
|
+
# Convert papers and create metadata
|
|
254
|
+
papers_dir = Path("literature/")
|
|
255
|
+
output_dir = Path("literature_markdown/")
|
|
256
|
+
output_dir.mkdir(exist_ok=True)
|
|
257
|
+
|
|
258
|
+
catalog = []
|
|
259
|
+
|
|
260
|
+
for paper in papers_dir.glob("*.pdf"):
|
|
261
|
+
result = md.convert(str(paper))
|
|
262
|
+
|
|
263
|
+
# Save Markdown
|
|
264
|
+
md_file = output_dir / f"{paper.stem}.md"
|
|
265
|
+
md_file.write_text(result.text_content)
|
|
266
|
+
|
|
267
|
+
# Store metadata
|
|
268
|
+
catalog.append({
|
|
269
|
+
"title": result.title or paper.stem,
|
|
270
|
+
"source": paper.name,
|
|
271
|
+
"markdown": str(md_file),
|
|
272
|
+
"word_count": len(result.text_content.split())
|
|
273
|
+
})
|
|
274
|
+
|
|
275
|
+
# Save catalog
|
|
276
|
+
with open(output_dir / "catalog.json", "w") as f:
|
|
277
|
+
json.dump(catalog, f, indent=2)
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### Data Extraction Pipeline
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
from markitdown import MarkItDown
|
|
284
|
+
import re
|
|
285
|
+
|
|
286
|
+
md = MarkItDown()
|
|
287
|
+
|
|
288
|
+
# Convert Excel data to Markdown
|
|
289
|
+
result = md.convert("experimental_results.xlsx")
|
|
290
|
+
|
|
291
|
+
# Extract tables (Markdown tables start with |)
|
|
292
|
+
tables = []
|
|
293
|
+
current_table = []
|
|
294
|
+
in_table = False
|
|
295
|
+
|
|
296
|
+
for line in result.text_content.split('\n'):
|
|
297
|
+
if line.strip().startswith('|'):
|
|
298
|
+
in_table = True
|
|
299
|
+
current_table.append(line)
|
|
300
|
+
elif in_table:
|
|
301
|
+
if current_table:
|
|
302
|
+
tables.append('\n'.join(current_table))
|
|
303
|
+
current_table = []
|
|
304
|
+
in_table = False
|
|
305
|
+
|
|
306
|
+
# Process each table
|
|
307
|
+
for i, table in enumerate(tables):
|
|
308
|
+
print(f"Table {i+1}:")
|
|
309
|
+
print(table)
|
|
310
|
+
print("\n" + "="*50 + "\n")
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
### YouTube Transcript Analysis
|
|
314
|
+
|
|
315
|
+
```python
|
|
316
|
+
from markitdown import MarkItDown
|
|
317
|
+
|
|
318
|
+
md = MarkItDown()
|
|
319
|
+
|
|
320
|
+
# Get transcript
|
|
321
|
+
video_url = "https://www.youtube.com/watch?v=VIDEO_ID"
|
|
322
|
+
result = md.convert(video_url)
|
|
323
|
+
|
|
324
|
+
# Save transcript
|
|
325
|
+
with open("lecture_transcript.md", "w") as f:
|
|
326
|
+
f.write(f"# Lecture Transcript\n\n")
|
|
327
|
+
f.write(f"**Source**: {video_url}\n\n")
|
|
328
|
+
f.write(result.text_content)
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
## Error Handling
|
|
332
|
+
|
|
333
|
+
### Robust Conversion
|
|
334
|
+
|
|
335
|
+
```python
|
|
336
|
+
from markitdown import MarkItDown
|
|
337
|
+
from pathlib import Path
|
|
338
|
+
import logging
|
|
339
|
+
|
|
340
|
+
logging.basicConfig(level=logging.INFO)
|
|
341
|
+
logger = logging.getLogger(__name__)
|
|
342
|
+
|
|
343
|
+
md = MarkItDown()
|
|
344
|
+
|
|
345
|
+
def safe_convert(filepath):
|
|
346
|
+
"""Convert file with error handling."""
|
|
347
|
+
try:
|
|
348
|
+
result = md.convert(filepath)
|
|
349
|
+
output = Path(filepath).stem + ".md"
|
|
350
|
+
|
|
351
|
+
with open(output, "w") as f:
|
|
352
|
+
f.write(result.text_content)
|
|
353
|
+
|
|
354
|
+
logger.info(f"Successfully converted {filepath}")
|
|
355
|
+
return True
|
|
356
|
+
|
|
357
|
+
except FileNotFoundError:
|
|
358
|
+
logger.error(f"File not found: {filepath}")
|
|
359
|
+
return False
|
|
360
|
+
|
|
361
|
+
except ValueError as e:
|
|
362
|
+
logger.error(f"Invalid file format for {filepath}: {e}")
|
|
363
|
+
return False
|
|
364
|
+
|
|
365
|
+
except Exception as e:
|
|
366
|
+
logger.error(f"Unexpected error converting {filepath}: {e}")
|
|
367
|
+
return False
|
|
368
|
+
|
|
369
|
+
# Use it
|
|
370
|
+
files = ["paper.pdf", "data.xlsx", "slides.pptx"]
|
|
371
|
+
results = [safe_convert(f) for f in files]
|
|
372
|
+
|
|
373
|
+
print(f"Successfully converted {sum(results)}/{len(files)} files")
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
## Advanced Use Cases
|
|
377
|
+
|
|
378
|
+
### Custom Metadata Extraction
|
|
379
|
+
|
|
380
|
+
```python
|
|
381
|
+
from markitdown import MarkItDown
|
|
382
|
+
import re
|
|
383
|
+
from datetime import datetime
|
|
384
|
+
|
|
385
|
+
md = MarkItDown()
|
|
386
|
+
|
|
387
|
+
def convert_with_metadata(filepath):
|
|
388
|
+
result = md.convert(filepath)
|
|
389
|
+
|
|
390
|
+
# Extract metadata from content
|
|
391
|
+
metadata = {
|
|
392
|
+
"file": filepath,
|
|
393
|
+
"title": result.title,
|
|
394
|
+
"converted_at": datetime.now().isoformat(),
|
|
395
|
+
"word_count": len(result.text_content.split()),
|
|
396
|
+
"char_count": len(result.text_content)
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
# Try to find author
|
|
400
|
+
author_match = re.search(r'(?:Author|By):\s*(.+?)(?:\n|$)', result.text_content)
|
|
401
|
+
if author_match:
|
|
402
|
+
metadata["author"] = author_match.group(1).strip()
|
|
403
|
+
|
|
404
|
+
# Create formatted output
|
|
405
|
+
output = f"""---
|
|
406
|
+
title: {metadata['title']}
|
|
407
|
+
author: {metadata.get('author', 'Unknown')}
|
|
408
|
+
source: {metadata['file']}
|
|
409
|
+
converted: {metadata['converted_at']}
|
|
410
|
+
words: {metadata['word_count']}
|
|
411
|
+
---
|
|
412
|
+
|
|
413
|
+
{result.text_content}
|
|
414
|
+
"""
|
|
415
|
+
|
|
416
|
+
return output, metadata
|
|
417
|
+
|
|
418
|
+
# Use it
|
|
419
|
+
content, meta = convert_with_metadata("paper.pdf")
|
|
420
|
+
print(meta)
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
### Format-Specific Processing
|
|
424
|
+
|
|
425
|
+
```python
|
|
426
|
+
from markitdown import MarkItDown
|
|
427
|
+
from pathlib import Path
|
|
428
|
+
|
|
429
|
+
md = MarkItDown()
|
|
430
|
+
|
|
431
|
+
def process_by_format(filepath):
|
|
432
|
+
path = Path(filepath)
|
|
433
|
+
result = md.convert(filepath)
|
|
434
|
+
|
|
435
|
+
if path.suffix == '.pdf':
|
|
436
|
+
# Add PDF-specific metadata
|
|
437
|
+
output = f"# PDF Document: {path.stem}\n\n"
|
|
438
|
+
output += result.text_content
|
|
439
|
+
|
|
440
|
+
elif path.suffix == '.xlsx':
|
|
441
|
+
# Add table count
|
|
442
|
+
table_count = result.text_content.count('|---')
|
|
443
|
+
output = f"# Excel Data: {path.stem}\n\n"
|
|
444
|
+
output += f"**Tables**: {table_count}\n\n"
|
|
445
|
+
output += result.text_content
|
|
446
|
+
|
|
447
|
+
elif path.suffix == '.pptx':
|
|
448
|
+
# Add slide count
|
|
449
|
+
slide_count = result.text_content.count('## Slide')
|
|
450
|
+
output = f"# Presentation: {path.stem}\n\n"
|
|
451
|
+
output += f"**Slides**: {slide_count}\n\n"
|
|
452
|
+
output += result.text_content
|
|
453
|
+
|
|
454
|
+
else:
|
|
455
|
+
output = result.text_content
|
|
456
|
+
|
|
457
|
+
return output
|
|
458
|
+
|
|
459
|
+
# Use it
|
|
460
|
+
content = process_by_format("presentation.pptx")
|
|
461
|
+
print(content)
|
|
462
|
+
```
|
|
463
|
+
|