scientific-writer 2.2.1__py3-none-any.whl → 2.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scientific-writer might be problematic. Click here for more details.
- scientific_writer/.claude/WRITER.md +748 -0
- scientific_writer/.claude/settings.local.json +30 -0
- scientific_writer/.claude/skills/citation-management/SKILL.md +1046 -0
- scientific_writer/.claude/skills/citation-management/assets/bibtex_template.bib +264 -0
- scientific_writer/.claude/skills/citation-management/assets/citation_checklist.md +386 -0
- scientific_writer/.claude/skills/citation-management/references/bibtex_formatting.md +908 -0
- scientific_writer/.claude/skills/citation-management/references/citation_validation.md +794 -0
- scientific_writer/.claude/skills/citation-management/references/google_scholar_search.md +725 -0
- scientific_writer/.claude/skills/citation-management/references/metadata_extraction.md +870 -0
- scientific_writer/.claude/skills/citation-management/references/pubmed_search.md +839 -0
- scientific_writer/.claude/skills/citation-management/scripts/doi_to_bibtex.py +204 -0
- scientific_writer/.claude/skills/citation-management/scripts/extract_metadata.py +569 -0
- scientific_writer/.claude/skills/citation-management/scripts/format_bibtex.py +349 -0
- scientific_writer/.claude/skills/citation-management/scripts/search_google_scholar.py +282 -0
- scientific_writer/.claude/skills/citation-management/scripts/search_pubmed.py +398 -0
- scientific_writer/.claude/skills/citation-management/scripts/validate_citations.py +497 -0
- scientific_writer/.claude/skills/clinical-reports/IMPLEMENTATION_SUMMARY.md +641 -0
- scientific_writer/.claude/skills/clinical-reports/README.md +236 -0
- scientific_writer/.claude/skills/clinical-reports/SKILL.md +1088 -0
- scientific_writer/.claude/skills/clinical-reports/assets/case_report_template.md +352 -0
- scientific_writer/.claude/skills/clinical-reports/assets/clinical_trial_csr_template.md +353 -0
- scientific_writer/.claude/skills/clinical-reports/assets/clinical_trial_sae_template.md +359 -0
- scientific_writer/.claude/skills/clinical-reports/assets/consult_note_template.md +305 -0
- scientific_writer/.claude/skills/clinical-reports/assets/discharge_summary_template.md +453 -0
- scientific_writer/.claude/skills/clinical-reports/assets/hipaa_compliance_checklist.md +395 -0
- scientific_writer/.claude/skills/clinical-reports/assets/history_physical_template.md +305 -0
- scientific_writer/.claude/skills/clinical-reports/assets/lab_report_template.md +309 -0
- scientific_writer/.claude/skills/clinical-reports/assets/pathology_report_template.md +249 -0
- scientific_writer/.claude/skills/clinical-reports/assets/quality_checklist.md +338 -0
- scientific_writer/.claude/skills/clinical-reports/assets/radiology_report_template.md +318 -0
- scientific_writer/.claude/skills/clinical-reports/assets/soap_note_template.md +253 -0
- scientific_writer/.claude/skills/clinical-reports/references/case_report_guidelines.md +570 -0
- scientific_writer/.claude/skills/clinical-reports/references/clinical_trial_reporting.md +693 -0
- scientific_writer/.claude/skills/clinical-reports/references/data_presentation.md +530 -0
- scientific_writer/.claude/skills/clinical-reports/references/diagnostic_reports_standards.md +629 -0
- scientific_writer/.claude/skills/clinical-reports/references/medical_terminology.md +588 -0
- scientific_writer/.claude/skills/clinical-reports/references/patient_documentation.md +744 -0
- scientific_writer/.claude/skills/clinical-reports/references/peer_review_standards.md +585 -0
- scientific_writer/.claude/skills/clinical-reports/references/regulatory_compliance.md +577 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/check_deidentification.py +346 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/compliance_checker.py +78 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/extract_clinical_data.py +102 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/format_adverse_events.py +103 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/generate_report_template.py +163 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/terminology_validator.py +133 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/validate_case_report.py +334 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/validate_trial_report.py +89 -0
- scientific_writer/.claude/skills/document-skills/docx/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/docx/SKILL.md +197 -0
- scientific_writer/.claude/skills/document-skills/docx/docx-js.md +350 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/mce/mc.xsd +75 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/pack.py +159 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/unpack.py +29 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validate.py +69 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/__init__.py +15 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/base.py +951 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/docx.py +274 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/pptx.py +315 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/redlining.py +279 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml.md +610 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/__init__.py +1 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/document.py +1276 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/comments.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/commentsExtended.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/commentsIds.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/people.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/utilities.py +374 -0
- scientific_writer/.claude/skills/document-skills/pdf/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/pdf/SKILL.md +294 -0
- scientific_writer/.claude/skills/document-skills/pdf/forms.md +205 -0
- scientific_writer/.claude/skills/document-skills/pdf/reference.md +612 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/check_bounding_boxes.py +70 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/check_bounding_boxes_test.py +226 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/check_fillable_fields.py +12 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/convert_pdf_to_images.py +35 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/create_validation_image.py +41 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/extract_form_field_info.py +152 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/fill_fillable_fields.py +114 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
- scientific_writer/.claude/skills/document-skills/pptx/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/pptx/SKILL.md +484 -0
- scientific_writer/.claude/skills/document-skills/pptx/html2pptx.md +625 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/mce/mc.xsd +75 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/pack.py +159 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/unpack.py +29 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validate.py +69 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/__init__.py +15 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/base.py +951 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/docx.py +274 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/pptx.py +315 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/redlining.py +279 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml.md +427 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/html2pptx.js +979 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/inventory.py +1020 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/rearrange.py +231 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/replace.py +385 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/thumbnail.py +450 -0
- scientific_writer/.claude/skills/document-skills/xlsx/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/xlsx/SKILL.md +289 -0
- scientific_writer/.claude/skills/document-skills/xlsx/recalc.py +178 -0
- scientific_writer/.claude/skills/hypothesis-generation/SKILL.md +155 -0
- scientific_writer/.claude/skills/hypothesis-generation/assets/hypothesis_output_template.md +302 -0
- scientific_writer/.claude/skills/hypothesis-generation/references/experimental_design_patterns.md +327 -0
- scientific_writer/.claude/skills/hypothesis-generation/references/hypothesis_quality_criteria.md +196 -0
- scientific_writer/.claude/skills/hypothesis-generation/references/literature_search_strategies.md +505 -0
- scientific_writer/.claude/skills/latex-posters/README.md +417 -0
- scientific_writer/.claude/skills/latex-posters/SKILL.md +919 -0
- scientific_writer/.claude/skills/latex-posters/assets/baposter_template.tex +257 -0
- scientific_writer/.claude/skills/latex-posters/assets/beamerposter_template.tex +244 -0
- scientific_writer/.claude/skills/latex-posters/assets/poster_quality_checklist.md +358 -0
- scientific_writer/.claude/skills/latex-posters/assets/tikzposter_template.tex +251 -0
- scientific_writer/.claude/skills/latex-posters/references/latex_poster_packages.md +745 -0
- scientific_writer/.claude/skills/latex-posters/references/poster_content_guide.md +748 -0
- scientific_writer/.claude/skills/latex-posters/references/poster_design_principles.md +806 -0
- scientific_writer/.claude/skills/latex-posters/references/poster_layout_design.md +900 -0
- scientific_writer/.claude/skills/latex-posters/scripts/review_poster.sh +214 -0
- scientific_writer/.claude/skills/literature-review/SKILL.md +546 -0
- scientific_writer/.claude/skills/literature-review/assets/review_template.md +412 -0
- scientific_writer/.claude/skills/literature-review/references/citation_styles.md +166 -0
- scientific_writer/.claude/skills/literature-review/references/database_strategies.md +381 -0
- scientific_writer/.claude/skills/literature-review/scripts/generate_pdf.py +176 -0
- scientific_writer/.claude/skills/literature-review/scripts/search_databases.py +303 -0
- scientific_writer/.claude/skills/literature-review/scripts/verify_citations.py +222 -0
- scientific_writer/.claude/skills/markitdown/INSTALLATION_GUIDE.md +318 -0
- scientific_writer/.claude/skills/markitdown/LICENSE.txt +22 -0
- scientific_writer/.claude/skills/markitdown/OPENROUTER_INTEGRATION.md +359 -0
- scientific_writer/.claude/skills/markitdown/QUICK_REFERENCE.md +309 -0
- scientific_writer/.claude/skills/markitdown/README.md +184 -0
- scientific_writer/.claude/skills/markitdown/SKILL.md +450 -0
- scientific_writer/.claude/skills/markitdown/SKILL_SUMMARY.md +307 -0
- scientific_writer/.claude/skills/markitdown/assets/example_usage.md +463 -0
- scientific_writer/.claude/skills/markitdown/references/api_reference.md +399 -0
- scientific_writer/.claude/skills/markitdown/references/file_formats.md +542 -0
- scientific_writer/.claude/skills/markitdown/scripts/batch_convert.py +228 -0
- scientific_writer/.claude/skills/markitdown/scripts/convert_literature.py +283 -0
- scientific_writer/.claude/skills/markitdown/scripts/convert_with_ai.py +243 -0
- scientific_writer/.claude/skills/paper-2-web/SKILL.md +455 -0
- scientific_writer/.claude/skills/paper-2-web/references/installation.md +141 -0
- scientific_writer/.claude/skills/paper-2-web/references/paper2poster.md +346 -0
- scientific_writer/.claude/skills/paper-2-web/references/paper2video.md +305 -0
- scientific_writer/.claude/skills/paper-2-web/references/paper2web.md +187 -0
- scientific_writer/.claude/skills/paper-2-web/references/usage_examples.md +436 -0
- scientific_writer/.claude/skills/peer-review/SKILL.md +375 -0
- scientific_writer/.claude/skills/peer-review/references/common_issues.md +552 -0
- scientific_writer/.claude/skills/peer-review/references/reporting_standards.md +290 -0
- scientific_writer/.claude/skills/research-grants/README.md +285 -0
- scientific_writer/.claude/skills/research-grants/SKILL.md +896 -0
- scientific_writer/.claude/skills/research-grants/assets/budget_justification_template.md +453 -0
- scientific_writer/.claude/skills/research-grants/assets/nih_specific_aims_template.md +166 -0
- scientific_writer/.claude/skills/research-grants/assets/nsf_project_summary_template.md +92 -0
- scientific_writer/.claude/skills/research-grants/references/broader_impacts.md +392 -0
- scientific_writer/.claude/skills/research-grants/references/darpa_guidelines.md +636 -0
- scientific_writer/.claude/skills/research-grants/references/doe_guidelines.md +586 -0
- scientific_writer/.claude/skills/research-grants/references/nih_guidelines.md +851 -0
- scientific_writer/.claude/skills/research-grants/references/nsf_guidelines.md +570 -0
- scientific_writer/.claude/skills/research-grants/references/specific_aims_guide.md +458 -0
- scientific_writer/.claude/skills/research-lookup/README.md +116 -0
- scientific_writer/.claude/skills/research-lookup/SKILL.md +443 -0
- scientific_writer/.claude/skills/research-lookup/examples.py +174 -0
- scientific_writer/.claude/skills/research-lookup/lookup.py +93 -0
- scientific_writer/.claude/skills/research-lookup/research_lookup.py +335 -0
- scientific_writer/.claude/skills/research-lookup/scripts/research_lookup.py +261 -0
- scientific_writer/.claude/skills/scholar-evaluation/SKILL.md +254 -0
- scientific_writer/.claude/skills/scholar-evaluation/references/evaluation_framework.md +663 -0
- scientific_writer/.claude/skills/scholar-evaluation/scripts/calculate_scores.py +378 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/SKILL.md +530 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/common_biases.md +364 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/evidence_hierarchy.md +484 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/experimental_design.md +496 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/logical_fallacies.md +478 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/scientific_method.md +169 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/statistical_pitfalls.md +506 -0
- scientific_writer/.claude/skills/scientific-schematics/SKILL.md +2035 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/block_diagram_template.tex +199 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/circuit_template.tex +159 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/flowchart_template.tex +161 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/pathway_template.tex +162 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/tikz_styles.tex +422 -0
- scientific_writer/.claude/skills/scientific-schematics/references/best_practices.md +562 -0
- scientific_writer/.claude/skills/scientific-schematics/references/diagram_types.md +637 -0
- scientific_writer/.claude/skills/scientific-schematics/references/python_libraries.md +791 -0
- scientific_writer/.claude/skills/scientific-schematics/references/tikz_guide.md +734 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/circuit_generator.py +307 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/compile_tikz.py +292 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/generate_flowchart.py +281 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/pathway_diagram.py +406 -0
- scientific_writer/.claude/skills/scientific-writing/SKILL.md +443 -0
- scientific_writer/.claude/skills/scientific-writing/references/citation_styles.md +720 -0
- scientific_writer/.claude/skills/scientific-writing/references/figures_tables.md +806 -0
- scientific_writer/.claude/skills/scientific-writing/references/imrad_structure.md +658 -0
- scientific_writer/.claude/skills/scientific-writing/references/reporting_guidelines.md +748 -0
- scientific_writer/.claude/skills/scientific-writing/references/writing_principles.md +824 -0
- scientific_writer/.claude/skills/treatment-plans/README.md +483 -0
- scientific_writer/.claude/skills/treatment-plans/SKILL.md +817 -0
- scientific_writer/.claude/skills/treatment-plans/assets/chronic_disease_management_plan.tex +636 -0
- scientific_writer/.claude/skills/treatment-plans/assets/general_medical_treatment_plan.tex +616 -0
- scientific_writer/.claude/skills/treatment-plans/assets/mental_health_treatment_plan.tex +745 -0
- scientific_writer/.claude/skills/treatment-plans/assets/pain_management_plan.tex +770 -0
- scientific_writer/.claude/skills/treatment-plans/assets/perioperative_care_plan.tex +724 -0
- scientific_writer/.claude/skills/treatment-plans/assets/quality_checklist.md +471 -0
- scientific_writer/.claude/skills/treatment-plans/assets/rehabilitation_treatment_plan.tex +727 -0
- scientific_writer/.claude/skills/treatment-plans/references/goal_setting_frameworks.md +411 -0
- scientific_writer/.claude/skills/treatment-plans/references/intervention_guidelines.md +507 -0
- scientific_writer/.claude/skills/treatment-plans/references/regulatory_compliance.md +476 -0
- scientific_writer/.claude/skills/treatment-plans/references/specialty_specific_guidelines.md +607 -0
- scientific_writer/.claude/skills/treatment-plans/references/treatment_plan_standards.md +456 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/check_completeness.py +318 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/generate_template.py +244 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/timeline_generator.py +369 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/validate_treatment_plan.py +367 -0
- scientific_writer/.claude/skills/venue-templates/SKILL.md +590 -0
- scientific_writer/.claude/skills/venue-templates/assets/grants/nih_specific_aims.tex +235 -0
- scientific_writer/.claude/skills/venue-templates/assets/grants/nsf_proposal_template.tex +375 -0
- scientific_writer/.claude/skills/venue-templates/assets/journals/nature_article.tex +171 -0
- scientific_writer/.claude/skills/venue-templates/assets/journals/neurips_article.tex +283 -0
- scientific_writer/.claude/skills/venue-templates/assets/journals/plos_one.tex +317 -0
- scientific_writer/.claude/skills/venue-templates/assets/posters/beamerposter_academic.tex +311 -0
- scientific_writer/.claude/skills/venue-templates/references/conferences_formatting.md +564 -0
- scientific_writer/.claude/skills/venue-templates/references/grants_requirements.md +787 -0
- scientific_writer/.claude/skills/venue-templates/references/journals_formatting.md +486 -0
- scientific_writer/.claude/skills/venue-templates/references/posters_guidelines.md +628 -0
- scientific_writer/.claude/skills/venue-templates/scripts/customize_template.py +206 -0
- scientific_writer/.claude/skills/venue-templates/scripts/query_template.py +260 -0
- scientific_writer/.claude/skills/venue-templates/scripts/validate_format.py +255 -0
- scientific_writer/__init__.py +1 -1
- scientific_writer/api.py +9 -5
- scientific_writer/cli.py +9 -5
- scientific_writer/core.py +28 -5
- {scientific_writer-2.2.1.dist-info → scientific_writer-2.2.3.dist-info}/METADATA +1 -1
- scientific_writer-2.2.3.dist-info/RECORD +312 -0
- scientific_writer-2.2.1.dist-info/RECORD +0 -11
- {scientific_writer-2.2.1.dist-info → scientific_writer-2.2.3.dist-info}/WHEEL +0 -0
- {scientific_writer-2.2.1.dist-info → scientific_writer-2.2.3.dist-info}/entry_points.txt +0 -0
- {scientific_writer-2.2.1.dist-info → scientific_writer-2.2.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Batch convert multiple files to Markdown using MarkItDown.
|
|
4
|
+
|
|
5
|
+
This script demonstrates how to efficiently convert multiple files
|
|
6
|
+
in a directory to Markdown format.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Optional
|
|
12
|
+
from markitdown import MarkItDown
|
|
13
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def convert_file(md: MarkItDown, file_path: Path, output_dir: Path, verbose: bool = False) -> tuple[bool, str, str]:
|
|
18
|
+
"""
|
|
19
|
+
Convert a single file to Markdown.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
md: MarkItDown instance
|
|
23
|
+
file_path: Path to input file
|
|
24
|
+
output_dir: Directory for output files
|
|
25
|
+
verbose: Print detailed messages
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Tuple of (success, input_path, message)
|
|
29
|
+
"""
|
|
30
|
+
try:
|
|
31
|
+
if verbose:
|
|
32
|
+
print(f"Converting: {file_path}")
|
|
33
|
+
|
|
34
|
+
result = md.convert(str(file_path))
|
|
35
|
+
|
|
36
|
+
# Create output path
|
|
37
|
+
output_file = output_dir / f"{file_path.stem}.md"
|
|
38
|
+
|
|
39
|
+
# Write content with metadata header
|
|
40
|
+
content = f"# {result.title or file_path.stem}\n\n"
|
|
41
|
+
content += f"**Source**: {file_path.name}\n"
|
|
42
|
+
content += f"**Format**: {file_path.suffix}\n\n"
|
|
43
|
+
content += "---\n\n"
|
|
44
|
+
content += result.text_content
|
|
45
|
+
|
|
46
|
+
output_file.write_text(content, encoding='utf-8')
|
|
47
|
+
|
|
48
|
+
return True, str(file_path), f"✓ Converted to {output_file.name}"
|
|
49
|
+
|
|
50
|
+
except Exception as e:
|
|
51
|
+
return False, str(file_path), f"✗ Error: {str(e)}"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def batch_convert(
|
|
55
|
+
input_dir: Path,
|
|
56
|
+
output_dir: Path,
|
|
57
|
+
extensions: Optional[List[str]] = None,
|
|
58
|
+
recursive: bool = False,
|
|
59
|
+
workers: int = 4,
|
|
60
|
+
verbose: bool = False,
|
|
61
|
+
enable_plugins: bool = False
|
|
62
|
+
) -> dict:
|
|
63
|
+
"""
|
|
64
|
+
Batch convert files in a directory.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
input_dir: Input directory
|
|
68
|
+
output_dir: Output directory
|
|
69
|
+
extensions: List of file extensions to convert (e.g., ['.pdf', '.docx'])
|
|
70
|
+
recursive: Search subdirectories
|
|
71
|
+
workers: Number of parallel workers
|
|
72
|
+
verbose: Print detailed messages
|
|
73
|
+
enable_plugins: Enable MarkItDown plugins
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Dictionary with conversion statistics
|
|
77
|
+
"""
|
|
78
|
+
# Create output directory
|
|
79
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
|
|
81
|
+
# Default extensions if not specified
|
|
82
|
+
if extensions is None:
|
|
83
|
+
extensions = ['.pdf', '.docx', '.pptx', '.xlsx', '.html', '.jpg', '.png']
|
|
84
|
+
|
|
85
|
+
# Find files
|
|
86
|
+
files = []
|
|
87
|
+
if recursive:
|
|
88
|
+
for ext in extensions:
|
|
89
|
+
files.extend(input_dir.rglob(f"*{ext}"))
|
|
90
|
+
else:
|
|
91
|
+
for ext in extensions:
|
|
92
|
+
files.extend(input_dir.glob(f"*{ext}"))
|
|
93
|
+
|
|
94
|
+
if not files:
|
|
95
|
+
print(f"No files found with extensions: {', '.join(extensions)}")
|
|
96
|
+
return {'total': 0, 'success': 0, 'failed': 0}
|
|
97
|
+
|
|
98
|
+
print(f"Found {len(files)} file(s) to convert")
|
|
99
|
+
|
|
100
|
+
# Create MarkItDown instance
|
|
101
|
+
md = MarkItDown(enable_plugins=enable_plugins)
|
|
102
|
+
|
|
103
|
+
# Convert files in parallel
|
|
104
|
+
results = {
|
|
105
|
+
'total': len(files),
|
|
106
|
+
'success': 0,
|
|
107
|
+
'failed': 0,
|
|
108
|
+
'details': []
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
with ThreadPoolExecutor(max_workers=workers) as executor:
|
|
112
|
+
futures = {
|
|
113
|
+
executor.submit(convert_file, md, file_path, output_dir, verbose): file_path
|
|
114
|
+
for file_path in files
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
for future in as_completed(futures):
|
|
118
|
+
success, path, message = future.result()
|
|
119
|
+
|
|
120
|
+
if success:
|
|
121
|
+
results['success'] += 1
|
|
122
|
+
else:
|
|
123
|
+
results['failed'] += 1
|
|
124
|
+
|
|
125
|
+
results['details'].append({
|
|
126
|
+
'file': path,
|
|
127
|
+
'success': success,
|
|
128
|
+
'message': message
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
print(message)
|
|
132
|
+
|
|
133
|
+
return results
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def main():
|
|
137
|
+
parser = argparse.ArgumentParser(
|
|
138
|
+
description="Batch convert files to Markdown using MarkItDown",
|
|
139
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
140
|
+
epilog="""
|
|
141
|
+
Examples:
|
|
142
|
+
# Convert all PDFs in a directory
|
|
143
|
+
python batch_convert.py papers/ output/ --extensions .pdf
|
|
144
|
+
|
|
145
|
+
# Convert multiple formats recursively
|
|
146
|
+
python batch_convert.py documents/ markdown/ --extensions .pdf .docx .pptx -r
|
|
147
|
+
|
|
148
|
+
# Use 8 parallel workers
|
|
149
|
+
python batch_convert.py input/ output/ --workers 8
|
|
150
|
+
|
|
151
|
+
# Enable plugins
|
|
152
|
+
python batch_convert.py input/ output/ --plugins
|
|
153
|
+
"""
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
parser.add_argument('input_dir', type=Path, help='Input directory')
|
|
157
|
+
parser.add_argument('output_dir', type=Path, help='Output directory')
|
|
158
|
+
parser.add_argument(
|
|
159
|
+
'--extensions', '-e',
|
|
160
|
+
nargs='+',
|
|
161
|
+
help='File extensions to convert (e.g., .pdf .docx)'
|
|
162
|
+
)
|
|
163
|
+
parser.add_argument(
|
|
164
|
+
'--recursive', '-r',
|
|
165
|
+
action='store_true',
|
|
166
|
+
help='Search subdirectories recursively'
|
|
167
|
+
)
|
|
168
|
+
parser.add_argument(
|
|
169
|
+
'--workers', '-w',
|
|
170
|
+
type=int,
|
|
171
|
+
default=4,
|
|
172
|
+
help='Number of parallel workers (default: 4)'
|
|
173
|
+
)
|
|
174
|
+
parser.add_argument(
|
|
175
|
+
'--verbose', '-v',
|
|
176
|
+
action='store_true',
|
|
177
|
+
help='Verbose output'
|
|
178
|
+
)
|
|
179
|
+
parser.add_argument(
|
|
180
|
+
'--plugins', '-p',
|
|
181
|
+
action='store_true',
|
|
182
|
+
help='Enable MarkItDown plugins'
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
args = parser.parse_args()
|
|
186
|
+
|
|
187
|
+
# Validate input directory
|
|
188
|
+
if not args.input_dir.exists():
|
|
189
|
+
print(f"Error: Input directory '{args.input_dir}' does not exist")
|
|
190
|
+
sys.exit(1)
|
|
191
|
+
|
|
192
|
+
if not args.input_dir.is_dir():
|
|
193
|
+
print(f"Error: '{args.input_dir}' is not a directory")
|
|
194
|
+
sys.exit(1)
|
|
195
|
+
|
|
196
|
+
# Run batch conversion
|
|
197
|
+
results = batch_convert(
|
|
198
|
+
input_dir=args.input_dir,
|
|
199
|
+
output_dir=args.output_dir,
|
|
200
|
+
extensions=args.extensions,
|
|
201
|
+
recursive=args.recursive,
|
|
202
|
+
workers=args.workers,
|
|
203
|
+
verbose=args.verbose,
|
|
204
|
+
enable_plugins=args.plugins
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Print summary
|
|
208
|
+
print("\n" + "="*50)
|
|
209
|
+
print("CONVERSION SUMMARY")
|
|
210
|
+
print("="*50)
|
|
211
|
+
print(f"Total files: {results['total']}")
|
|
212
|
+
print(f"Successful: {results['success']}")
|
|
213
|
+
print(f"Failed: {results['failed']}")
|
|
214
|
+
print(f"Success rate: {results['success']/results['total']*100:.1f}%" if results['total'] > 0 else "N/A")
|
|
215
|
+
|
|
216
|
+
# Show failed files if any
|
|
217
|
+
if results['failed'] > 0:
|
|
218
|
+
print("\nFailed conversions:")
|
|
219
|
+
for detail in results['details']:
|
|
220
|
+
if not detail['success']:
|
|
221
|
+
print(f" - {detail['file']}: {detail['message']}")
|
|
222
|
+
|
|
223
|
+
sys.exit(0 if results['failed'] == 0 else 1)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
if __name__ == '__main__':
|
|
227
|
+
main()
|
|
228
|
+
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Convert scientific literature PDFs to Markdown for analysis and review.
|
|
4
|
+
|
|
5
|
+
This script is specifically designed for converting academic papers,
|
|
6
|
+
organizing them, and preparing them for literature review workflows.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Dict, Optional
|
|
15
|
+
from markitdown import MarkItDown
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def extract_metadata_from_filename(filename: str) -> Dict[str, str]:
|
|
20
|
+
"""
|
|
21
|
+
Try to extract metadata from filename.
|
|
22
|
+
Supports patterns like: Author_Year_Title.pdf
|
|
23
|
+
"""
|
|
24
|
+
metadata = {}
|
|
25
|
+
|
|
26
|
+
# Remove extension
|
|
27
|
+
name = Path(filename).stem
|
|
28
|
+
|
|
29
|
+
# Try to extract year
|
|
30
|
+
year_match = re.search(r'\b(19|20)\d{2}\b', name)
|
|
31
|
+
if year_match:
|
|
32
|
+
metadata['year'] = year_match.group()
|
|
33
|
+
|
|
34
|
+
# Split by underscores or dashes
|
|
35
|
+
parts = re.split(r'[_\-]', name)
|
|
36
|
+
if len(parts) >= 2:
|
|
37
|
+
metadata['author'] = parts[0].replace('_', ' ')
|
|
38
|
+
metadata['title'] = ' '.join(parts[1:]).replace('_', ' ')
|
|
39
|
+
else:
|
|
40
|
+
metadata['title'] = name.replace('_', ' ')
|
|
41
|
+
|
|
42
|
+
return metadata
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def convert_paper(
|
|
46
|
+
md: MarkItDown,
|
|
47
|
+
input_file: Path,
|
|
48
|
+
output_dir: Path,
|
|
49
|
+
organize_by_year: bool = False
|
|
50
|
+
) -> tuple[bool, Dict]:
|
|
51
|
+
"""
|
|
52
|
+
Convert a single paper to Markdown with metadata extraction.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
md: MarkItDown instance
|
|
56
|
+
input_file: Path to PDF file
|
|
57
|
+
output_dir: Output directory
|
|
58
|
+
organize_by_year: Organize into year subdirectories
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Tuple of (success, metadata_dict)
|
|
62
|
+
"""
|
|
63
|
+
try:
|
|
64
|
+
print(f"Converting: {input_file.name}")
|
|
65
|
+
|
|
66
|
+
# Convert to Markdown
|
|
67
|
+
result = md.convert(str(input_file))
|
|
68
|
+
|
|
69
|
+
# Extract metadata from filename
|
|
70
|
+
metadata = extract_metadata_from_filename(input_file.name)
|
|
71
|
+
metadata['source_file'] = input_file.name
|
|
72
|
+
metadata['converted_date'] = datetime.now().isoformat()
|
|
73
|
+
|
|
74
|
+
# Try to extract title from content if not in filename
|
|
75
|
+
if 'title' not in metadata and result.title:
|
|
76
|
+
metadata['title'] = result.title
|
|
77
|
+
|
|
78
|
+
# Create output path
|
|
79
|
+
if organize_by_year and 'year' in metadata:
|
|
80
|
+
output_subdir = output_dir / metadata['year']
|
|
81
|
+
output_subdir.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
else:
|
|
83
|
+
output_subdir = output_dir
|
|
84
|
+
output_subdir.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
|
|
86
|
+
output_file = output_subdir / f"{input_file.stem}.md"
|
|
87
|
+
|
|
88
|
+
# Create formatted Markdown with front matter
|
|
89
|
+
content = "---\n"
|
|
90
|
+
content += f"title: \"{metadata.get('title', input_file.stem)}\"\n"
|
|
91
|
+
if 'author' in metadata:
|
|
92
|
+
content += f"author: \"{metadata['author']}\"\n"
|
|
93
|
+
if 'year' in metadata:
|
|
94
|
+
content += f"year: {metadata['year']}\n"
|
|
95
|
+
content += f"source: \"{metadata['source_file']}\"\n"
|
|
96
|
+
content += f"converted: \"{metadata['converted_date']}\"\n"
|
|
97
|
+
content += "---\n\n"
|
|
98
|
+
|
|
99
|
+
# Add title
|
|
100
|
+
content += f"# {metadata.get('title', input_file.stem)}\n\n"
|
|
101
|
+
|
|
102
|
+
# Add metadata section
|
|
103
|
+
content += "## Document Information\n\n"
|
|
104
|
+
if 'author' in metadata:
|
|
105
|
+
content += f"**Author**: {metadata['author']}\n"
|
|
106
|
+
if 'year' in metadata:
|
|
107
|
+
content += f"**Year**: {metadata['year']}\n"
|
|
108
|
+
content += f"**Source File**: {metadata['source_file']}\n"
|
|
109
|
+
content += f"**Converted**: {metadata['converted_date']}\n\n"
|
|
110
|
+
content += "---\n\n"
|
|
111
|
+
|
|
112
|
+
# Add content
|
|
113
|
+
content += result.text_content
|
|
114
|
+
|
|
115
|
+
# Write to file
|
|
116
|
+
output_file.write_text(content, encoding='utf-8')
|
|
117
|
+
|
|
118
|
+
print(f"✓ Saved to: {output_file}")
|
|
119
|
+
|
|
120
|
+
return True, metadata
|
|
121
|
+
|
|
122
|
+
except Exception as e:
|
|
123
|
+
print(f"✗ Error converting {input_file.name}: {str(e)}")
|
|
124
|
+
return False, {'source_file': input_file.name, 'error': str(e)}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def create_index(papers: List[Dict], output_dir: Path):
|
|
128
|
+
"""Create an index/catalog of all converted papers."""
|
|
129
|
+
|
|
130
|
+
# Sort by year (if available) and title
|
|
131
|
+
papers_sorted = sorted(
|
|
132
|
+
papers,
|
|
133
|
+
key=lambda x: (x.get('year', '9999'), x.get('title', ''))
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Create Markdown index
|
|
137
|
+
index_content = "# Literature Review Index\n\n"
|
|
138
|
+
index_content += f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
|
|
139
|
+
index_content += f"**Total Papers**: {len(papers)}\n\n"
|
|
140
|
+
index_content += "---\n\n"
|
|
141
|
+
|
|
142
|
+
# Group by year
|
|
143
|
+
by_year = {}
|
|
144
|
+
for paper in papers_sorted:
|
|
145
|
+
year = paper.get('year', 'Unknown')
|
|
146
|
+
if year not in by_year:
|
|
147
|
+
by_year[year] = []
|
|
148
|
+
by_year[year].append(paper)
|
|
149
|
+
|
|
150
|
+
# Write by year
|
|
151
|
+
for year in sorted(by_year.keys()):
|
|
152
|
+
index_content += f"## {year}\n\n"
|
|
153
|
+
for paper in by_year[year]:
|
|
154
|
+
title = paper.get('title', paper.get('source_file', 'Unknown'))
|
|
155
|
+
author = paper.get('author', 'Unknown Author')
|
|
156
|
+
source = paper.get('source_file', '')
|
|
157
|
+
|
|
158
|
+
# Create link to markdown file
|
|
159
|
+
md_file = Path(source).stem + ".md"
|
|
160
|
+
if 'year' in paper and paper['year'] != 'Unknown':
|
|
161
|
+
md_file = f"{paper['year']}/{md_file}"
|
|
162
|
+
|
|
163
|
+
index_content += f"- **{title}**\n"
|
|
164
|
+
index_content += f" - Author: {author}\n"
|
|
165
|
+
index_content += f" - Source: {source}\n"
|
|
166
|
+
index_content += f" - [Read Markdown]({md_file})\n\n"
|
|
167
|
+
|
|
168
|
+
# Write index
|
|
169
|
+
index_file = output_dir / "INDEX.md"
|
|
170
|
+
index_file.write_text(index_content, encoding='utf-8')
|
|
171
|
+
print(f"\n✓ Created index: {index_file}")
|
|
172
|
+
|
|
173
|
+
# Also create JSON catalog
|
|
174
|
+
catalog_file = output_dir / "catalog.json"
|
|
175
|
+
with open(catalog_file, 'w', encoding='utf-8') as f:
|
|
176
|
+
json.dump(papers_sorted, f, indent=2, ensure_ascii=False)
|
|
177
|
+
print(f"✓ Created catalog: {catalog_file}")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def main():
|
|
181
|
+
parser = argparse.ArgumentParser(
|
|
182
|
+
description="Convert scientific literature PDFs to Markdown",
|
|
183
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
184
|
+
epilog="""
|
|
185
|
+
Examples:
|
|
186
|
+
# Convert all PDFs in a directory
|
|
187
|
+
python convert_literature.py papers/ output/
|
|
188
|
+
|
|
189
|
+
# Organize by year
|
|
190
|
+
python convert_literature.py papers/ output/ --organize-by-year
|
|
191
|
+
|
|
192
|
+
# Create index of all papers
|
|
193
|
+
python convert_literature.py papers/ output/ --create-index
|
|
194
|
+
|
|
195
|
+
Filename Conventions:
|
|
196
|
+
For best results, name your PDFs using this pattern:
|
|
197
|
+
Author_Year_Title.pdf
|
|
198
|
+
|
|
199
|
+
Examples:
|
|
200
|
+
Smith_2023_Machine_Learning_Applications.pdf
|
|
201
|
+
Jones_2022_Climate_Change_Analysis.pdf
|
|
202
|
+
"""
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
parser.add_argument('input_dir', type=Path, help='Directory with PDF files')
|
|
206
|
+
parser.add_argument('output_dir', type=Path, help='Output directory for Markdown files')
|
|
207
|
+
parser.add_argument(
|
|
208
|
+
'--organize-by-year', '-y',
|
|
209
|
+
action='store_true',
|
|
210
|
+
help='Organize output into year subdirectories'
|
|
211
|
+
)
|
|
212
|
+
parser.add_argument(
|
|
213
|
+
'--create-index', '-i',
|
|
214
|
+
action='store_true',
|
|
215
|
+
help='Create an index/catalog of all papers'
|
|
216
|
+
)
|
|
217
|
+
parser.add_argument(
|
|
218
|
+
'--recursive', '-r',
|
|
219
|
+
action='store_true',
|
|
220
|
+
help='Search subdirectories recursively'
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
args = parser.parse_args()
|
|
224
|
+
|
|
225
|
+
# Validate input
|
|
226
|
+
if not args.input_dir.exists():
|
|
227
|
+
print(f"Error: Input directory '{args.input_dir}' does not exist")
|
|
228
|
+
sys.exit(1)
|
|
229
|
+
|
|
230
|
+
if not args.input_dir.is_dir():
|
|
231
|
+
print(f"Error: '{args.input_dir}' is not a directory")
|
|
232
|
+
sys.exit(1)
|
|
233
|
+
|
|
234
|
+
# Find PDF files
|
|
235
|
+
if args.recursive:
|
|
236
|
+
pdf_files = list(args.input_dir.rglob("*.pdf"))
|
|
237
|
+
else:
|
|
238
|
+
pdf_files = list(args.input_dir.glob("*.pdf"))
|
|
239
|
+
|
|
240
|
+
if not pdf_files:
|
|
241
|
+
print("No PDF files found")
|
|
242
|
+
sys.exit(1)
|
|
243
|
+
|
|
244
|
+
print(f"Found {len(pdf_files)} PDF file(s)")
|
|
245
|
+
|
|
246
|
+
# Create MarkItDown instance
|
|
247
|
+
md = MarkItDown()
|
|
248
|
+
|
|
249
|
+
# Convert all papers
|
|
250
|
+
results = []
|
|
251
|
+
success_count = 0
|
|
252
|
+
|
|
253
|
+
for pdf_file in pdf_files:
|
|
254
|
+
success, metadata = convert_paper(
|
|
255
|
+
md,
|
|
256
|
+
pdf_file,
|
|
257
|
+
args.output_dir,
|
|
258
|
+
args.organize_by_year
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if success:
|
|
262
|
+
success_count += 1
|
|
263
|
+
results.append(metadata)
|
|
264
|
+
|
|
265
|
+
# Create index if requested
|
|
266
|
+
if args.create_index and results:
|
|
267
|
+
create_index(results, args.output_dir)
|
|
268
|
+
|
|
269
|
+
# Print summary
|
|
270
|
+
print("\n" + "="*50)
|
|
271
|
+
print("CONVERSION SUMMARY")
|
|
272
|
+
print("="*50)
|
|
273
|
+
print(f"Total papers: {len(pdf_files)}")
|
|
274
|
+
print(f"Successful: {success_count}")
|
|
275
|
+
print(f"Failed: {len(pdf_files) - success_count}")
|
|
276
|
+
print(f"Success rate: {success_count/len(pdf_files)*100:.1f}%")
|
|
277
|
+
|
|
278
|
+
sys.exit(0 if success_count == len(pdf_files) else 1)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
if __name__ == '__main__':
|
|
282
|
+
main()
|
|
283
|
+
|