scientific-writer 2.2.1__py3-none-any.whl → 2.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scientific-writer might be problematic. Click here for more details.
- scientific_writer/.claude/WRITER.md +748 -0
- scientific_writer/.claude/settings.local.json +30 -0
- scientific_writer/.claude/skills/citation-management/SKILL.md +1046 -0
- scientific_writer/.claude/skills/citation-management/assets/bibtex_template.bib +264 -0
- scientific_writer/.claude/skills/citation-management/assets/citation_checklist.md +386 -0
- scientific_writer/.claude/skills/citation-management/references/bibtex_formatting.md +908 -0
- scientific_writer/.claude/skills/citation-management/references/citation_validation.md +794 -0
- scientific_writer/.claude/skills/citation-management/references/google_scholar_search.md +725 -0
- scientific_writer/.claude/skills/citation-management/references/metadata_extraction.md +870 -0
- scientific_writer/.claude/skills/citation-management/references/pubmed_search.md +839 -0
- scientific_writer/.claude/skills/citation-management/scripts/doi_to_bibtex.py +204 -0
- scientific_writer/.claude/skills/citation-management/scripts/extract_metadata.py +569 -0
- scientific_writer/.claude/skills/citation-management/scripts/format_bibtex.py +349 -0
- scientific_writer/.claude/skills/citation-management/scripts/search_google_scholar.py +282 -0
- scientific_writer/.claude/skills/citation-management/scripts/search_pubmed.py +398 -0
- scientific_writer/.claude/skills/citation-management/scripts/validate_citations.py +497 -0
- scientific_writer/.claude/skills/clinical-reports/IMPLEMENTATION_SUMMARY.md +641 -0
- scientific_writer/.claude/skills/clinical-reports/README.md +236 -0
- scientific_writer/.claude/skills/clinical-reports/SKILL.md +1088 -0
- scientific_writer/.claude/skills/clinical-reports/assets/case_report_template.md +352 -0
- scientific_writer/.claude/skills/clinical-reports/assets/clinical_trial_csr_template.md +353 -0
- scientific_writer/.claude/skills/clinical-reports/assets/clinical_trial_sae_template.md +359 -0
- scientific_writer/.claude/skills/clinical-reports/assets/consult_note_template.md +305 -0
- scientific_writer/.claude/skills/clinical-reports/assets/discharge_summary_template.md +453 -0
- scientific_writer/.claude/skills/clinical-reports/assets/hipaa_compliance_checklist.md +395 -0
- scientific_writer/.claude/skills/clinical-reports/assets/history_physical_template.md +305 -0
- scientific_writer/.claude/skills/clinical-reports/assets/lab_report_template.md +309 -0
- scientific_writer/.claude/skills/clinical-reports/assets/pathology_report_template.md +249 -0
- scientific_writer/.claude/skills/clinical-reports/assets/quality_checklist.md +338 -0
- scientific_writer/.claude/skills/clinical-reports/assets/radiology_report_template.md +318 -0
- scientific_writer/.claude/skills/clinical-reports/assets/soap_note_template.md +253 -0
- scientific_writer/.claude/skills/clinical-reports/references/case_report_guidelines.md +570 -0
- scientific_writer/.claude/skills/clinical-reports/references/clinical_trial_reporting.md +693 -0
- scientific_writer/.claude/skills/clinical-reports/references/data_presentation.md +530 -0
- scientific_writer/.claude/skills/clinical-reports/references/diagnostic_reports_standards.md +629 -0
- scientific_writer/.claude/skills/clinical-reports/references/medical_terminology.md +588 -0
- scientific_writer/.claude/skills/clinical-reports/references/patient_documentation.md +744 -0
- scientific_writer/.claude/skills/clinical-reports/references/peer_review_standards.md +585 -0
- scientific_writer/.claude/skills/clinical-reports/references/regulatory_compliance.md +577 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/check_deidentification.py +346 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/compliance_checker.py +78 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/extract_clinical_data.py +102 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/format_adverse_events.py +103 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/generate_report_template.py +163 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/terminology_validator.py +133 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/validate_case_report.py +334 -0
- scientific_writer/.claude/skills/clinical-reports/scripts/validate_trial_report.py +89 -0
- scientific_writer/.claude/skills/document-skills/docx/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/docx/SKILL.md +197 -0
- scientific_writer/.claude/skills/document-skills/docx/docx-js.md +350 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/mce/mc.xsd +75 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/pack.py +159 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/unpack.py +29 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validate.py +69 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/__init__.py +15 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/base.py +951 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/docx.py +274 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/pptx.py +315 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml/scripts/validation/redlining.py +279 -0
- scientific_writer/.claude/skills/document-skills/docx/ooxml.md +610 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/__init__.py +1 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/document.py +1276 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/comments.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/commentsExtended.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/commentsIds.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/templates/people.xml +3 -0
- scientific_writer/.claude/skills/document-skills/docx/scripts/utilities.py +374 -0
- scientific_writer/.claude/skills/document-skills/pdf/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/pdf/SKILL.md +294 -0
- scientific_writer/.claude/skills/document-skills/pdf/forms.md +205 -0
- scientific_writer/.claude/skills/document-skills/pdf/reference.md +612 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/check_bounding_boxes.py +70 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/check_bounding_boxes_test.py +226 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/check_fillable_fields.py +12 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/convert_pdf_to_images.py +35 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/create_validation_image.py +41 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/extract_form_field_info.py +152 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/fill_fillable_fields.py +114 -0
- scientific_writer/.claude/skills/document-skills/pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
- scientific_writer/.claude/skills/document-skills/pptx/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/pptx/SKILL.md +484 -0
- scientific_writer/.claude/skills/document-skills/pptx/html2pptx.md +625 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/mce/mc.xsd +75 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/pack.py +159 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/unpack.py +29 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validate.py +69 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/__init__.py +15 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/base.py +951 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/docx.py +274 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/pptx.py +315 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml/scripts/validation/redlining.py +279 -0
- scientific_writer/.claude/skills/document-skills/pptx/ooxml.md +427 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/html2pptx.js +979 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/inventory.py +1020 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/rearrange.py +231 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/replace.py +385 -0
- scientific_writer/.claude/skills/document-skills/pptx/scripts/thumbnail.py +450 -0
- scientific_writer/.claude/skills/document-skills/xlsx/LICENSE.txt +30 -0
- scientific_writer/.claude/skills/document-skills/xlsx/SKILL.md +289 -0
- scientific_writer/.claude/skills/document-skills/xlsx/recalc.py +178 -0
- scientific_writer/.claude/skills/hypothesis-generation/SKILL.md +155 -0
- scientific_writer/.claude/skills/hypothesis-generation/assets/hypothesis_output_template.md +302 -0
- scientific_writer/.claude/skills/hypothesis-generation/references/experimental_design_patterns.md +327 -0
- scientific_writer/.claude/skills/hypothesis-generation/references/hypothesis_quality_criteria.md +196 -0
- scientific_writer/.claude/skills/hypothesis-generation/references/literature_search_strategies.md +505 -0
- scientific_writer/.claude/skills/latex-posters/README.md +417 -0
- scientific_writer/.claude/skills/latex-posters/SKILL.md +919 -0
- scientific_writer/.claude/skills/latex-posters/assets/baposter_template.tex +257 -0
- scientific_writer/.claude/skills/latex-posters/assets/beamerposter_template.tex +244 -0
- scientific_writer/.claude/skills/latex-posters/assets/poster_quality_checklist.md +358 -0
- scientific_writer/.claude/skills/latex-posters/assets/tikzposter_template.tex +251 -0
- scientific_writer/.claude/skills/latex-posters/references/latex_poster_packages.md +745 -0
- scientific_writer/.claude/skills/latex-posters/references/poster_content_guide.md +748 -0
- scientific_writer/.claude/skills/latex-posters/references/poster_design_principles.md +806 -0
- scientific_writer/.claude/skills/latex-posters/references/poster_layout_design.md +900 -0
- scientific_writer/.claude/skills/latex-posters/scripts/review_poster.sh +214 -0
- scientific_writer/.claude/skills/literature-review/SKILL.md +546 -0
- scientific_writer/.claude/skills/literature-review/assets/review_template.md +412 -0
- scientific_writer/.claude/skills/literature-review/references/citation_styles.md +166 -0
- scientific_writer/.claude/skills/literature-review/references/database_strategies.md +381 -0
- scientific_writer/.claude/skills/literature-review/scripts/generate_pdf.py +176 -0
- scientific_writer/.claude/skills/literature-review/scripts/search_databases.py +303 -0
- scientific_writer/.claude/skills/literature-review/scripts/verify_citations.py +222 -0
- scientific_writer/.claude/skills/markitdown/INSTALLATION_GUIDE.md +318 -0
- scientific_writer/.claude/skills/markitdown/LICENSE.txt +22 -0
- scientific_writer/.claude/skills/markitdown/OPENROUTER_INTEGRATION.md +359 -0
- scientific_writer/.claude/skills/markitdown/QUICK_REFERENCE.md +309 -0
- scientific_writer/.claude/skills/markitdown/README.md +184 -0
- scientific_writer/.claude/skills/markitdown/SKILL.md +450 -0
- scientific_writer/.claude/skills/markitdown/SKILL_SUMMARY.md +307 -0
- scientific_writer/.claude/skills/markitdown/assets/example_usage.md +463 -0
- scientific_writer/.claude/skills/markitdown/references/api_reference.md +399 -0
- scientific_writer/.claude/skills/markitdown/references/file_formats.md +542 -0
- scientific_writer/.claude/skills/markitdown/scripts/batch_convert.py +228 -0
- scientific_writer/.claude/skills/markitdown/scripts/convert_literature.py +283 -0
- scientific_writer/.claude/skills/markitdown/scripts/convert_with_ai.py +243 -0
- scientific_writer/.claude/skills/paper-2-web/SKILL.md +455 -0
- scientific_writer/.claude/skills/paper-2-web/references/installation.md +141 -0
- scientific_writer/.claude/skills/paper-2-web/references/paper2poster.md +346 -0
- scientific_writer/.claude/skills/paper-2-web/references/paper2video.md +305 -0
- scientific_writer/.claude/skills/paper-2-web/references/paper2web.md +187 -0
- scientific_writer/.claude/skills/paper-2-web/references/usage_examples.md +436 -0
- scientific_writer/.claude/skills/peer-review/SKILL.md +375 -0
- scientific_writer/.claude/skills/peer-review/references/common_issues.md +552 -0
- scientific_writer/.claude/skills/peer-review/references/reporting_standards.md +290 -0
- scientific_writer/.claude/skills/research-grants/README.md +285 -0
- scientific_writer/.claude/skills/research-grants/SKILL.md +896 -0
- scientific_writer/.claude/skills/research-grants/assets/budget_justification_template.md +453 -0
- scientific_writer/.claude/skills/research-grants/assets/nih_specific_aims_template.md +166 -0
- scientific_writer/.claude/skills/research-grants/assets/nsf_project_summary_template.md +92 -0
- scientific_writer/.claude/skills/research-grants/references/broader_impacts.md +392 -0
- scientific_writer/.claude/skills/research-grants/references/darpa_guidelines.md +636 -0
- scientific_writer/.claude/skills/research-grants/references/doe_guidelines.md +586 -0
- scientific_writer/.claude/skills/research-grants/references/nih_guidelines.md +851 -0
- scientific_writer/.claude/skills/research-grants/references/nsf_guidelines.md +570 -0
- scientific_writer/.claude/skills/research-grants/references/specific_aims_guide.md +458 -0
- scientific_writer/.claude/skills/research-lookup/README.md +116 -0
- scientific_writer/.claude/skills/research-lookup/SKILL.md +443 -0
- scientific_writer/.claude/skills/research-lookup/examples.py +174 -0
- scientific_writer/.claude/skills/research-lookup/lookup.py +93 -0
- scientific_writer/.claude/skills/research-lookup/research_lookup.py +335 -0
- scientific_writer/.claude/skills/research-lookup/scripts/research_lookup.py +261 -0
- scientific_writer/.claude/skills/scholar-evaluation/SKILL.md +254 -0
- scientific_writer/.claude/skills/scholar-evaluation/references/evaluation_framework.md +663 -0
- scientific_writer/.claude/skills/scholar-evaluation/scripts/calculate_scores.py +378 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/SKILL.md +530 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/common_biases.md +364 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/evidence_hierarchy.md +484 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/experimental_design.md +496 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/logical_fallacies.md +478 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/scientific_method.md +169 -0
- scientific_writer/.claude/skills/scientific-critical-thinking/references/statistical_pitfalls.md +506 -0
- scientific_writer/.claude/skills/scientific-schematics/SKILL.md +2035 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/block_diagram_template.tex +199 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/circuit_template.tex +159 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/flowchart_template.tex +161 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/pathway_template.tex +162 -0
- scientific_writer/.claude/skills/scientific-schematics/assets/tikz_styles.tex +422 -0
- scientific_writer/.claude/skills/scientific-schematics/references/best_practices.md +562 -0
- scientific_writer/.claude/skills/scientific-schematics/references/diagram_types.md +637 -0
- scientific_writer/.claude/skills/scientific-schematics/references/python_libraries.md +791 -0
- scientific_writer/.claude/skills/scientific-schematics/references/tikz_guide.md +734 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/circuit_generator.py +307 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/compile_tikz.py +292 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/generate_flowchart.py +281 -0
- scientific_writer/.claude/skills/scientific-schematics/scripts/pathway_diagram.py +406 -0
- scientific_writer/.claude/skills/scientific-writing/SKILL.md +443 -0
- scientific_writer/.claude/skills/scientific-writing/references/citation_styles.md +720 -0
- scientific_writer/.claude/skills/scientific-writing/references/figures_tables.md +806 -0
- scientific_writer/.claude/skills/scientific-writing/references/imrad_structure.md +658 -0
- scientific_writer/.claude/skills/scientific-writing/references/reporting_guidelines.md +748 -0
- scientific_writer/.claude/skills/scientific-writing/references/writing_principles.md +824 -0
- scientific_writer/.claude/skills/treatment-plans/README.md +483 -0
- scientific_writer/.claude/skills/treatment-plans/SKILL.md +817 -0
- scientific_writer/.claude/skills/treatment-plans/assets/chronic_disease_management_plan.tex +636 -0
- scientific_writer/.claude/skills/treatment-plans/assets/general_medical_treatment_plan.tex +616 -0
- scientific_writer/.claude/skills/treatment-plans/assets/mental_health_treatment_plan.tex +745 -0
- scientific_writer/.claude/skills/treatment-plans/assets/pain_management_plan.tex +770 -0
- scientific_writer/.claude/skills/treatment-plans/assets/perioperative_care_plan.tex +724 -0
- scientific_writer/.claude/skills/treatment-plans/assets/quality_checklist.md +471 -0
- scientific_writer/.claude/skills/treatment-plans/assets/rehabilitation_treatment_plan.tex +727 -0
- scientific_writer/.claude/skills/treatment-plans/references/goal_setting_frameworks.md +411 -0
- scientific_writer/.claude/skills/treatment-plans/references/intervention_guidelines.md +507 -0
- scientific_writer/.claude/skills/treatment-plans/references/regulatory_compliance.md +476 -0
- scientific_writer/.claude/skills/treatment-plans/references/specialty_specific_guidelines.md +607 -0
- scientific_writer/.claude/skills/treatment-plans/references/treatment_plan_standards.md +456 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/check_completeness.py +318 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/generate_template.py +244 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/timeline_generator.py +369 -0
- scientific_writer/.claude/skills/treatment-plans/scripts/validate_treatment_plan.py +367 -0
- scientific_writer/.claude/skills/venue-templates/SKILL.md +590 -0
- scientific_writer/.claude/skills/venue-templates/assets/grants/nih_specific_aims.tex +235 -0
- scientific_writer/.claude/skills/venue-templates/assets/grants/nsf_proposal_template.tex +375 -0
- scientific_writer/.claude/skills/venue-templates/assets/journals/nature_article.tex +171 -0
- scientific_writer/.claude/skills/venue-templates/assets/journals/neurips_article.tex +283 -0
- scientific_writer/.claude/skills/venue-templates/assets/journals/plos_one.tex +317 -0
- scientific_writer/.claude/skills/venue-templates/assets/posters/beamerposter_academic.tex +311 -0
- scientific_writer/.claude/skills/venue-templates/references/conferences_formatting.md +564 -0
- scientific_writer/.claude/skills/venue-templates/references/grants_requirements.md +787 -0
- scientific_writer/.claude/skills/venue-templates/references/journals_formatting.md +486 -0
- scientific_writer/.claude/skills/venue-templates/references/posters_guidelines.md +628 -0
- scientific_writer/.claude/skills/venue-templates/scripts/customize_template.py +206 -0
- scientific_writer/.claude/skills/venue-templates/scripts/query_template.py +260 -0
- scientific_writer/.claude/skills/venue-templates/scripts/validate_format.py +255 -0
- scientific_writer/__init__.py +1 -1
- scientific_writer/api.py +9 -5
- scientific_writer/cli.py +9 -5
- scientific_writer/core.py +28 -5
- {scientific_writer-2.2.1.dist-info → scientific_writer-2.2.3.dist-info}/METADATA +1 -1
- scientific_writer-2.2.3.dist-info/RECORD +312 -0
- scientific_writer-2.2.1.dist-info/RECORD +0 -11
- {scientific_writer-2.2.1.dist-info → scientific_writer-2.2.3.dist-info}/WHEEL +0 -0
- {scientific_writer-2.2.1.dist-info → scientific_writer-2.2.3.dist-info}/entry_points.txt +0 -0
- {scientific_writer-2.2.1.dist-info → scientific_writer-2.2.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Metadata Extraction Tool
|
|
4
|
+
Extract citation metadata from DOI, PMID, arXiv ID, or URL using various APIs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import requests
|
|
10
|
+
import argparse
|
|
11
|
+
import time
|
|
12
|
+
import re
|
|
13
|
+
import json
|
|
14
|
+
import xml.etree.ElementTree as ET
|
|
15
|
+
from typing import Optional, Dict, List, Tuple
|
|
16
|
+
from urllib.parse import urlparse
|
|
17
|
+
|
|
18
|
+
class MetadataExtractor:
|
|
19
|
+
"""Extract metadata from various sources and generate BibTeX."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, email: Optional[str] = None):
|
|
22
|
+
"""
|
|
23
|
+
Initialize extractor.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
email: Email for Entrez API (recommended for PubMed)
|
|
27
|
+
"""
|
|
28
|
+
self.session = requests.Session()
|
|
29
|
+
self.session.headers.update({
|
|
30
|
+
'User-Agent': 'MetadataExtractor/1.0 (Citation Management Tool)'
|
|
31
|
+
})
|
|
32
|
+
self.email = email or os.getenv('NCBI_EMAIL', '')
|
|
33
|
+
|
|
34
|
+
def identify_type(self, identifier: str) -> Tuple[str, str]:
|
|
35
|
+
"""
|
|
36
|
+
Identify the type of identifier.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
identifier: DOI, PMID, arXiv ID, or URL
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Tuple of (type, cleaned_identifier)
|
|
43
|
+
"""
|
|
44
|
+
identifier = identifier.strip()
|
|
45
|
+
|
|
46
|
+
# Check if URL
|
|
47
|
+
if identifier.startswith('http://') or identifier.startswith('https://'):
|
|
48
|
+
return self._parse_url(identifier)
|
|
49
|
+
|
|
50
|
+
# Check for DOI
|
|
51
|
+
if identifier.startswith('10.'):
|
|
52
|
+
return ('doi', identifier)
|
|
53
|
+
|
|
54
|
+
# Check for arXiv ID
|
|
55
|
+
if re.match(r'^\d{4}\.\d{4,5}(v\d+)?$', identifier):
|
|
56
|
+
return ('arxiv', identifier)
|
|
57
|
+
if identifier.startswith('arXiv:'):
|
|
58
|
+
return ('arxiv', identifier.replace('arXiv:', ''))
|
|
59
|
+
|
|
60
|
+
# Check for PMID (8-digit number typically)
|
|
61
|
+
if identifier.isdigit() and len(identifier) >= 7:
|
|
62
|
+
return ('pmid', identifier)
|
|
63
|
+
|
|
64
|
+
# Check for PMCID
|
|
65
|
+
if identifier.upper().startswith('PMC') and identifier[3:].isdigit():
|
|
66
|
+
return ('pmcid', identifier.upper())
|
|
67
|
+
|
|
68
|
+
return ('unknown', identifier)
|
|
69
|
+
|
|
70
|
+
def _parse_url(self, url: str) -> Tuple[str, str]:
|
|
71
|
+
"""Parse URL to extract identifier type and value."""
|
|
72
|
+
parsed = urlparse(url)
|
|
73
|
+
|
|
74
|
+
# DOI URLs
|
|
75
|
+
if 'doi.org' in parsed.netloc:
|
|
76
|
+
doi = parsed.path.lstrip('/')
|
|
77
|
+
return ('doi', doi)
|
|
78
|
+
|
|
79
|
+
# PubMed URLs
|
|
80
|
+
if 'pubmed.ncbi.nlm.nih.gov' in parsed.netloc or 'ncbi.nlm.nih.gov/pubmed' in url:
|
|
81
|
+
pmid = re.search(r'/(\d+)', parsed.path)
|
|
82
|
+
if pmid:
|
|
83
|
+
return ('pmid', pmid.group(1))
|
|
84
|
+
|
|
85
|
+
# arXiv URLs
|
|
86
|
+
if 'arxiv.org' in parsed.netloc:
|
|
87
|
+
arxiv_id = re.search(r'/abs/(\d{4}\.\d{4,5})', parsed.path)
|
|
88
|
+
if arxiv_id:
|
|
89
|
+
return ('arxiv', arxiv_id.group(1))
|
|
90
|
+
|
|
91
|
+
# Nature, Science, Cell, etc. - try to extract DOI from URL
|
|
92
|
+
doi_match = re.search(r'10\.\d{4,}/[^\s/]+', url)
|
|
93
|
+
if doi_match:
|
|
94
|
+
return ('doi', doi_match.group())
|
|
95
|
+
|
|
96
|
+
return ('url', url)
|
|
97
|
+
|
|
98
|
+
def extract_from_doi(self, doi: str) -> Optional[Dict]:
|
|
99
|
+
"""
|
|
100
|
+
Extract metadata from DOI using CrossRef API.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
doi: Digital Object Identifier
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Metadata dictionary or None
|
|
107
|
+
"""
|
|
108
|
+
url = f'https://api.crossref.org/works/{doi}'
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
response = self.session.get(url, timeout=15)
|
|
112
|
+
|
|
113
|
+
if response.status_code == 200:
|
|
114
|
+
data = response.json()
|
|
115
|
+
message = data.get('message', {})
|
|
116
|
+
|
|
117
|
+
metadata = {
|
|
118
|
+
'type': 'doi',
|
|
119
|
+
'entry_type': self._crossref_type_to_bibtex(message.get('type')),
|
|
120
|
+
'doi': doi,
|
|
121
|
+
'title': message.get('title', [''])[0],
|
|
122
|
+
'authors': self._format_authors_crossref(message.get('author', [])),
|
|
123
|
+
'year': self._extract_year_crossref(message),
|
|
124
|
+
'journal': message.get('container-title', [''])[0] if message.get('container-title') else '',
|
|
125
|
+
'volume': str(message.get('volume', '')) if message.get('volume') else '',
|
|
126
|
+
'issue': str(message.get('issue', '')) if message.get('issue') else '',
|
|
127
|
+
'pages': message.get('page', ''),
|
|
128
|
+
'publisher': message.get('publisher', ''),
|
|
129
|
+
'url': f'https://doi.org/{doi}'
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return metadata
|
|
133
|
+
else:
|
|
134
|
+
print(f'Error: CrossRef API returned status {response.status_code} for DOI: {doi}', file=sys.stderr)
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
print(f'Error extracting metadata from DOI {doi}: {e}', file=sys.stderr)
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
def extract_from_pmid(self, pmid: str) -> Optional[Dict]:
|
|
142
|
+
"""
|
|
143
|
+
Extract metadata from PMID using PubMed E-utilities.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
pmid: PubMed ID
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Metadata dictionary or None
|
|
150
|
+
"""
|
|
151
|
+
url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
|
|
152
|
+
params = {
|
|
153
|
+
'db': 'pubmed',
|
|
154
|
+
'id': pmid,
|
|
155
|
+
'retmode': 'xml',
|
|
156
|
+
'rettype': 'abstract'
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if self.email:
|
|
160
|
+
params['email'] = self.email
|
|
161
|
+
|
|
162
|
+
api_key = os.getenv('NCBI_API_KEY')
|
|
163
|
+
if api_key:
|
|
164
|
+
params['api_key'] = api_key
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
response = self.session.get(url, params=params, timeout=15)
|
|
168
|
+
|
|
169
|
+
if response.status_code == 200:
|
|
170
|
+
root = ET.fromstring(response.content)
|
|
171
|
+
article = root.find('.//PubmedArticle')
|
|
172
|
+
|
|
173
|
+
if article is None:
|
|
174
|
+
print(f'Error: No article found for PMID: {pmid}', file=sys.stderr)
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
# Extract metadata from XML
|
|
178
|
+
medline_citation = article.find('.//MedlineCitation')
|
|
179
|
+
article_elem = medline_citation.find('.//Article')
|
|
180
|
+
journal = article_elem.find('.//Journal')
|
|
181
|
+
|
|
182
|
+
# Get DOI if available
|
|
183
|
+
doi = None
|
|
184
|
+
article_ids = article.findall('.//ArticleId')
|
|
185
|
+
for article_id in article_ids:
|
|
186
|
+
if article_id.get('IdType') == 'doi':
|
|
187
|
+
doi = article_id.text
|
|
188
|
+
break
|
|
189
|
+
|
|
190
|
+
metadata = {
|
|
191
|
+
'type': 'pmid',
|
|
192
|
+
'entry_type': 'article',
|
|
193
|
+
'pmid': pmid,
|
|
194
|
+
'title': article_elem.findtext('.//ArticleTitle', ''),
|
|
195
|
+
'authors': self._format_authors_pubmed(article_elem.findall('.//Author')),
|
|
196
|
+
'year': self._extract_year_pubmed(article_elem),
|
|
197
|
+
'journal': journal.findtext('.//Title', ''),
|
|
198
|
+
'volume': journal.findtext('.//JournalIssue/Volume', ''),
|
|
199
|
+
'issue': journal.findtext('.//JournalIssue/Issue', ''),
|
|
200
|
+
'pages': article_elem.findtext('.//Pagination/MedlinePgn', ''),
|
|
201
|
+
'doi': doi
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return metadata
|
|
205
|
+
else:
|
|
206
|
+
print(f'Error: PubMed API returned status {response.status_code} for PMID: {pmid}', file=sys.stderr)
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
except Exception as e:
|
|
210
|
+
print(f'Error extracting metadata from PMID {pmid}: {e}', file=sys.stderr)
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
def extract_from_arxiv(self, arxiv_id: str) -> Optional[Dict]:
|
|
214
|
+
"""
|
|
215
|
+
Extract metadata from arXiv ID using arXiv API.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
arxiv_id: arXiv identifier
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Metadata dictionary or None
|
|
222
|
+
"""
|
|
223
|
+
url = 'http://export.arxiv.org/api/query'
|
|
224
|
+
params = {
|
|
225
|
+
'id_list': arxiv_id,
|
|
226
|
+
'max_results': 1
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
response = self.session.get(url, params=params, timeout=15)
|
|
231
|
+
|
|
232
|
+
if response.status_code == 200:
|
|
233
|
+
# Parse Atom XML
|
|
234
|
+
root = ET.fromstring(response.content)
|
|
235
|
+
ns = {'atom': 'http://www.w3.org/2005/Atom', 'arxiv': 'http://arxiv.org/schemas/atom'}
|
|
236
|
+
|
|
237
|
+
entry = root.find('atom:entry', ns)
|
|
238
|
+
if entry is None:
|
|
239
|
+
print(f'Error: No entry found for arXiv ID: {arxiv_id}', file=sys.stderr)
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
# Extract DOI if published
|
|
243
|
+
doi_elem = entry.find('arxiv:doi', ns)
|
|
244
|
+
doi = doi_elem.text if doi_elem is not None else None
|
|
245
|
+
|
|
246
|
+
# Extract journal reference if published
|
|
247
|
+
journal_ref_elem = entry.find('arxiv:journal_ref', ns)
|
|
248
|
+
journal_ref = journal_ref_elem.text if journal_ref_elem is not None else None
|
|
249
|
+
|
|
250
|
+
# Get publication date
|
|
251
|
+
published = entry.findtext('atom:published', '', ns)
|
|
252
|
+
year = published[:4] if published else ''
|
|
253
|
+
|
|
254
|
+
# Get authors
|
|
255
|
+
authors = []
|
|
256
|
+
for author in entry.findall('atom:author', ns):
|
|
257
|
+
name = author.findtext('atom:name', '', ns)
|
|
258
|
+
if name:
|
|
259
|
+
authors.append(name)
|
|
260
|
+
|
|
261
|
+
metadata = {
|
|
262
|
+
'type': 'arxiv',
|
|
263
|
+
'entry_type': 'misc' if not doi else 'article',
|
|
264
|
+
'arxiv_id': arxiv_id,
|
|
265
|
+
'title': entry.findtext('atom:title', '', ns).strip().replace('\n', ' '),
|
|
266
|
+
'authors': ' and '.join(authors),
|
|
267
|
+
'year': year,
|
|
268
|
+
'doi': doi,
|
|
269
|
+
'journal_ref': journal_ref,
|
|
270
|
+
'abstract': entry.findtext('atom:summary', '', ns).strip().replace('\n', ' '),
|
|
271
|
+
'url': f'https://arxiv.org/abs/{arxiv_id}'
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return metadata
|
|
275
|
+
else:
|
|
276
|
+
print(f'Error: arXiv API returned status {response.status_code} for ID: {arxiv_id}', file=sys.stderr)
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
except Exception as e:
|
|
280
|
+
print(f'Error extracting metadata from arXiv {arxiv_id}: {e}', file=sys.stderr)
|
|
281
|
+
return None
|
|
282
|
+
|
|
283
|
+
def metadata_to_bibtex(self, metadata: Dict, citation_key: Optional[str] = None) -> str:
|
|
284
|
+
"""
|
|
285
|
+
Convert metadata dictionary to BibTeX format.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
metadata: Metadata dictionary
|
|
289
|
+
citation_key: Optional custom citation key
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
BibTeX string
|
|
293
|
+
"""
|
|
294
|
+
if not citation_key:
|
|
295
|
+
citation_key = self._generate_citation_key(metadata)
|
|
296
|
+
|
|
297
|
+
entry_type = metadata.get('entry_type', 'misc')
|
|
298
|
+
|
|
299
|
+
# Build BibTeX entry
|
|
300
|
+
lines = [f'@{entry_type}{{{citation_key},']
|
|
301
|
+
|
|
302
|
+
# Add fields
|
|
303
|
+
if metadata.get('authors'):
|
|
304
|
+
lines.append(f' author = {{{metadata["authors"]}}},')
|
|
305
|
+
|
|
306
|
+
if metadata.get('title'):
|
|
307
|
+
# Protect capitalization
|
|
308
|
+
title = self._protect_title(metadata['title'])
|
|
309
|
+
lines.append(f' title = {{{title}}},')
|
|
310
|
+
|
|
311
|
+
if entry_type == 'article' and metadata.get('journal'):
|
|
312
|
+
lines.append(f' journal = {{{metadata["journal"]}}},')
|
|
313
|
+
elif entry_type == 'misc' and metadata.get('type') == 'arxiv':
|
|
314
|
+
lines.append(f' howpublished = {{arXiv}},')
|
|
315
|
+
|
|
316
|
+
if metadata.get('year'):
|
|
317
|
+
lines.append(f' year = {{{metadata["year"]}}},')
|
|
318
|
+
|
|
319
|
+
if metadata.get('volume'):
|
|
320
|
+
lines.append(f' volume = {{{metadata["volume"]}}},')
|
|
321
|
+
|
|
322
|
+
if metadata.get('issue'):
|
|
323
|
+
lines.append(f' number = {{{metadata["issue"]}}},')
|
|
324
|
+
|
|
325
|
+
if metadata.get('pages'):
|
|
326
|
+
pages = metadata['pages'].replace('-', '--') # En-dash
|
|
327
|
+
lines.append(f' pages = {{{pages}}},')
|
|
328
|
+
|
|
329
|
+
if metadata.get('doi'):
|
|
330
|
+
lines.append(f' doi = {{{metadata["doi"]}}},')
|
|
331
|
+
elif metadata.get('url'):
|
|
332
|
+
lines.append(f' url = {{{metadata["url"]}}},')
|
|
333
|
+
|
|
334
|
+
if metadata.get('pmid'):
|
|
335
|
+
lines.append(f' note = {{PMID: {metadata["pmid"]}}},')
|
|
336
|
+
|
|
337
|
+
if metadata.get('type') == 'arxiv' and not metadata.get('doi'):
|
|
338
|
+
lines.append(f' note = {{Preprint}},')
|
|
339
|
+
|
|
340
|
+
# Remove trailing comma from last field
|
|
341
|
+
if lines[-1].endswith(','):
|
|
342
|
+
lines[-1] = lines[-1][:-1]
|
|
343
|
+
|
|
344
|
+
lines.append('}')
|
|
345
|
+
|
|
346
|
+
return '\n'.join(lines)
|
|
347
|
+
|
|
348
|
+
def _crossref_type_to_bibtex(self, crossref_type: str) -> str:
|
|
349
|
+
"""Map CrossRef type to BibTeX entry type."""
|
|
350
|
+
type_map = {
|
|
351
|
+
'journal-article': 'article',
|
|
352
|
+
'book': 'book',
|
|
353
|
+
'book-chapter': 'incollection',
|
|
354
|
+
'proceedings-article': 'inproceedings',
|
|
355
|
+
'posted-content': 'misc',
|
|
356
|
+
'dataset': 'misc',
|
|
357
|
+
'report': 'techreport'
|
|
358
|
+
}
|
|
359
|
+
return type_map.get(crossref_type, 'misc')
|
|
360
|
+
|
|
361
|
+
def _format_authors_crossref(self, authors: List[Dict]) -> str:
|
|
362
|
+
"""Format author list from CrossRef data."""
|
|
363
|
+
if not authors:
|
|
364
|
+
return ''
|
|
365
|
+
|
|
366
|
+
formatted = []
|
|
367
|
+
for author in authors:
|
|
368
|
+
given = author.get('given', '')
|
|
369
|
+
family = author.get('family', '')
|
|
370
|
+
if family:
|
|
371
|
+
if given:
|
|
372
|
+
formatted.append(f'{family}, {given}')
|
|
373
|
+
else:
|
|
374
|
+
formatted.append(family)
|
|
375
|
+
|
|
376
|
+
return ' and '.join(formatted)
|
|
377
|
+
|
|
378
|
+
def _format_authors_pubmed(self, authors: List) -> str:
|
|
379
|
+
"""Format author list from PubMed XML."""
|
|
380
|
+
formatted = []
|
|
381
|
+
for author in authors:
|
|
382
|
+
last_name = author.findtext('.//LastName', '')
|
|
383
|
+
fore_name = author.findtext('.//ForeName', '')
|
|
384
|
+
if last_name:
|
|
385
|
+
if fore_name:
|
|
386
|
+
formatted.append(f'{last_name}, {fore_name}')
|
|
387
|
+
else:
|
|
388
|
+
formatted.append(last_name)
|
|
389
|
+
|
|
390
|
+
return ' and '.join(formatted)
|
|
391
|
+
|
|
392
|
+
def _extract_year_crossref(self, message: Dict) -> str:
|
|
393
|
+
"""Extract year from CrossRef message."""
|
|
394
|
+
# Try published-print first, then published-online
|
|
395
|
+
date_parts = message.get('published-print', {}).get('date-parts', [[]])
|
|
396
|
+
if not date_parts or not date_parts[0]:
|
|
397
|
+
date_parts = message.get('published-online', {}).get('date-parts', [[]])
|
|
398
|
+
|
|
399
|
+
if date_parts and date_parts[0]:
|
|
400
|
+
return str(date_parts[0][0])
|
|
401
|
+
return ''
|
|
402
|
+
|
|
403
|
+
def _extract_year_pubmed(self, article: ET.Element) -> str:
|
|
404
|
+
"""Extract year from PubMed XML."""
|
|
405
|
+
year = article.findtext('.//Journal/JournalIssue/PubDate/Year', '')
|
|
406
|
+
if not year:
|
|
407
|
+
medline_date = article.findtext('.//Journal/JournalIssue/PubDate/MedlineDate', '')
|
|
408
|
+
if medline_date:
|
|
409
|
+
year_match = re.search(r'\d{4}', medline_date)
|
|
410
|
+
if year_match:
|
|
411
|
+
year = year_match.group()
|
|
412
|
+
return year
|
|
413
|
+
|
|
414
|
+
def _generate_citation_key(self, metadata: Dict) -> str:
|
|
415
|
+
"""Generate a citation key from metadata."""
|
|
416
|
+
# Get first author last name
|
|
417
|
+
authors = metadata.get('authors', '')
|
|
418
|
+
if authors:
|
|
419
|
+
first_author = authors.split(' and ')[0]
|
|
420
|
+
if ',' in first_author:
|
|
421
|
+
last_name = first_author.split(',')[0].strip()
|
|
422
|
+
else:
|
|
423
|
+
last_name = first_author.split()[-1] if first_author else 'Unknown'
|
|
424
|
+
else:
|
|
425
|
+
last_name = 'Unknown'
|
|
426
|
+
|
|
427
|
+
# Get year
|
|
428
|
+
year = metadata.get('year', '').strip()
|
|
429
|
+
if not year:
|
|
430
|
+
year = 'XXXX'
|
|
431
|
+
|
|
432
|
+
# Clean last name (remove special characters)
|
|
433
|
+
last_name = re.sub(r'[^a-zA-Z]', '', last_name)
|
|
434
|
+
|
|
435
|
+
# Get keyword from title
|
|
436
|
+
title = metadata.get('title', '')
|
|
437
|
+
words = re.findall(r'\b[a-zA-Z]{4,}\b', title)
|
|
438
|
+
keyword = words[0].lower() if words else 'paper'
|
|
439
|
+
|
|
440
|
+
return f'{last_name}{year}{keyword}'
|
|
441
|
+
|
|
442
|
+
def _protect_title(self, title: str) -> str:
|
|
443
|
+
"""Protect capitalization in title for BibTeX."""
|
|
444
|
+
# Protect common acronyms and proper nouns
|
|
445
|
+
protected_words = [
|
|
446
|
+
'DNA', 'RNA', 'CRISPR', 'COVID', 'HIV', 'AIDS', 'AlphaFold',
|
|
447
|
+
'Python', 'AI', 'ML', 'GPU', 'CPU', 'USA', 'UK', 'EU'
|
|
448
|
+
]
|
|
449
|
+
|
|
450
|
+
for word in protected_words:
|
|
451
|
+
title = re.sub(rf'\b{word}\b', f'{{{word}}}', title, flags=re.IGNORECASE)
|
|
452
|
+
|
|
453
|
+
return title
|
|
454
|
+
|
|
455
|
+
def extract(self, identifier: str) -> Optional[str]:
|
|
456
|
+
"""
|
|
457
|
+
Extract metadata and return BibTeX.
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
identifier: DOI, PMID, arXiv ID, or URL
|
|
461
|
+
|
|
462
|
+
Returns:
|
|
463
|
+
BibTeX string or None
|
|
464
|
+
"""
|
|
465
|
+
id_type, clean_id = self.identify_type(identifier)
|
|
466
|
+
|
|
467
|
+
print(f'Identified as {id_type}: {clean_id}', file=sys.stderr)
|
|
468
|
+
|
|
469
|
+
metadata = None
|
|
470
|
+
|
|
471
|
+
if id_type == 'doi':
|
|
472
|
+
metadata = self.extract_from_doi(clean_id)
|
|
473
|
+
elif id_type == 'pmid':
|
|
474
|
+
metadata = self.extract_from_pmid(clean_id)
|
|
475
|
+
elif id_type == 'arxiv':
|
|
476
|
+
metadata = self.extract_from_arxiv(clean_id)
|
|
477
|
+
else:
|
|
478
|
+
print(f'Error: Unknown identifier type: {identifier}', file=sys.stderr)
|
|
479
|
+
return None
|
|
480
|
+
|
|
481
|
+
if metadata:
|
|
482
|
+
return self.metadata_to_bibtex(metadata)
|
|
483
|
+
else:
|
|
484
|
+
return None
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def main():
|
|
488
|
+
"""Command-line interface."""
|
|
489
|
+
parser = argparse.ArgumentParser(
|
|
490
|
+
description='Extract citation metadata from DOI, PMID, arXiv ID, or URL',
|
|
491
|
+
epilog='Example: python extract_metadata.py --doi 10.1038/s41586-021-03819-2'
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
parser.add_argument('--doi', help='Digital Object Identifier')
|
|
495
|
+
parser.add_argument('--pmid', help='PubMed ID')
|
|
496
|
+
parser.add_argument('--arxiv', help='arXiv ID')
|
|
497
|
+
parser.add_argument('--url', help='URL to article')
|
|
498
|
+
parser.add_argument('-i', '--input', help='Input file with identifiers (one per line)')
|
|
499
|
+
parser.add_argument('-o', '--output', help='Output file for BibTeX (default: stdout)')
|
|
500
|
+
parser.add_argument('--format', choices=['bibtex', 'json'], default='bibtex', help='Output format')
|
|
501
|
+
parser.add_argument('--email', help='Email for NCBI E-utilities (recommended)')
|
|
502
|
+
|
|
503
|
+
args = parser.parse_args()
|
|
504
|
+
|
|
505
|
+
# Collect identifiers
|
|
506
|
+
identifiers = []
|
|
507
|
+
if args.doi:
|
|
508
|
+
identifiers.append(args.doi)
|
|
509
|
+
if args.pmid:
|
|
510
|
+
identifiers.append(args.pmid)
|
|
511
|
+
if args.arxiv:
|
|
512
|
+
identifiers.append(args.arxiv)
|
|
513
|
+
if args.url:
|
|
514
|
+
identifiers.append(args.url)
|
|
515
|
+
|
|
516
|
+
if args.input:
|
|
517
|
+
try:
|
|
518
|
+
with open(args.input, 'r', encoding='utf-8') as f:
|
|
519
|
+
file_ids = [line.strip() for line in f if line.strip()]
|
|
520
|
+
identifiers.extend(file_ids)
|
|
521
|
+
except Exception as e:
|
|
522
|
+
print(f'Error reading input file: {e}', file=sys.stderr)
|
|
523
|
+
sys.exit(1)
|
|
524
|
+
|
|
525
|
+
if not identifiers:
|
|
526
|
+
parser.print_help()
|
|
527
|
+
sys.exit(1)
|
|
528
|
+
|
|
529
|
+
# Extract metadata
|
|
530
|
+
extractor = MetadataExtractor(email=args.email)
|
|
531
|
+
bibtex_entries = []
|
|
532
|
+
|
|
533
|
+
for i, identifier in enumerate(identifiers):
|
|
534
|
+
print(f'\nProcessing {i+1}/{len(identifiers)}...', file=sys.stderr)
|
|
535
|
+
bibtex = extractor.extract(identifier)
|
|
536
|
+
if bibtex:
|
|
537
|
+
bibtex_entries.append(bibtex)
|
|
538
|
+
|
|
539
|
+
# Rate limiting
|
|
540
|
+
if i < len(identifiers) - 1:
|
|
541
|
+
time.sleep(0.5)
|
|
542
|
+
|
|
543
|
+
if not bibtex_entries:
|
|
544
|
+
print('Error: No successful extractions', file=sys.stderr)
|
|
545
|
+
sys.exit(1)
|
|
546
|
+
|
|
547
|
+
# Format output
|
|
548
|
+
if args.format == 'bibtex':
|
|
549
|
+
output = '\n\n'.join(bibtex_entries) + '\n'
|
|
550
|
+
else: # json
|
|
551
|
+
output = json.dumps({
|
|
552
|
+
'count': len(bibtex_entries),
|
|
553
|
+
'entries': bibtex_entries
|
|
554
|
+
}, indent=2)
|
|
555
|
+
|
|
556
|
+
# Write output
|
|
557
|
+
if args.output:
|
|
558
|
+
with open(args.output, 'w', encoding='utf-8') as f:
|
|
559
|
+
f.write(output)
|
|
560
|
+
print(f'\nSuccessfully wrote {len(bibtex_entries)} entries to {args.output}', file=sys.stderr)
|
|
561
|
+
else:
|
|
562
|
+
print(output)
|
|
563
|
+
|
|
564
|
+
print(f'\nExtracted {len(bibtex_entries)}/{len(identifiers)} entries', file=sys.stderr)
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
if __name__ == '__main__':
|
|
568
|
+
main()
|
|
569
|
+
|