@duckmind/deepquark-darwin-arm64 0.9.78 → 0.9.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. package/.deepquark/skills/bundled/data-storytelling/SKILL.md +453 -0
  2. package/.deepquark/skills/bundled/docx/LICENSE.txt +30 -0
  3. package/.deepquark/skills/bundled/docx/SKILL.md +481 -0
  4. package/.deepquark/skills/bundled/docx/scripts/__init__.py +1 -0
  5. package/.deepquark/skills/bundled/docx/scripts/accept_changes.py +135 -0
  6. package/.deepquark/skills/bundled/docx/scripts/comment.py +318 -0
  7. package/.deepquark/skills/bundled/docx/scripts/office/helpers/__init__.py +0 -0
  8. package/.deepquark/skills/bundled/docx/scripts/office/helpers/merge_runs.py +199 -0
  9. package/.deepquark/skills/bundled/docx/scripts/office/helpers/simplify_redlines.py +197 -0
  10. package/.deepquark/skills/bundled/docx/scripts/office/pack.py +159 -0
  11. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  12. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  13. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  14. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  15. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  16. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  17. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  18. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  19. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  20. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  21. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  22. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  23. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  24. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  25. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  26. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  27. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  28. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  29. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  30. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  31. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  32. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  33. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  34. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  35. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  36. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  37. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  38. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  39. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  40. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  41. package/.deepquark/skills/bundled/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  42. package/.deepquark/skills/bundled/docx/scripts/office/schemas/mce/mc.xsd +75 -0
  43. package/.deepquark/skills/bundled/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  44. package/.deepquark/skills/bundled/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  45. package/.deepquark/skills/bundled/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  46. package/.deepquark/skills/bundled/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  47. package/.deepquark/skills/bundled/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  48. package/.deepquark/skills/bundled/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  49. package/.deepquark/skills/bundled/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  50. package/.deepquark/skills/bundled/docx/scripts/office/soffice.py +183 -0
  51. package/.deepquark/skills/bundled/docx/scripts/office/unpack.py +132 -0
  52. package/.deepquark/skills/bundled/docx/scripts/office/validate.py +111 -0
  53. package/.deepquark/skills/bundled/docx/scripts/office/validators/__init__.py +15 -0
  54. package/.deepquark/skills/bundled/docx/scripts/office/validators/base.py +847 -0
  55. package/.deepquark/skills/bundled/docx/scripts/office/validators/docx.py +446 -0
  56. package/.deepquark/skills/bundled/docx/scripts/office/validators/pptx.py +275 -0
  57. package/.deepquark/skills/bundled/docx/scripts/office/validators/redlining.py +247 -0
  58. package/.deepquark/skills/bundled/docx/scripts/templates/comments.xml +3 -0
  59. package/.deepquark/skills/bundled/docx/scripts/templates/commentsExtended.xml +3 -0
  60. package/.deepquark/skills/bundled/docx/scripts/templates/commentsExtensible.xml +3 -0
  61. package/.deepquark/skills/bundled/docx/scripts/templates/commentsIds.xml +3 -0
  62. package/.deepquark/skills/bundled/docx/scripts/templates/people.xml +3 -0
  63. package/.deepquark/skills/bundled/drawio-architect/SKILL.md +300 -0
  64. package/.deepquark/skills/bundled/drawio-architect/references/architecture-patterns.md +236 -0
  65. package/.deepquark/skills/bundled/drawio-architect/references/branding.md +180 -0
  66. package/.deepquark/skills/bundled/drawio-architect/references/cloud-icons.md +493 -0
  67. package/.deepquark/skills/bundled/drawio-architect/references/style-guide.md +268 -0
  68. package/.deepquark/skills/bundled/duckmind-deep-research/ARCHITECTURE_REVIEW.md +495 -0
  69. package/.deepquark/skills/bundled/duckmind-deep-research/AUTONOMY_VERIFICATION.md +420 -0
  70. package/.deepquark/skills/bundled/duckmind-deep-research/COMPETITIVE_ANALYSIS.md +179 -0
  71. package/.deepquark/skills/bundled/duckmind-deep-research/CONTEXT_OPTIMIZATION.md +293 -0
  72. package/.deepquark/skills/bundled/duckmind-deep-research/QUICK_START.md +167 -0
  73. package/.deepquark/skills/bundled/duckmind-deep-research/README.md +259 -0
  74. package/.deepquark/skills/bundled/duckmind-deep-research/SKILL.md +754 -0
  75. package/.deepquark/skills/bundled/duckmind-deep-research/WORD_PRECISION_AUDIT.md +476 -0
  76. package/.deepquark/skills/bundled/duckmind-deep-research/reference/methodology.md +384 -0
  77. package/.deepquark/skills/bundled/duckmind-deep-research/requirements.txt +10 -0
  78. package/.deepquark/skills/bundled/duckmind-deep-research/scripts/citation_manager.py +177 -0
  79. package/.deepquark/skills/bundled/duckmind-deep-research/scripts/md_to_html.py +330 -0
  80. package/.deepquark/skills/bundled/duckmind-deep-research/scripts/research_engine.py +578 -0
  81. package/.deepquark/skills/bundled/duckmind-deep-research/scripts/source_evaluator.py +292 -0
  82. package/.deepquark/skills/bundled/duckmind-deep-research/scripts/validate_report.py +354 -0
  83. package/.deepquark/skills/bundled/duckmind-deep-research/scripts/verify_citations.py +430 -0
  84. package/.deepquark/skills/bundled/duckmind-deep-research/scripts/verify_html.py +220 -0
  85. package/.deepquark/skills/bundled/duckmind-deep-research/templates/mckinsey_report_template.html +443 -0
  86. package/.deepquark/skills/bundled/duckmind-deep-research/templates/report_template.md +414 -0
  87. package/.deepquark/skills/bundled/duckmind-deep-research/tests/fixtures/invalid_report.md +27 -0
  88. package/.deepquark/skills/bundled/duckmind-deep-research/tests/fixtures/valid_report.md +114 -0
  89. package/.deepquark/skills/bundled/duckmind-multimodal/SKILL.md +171 -0
  90. package/.deepquark/skills/bundled/duckmind-multimodal/references/image-generation.md +131 -0
  91. package/.deepquark/skills/bundled/duckmind-multimodal/references/pdf-processing.md +120 -0
  92. package/.deepquark/skills/bundled/duckmind-transcribe/SKILL.md +70 -0
  93. package/.deepquark/skills/bundled/duckmind-transcribe/scripts/transcribe.sh +134 -0
  94. package/.deepquark/skills/bundled/excel-analysis/SKILL.md +247 -0
  95. package/.deepquark/skills/bundled/ge-payroll/SKILL.md +153 -0
  96. package/.deepquark/skills/bundled/ge-payroll/evals/evals.json +23 -0
  97. package/.deepquark/skills/bundled/ge-payroll/references/pain-points-improvements.md +106 -0
  98. package/.deepquark/skills/bundled/ge-payroll/references/process-detail.md +217 -0
  99. package/.deepquark/skills/bundled/ge-payroll/references/raci-stakeholders.md +85 -0
  100. package/.deepquark/skills/bundled/ge-payroll/references/timeline-mandays.md +64 -0
  101. package/.deepquark/skills/bundled/pdf/LICENSE.txt +30 -0
  102. package/.deepquark/skills/bundled/pdf/SKILL.md +314 -0
  103. package/.deepquark/skills/bundled/pdf/forms.md +294 -0
  104. package/.deepquark/skills/bundled/pdf/reference.md +612 -0
  105. package/.deepquark/skills/bundled/pdf/scripts/check_bounding_boxes.py +65 -0
  106. package/.deepquark/skills/bundled/pdf/scripts/check_fillable_fields.py +11 -0
  107. package/.deepquark/skills/bundled/pdf/scripts/convert_pdf_to_images.py +33 -0
  108. package/.deepquark/skills/bundled/pdf/scripts/create_validation_image.py +37 -0
  109. package/.deepquark/skills/bundled/pdf/scripts/extract_form_field_info.py +122 -0
  110. package/.deepquark/skills/bundled/pdf/scripts/extract_form_structure.py +115 -0
  111. package/.deepquark/skills/bundled/pdf/scripts/fill_fillable_fields.py +98 -0
  112. package/.deepquark/skills/bundled/pdf/scripts/fill_pdf_form_with_annotations.py +107 -0
  113. package/.deepquark/skills/bundled/perplexity-search/SKILL.md +447 -0
  114. package/.deepquark/skills/bundled/perplexity-search/assets/.env.example +16 -0
  115. package/.deepquark/skills/bundled/perplexity-search/references/model_comparison.md +386 -0
  116. package/.deepquark/skills/bundled/perplexity-search/references/openrouter_setup.md +454 -0
  117. package/.deepquark/skills/bundled/perplexity-search/references/search_strategies.md +258 -0
  118. package/.deepquark/skills/bundled/perplexity-search/scripts/perplexity_search.py +277 -0
  119. package/.deepquark/skills/bundled/perplexity-search/scripts/setup_env.py +171 -0
  120. package/.deepquark/skills/bundled/pptx/LICENSE.txt +30 -0
  121. package/.deepquark/skills/bundled/pptx/SKILL.md +232 -0
  122. package/.deepquark/skills/bundled/pptx/editing.md +205 -0
  123. package/.deepquark/skills/bundled/pptx/pptxgenjs.md +420 -0
  124. package/.deepquark/skills/bundled/pptx/scripts/__init__.py +0 -0
  125. package/.deepquark/skills/bundled/pptx/scripts/add_slide.py +195 -0
  126. package/.deepquark/skills/bundled/pptx/scripts/clean.py +286 -0
  127. package/.deepquark/skills/bundled/pptx/scripts/office/helpers/__init__.py +0 -0
  128. package/.deepquark/skills/bundled/pptx/scripts/office/helpers/merge_runs.py +199 -0
  129. package/.deepquark/skills/bundled/pptx/scripts/office/helpers/simplify_redlines.py +197 -0
  130. package/.deepquark/skills/bundled/pptx/scripts/office/pack.py +159 -0
  131. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  132. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  133. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  134. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  135. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  136. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  137. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  138. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  139. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  140. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  141. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  142. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  143. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  144. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  145. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  146. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  147. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  148. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  149. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  150. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  151. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  152. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  153. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  154. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  155. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  156. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  157. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  158. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  159. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  160. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  161. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  162. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/mce/mc.xsd +75 -0
  163. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  164. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  165. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  166. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  167. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  168. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  169. package/.deepquark/skills/bundled/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  170. package/.deepquark/skills/bundled/pptx/scripts/office/soffice.py +183 -0
  171. package/.deepquark/skills/bundled/pptx/scripts/office/unpack.py +132 -0
  172. package/.deepquark/skills/bundled/pptx/scripts/office/validate.py +111 -0
  173. package/.deepquark/skills/bundled/pptx/scripts/office/validators/__init__.py +15 -0
  174. package/.deepquark/skills/bundled/pptx/scripts/office/validators/base.py +847 -0
  175. package/.deepquark/skills/bundled/pptx/scripts/office/validators/docx.py +446 -0
  176. package/.deepquark/skills/bundled/pptx/scripts/office/validators/pptx.py +275 -0
  177. package/.deepquark/skills/bundled/pptx/scripts/office/validators/redlining.py +247 -0
  178. package/.deepquark/skills/bundled/pptx/scripts/thumbnail.py +289 -0
  179. package/.deepquark/skills/bundled/text-to-pdf-automation/SKILL.md +91 -0
  180. package/.deepquark/skills/bundled/web-fetch/SKILL.md +56 -0
  181. package/.deepquark/skills/bundled/web-fetch/scripts/fetch.sh +54 -0
  182. package/.deepquark/skills/bundled/xlsx/LICENSE.txt +30 -0
  183. package/.deepquark/skills/bundled/xlsx/SKILL.md +292 -0
  184. package/.deepquark/skills/bundled/xlsx/scripts/office/helpers/__init__.py +0 -0
  185. package/.deepquark/skills/bundled/xlsx/scripts/office/helpers/merge_runs.py +199 -0
  186. package/.deepquark/skills/bundled/xlsx/scripts/office/helpers/simplify_redlines.py +197 -0
  187. package/.deepquark/skills/bundled/xlsx/scripts/office/pack.py +159 -0
  188. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  189. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  190. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  191. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  192. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  193. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  194. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  195. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  196. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  197. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  198. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  199. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  200. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  201. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  202. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  203. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  204. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  205. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  206. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  207. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  208. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  209. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  210. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  211. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  212. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  213. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  214. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  215. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  216. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  217. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  218. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  219. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  220. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  221. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  222. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  223. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  224. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  225. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  226. package/.deepquark/skills/bundled/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  227. package/.deepquark/skills/bundled/xlsx/scripts/office/soffice.py +183 -0
  228. package/.deepquark/skills/bundled/xlsx/scripts/office/unpack.py +132 -0
  229. package/.deepquark/skills/bundled/xlsx/scripts/office/validate.py +111 -0
  230. package/.deepquark/skills/bundled/xlsx/scripts/office/validators/__init__.py +15 -0
  231. package/.deepquark/skills/bundled/xlsx/scripts/office/validators/base.py +847 -0
  232. package/.deepquark/skills/bundled/xlsx/scripts/office/validators/docx.py +446 -0
  233. package/.deepquark/skills/bundled/xlsx/scripts/office/validators/pptx.py +275 -0
  234. package/.deepquark/skills/bundled/xlsx/scripts/office/validators/redlining.py +247 -0
  235. package/.deepquark/skills/bundled/xlsx/scripts/recalc.py +184 -0
  236. package/.deepquark/skills/bundled/youtube-downloader/SKILL.md +99 -0
  237. package/.deepquark/skills/bundled/youtube-downloader/scripts/download_video.py +145 -0
  238. package/.deepquark/skills/bundled/youtube-transcribe-skill/SKILL.md +116 -0
  239. package/bin/deepquark +0 -0
  240. package/package.json +4 -3
@@ -0,0 +1,430 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Citation Verification Script (Enhanced with CiteGuard techniques)
4
+
5
+ Catches fabricated citations by checking:
6
+ 1. DOI resolution (via doi.org)
7
+ 2. Basic metadata matching (title similarity, year match)
8
+ 3. URL accessibility verification
9
+ 4. Hallucination pattern detection (generic titles, suspicious patterns)
10
+ 5. Flags suspicious entries for manual review
11
+
12
+ Enhanced in 2025 with:
13
+ - Content alignment checking (when URL available)
14
+ - Multi-source verification (DOI + URL + metadata cross-check)
15
+ - Advanced hallucination detection patterns
16
+ - Better false positive reduction
17
+
18
+ Usage:
19
+ python verify_citations.py --report [path]
20
+ python verify_citations.py --report [path] --strict # Fail on any unverified
21
+
22
+ Does NOT require API keys - uses free DOI resolver and heuristics.
23
+ """
24
+
25
+ import sys
26
+ import argparse
27
+ import re
28
+ from pathlib import Path
29
+ from typing import List, Dict, Tuple
30
+ from urllib import request, error
31
+ from urllib.parse import quote
32
+ import json
33
+ import time
34
+
35
+ class CitationVerifier:
36
+ """Verify citations in research report"""
37
+
38
+ def __init__(self, report_path: Path, strict_mode: bool = False):
39
+ self.report_path = report_path
40
+ self.strict_mode = strict_mode
41
+ self.content = self._read_report()
42
+ self.suspicious = []
43
+ self.verified = []
44
+ self.errors = []
45
+
46
+ # Hallucination detection patterns (2025 CiteGuard enhancement)
47
+ self.suspicious_patterns = [
48
+ # Generic academic-sounding but fake patterns
49
+ (r'^(A |An |The )?(Study|Analysis|Review|Survey|Investigation) (of|on|into)',
50
+ "Generic academic title pattern"),
51
+ (r'^(Recent|Current|Modern|Contemporary) (Advances|Developments|Trends) in',
52
+ "Generic 'advances' title pattern"),
53
+ # Too perfect, templated titles
54
+ (r'^[A-Z][a-z]+ [A-Z][a-z]+: A (Comprehensive|Complete|Systematic) (Review|Analysis|Guide)$',
55
+ "Too perfect, templated structure"),
56
+ ]
57
+
58
+ def _read_report(self) -> str:
59
+ """Read report file"""
60
+ try:
61
+ with open(self.report_path, 'r', encoding='utf-8') as f:
62
+ return f.read()
63
+ except Exception as e:
64
+ print(f"L ERROR: Cannot read report: {e}")
65
+ sys.exit(1)
66
+
67
+ def extract_bibliography(self) -> List[Dict]:
68
+ """Extract bibliography entries from report"""
69
+ pattern = r'## Bibliography(.*?)(?=##|\Z)'
70
+ match = re.search(pattern, self.content, re.DOTALL | re.IGNORECASE)
71
+
72
+ if not match:
73
+ self.errors.append("No Bibliography section found")
74
+ return []
75
+
76
+ bib_section = match.group(1)
77
+
78
+ # Parse entries: [N] Author (Year). "Title". Venue. URL
79
+ entries = []
80
+ lines = bib_section.strip().split('\n')
81
+
82
+ current_entry = None
83
+ for line in lines:
84
+ line = line.strip()
85
+ if not line:
86
+ continue
87
+
88
+ # Check if starts with citation number [N]
89
+ match_num = re.match(r'^\[(\d+)\]\s+(.+)$', line)
90
+ if match_num:
91
+ if current_entry:
92
+ entries.append(current_entry)
93
+
94
+ num = match_num.group(1)
95
+ rest = match_num.group(2)
96
+
97
+ # Try to parse: Author (Year). "Title". Venue. URL
98
+ year_match = re.search(r'\((\d{4})\)', rest)
99
+ title_match = re.search(r'"([^"]+)"', rest)
100
+ doi_match = re.search(r'doi\.org/(10\.\S+)', rest)
101
+ url_match = re.search(r'https?://[^\s\)]+', rest)
102
+
103
+ current_entry = {
104
+ 'num': num,
105
+ 'raw': rest,
106
+ 'year': year_match.group(1) if year_match else None,
107
+ 'title': title_match.group(1) if title_match else None,
108
+ 'doi': doi_match.group(1) if doi_match else None,
109
+ 'url': url_match.group(0) if url_match else None
110
+ }
111
+ elif current_entry:
112
+ # Multi-line entry, append to raw
113
+ current_entry['raw'] += ' ' + line
114
+
115
+ if current_entry:
116
+ entries.append(current_entry)
117
+
118
+ return entries
119
+
120
+ def verify_doi(self, doi: str) -> Tuple[bool, Dict]:
121
+ """
122
+ Verify DOI exists and get metadata.
123
+ Returns (success, metadata_dict)
124
+ """
125
+ if not doi:
126
+ return False, {}
127
+
128
+ try:
129
+ # Use content negotiation to get JSON metadata
130
+ url = f"https://doi.org/{quote(doi)}"
131
+ req = request.Request(url)
132
+ req.add_header('Accept', 'application/vnd.citationstyles.csl+json')
133
+
134
+ with request.urlopen(req, timeout=10) as response:
135
+ data = json.loads(response.read().decode('utf-8'))
136
+
137
+ return True, {
138
+ 'title': data.get('title', ''),
139
+ 'year': data.get('issued', {}).get('date-parts', [[None]])[0][0],
140
+ 'authors': [
141
+ f"{a.get('family', '')} {a.get('given', '')}"
142
+ for a in data.get('author', [])
143
+ ],
144
+ 'venue': data.get('container-title', '')
145
+ }
146
+ except error.HTTPError as e:
147
+ if e.code == 404:
148
+ return False, {'error': 'DOI not found (404)'}
149
+ return False, {'error': f'HTTP {e.code}'}
150
+ except Exception as e:
151
+ return False, {'error': str(e)}
152
+
153
+ def verify_url(self, url: str) -> Tuple[bool, str]:
154
+ """
155
+ Verify URL is accessible (2025 CiteGuard enhancement).
156
+ Returns (accessible, status_message)
157
+ """
158
+ if not url:
159
+ return False, "No URL"
160
+
161
+ try:
162
+ # HEAD request to check accessibility without downloading
163
+ req = request.Request(url, method='HEAD')
164
+ req.add_header('User-Agent', 'Mozilla/5.0 (Research Citation Verifier)')
165
+
166
+ with request.urlopen(req, timeout=10) as response:
167
+ if response.status == 200:
168
+ return True, "URL accessible"
169
+ else:
170
+ return False, f"HTTP {response.status}"
171
+ except error.HTTPError as e:
172
+ return False, f"HTTP {e.code}"
173
+ except error.URLError as e:
174
+ return False, f"URL error: {e.reason}"
175
+ except Exception as e:
176
+ return False, f"Connection error: {str(e)[:50]}"
177
+
178
+ def detect_hallucination_patterns(self, entry: Dict) -> List[str]:
179
+ """
180
+ Detect common LLM hallucination patterns in citations (2025 CiteGuard).
181
+ Returns list of detected issues.
182
+ """
183
+ issues = []
184
+ title = entry.get('title', '')
185
+
186
+ if not title:
187
+ return issues
188
+
189
+ # Check against suspicious patterns
190
+ for pattern, description in self.suspicious_patterns:
191
+ if re.match(pattern, title, re.IGNORECASE):
192
+ issues.append(f"Suspicious title pattern: {description}")
193
+
194
+ # Check for overly generic titles
195
+ generic_words = ['overview', 'introduction', 'guide', 'handbook', 'manual']
196
+ if any(word in title.lower() for word in generic_words) and len(title.split()) < 5:
197
+ issues.append("Very generic short title")
198
+
199
+ # Check for placeholder-like titles
200
+ if any(x in title.lower() for x in ['tbd', 'todo', 'placeholder', 'example']):
201
+ issues.append("Placeholder text in title")
202
+
203
+ # Check for inconsistent metadata
204
+ if entry.get('year'):
205
+ year = int(entry['year'])
206
+ # Very recent without DOI or URL is suspicious
207
+ if year >= 2024 and not entry.get('doi') and not entry.get('url'):
208
+ issues.append("Recent year (2024+) with no verification method")
209
+ # Future year is definitely wrong
210
+ if year > 2025:
211
+ issues.append(f"Future year: {year}")
212
+ # Very old with modern phrasing is suspicious
213
+ if year < 2000 and any(word in title.lower() for word in ['ai', 'llm', 'gpt', 'transformer']):
214
+ issues.append(f"Anachronistic: pre-2000 ({year}) citation mentioning modern AI terms")
215
+
216
+ return issues
217
+
218
+ def check_title_similarity(self, title1: str, title2: str) -> float:
219
+ """
220
+ Simple title similarity check (word overlap).
221
+ Returns score 0.0-1.0
222
+ """
223
+ if not title1 or not title2:
224
+ return 0.0
225
+
226
+ # Normalize: lowercase, remove punctuation, split
227
+ def normalize(s):
228
+ s = s.lower()
229
+ s = re.sub(r'[^\w\s]', ' ', s)
230
+ return set(s.split())
231
+
232
+ words1 = normalize(title1)
233
+ words2 = normalize(title2)
234
+
235
+ if not words1 or not words2:
236
+ return 0.0
237
+
238
+ overlap = len(words1 & words2)
239
+ total = len(words1 | words2)
240
+
241
+ return overlap / total if total > 0 else 0.0
242
+
243
+ def verify_entry(self, entry: Dict) -> Dict:
244
+ """Verify a single bibliography entry (Enhanced 2025 with CiteGuard)"""
245
+ result = {
246
+ 'num': entry['num'],
247
+ 'status': 'unknown',
248
+ 'issues': [],
249
+ 'metadata': {},
250
+ 'verification_methods': []
251
+ }
252
+
253
+ # STEP 1: Run hallucination detection (CiteGuard 2025)
254
+ hallucination_issues = self.detect_hallucination_patterns(entry)
255
+ if hallucination_issues:
256
+ result['issues'].extend(hallucination_issues)
257
+ result['status'] = 'suspicious'
258
+
259
+ # STEP 2: Has DOI?
260
+ if entry['doi']:
261
+ print(f" [{entry['num']}] Checking DOI {entry['doi']}...", end=' ')
262
+ success, metadata = self.verify_doi(entry['doi'])
263
+
264
+ if success:
265
+ result['metadata'] = metadata
266
+ result['status'] = 'verified'
267
+ print("")
268
+
269
+ # Check title similarity if we have both
270
+ if entry['title'] and metadata.get('title'):
271
+ similarity = self.check_title_similarity(
272
+ entry['title'],
273
+ metadata['title']
274
+ )
275
+
276
+ if similarity < 0.5:
277
+ result['issues'].append(
278
+ f"Title mismatch (similarity: {similarity:.1%})"
279
+ )
280
+ result['status'] = 'suspicious'
281
+
282
+ # Check year match
283
+ if entry['year'] and metadata.get('year'):
284
+ if int(entry['year']) != int(metadata['year']):
285
+ result['issues'].append(
286
+ f"Year mismatch: report says {entry['year']}, DOI says {metadata['year']}"
287
+ )
288
+ result['status'] = 'suspicious'
289
+
290
+ else:
291
+ print(f"✗ {metadata.get('error', 'Failed')}")
292
+ result['status'] = 'unverified'
293
+ result['issues'].append(f"DOI resolution failed: {metadata.get('error', 'unknown')}")
294
+
295
+ # STEP 3: Check URL accessibility (if no DOI or DOI failed)
296
+ if entry['url'] and result['status'] != 'verified':
297
+ url_ok, url_status = self.verify_url(entry['url'])
298
+ if url_ok:
299
+ result['verification_methods'].append('URL')
300
+ # Upgrade status if URL verifies
301
+ if result['status'] in ['unknown', 'no_doi', 'unverified']:
302
+ result['status'] = 'url_verified'
303
+ print(f" [{entry['num']}] URL accessible ✓")
304
+ else:
305
+ result['issues'].append(f"URL check failed: {url_status}")
306
+
307
+ # STEP 4: Final fallback - no verification method
308
+ if not entry['doi'] and not entry['url']:
309
+ if 'No DOI provided' not in ' '.join(result['issues']):
310
+ result['issues'].append("No DOI or URL - cannot verify")
311
+ result['status'] = 'suspicious'
312
+
313
+ return result
314
+
315
+ def verify_all(self):
316
+ """Verify all bibliography entries"""
317
+ print(f"\n{'='*60}")
318
+ print(f"CITATION VERIFICATION: {self.report_path.name}")
319
+ print(f"{'='*60}\n")
320
+
321
+ entries = self.extract_bibliography()
322
+
323
+ if not entries:
324
+ print("L No bibliography entries found\n")
325
+ return False
326
+
327
+ print(f"Found {len(entries)} citations\n")
328
+
329
+ results = []
330
+ for entry in entries:
331
+ result = self.verify_entry(entry)
332
+ results.append(result)
333
+
334
+ # Rate limiting
335
+ time.sleep(0.5)
336
+
337
+ # Summarize
338
+ print(f"\n{'='*60}")
339
+ print(f"VERIFICATION SUMMARY")
340
+ print(f"{'='*60}\n")
341
+
342
+ verified = [r for r in results if r['status'] == 'verified']
343
+ url_verified = [r for r in results if r['status'] == 'url_verified']
344
+ suspicious = [r for r in results if r['status'] == 'suspicious']
345
+ unverified = [r for r in results if r['status'] in ['unverified', 'no_doi', 'unknown']]
346
+
347
+ print(f'DOI Verified: {len(verified)}/{len(results)}')
348
+ print(f'URL Verified: {len(url_verified)}/{len(results)}')
349
+ print(f'Suspicious: {len(suspicious)}/{len(results)}')
350
+ print(f'Unverified: {len(unverified)}/{len(results)}')
351
+ print()
352
+
353
+ if suspicious:
354
+ print('SUSPICIOUS CITATIONS (Manual Review Needed):')
355
+ for r in suspicious:
356
+ print(f"\n [{r['num']}]")
357
+ for issue in r['issues']:
358
+ print(f" - {issue}")
359
+ print()
360
+
361
+ if unverified and len(unverified) > 0:
362
+ print('UNVERIFIED CITATIONS (Could not check):')
363
+ for r in unverified:
364
+ print(f" [{r['num']}] {r['issues'][0] if r['issues'] else 'Unknown'}")
365
+ print()
366
+
367
+ # Decision (Enhanced 2025 - includes URL-verified as acceptable)
368
+ total_verified = len(verified) + len(url_verified)
369
+
370
+ if suspicious:
371
+ print('WARNING: Suspicious citations detected')
372
+ if self.strict_mode:
373
+ print(' STRICT MODE: Failing due to suspicious citations')
374
+ return False
375
+ else:
376
+ print(' (Continuing in non-strict mode)')
377
+
378
+ if self.strict_mode and unverified:
379
+ print('STRICT MODE: Unverified citations found')
380
+ return False
381
+
382
+ if total_verified / len(results) < 0.5:
383
+ print('WARNING: Less than 50% citations verified')
384
+ return True # Pass with warning
385
+ else:
386
+ print('CITATION VERIFICATION PASSED')
387
+ return True
388
+
389
+
390
+ def main():
391
+ parser = argparse.ArgumentParser(
392
+ description="Verify citations in research report",
393
+ formatter_class=argparse.RawDescriptionHelpFormatter,
394
+ epilog="""
395
+ Examples:
396
+ python verify_citations.py --report report.md
397
+
398
+ Note: Requires internet connection to check DOIs.
399
+ Uses free DOI resolver - no API key needed.
400
+ """
401
+ )
402
+
403
+ parser.add_argument(
404
+ '--report', '-r',
405
+ type=str,
406
+ required=True,
407
+ help='Path to research report markdown file'
408
+ )
409
+
410
+ parser.add_argument(
411
+ '--strict',
412
+ action='store_true',
413
+ help='Strict mode: fail on any unverified or suspicious citations'
414
+ )
415
+
416
+ args = parser.parse_args()
417
+ report_path = Path(args.report)
418
+
419
+ if not report_path.exists():
420
+ print(f"ERROR: Report file not found: {report_path}")
421
+ sys.exit(1)
422
+
423
+ verifier = CitationVerifier(report_path, strict_mode=args.strict)
424
+ passed = verifier.verify_all()
425
+
426
+ sys.exit(0 if passed else 1)
427
+
428
+
429
+ if __name__ == '__main__':
430
+ main()
@@ -0,0 +1,220 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ HTML Report Verification Script
4
+ Validates that HTML reports are properly generated with all sections from MD
5
+ """
6
+
7
+ import argparse
8
+ import re
9
+ from pathlib import Path
10
+ from typing import List, Tuple
11
+
12
+
13
+ class HTMLVerifier:
14
+ """Verify HTML research reports"""
15
+
16
+ def __init__(self, html_path: Path, md_path: Path):
17
+ self.html_path = html_path
18
+ self.md_path = md_path
19
+ self.errors = []
20
+ self.warnings = []
21
+
22
+ def verify(self) -> bool:
23
+ """
24
+ Run all verification checks
25
+
26
+ Returns:
27
+ True if all checks pass, False otherwise
28
+ """
29
+ print(f"\n{'='*60}")
30
+ print(f"HTML REPORT VERIFICATION")
31
+ print(f"{'='*60}\n")
32
+
33
+ print(f"HTML File: {self.html_path}")
34
+ print(f"MD File: {self.md_path}\n")
35
+
36
+ # Read files
37
+ try:
38
+ html_content = self.html_path.read_text()
39
+ md_content = self.md_path.read_text()
40
+ except Exception as e:
41
+ self.errors.append(f"Failed to read files: {e}")
42
+ return False
43
+
44
+ # Run checks
45
+ self._check_sections(html_content, md_content)
46
+ self._check_no_placeholders(html_content)
47
+ self._check_no_emojis(html_content)
48
+ self._check_structure(html_content)
49
+ self._check_citations(html_content, md_content)
50
+ self._check_bibliography(html_content, md_content)
51
+
52
+ # Report results
53
+ self._print_results()
54
+
55
+ return len(self.errors) == 0
56
+
57
+ def _check_sections(self, html: str, md: str):
58
+ """Verify all markdown sections are present in HTML"""
59
+ # Extract section headings from markdown
60
+ md_sections = re.findall(r'^## (.+)$', md, re.MULTILINE)
61
+
62
+ # Extract sections from HTML
63
+ html_sections = re.findall(r'<h2 class="section-title">(.+?)</h2>', html)
64
+
65
+ # Check if we have placeholder sections like <div class="section">#</div>
66
+ placeholder_sections = re.findall(r'<div class="section">#</div>', html)
67
+
68
+ if placeholder_sections:
69
+ self.errors.append(
70
+ f"Found {len(placeholder_sections)} placeholder sections (empty '#' divs) - content not converted properly"
71
+ )
72
+
73
+ # Compare section counts
74
+ if len(md_sections) > len(html_sections) + 1: # +1 for bibliography which is separate
75
+ self.errors.append(
76
+ f"Section count mismatch: MD has {len(md_sections)} sections, HTML has only {len(html_sections)} + bibliography"
77
+ )
78
+ missing = set(md_sections) - set(html_sections)
79
+ if missing:
80
+ self.errors.append(f"Missing sections in HTML: {missing}")
81
+
82
+ # Verify Executive Summary is present
83
+ if "Executive Summary" in md and "Executive Summary" not in html:
84
+ self.errors.append("Executive Summary missing from HTML")
85
+
86
+ def _check_no_placeholders(self, html: str):
87
+ """Check for common placeholders that shouldn't be in final report"""
88
+ placeholders = [
89
+ '{{TITLE}}', '{{DATE}}', '{{CONTENT}}', '{{BIBLIOGRAPHY}}',
90
+ '{{METRICS_DASHBOARD}}', '{{SOURCE_COUNT}}', 'TODO', 'TBD',
91
+ 'PLACEHOLDER', 'FIXME'
92
+ ]
93
+
94
+ found = []
95
+ for placeholder in placeholders:
96
+ if placeholder in html:
97
+ found.append(placeholder)
98
+
99
+ if found:
100
+ self.errors.append(f"Found unreplaced placeholders: {', '.join(found)}")
101
+
102
+ def _check_no_emojis(self, html: str):
103
+ """Verify no emojis are present in HTML"""
104
+ # Common emoji patterns
105
+ emoji_pattern = re.compile(
106
+ "["
107
+ "\U0001F600-\U0001F64F" # emoticons
108
+ "\U0001F300-\U0001F5FF" # symbols & pictographs
109
+ "\U0001F680-\U0001F6FF" # transport & map symbols
110
+ "\U0001F1E0-\U0001F1FF" # flags
111
+ "\U00002702-\U000027B0"
112
+ "\U000024C2-\U0001F251"
113
+ "]+",
114
+ flags=re.UNICODE
115
+ )
116
+
117
+ emojis = emoji_pattern.findall(html)
118
+ if emojis:
119
+ unique_emojis = set(emojis)
120
+ self.errors.append(f"Found {len(emojis)} emojis in HTML (should be none): {unique_emojis}")
121
+
122
+ def _check_structure(self, html: str):
123
+ """Verify HTML has proper structure"""
124
+ required_elements = [
125
+ ('<html', 'HTML tag'),
126
+ ('<head', 'head tag'),
127
+ ('<body', 'body tag'),
128
+ ('<title>', 'title tag'),
129
+ ('class="header"', 'header section'),
130
+ ('class="content"', 'content section'),
131
+ ('class="bibliography"', 'bibliography section'),
132
+ ]
133
+
134
+ for element, name in required_elements:
135
+ if element not in html:
136
+ self.errors.append(f"Missing {name} in HTML")
137
+
138
+ # Check for unclosed tags (basic check)
139
+ open_divs = html.count('<div')
140
+ close_divs = html.count('</div>')
141
+
142
+ if abs(open_divs - close_divs) > 2: # Allow small discrepancy
143
+ self.warnings.append(
144
+ f"Possible unclosed divs: {open_divs} opening tags, {close_divs} closing tags"
145
+ )
146
+
147
+ def _check_citations(self, html: str, md: str):
148
+ """Verify citations are present"""
149
+ # Extract citations from markdown
150
+ md_citations = set(re.findall(r'\[(\d+)\]', md))
151
+
152
+ # Extract citations from HTML (excluding bibliography)
153
+ html_content = html.split('class="bibliography"')[0] if 'class="bibliography"' in html else html
154
+ html_citations = set(re.findall(r'\[(\d+)\]', html_content))
155
+
156
+ if len(md_citations) > 0 and len(html_citations) == 0:
157
+ self.errors.append("No citations found in HTML content (but present in MD)")
158
+
159
+ if len(md_citations) > len(html_citations) * 1.5: # Allow some variation
160
+ self.warnings.append(
161
+ f"Fewer citations in HTML ({len(html_citations)}) than MD ({len(md_citations)})"
162
+ )
163
+
164
+ def _check_bibliography(self, html: str, md: str):
165
+ """Verify bibliography is present and formatted"""
166
+ if '## Bibliography' in md:
167
+ if 'class="bibliography"' not in html:
168
+ self.errors.append("Bibliography section missing from HTML")
169
+ elif 'class="bib-entry"' not in html:
170
+ self.warnings.append("Bibliography present but entries not properly formatted")
171
+
172
+ def _print_results(self):
173
+ """Print verification results"""
174
+ print(f"\n{'-'*60}")
175
+ print("VERIFICATION RESULTS")
176
+ print(f"{'-'*60}\n")
177
+
178
+ if self.errors:
179
+ print(f"❌ ERRORS ({len(self.errors)}):")
180
+ for i, error in enumerate(self.errors, 1):
181
+ print(f" {i}. {error}")
182
+ print()
183
+
184
+ if self.warnings:
185
+ print(f"⚠️ WARNINGS ({len(self.warnings)}):")
186
+ for i, warning in enumerate(self.warnings, 1):
187
+ print(f" {i}. {warning}")
188
+ print()
189
+
190
+ if not self.errors and not self.warnings:
191
+ print("✅ All checks passed! HTML report is valid.")
192
+ print()
193
+
194
+ print(f"{'-'*60}\n")
195
+
196
+
197
+ def main():
198
+ """Main entry point"""
199
+ parser = argparse.ArgumentParser(description='Verify HTML research report')
200
+ parser.add_argument('--html', type=Path, required=True, help='Path to HTML report')
201
+ parser.add_argument('--md', type=Path, required=True, help='Path to markdown report')
202
+
203
+ args = parser.parse_args()
204
+
205
+ if not args.html.exists():
206
+ print(f"Error: HTML file not found: {args.html}")
207
+ return 1
208
+
209
+ if not args.md.exists():
210
+ print(f"Error: Markdown file not found: {args.md}")
211
+ return 1
212
+
213
+ verifier = HTMLVerifier(args.html, args.md)
214
+ success = verifier.verify()
215
+
216
+ return 0 if success else 1
217
+
218
+
219
+ if __name__ == "__main__":
220
+ exit(main())