apex-ruby 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (501) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +119 -0
  3. data/apex-ruby.gemspec +31 -0
  4. data/ext/apex_ext/apex_ext.c +215 -0
  5. data/ext/apex_ext/apex_src/BENCHMARK.md +32 -0
  6. data/ext/apex_ext/apex_src/BENCHMARK_COMPARISON.md +67 -0
  7. data/ext/apex_ext/apex_src/CHANGELOG.md +2454 -0
  8. data/ext/apex_ext/apex_src/CMakeLists.txt +454 -0
  9. data/ext/apex_ext/apex_src/Dockerfile.linux-build +15 -0
  10. data/ext/apex_ext/apex_src/Formula/apex.rb +38 -0
  11. data/ext/apex_ext/apex_src/Info.plist.in +27 -0
  12. data/ext/apex_ext/apex_src/LICENSE +21 -0
  13. data/ext/apex_ext/apex_src/Package.swift +160 -0
  14. data/ext/apex_ext/apex_src/PackageSupport/README.md +17 -0
  15. data/ext/apex_ext/apex_src/PackageSupport/cmark-gfm/cmark-gfm_export.h +20 -0
  16. data/ext/apex_ext/apex_src/PackageSupport/cmark-gfm/cmark-gfm_version.h +14 -0
  17. data/ext/apex_ext/apex_src/PackageSupport/cmark-gfm/cmark_gfm_spm_stub.c +4 -0
  18. data/ext/apex_ext/apex_src/PackageSupport/cmark-gfm/config.h +41 -0
  19. data/ext/apex_ext/apex_src/README.md +452 -0
  20. data/ext/apex_ext/apex_src/VERSION +1 -0
  21. data/ext/apex_ext/apex_src/apex-header-2-rb@2x.webp +0 -0
  22. data/ext/apex_ext/apex_src/apex-plugins.json.example +20 -0
  23. data/ext/apex_ext/apex_src/apex.pc.in +11 -0
  24. data/ext/apex_ext/apex_src/cli/main.c +2720 -0
  25. data/ext/apex_ext/apex_src/debug_test.sh +22 -0
  26. data/ext/apex_ext/apex_src/docs/API_REFERENCE.md +451 -0
  27. data/ext/apex_ext/apex_src/docs/ARCHITECTURE.md +166 -0
  28. data/ext/apex_ext/apex_src/docs/CMARK_INTEGRATION.md +220 -0
  29. data/ext/apex_ext/apex_src/docs/CRITICMARKUP.md +501 -0
  30. data/ext/apex_ext/apex_src/docs/DEBUGGING.md +73 -0
  31. data/ext/apex_ext/apex_src/docs/FINAL_STATUS.md +391 -0
  32. data/ext/apex_ext/apex_src/docs/FINAL_STATUS_UPDATE.md +237 -0
  33. data/ext/apex_ext/apex_src/docs/FUTURE_FEATURES.md +456 -0
  34. data/ext/apex_ext/apex_src/docs/IAL_FEATURES.md +210 -0
  35. data/ext/apex_ext/apex_src/docs/IAL_STATUS.md +344 -0
  36. data/ext/apex_ext/apex_src/docs/INTEGRATION_EXAMPLE.m +144 -0
  37. data/ext/apex_ext/apex_src/docs/LIMITATIONS_RESOLVED.md +278 -0
  38. data/ext/apex_ext/apex_src/docs/OUTPUT_MODES.md +321 -0
  39. data/ext/apex_ext/apex_src/docs/PROGRESS.md +167 -0
  40. data/ext/apex_ext/apex_src/docs/STANDALONE_FEATURE.md +174 -0
  41. data/ext/apex_ext/apex_src/docs/TABLE_SPANS_STATUS.md +243 -0
  42. data/ext/apex_ext/apex_src/docs/TEST_COVERAGE.md +316 -0
  43. data/ext/apex_ext/apex_src/docs/USER_GUIDE.md +803 -0
  44. data/ext/apex_ext/apex_src/docs/WIKI_LINKS_ISSUE.md +91 -0
  45. data/ext/apex_ext/apex_src/documentation/README.md +160 -0
  46. data/ext/apex_ext/apex_src/documentation/docsets/Apex Command Line Options.cheatsheet.txt +365 -0
  47. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Info.plist +24 -0
  48. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/C-API.html +1737 -0
  49. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Citations.html +1420 -0
  50. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Command-Line-Options.html +3574 -0
  51. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Configuration.html +1603 -0
  52. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Credits.html +910 -0
  53. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Examples.html +1168 -0
  54. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Getting-Started.html +1003 -0
  55. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Header-IDs.html +1308 -0
  56. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Home.html +1078 -0
  57. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Inline-Attribute-Lists.html +1622 -0
  58. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Installation.html +1168 -0
  59. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Limitations-and-Roadmap.html +1698 -0
  60. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Metadata-Transforms.html +1531 -0
  61. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Modes.html +1980 -0
  62. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Multi-File-Documents.html +1368 -0
  63. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Pandoc-Integration.html +1151 -0
  64. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Plugins.html +2861 -0
  65. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Syntax.html +3981 -0
  66. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Troubleshooting.html +1454 -0
  67. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Usage.html +1200 -0
  68. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/Documents/Xcode-Integration.html +2066 -0
  69. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/docSet.dsidx +0 -0
  70. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/optimizedIndex.dsidx +0 -0
  71. data/ext/apex_ext/apex_src/documentation/docsets/Apex.docset/Contents/Resources/tempOptimizedIndex.dsidx +0 -0
  72. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Info.plist +22 -0
  73. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/cheatset_resources/Open_Sans.woff +0 -0
  74. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/cheatset_resources/Open_Sans_Bold.woff +0 -0
  75. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/cheatset_resources/Open_Sans_Bold_Italic.woff +0 -0
  76. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/cheatset_resources/Open_Sans_Extrabold.woff +0 -0
  77. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/cheatset_resources/Open_Sans_Extrabold_Italic.woff +0 -0
  78. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/cheatset_resources/Open_Sans_Italic.woff +0 -0
  79. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/cheatset_resources/Open_Sans_Semibold.woff +0 -0
  80. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/cheatset_resources/Open_Sans_Semibold_Italic.woff +0 -0
  81. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/index.html +914 -0
  82. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/Documents/style.css +399 -0
  83. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/docSet.dsidx +0 -0
  84. data/ext/apex_ext/apex_src/documentation/docsets/ApexCLI.docset/Contents/Resources/optimizedIndex.dsidx +0 -0
  85. data/ext/apex_ext/apex_src/documentation/generate_app_docs.rb +772 -0
  86. data/ext/apex_ext/apex_src/documentation/generate_app_docs_ai.rb +678 -0
  87. data/ext/apex_ext/apex_src/documentation/generate_docset.rb +873 -0
  88. data/ext/apex_ext/apex_src/documentation/generate_single_html.rb +733 -0
  89. data/ext/apex_ext/apex_src/documentation/html/apex-docs.html +17073 -0
  90. data/ext/apex_ext/apex_src/documentation/shared_scripts.js +64 -0
  91. data/ext/apex_ext/apex_src/documentation/shared_styles.css +646 -0
  92. data/ext/apex_ext/apex_src/documentation/transform_for_app.example.md +260 -0
  93. data/ext/apex_ext/apex_src/examples/bracketed_spans_demo.md +119 -0
  94. data/ext/apex_ext/apex_src/examples/emoji_span_plugin.yml +11 -0
  95. data/ext/apex_ext/apex_src/examples/example.html +53 -0
  96. data/ext/apex_ext/apex_src/examples/example.md +85 -0
  97. data/ext/apex_ext/apex_src/examples/fenced_divs_demo.md +158 -0
  98. data/ext/apex_ext/apex_src/examples/kbd.md +8 -0
  99. data/ext/apex_ext/apex_src/examples/kbd_plugin.rb +250 -0
  100. data/ext/apex_ext/apex_src/examples/kbd_plugin.yml +9 -0
  101. data/ext/apex_ext/apex_src/icon/apexicon-outline-black.png +0 -0
  102. data/ext/apex_ext/apex_src/icon/apexicon-outline-black@2x.png +0 -0
  103. data/ext/apex_ext/apex_src/icon/apexicon-outline-mark.png +0 -0
  104. data/ext/apex_ext/apex_src/icon/apexicon-outline-mark@2x.png +0 -0
  105. data/ext/apex_ext/apex_src/icon/apexicon-outline-white.png +0 -0
  106. data/ext/apex_ext/apex_src/icon/apexicon-outline-white@2x.png +0 -0
  107. data/ext/apex_ext/apex_src/icon/apexicon.png +0 -0
  108. data/ext/apex_ext/apex_src/icon/apexicon@2x.png +0 -0
  109. data/ext/apex_ext/apex_src/include/apex/apex.h +247 -0
  110. data/ext/apex_ext/apex_src/include/apex/buffer.h +93 -0
  111. data/ext/apex_ext/apex_src/include/apex/module.modulemap +16 -0
  112. data/ext/apex_ext/apex_src/include/apex/parser.h +150 -0
  113. data/ext/apex_ext/apex_src/include/apex/renderer.h +39 -0
  114. data/ext/apex_ext/apex_src/man/apex-config.5 +374 -0
  115. data/ext/apex_ext/apex_src/man/apex-config.5.md +260 -0
  116. data/ext/apex_ext/apex_src/man/apex-plugins.7 +456 -0
  117. data/ext/apex_ext/apex_src/man/apex-plugins.7.md +365 -0
  118. data/ext/apex_ext/apex_src/man/apex.1 +828 -0
  119. data/ext/apex_ext/apex_src/man/apex.1.md +643 -0
  120. data/ext/apex_ext/apex_src/man/apex.1.new +338 -0
  121. data/ext/apex_ext/apex_src/objc/Apex.swift +237 -0
  122. data/ext/apex_ext/apex_src/objc/NSString+Apex.h +117 -0
  123. data/ext/apex_ext/apex_src/objc/NSString+Apex.m +332 -0
  124. data/ext/apex_ext/apex_src/src/_README.md +358 -0
  125. data/ext/apex_ext/apex_src/src/apex.c +6326 -0
  126. data/ext/apex_ext/apex_src/src/buffer.c +93 -0
  127. data/ext/apex_ext/apex_src/src/extensions/abbreviations.c +362 -0
  128. data/ext/apex_ext/apex_src/src/extensions/abbreviations.h +45 -0
  129. data/ext/apex_ext/apex_src/src/extensions/advanced_footnotes.c +184 -0
  130. data/ext/apex_ext/apex_src/src/extensions/advanced_footnotes.h +50 -0
  131. data/ext/apex_ext/apex_src/src/extensions/advanced_tables.c +1897 -0
  132. data/ext/apex_ext/apex_src/src/extensions/advanced_tables.h +42 -0
  133. data/ext/apex_ext/apex_src/src/extensions/callouts.c +215 -0
  134. data/ext/apex_ext/apex_src/src/extensions/callouts.h +53 -0
  135. data/ext/apex_ext/apex_src/src/extensions/citations.c +2042 -0
  136. data/ext/apex_ext/apex_src/src/extensions/citations.h +163 -0
  137. data/ext/apex_ext/apex_src/src/extensions/critic.c +329 -0
  138. data/ext/apex_ext/apex_src/src/extensions/critic.h +48 -0
  139. data/ext/apex_ext/apex_src/src/extensions/definition_list.c +1670 -0
  140. data/ext/apex_ext/apex_src/src/extensions/definition_list.h +42 -0
  141. data/ext/apex_ext/apex_src/src/extensions/emoji.c +710 -0
  142. data/ext/apex_ext/apex_src/src/extensions/emoji.h +38 -0
  143. data/ext/apex_ext/apex_src/src/extensions/emoji_data.h +942 -0
  144. data/ext/apex_ext/apex_src/src/extensions/fenced_divs.c +925 -0
  145. data/ext/apex_ext/apex_src/src/extensions/fenced_divs.h +43 -0
  146. data/ext/apex_ext/apex_src/src/extensions/github-emoji.txt +869 -0
  147. data/ext/apex_ext/apex_src/src/extensions/grid_tables.c +1121 -0
  148. data/ext/apex_ext/apex_src/src/extensions/grid_tables.h +33 -0
  149. data/ext/apex_ext/apex_src/src/extensions/header_ids.c +626 -0
  150. data/ext/apex_ext/apex_src/src/extensions/header_ids.h +60 -0
  151. data/ext/apex_ext/apex_src/src/extensions/highlight.c +135 -0
  152. data/ext/apex_ext/apex_src/src/extensions/highlight.h +16 -0
  153. data/ext/apex_ext/apex_src/src/extensions/html_markdown.c +408 -0
  154. data/ext/apex_ext/apex_src/src/extensions/html_markdown.h +42 -0
  155. data/ext/apex_ext/apex_src/src/extensions/ial.c +4084 -0
  156. data/ext/apex_ext/apex_src/src/extensions/ial.h +145 -0
  157. data/ext/apex_ext/apex_src/src/extensions/includes.c +1536 -0
  158. data/ext/apex_ext/apex_src/src/extensions/includes.h +54 -0
  159. data/ext/apex_ext/apex_src/src/extensions/index.c +967 -0
  160. data/ext/apex_ext/apex_src/src/extensions/index.h +90 -0
  161. data/ext/apex_ext/apex_src/src/extensions/inline_footnotes.c +205 -0
  162. data/ext/apex_ext/apex_src/src/extensions/inline_footnotes.h +34 -0
  163. data/ext/apex_ext/apex_src/src/extensions/inline_tables.c +332 -0
  164. data/ext/apex_ext/apex_src/src/extensions/inline_tables.h +13 -0
  165. data/ext/apex_ext/apex_src/src/extensions/insert.c +248 -0
  166. data/ext/apex_ext/apex_src/src/extensions/insert.h +18 -0
  167. data/ext/apex_ext/apex_src/src/extensions/math.c +279 -0
  168. data/ext/apex_ext/apex_src/src/extensions/math.h +32 -0
  169. data/ext/apex_ext/apex_src/src/extensions/metadata.c +3046 -0
  170. data/ext/apex_ext/apex_src/src/extensions/metadata.h +125 -0
  171. data/ext/apex_ext/apex_src/src/extensions/relaxed_tables.c +1297 -0
  172. data/ext/apex_ext/apex_src/src/extensions/relaxed_tables.h +39 -0
  173. data/ext/apex_ext/apex_src/src/extensions/special_markers.c +194 -0
  174. data/ext/apex_ext/apex_src/src/extensions/special_markers.h +29 -0
  175. data/ext/apex_ext/apex_src/src/extensions/sup_sub.c +405 -0
  176. data/ext/apex_ext/apex_src/src/extensions/sup_sub.h +16 -0
  177. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.c +468 -0
  178. data/ext/apex_ext/apex_src/src/extensions/syntax_highlight.h +44 -0
  179. data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.c +2679 -0
  180. data/ext/apex_ext/apex_src/src/extensions/table_html_postprocess.h +23 -0
  181. data/ext/apex_ext/apex_src/src/extensions/toc.c +255 -0
  182. data/ext/apex_ext/apex_src/src/extensions/toc.h +34 -0
  183. data/ext/apex_ext/apex_src/src/extensions/wiki_links.c +624 -0
  184. data/ext/apex_ext/apex_src/src/extensions/wiki_links.h +58 -0
  185. data/ext/apex_ext/apex_src/src/html_renderer.c +2762 -0
  186. data/ext/apex_ext/apex_src/src/html_renderer.h +126 -0
  187. data/ext/apex_ext/apex_src/src/parser.c +227 -0
  188. data/ext/apex_ext/apex_src/src/plugins.c +895 -0
  189. data/ext/apex_ext/apex_src/src/plugins.h +39 -0
  190. data/ext/apex_ext/apex_src/src/plugins_env.c +187 -0
  191. data/ext/apex_ext/apex_src/src/plugins_remote.c +263 -0
  192. data/ext/apex_ext/apex_src/src/pretty_html.c +358 -0
  193. data/ext/apex_ext/apex_src/src/renderer.c +241 -0
  194. data/ext/apex_ext/apex_src/src/utf8.c +56 -0
  195. data/ext/apex_ext/apex_src/test-linux-build.sh +20 -0
  196. data/ext/apex_ext/apex_src/test.html +103 -0
  197. data/ext/apex_ext/apex_src/test_coverage.sh +121 -0
  198. data/ext/apex_ext/apex_src/test_ial_fenced.md +6 -0
  199. data/ext/apex_ext/apex_src/test_math_norm.py +79 -0
  200. data/ext/apex_ext/apex_src/test_pandoc_output.html +48 -0
  201. data/ext/apex_ext/apex_src/test_spm.sh +107 -0
  202. data/ext/apex_ext/apex_src/tests/ApexSPMTest/main.swift +50 -0
  203. data/ext/apex_ext/apex_src/tests/BENCHMARK_RESULTS.md +229 -0
  204. data/ext/apex_ext/apex_src/tests/CMakeLists.txt +24 -0
  205. data/ext/apex_ext/apex_src/tests/README.md +146 -0
  206. data/ext/apex_ext/apex_src/tests/benchmark.sh +113 -0
  207. data/ext/apex_ext/apex_src/tests/benchmark_comparison.sh +166 -0
  208. data/ext/apex_ext/apex_src/tests/compare_header_ids.sh +31 -0
  209. data/ext/apex_ext/apex_src/tests/fixtures/basic/headers.md +25 -0
  210. data/ext/apex_ext/apex_src/tests/fixtures/basic/list-interruption.md +24 -0
  211. data/ext/apex_ext/apex_src/tests/fixtures/basic/misc_markup.md +33 -0
  212. data/ext/apex_ext/apex_src/tests/fixtures/basic/test_basic.md +26 -0
  213. data/ext/apex_ext/apex_src/tests/fixtures/code/code-blocks.md +260 -0
  214. data/ext/apex_ext/apex_src/tests/fixtures/combine_summary/SUMMARY.md +6 -0
  215. data/ext/apex_ext/apex_src/tests/fixtures/combine_summary/chapter1.md +7 -0
  216. data/ext/apex_ext/apex_src/tests/fixtures/combine_summary/index.txt +9 -0
  217. data/ext/apex_ext/apex_src/tests/fixtures/combine_summary/intro.md +5 -0
  218. data/ext/apex_ext/apex_src/tests/fixtures/combine_summary/section1_1.md +5 -0
  219. data/ext/apex_ext/apex_src/tests/fixtures/comprehensive_test.md +620 -0
  220. data/ext/apex_ext/apex_src/tests/fixtures/debug_ref_image_ial.md +3 -0
  221. data/ext/apex_ext/apex_src/tests/fixtures/demos/ial.md +11 -0
  222. data/ext/apex_ext/apex_src/tests/fixtures/demos/ial_demo.md +177 -0
  223. data/ext/apex_ext/apex_src/tests/fixtures/extensions/emoji-autocorrect.md +94 -0
  224. data/ext/apex_ext/apex_src/tests/fixtures/extensions/emoji_test.md +3 -0
  225. data/ext/apex_ext/apex_src/tests/fixtures/extensions/kbd_test.md +3 -0
  226. data/ext/apex_ext/apex_src/tests/fixtures/ial/bracketed_spans_test.md +74 -0
  227. data/ext/apex_ext/apex_src/tests/fixtures/images/image_and_encoding_test.md +27 -0
  228. data/ext/apex_ext/apex_src/tests/fixtures/images/multimarkdown_image_attributes_test.md +60 -0
  229. data/ext/apex_ext/apex_src/tests/fixtures/images/pandoc_ial_image_test.md +27 -0
  230. data/ext/apex_ext/apex_src/tests/fixtures/images/width_height_conversion_test.md +94 -0
  231. data/ext/apex_ext/apex_src/tests/fixtures/img-in-div.md +16 -0
  232. data/ext/apex_ext/apex_src/tests/fixtures/includes/code.py +4 -0
  233. data/ext/apex_ext/apex_src/tests/fixtures/includes/data.csv +5 -0
  234. data/ext/apex_ext/apex_src/tests/fixtures/includes/data.tsv +5 -0
  235. data/ext/apex_ext/apex_src/tests/fixtures/includes/image.png +2 -0
  236. data/ext/apex_ext/apex_src/tests/fixtures/includes/metadata_options.yml +11 -0
  237. data/ext/apex_ext/apex_src/tests/fixtures/includes/nested.md +8 -0
  238. data/ext/apex_ext/apex_src/tests/fixtures/includes/raw.html +4 -0
  239. data/ext/apex_ext/apex_src/tests/fixtures/includes/simple.md +7 -0
  240. data/ext/apex_ext/apex_src/tests/fixtures/includes/test_image.png +0 -0
  241. data/ext/apex_ext/apex_src/tests/fixtures/large_doc.md +1094 -0
  242. data/ext/apex_ext/apex_src/tests/fixtures/metadata_options.yml +11 -0
  243. data/ext/apex_ext/apex_src/tests/fixtures/output/gfm_header_id_test.md +96 -0
  244. data/ext/apex_ext/apex_src/tests/fixtures/output/test_citations.md +43 -0
  245. data/ext/apex_ext/apex_src/tests/fixtures/output/test_def_list_links.md +12 -0
  246. data/ext/apex_ext/apex_src/tests/fixtures/output/test_index_mmark.md +53 -0
  247. data/ext/apex_ext/apex_src/tests/fixtures/output/test_index_textindex.md +37 -0
  248. data/ext/apex_ext/apex_src/tests/fixtures/tables/advanced_tables_test.md +93 -0
  249. data/ext/apex_ext/apex_src/tests/fixtures/tables/inline_tables_test.md +38 -0
  250. data/ext/apex_ext/apex_src/tests/fixtures/tables/relaxed-table.md +12 -0
  251. data/ext/apex_ext/apex_src/tests/fixtures/tables/table_cr_line_endings.md +15 -0
  252. data/ext/apex_ext/apex_src/tests/fixtures/tables/table_no_trailing_newline.md +15 -0
  253. data/ext/apex_ext/apex_src/tests/generate_gfm_ids.sh +105 -0
  254. data/ext/apex_ext/apex_src/tests/generate_ial_demo.sh +143 -0
  255. data/ext/apex_ext/apex_src/tests/gfm_id_comparison_summary.md +96 -0
  256. data/ext/apex_ext/apex_src/tests/gh_api_test.md +6 -0
  257. data/ext/apex_ext/apex_src/tests/ial_demo.html +186 -0
  258. data/ext/apex_ext/apex_src/tests/include_code.py +19 -0
  259. data/ext/apex_ext/apex_src/tests/include_snippet.md +15 -0
  260. data/ext/apex_ext/apex_src/tests/multi_file_cli_test.sh +64 -0
  261. data/ext/apex_ext/apex_src/tests/sample_data.csv +7 -0
  262. data/ext/apex_ext/apex_src/tests/table_escaped_ltlt.md +4 -0
  263. data/ext/apex_ext/apex_src/tests/test_basic.c +74 -0
  264. data/ext/apex_ext/apex_src/tests/test_extensions.c +2116 -0
  265. data/ext/apex_ext/apex_src/tests/test_helpers.c +183 -0
  266. data/ext/apex_ext/apex_src/tests/test_helpers.h +91 -0
  267. data/ext/apex_ext/apex_src/tests/test_ial.c +282 -0
  268. data/ext/apex_ext/apex_src/tests/test_links.c +418 -0
  269. data/ext/apex_ext/apex_src/tests/test_marked_integration.c +265 -0
  270. data/ext/apex_ext/apex_src/tests/test_metadata.c +908 -0
  271. data/ext/apex_ext/apex_src/tests/test_output.c +1118 -0
  272. data/ext/apex_ext/apex_src/tests/test_plugins.c +219 -0
  273. data/ext/apex_ext/apex_src/tests/test_refs.bib +31 -0
  274. data/ext/apex_ext/apex_src/tests/test_runner.c +244 -0
  275. data/ext/apex_ext/apex_src/tests/test_syntax_highlight.c +198 -0
  276. data/ext/apex_ext/apex_src/tests/test_tables.c +862 -0
  277. data/ext/apex_ext/apex_src/tests/update_benchmarks.sh +9 -0
  278. data/ext/apex_ext/apex_src/tests/yaml_test.md +13 -0
  279. data/ext/apex_ext/apex_src/tests.rb +39 -0
  280. data/ext/apex_ext/apex_src/vendor/cmark-gfm/CMakeLists.txt +48 -0
  281. data/ext/apex_ext/apex_src/vendor/cmark-gfm/COPYING +170 -0
  282. data/ext/apex_ext/apex_src/vendor/cmark-gfm/CheckFileOffsetBits.c +14 -0
  283. data/ext/apex_ext/apex_src/vendor/cmark-gfm/CheckFileOffsetBits.cmake +43 -0
  284. data/ext/apex_ext/apex_src/vendor/cmark-gfm/FindAsan.cmake +74 -0
  285. data/ext/apex_ext/apex_src/vendor/cmark-gfm/Makefile.nmake +38 -0
  286. data/ext/apex_ext/apex_src/vendor/cmark-gfm/README.md +206 -0
  287. data/ext/apex_ext/apex_src/vendor/cmark-gfm/api_test/CMakeLists.txt +30 -0
  288. data/ext/apex_ext/apex_src/vendor/cmark-gfm/api_test/cplusplus.cpp +15 -0
  289. data/ext/apex_ext/apex_src/vendor/cmark-gfm/api_test/cplusplus.h +16 -0
  290. data/ext/apex_ext/apex_src/vendor/cmark-gfm/api_test/harness.c +111 -0
  291. data/ext/apex_ext/apex_src/vendor/cmark-gfm/api_test/harness.h +35 -0
  292. data/ext/apex_ext/apex_src/vendor/cmark-gfm/api_test/main.c +1169 -0
  293. data/ext/apex_ext/apex_src/vendor/cmark-gfm/appveyor.yml +21 -0
  294. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-bq-flat.md +16 -0
  295. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-bq-nested.md +13 -0
  296. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-code.md +11 -0
  297. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-fences.md +14 -0
  298. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-heading.md +9 -0
  299. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-hr.md +10 -0
  300. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-html.md +32 -0
  301. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-lheading.md +8 -0
  302. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-list-flat.md +67 -0
  303. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-list-nested.md +36 -0
  304. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-ref-flat.md +15 -0
  305. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/block-ref-nested.md +17 -0
  306. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-autolink.md +14 -0
  307. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-backticks.md +3 -0
  308. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-em-flat.md +5 -0
  309. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-em-nested.md +5 -0
  310. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-em-worst.md +5 -0
  311. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-entity.md +11 -0
  312. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-escape.md +15 -0
  313. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-html.md +44 -0
  314. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-links-flat.md +23 -0
  315. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-links-nested.md +13 -0
  316. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/inline-newlines.md +24 -0
  317. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/lorem1.md +13 -0
  318. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/samples/rawtabs.md +18 -0
  319. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/statistics.py +595 -0
  320. data/ext/apex_ext/apex_src/vendor/cmark-gfm/bench/stats.py +19 -0
  321. data/ext/apex_ext/apex_src/vendor/cmark-gfm/benchmarks.md +33 -0
  322. data/ext/apex_ext/apex_src/vendor/cmark-gfm/changelog.txt +1245 -0
  323. data/ext/apex_ext/apex_src/vendor/cmark-gfm/data/CaseFolding.txt +1495 -0
  324. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/CMakeLists.txt +119 -0
  325. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/autolink.c +508 -0
  326. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/autolink.h +8 -0
  327. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/cmark-gfm-core-extensions.h +54 -0
  328. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/core-extensions.c +27 -0
  329. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/ext_scanners.c +879 -0
  330. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/ext_scanners.h +24 -0
  331. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/ext_scanners.re +92 -0
  332. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/strikethrough.c +167 -0
  333. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/strikethrough.h +9 -0
  334. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/table.c +917 -0
  335. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/table.h +12 -0
  336. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/tagfilter.c +60 -0
  337. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/tagfilter.h +8 -0
  338. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/tasklist.c +156 -0
  339. data/ext/apex_ext/apex_src/vendor/cmark-gfm/extensions/tasklist.h +8 -0
  340. data/ext/apex_ext/apex_src/vendor/cmark-gfm/fuzz/CMakeLists.txt +22 -0
  341. data/ext/apex_ext/apex_src/vendor/cmark-gfm/fuzz/README.md +12 -0
  342. data/ext/apex_ext/apex_src/vendor/cmark-gfm/fuzz/fuzz_quadratic.c +91 -0
  343. data/ext/apex_ext/apex_src/vendor/cmark-gfm/fuzz/fuzz_quadratic_brackets.c +110 -0
  344. data/ext/apex_ext/apex_src/vendor/cmark-gfm/fuzz/fuzzloop.sh +28 -0
  345. data/ext/apex_ext/apex_src/vendor/cmark-gfm/man/CMakeLists.txt +10 -0
  346. data/ext/apex_ext/apex_src/vendor/cmark-gfm/man/make_man_page.py +133 -0
  347. data/ext/apex_ext/apex_src/vendor/cmark-gfm/man/man1/cmark-gfm.1 +78 -0
  348. data/ext/apex_ext/apex_src/vendor/cmark-gfm/man/man3/cmark-gfm.3 +1041 -0
  349. data/ext/apex_ext/apex_src/vendor/cmark-gfm/nmake.bat +1 -0
  350. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/CMakeLists.txt +230 -0
  351. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/arena.c +104 -0
  352. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/blocks.c +1622 -0
  353. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/buffer.c +278 -0
  354. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/buffer.h +116 -0
  355. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/case_fold_switch.inc +4327 -0
  356. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/chunk.h +135 -0
  357. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/cmark-gfm-extension_api.h +737 -0
  358. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/cmark-gfm.h +833 -0
  359. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/cmark-gfm_version.h.in +7 -0
  360. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/cmark.c +55 -0
  361. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/cmark_ctype.c +44 -0
  362. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/cmark_ctype.h +33 -0
  363. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/commonmark.c +514 -0
  364. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/config.h.in +76 -0
  365. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/entities.inc +2138 -0
  366. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/footnotes.c +63 -0
  367. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/footnotes.h +27 -0
  368. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/houdini.h +57 -0
  369. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/houdini_href_e.c +100 -0
  370. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/houdini_html_e.c +66 -0
  371. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/houdini_html_u.c +149 -0
  372. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/html.c +502 -0
  373. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/html.h +27 -0
  374. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/inlines.c +1788 -0
  375. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/inlines.h +29 -0
  376. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/iterator.c +159 -0
  377. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/iterator.h +26 -0
  378. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/latex.c +468 -0
  379. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/libcmark-gfm.pc.in +10 -0
  380. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/linked_list.c +37 -0
  381. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/main.c +328 -0
  382. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/man.c +274 -0
  383. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/map.c +129 -0
  384. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/map.h +44 -0
  385. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/node.c +1045 -0
  386. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/node.h +167 -0
  387. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/parser.h +59 -0
  388. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/plaintext.c +218 -0
  389. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/plugin.c +36 -0
  390. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/plugin.h +34 -0
  391. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/references.c +43 -0
  392. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/references.h +26 -0
  393. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/registry.c +63 -0
  394. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/registry.h +24 -0
  395. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/render.c +213 -0
  396. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/render.h +62 -0
  397. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/scanners.c +14056 -0
  398. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/scanners.h +70 -0
  399. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/scanners.re +365 -0
  400. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/syntax_extension.c +149 -0
  401. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/syntax_extension.h +34 -0
  402. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/utf8.c +317 -0
  403. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/utf8.h +35 -0
  404. data/ext/apex_ext/apex_src/vendor/cmark-gfm/src/xml.c +182 -0
  405. data/ext/apex_ext/apex_src/vendor/cmark-gfm/suppressions +10 -0
  406. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/CMakeLists.txt +114 -0
  407. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/afl_test_cases/test.md +49 -0
  408. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/cmark-fuzz.c +58 -0
  409. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/cmark.py +105 -0
  410. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/entity_tests.py +67 -0
  411. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/extensions-full-info-string.txt +0 -0
  412. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/extensions-table-prefer-style-attributes.txt +38 -0
  413. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/extensions.txt +920 -0
  414. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/fuzzing_dictionary +67 -0
  415. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/normalize.py +194 -0
  416. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/pathological_tests.py +160 -0
  417. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/regression.txt +375 -0
  418. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/roundtrip_tests.py +50 -0
  419. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/run-cmark-fuzz +4 -0
  420. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/smart_punct.txt +177 -0
  421. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/spec.txt +10212 -0
  422. data/ext/apex_ext/apex_src/vendor/cmark-gfm/test/spec_tests.py +152 -0
  423. data/ext/apex_ext/apex_src/vendor/cmark-gfm/toolchain-mingw32.cmake +17 -0
  424. data/ext/apex_ext/apex_src/vendor/cmark-gfm/tools/Dockerfile +41 -0
  425. data/ext/apex_ext/apex_src/vendor/cmark-gfm/tools/appveyor-build.bat +13 -0
  426. data/ext/apex_ext/apex_src/vendor/cmark-gfm/tools/make_entities_inc.py +32 -0
  427. data/ext/apex_ext/apex_src/vendor/cmark-gfm/tools/mkcasefold.pl +22 -0
  428. data/ext/apex_ext/apex_src/vendor/cmark-gfm/tools/xml2md.xsl +319 -0
  429. data/ext/apex_ext/apex_src/vendor/cmark-gfm/tools/xml2md_gfm.xsl +80 -0
  430. data/ext/apex_ext/apex_src/vendor/cmark-gfm/why-cmark-and-not-x.md +104 -0
  431. data/ext/apex_ext/apex_src/vendor/cmark-gfm/wrappers/wrapper.js +6 -0
  432. data/ext/apex_ext/apex_src/vendor/cmark-gfm/wrappers/wrapper.py +37 -0
  433. data/ext/apex_ext/apex_src/vendor/cmark-gfm/wrappers/wrapper.rb +15 -0
  434. data/ext/apex_ext/apex_src/vendor/cmark-gfm/wrappers/wrapper.rkt +208 -0
  435. data/ext/apex_ext/apex_src/vendor/cmark-gfm/wrappers/wrapper_ext.py +109 -0
  436. data/ext/apex_ext/apex_src/vendor/libyaml/CMakeLists.txt +160 -0
  437. data/ext/apex_ext/apex_src/vendor/libyaml/Changes +372 -0
  438. data/ext/apex_ext/apex_src/vendor/libyaml/License +20 -0
  439. data/ext/apex_ext/apex_src/vendor/libyaml/Makefile.am +51 -0
  440. data/ext/apex_ext/apex_src/vendor/libyaml/ReadMe.md +46 -0
  441. data/ext/apex_ext/apex_src/vendor/libyaml/announcement.msg +89 -0
  442. data/ext/apex_ext/apex_src/vendor/libyaml/bootstrap +3 -0
  443. data/ext/apex_ext/apex_src/vendor/libyaml/cmake/config.h.in +4 -0
  444. data/ext/apex_ext/apex_src/vendor/libyaml/configure.ac +73 -0
  445. data/ext/apex_ext/apex_src/vendor/libyaml/doc/doxygen.cfg +222 -0
  446. data/ext/apex_ext/apex_src/vendor/libyaml/docker/README.mkd +17 -0
  447. data/ext/apex_ext/apex_src/vendor/libyaml/docker/alpine-3.7 +26 -0
  448. data/ext/apex_ext/apex_src/vendor/libyaml/docker/fedora-25 +26 -0
  449. data/ext/apex_ext/apex_src/vendor/libyaml/docker/ubuntu-14.04 +29 -0
  450. data/ext/apex_ext/apex_src/vendor/libyaml/docker/ubuntu-16.04 +24 -0
  451. data/ext/apex_ext/apex_src/vendor/libyaml/examples/anchors.yaml +10 -0
  452. data/ext/apex_ext/apex_src/vendor/libyaml/examples/array.yaml +2 -0
  453. data/ext/apex_ext/apex_src/vendor/libyaml/examples/global-tag.yaml +14 -0
  454. data/ext/apex_ext/apex_src/vendor/libyaml/examples/json.yaml +1 -0
  455. data/ext/apex_ext/apex_src/vendor/libyaml/examples/mapping.yaml +2 -0
  456. data/ext/apex_ext/apex_src/vendor/libyaml/examples/numbers.yaml +1 -0
  457. data/ext/apex_ext/apex_src/vendor/libyaml/examples/strings.yaml +7 -0
  458. data/ext/apex_ext/apex_src/vendor/libyaml/examples/tags.yaml +7 -0
  459. data/ext/apex_ext/apex_src/vendor/libyaml/examples/yaml-version.yaml +3 -0
  460. data/ext/apex_ext/apex_src/vendor/libyaml/include/Makefile.am +17 -0
  461. data/ext/apex_ext/apex_src/vendor/libyaml/include/yaml.h +1999 -0
  462. data/ext/apex_ext/apex_src/vendor/libyaml/pkg/ReadMe.md +77 -0
  463. data/ext/apex_ext/apex_src/vendor/libyaml/pkg/docker/Dockerfile +32 -0
  464. data/ext/apex_ext/apex_src/vendor/libyaml/pkg/docker/output/ReadMe +1 -0
  465. data/ext/apex_ext/apex_src/vendor/libyaml/pkg/docker/scripts/libyaml-dist.sh +23 -0
  466. data/ext/apex_ext/apex_src/vendor/libyaml/regression-inputs/clusterfuzz-testcase-minimized-5607885063061504.yml +1 -0
  467. data/ext/apex_ext/apex_src/vendor/libyaml/src/Makefile.am +4 -0
  468. data/ext/apex_ext/apex_src/vendor/libyaml/src/api.c +1393 -0
  469. data/ext/apex_ext/apex_src/vendor/libyaml/src/dumper.c +394 -0
  470. data/ext/apex_ext/apex_src/vendor/libyaml/src/emitter.c +2358 -0
  471. data/ext/apex_ext/apex_src/vendor/libyaml/src/loader.c +544 -0
  472. data/ext/apex_ext/apex_src/vendor/libyaml/src/parser.c +1416 -0
  473. data/ext/apex_ext/apex_src/vendor/libyaml/src/reader.c +469 -0
  474. data/ext/apex_ext/apex_src/vendor/libyaml/src/scanner.c +3598 -0
  475. data/ext/apex_ext/apex_src/vendor/libyaml/src/writer.c +141 -0
  476. data/ext/apex_ext/apex_src/vendor/libyaml/src/yaml_private.h +684 -0
  477. data/ext/apex_ext/apex_src/vendor/libyaml/tests/CMakeLists.txt +27 -0
  478. data/ext/apex_ext/apex_src/vendor/libyaml/tests/Makefile.am +9 -0
  479. data/ext/apex_ext/apex_src/vendor/libyaml/tests/ReadMe.md +63 -0
  480. data/ext/apex_ext/apex_src/vendor/libyaml/tests/example-deconstructor-alt.c +800 -0
  481. data/ext/apex_ext/apex_src/vendor/libyaml/tests/example-deconstructor.c +1127 -0
  482. data/ext/apex_ext/apex_src/vendor/libyaml/tests/example-reformatter-alt.c +217 -0
  483. data/ext/apex_ext/apex_src/vendor/libyaml/tests/example-reformatter.c +202 -0
  484. data/ext/apex_ext/apex_src/vendor/libyaml/tests/run-all-tests.sh +29 -0
  485. data/ext/apex_ext/apex_src/vendor/libyaml/tests/run-dumper.c +314 -0
  486. data/ext/apex_ext/apex_src/vendor/libyaml/tests/run-emitter-test-suite.c +290 -0
  487. data/ext/apex_ext/apex_src/vendor/libyaml/tests/run-emitter.c +327 -0
  488. data/ext/apex_ext/apex_src/vendor/libyaml/tests/run-loader.c +63 -0
  489. data/ext/apex_ext/apex_src/vendor/libyaml/tests/run-parser-test-suite.c +196 -0
  490. data/ext/apex_ext/apex_src/vendor/libyaml/tests/run-parser.c +88 -0
  491. data/ext/apex_ext/apex_src/vendor/libyaml/tests/run-scanner.c +63 -0
  492. data/ext/apex_ext/apex_src/vendor/libyaml/tests/test-reader.c +354 -0
  493. data/ext/apex_ext/apex_src/vendor/libyaml/tests/test-version.c +29 -0
  494. data/ext/apex_ext/apex_src/vendor/libyaml/yaml-0.1.pc.in +10 -0
  495. data/ext/apex_ext/apex_src/vendor/libyaml/yamlConfig.cmake.in +16 -0
  496. data/ext/apex_ext/extconf.rb +103 -0
  497. data/lib/apex/configurable.rb +46 -0
  498. data/lib/apex/document.rb +66 -0
  499. data/lib/apex/version.rb +15 -0
  500. data/lib/apex.rb +28 -0
  501. metadata +544 -0
@@ -0,0 +1,2762 @@
1
+ /**
2
+ * Custom HTML Renderer for Apex
3
+ * Implementation
4
+ */
5
+
6
+ #include "html_renderer.h"
7
+ #include "table.h" /* For CMARK_NODE_TABLE */
8
+ #include "extensions/header_ids.h"
9
+ #include <string.h>
10
+ #include <stdlib.h>
11
+ #include <stdio.h>
12
+ #include <stdbool.h>
13
+ #include <ctype.h>
14
+
15
+ /**
16
+ * Inject attributes into HTML opening tags
17
+ * This postprocesses the HTML output to add attributes stored in user_data
18
+ */
19
+ __attribute__((unused))
20
+ static char *inject_attributes_in_html(const char *html, cmark_node *document) {
21
+ if (!html || !document) return html ? strdup(html) : NULL;
22
+
23
+ /* For now, we'll use a simpler approach: */
24
+ /* Since we can't easily modify cmark's renderer, we'll inject attributes */
25
+ /* by pattern matching on the HTML output */
26
+
27
+ /* This is a simplified implementation */
28
+ /* A full implementation would require forking cmark's HTML renderer */
29
+
30
+ return strdup(html);
31
+ }
32
+
33
+ /**
34
+ * Walk AST and collect nodes with attributes
35
+ */
36
+ typedef struct attr_node {
37
+ cmark_node *node;
38
+ char *attrs;
39
+ cmark_node_type node_type;
40
+ int element_index; /* nth element of this type (0=first p, 1=second p, etc.) */
41
+ char *text_fingerprint; /* First 50 chars of text content for matching */
42
+ struct attr_node *next;
43
+ } attr_node;
44
+
45
+ /**
46
+ * Extract IAL attributes (id, class, key="value") from attribute string,
47
+ * excluding internal attributes like data-caption, data-remove, colspan, rowspan
48
+ * Returns a newly allocated string with just the IAL attributes, or NULL if none found
49
+ */
50
+ static char *extract_ial_from_table_attrs(const char *attrs) {
51
+ if (!attrs) return NULL;
52
+
53
+ size_t result_cap = strlen(attrs) + 1;
54
+ char *result = malloc(result_cap);
55
+ if (!result) return NULL;
56
+ char *write = result;
57
+ *write = '\0';
58
+
59
+ const char *p = attrs;
60
+
61
+ /* Skip leading whitespace */
62
+ while (*p && isspace((unsigned char)*p)) p++;
63
+
64
+ while (*p) {
65
+ const char *attr_start = p;
66
+
67
+ /* Find end of attribute name or = */
68
+ const char *attr_name_end = p;
69
+ while (*attr_name_end && *attr_name_end != '=' && *attr_name_end != ' ' && *attr_name_end != '\t') {
70
+ attr_name_end++;
71
+ }
72
+
73
+ size_t attr_name_len = attr_name_end - attr_start;
74
+
75
+ /* Check if this is an internal attribute we should skip */
76
+ bool skip = false;
77
+ if (attr_name_len == 11 && strncmp(attr_start, "data-caption", 11) == 0) skip = true;
78
+ else if (attr_name_len == 11 && strncmp(attr_start, "data-remove", 11) == 0) skip = true;
79
+ else if (attr_name_len == 7 && strncmp(attr_start, "colspan", 7) == 0) skip = true;
80
+ else if (attr_name_len == 7 && strncmp(attr_start, "rowspan", 7) == 0) skip = true;
81
+
82
+ if (!skip) {
83
+ /* This is an IAL attribute - find the full attribute (name="value" or name='value') */
84
+ const char *attr_end = attr_name_end;
85
+ if (*attr_end == '=') {
86
+ attr_end++;
87
+ if (*attr_end == '"' || *attr_end == '\'') {
88
+ char q = *attr_end;
89
+ attr_end++;
90
+ while (*attr_end && *attr_end != q) {
91
+ if (*attr_end == '\\' && *(attr_end + 1)) attr_end++;
92
+ attr_end++;
93
+ }
94
+ if (*attr_end == q) attr_end++;
95
+ } else {
96
+ /* Unquoted value */
97
+ while (*attr_end && *attr_end != ' ' && *attr_end != '\t') attr_end++;
98
+ }
99
+ }
100
+
101
+ /* Copy this attribute to result */
102
+ size_t attr_len = attr_end - attr_start;
103
+ if ((size_t)(write - result) + attr_len + 2 >= result_cap) {
104
+ /* Need to realloc */
105
+ size_t current_len = write - result;
106
+ result_cap = (current_len + attr_len + 2) * 2;
107
+ char *new_result = realloc(result, result_cap);
108
+ if (!new_result) {
109
+ free(result);
110
+ return NULL;
111
+ }
112
+ result = new_result;
113
+ write = result + current_len;
114
+ }
115
+
116
+ /* Add space before attribute if needed (always for first attribute, or if previous doesn't end with space) */
117
+ if (write == result || (write > result && write[-1] != ' ')) {
118
+ *write++ = ' ';
119
+ }
120
+ memcpy(write, attr_start, attr_len);
121
+ write += attr_len;
122
+ *write = '\0';
123
+
124
+ p = attr_end;
125
+ } else {
126
+ /* Skip this attribute - find its end */
127
+ if (*attr_name_end == '=') {
128
+ attr_name_end++;
129
+ if (*attr_name_end == '"' || *attr_name_end == '\'') {
130
+ char q = *attr_name_end;
131
+ attr_name_end++;
132
+ while (*attr_name_end && *attr_name_end != q) {
133
+ if (*attr_name_end == '\\' && *(attr_name_end + 1)) attr_name_end++;
134
+ attr_name_end++;
135
+ }
136
+ if (*attr_name_end == q) attr_name_end++;
137
+ } else {
138
+ while (*attr_name_end && *attr_name_end != ' ' && *attr_name_end != '\t') attr_name_end++;
139
+ }
140
+ }
141
+ p = attr_name_end;
142
+ }
143
+
144
+ /* Skip whitespace before next attribute */
145
+ while (*p && isspace((unsigned char)*p)) p++;
146
+ }
147
+
148
+ if (write == result) {
149
+ /* No IAL attributes found */
150
+ free(result);
151
+ return NULL;
152
+ }
153
+
154
+ return result;
155
+ }
156
+
157
+ /* Counters for element indexing */
158
+ typedef struct {
159
+ int para_count;
160
+ int heading_count;
161
+ int table_count;
162
+ int blockquote_count;
163
+ int list_count;
164
+ int item_count;
165
+ int code_count;
166
+ int link_count;
167
+ int image_count;
168
+ int strong_count;
169
+ int emph_count;
170
+ int code_inline_count;
171
+ } element_counters;
172
+
173
+ /**
174
+ * Get text fingerprint from node (first 50 chars for matching)
175
+ */
176
+ static char *get_node_text_fingerprint(cmark_node *node) {
177
+ if (!node) return NULL;
178
+
179
+ cmark_node_type type = cmark_node_get_type(node);
180
+
181
+ /* For headings, get the literal */
182
+ if (type == CMARK_NODE_HEADING) {
183
+ cmark_node *text = cmark_node_first_child(node);
184
+ if (text && cmark_node_get_type(text) == CMARK_NODE_TEXT) {
185
+ const char *literal = cmark_node_get_literal(text);
186
+ if (literal) {
187
+ size_t len = strlen(literal);
188
+ if (len > 50) len = 50;
189
+ char *fingerprint = malloc(len + 1);
190
+ if (fingerprint) {
191
+ memcpy(fingerprint, literal, len);
192
+ fingerprint[len] = '\0';
193
+ return fingerprint;
194
+ }
195
+ }
196
+ }
197
+ }
198
+
199
+ /* For paragraphs, get text from first text node */
200
+ if (type == CMARK_NODE_PARAGRAPH) {
201
+ cmark_node *child = cmark_node_first_child(node);
202
+ if (child && cmark_node_get_type(child) == CMARK_NODE_TEXT) {
203
+ const char *literal = cmark_node_get_literal(child);
204
+ if (literal) {
205
+ size_t len = strlen(literal);
206
+ if (len > 50) len = 50;
207
+ char *fingerprint = malloc(len + 1);
208
+ if (fingerprint) {
209
+ memcpy(fingerprint, literal, len);
210
+ fingerprint[len] = '\0';
211
+ return fingerprint;
212
+ }
213
+ }
214
+ }
215
+ }
216
+
217
+ /* For links, use the URL */
218
+ if (type == CMARK_NODE_LINK) {
219
+ const char *url = cmark_node_get_url(node);
220
+ if (url) {
221
+ size_t len = strlen(url);
222
+ if (len > 50) len = 50;
223
+ char *fingerprint = malloc(len + 1);
224
+ if (fingerprint) {
225
+ memcpy(fingerprint, url, len);
226
+ fingerprint[len] = '\0';
227
+ return fingerprint;
228
+ }
229
+ }
230
+ }
231
+
232
+ /* For images, use the URL */
233
+ if (type == CMARK_NODE_IMAGE) {
234
+ const char *url = cmark_node_get_url(node);
235
+ if (url) {
236
+ size_t len = strlen(url);
237
+ if (len > 50) len = 50;
238
+ char *fingerprint = malloc(len + 1);
239
+ if (fingerprint) {
240
+ memcpy(fingerprint, url, len);
241
+ fingerprint[len] = '\0';
242
+ return fingerprint;
243
+ }
244
+ }
245
+ }
246
+
247
+ return NULL;
248
+ }
249
+
250
+ static void collect_nodes_with_attrs_recursive(cmark_node *node, attr_node **list, element_counters *counters) {
251
+ if (!node) return;
252
+
253
+ cmark_node_type type = cmark_node_get_type(node);
254
+
255
+ /* Increment counter for this element type */
256
+ int elem_idx = -1;
257
+ if (type == CMARK_NODE_PARAGRAPH) elem_idx = counters->para_count++;
258
+ else if (type >= CMARK_NODE_HEADING && type <= CMARK_NODE_HEADING + 5) elem_idx = counters->heading_count++;
259
+ else if (type == CMARK_NODE_TABLE) {
260
+ /* For tables, increment the counter first, then use (count - 1) as the index */
261
+ /* This ensures the index matches the HTML renderer's count of <table> tags */
262
+ elem_idx = counters->table_count++;
263
+ }
264
+ else if (type == CMARK_NODE_BLOCK_QUOTE) elem_idx = counters->blockquote_count++;
265
+ else if (type == CMARK_NODE_LIST) elem_idx = counters->list_count++;
266
+ else if (type == CMARK_NODE_ITEM) elem_idx = counters->item_count++;
267
+ else if (type == CMARK_NODE_CODE_BLOCK) elem_idx = counters->code_count++;
268
+ /* Inline elements need indices too - each type has its own counter */
269
+ else if (type == CMARK_NODE_LINK) elem_idx = counters->link_count++;
270
+ else if (type == CMARK_NODE_IMAGE) elem_idx = counters->image_count++;
271
+ else if (type == CMARK_NODE_STRONG) elem_idx = counters->strong_count++;
272
+ else if (type == CMARK_NODE_EMPH) elem_idx = counters->emph_count++;
273
+ else if (type == CMARK_NODE_CODE) elem_idx = counters->code_inline_count++;
274
+
275
+ /* Check if this node has attributes */
276
+ void *user_data = cmark_node_get_user_data(node);
277
+ if (user_data) {
278
+ attr_node *new_node = malloc(sizeof(attr_node));
279
+ if (new_node) {
280
+ new_node->node = node;
281
+ new_node->attrs = (char *)user_data;
282
+ new_node->node_type = type;
283
+ new_node->element_index = elem_idx;
284
+ new_node->text_fingerprint = get_node_text_fingerprint(node);
285
+ new_node->next = *list;
286
+ *list = new_node;
287
+ }
288
+
289
+ /* If node is marked for removal, don't traverse children */
290
+ if (strstr((char *)user_data, "data-remove")) {
291
+ return;
292
+ }
293
+ }
294
+
295
+ /* Recurse */
296
+ for (cmark_node *child = cmark_node_first_child(node); child; child = cmark_node_next(child)) {
297
+ collect_nodes_with_attrs_recursive(child, list, counters);
298
+ }
299
+ }
300
+
301
+ static void collect_nodes_with_attrs(cmark_node *node, attr_node **list) {
302
+ element_counters counters = {0};
303
+ collect_nodes_with_attrs_recursive(node, list, &counters);
304
+
305
+ /* Reverse the list to get document order */
306
+ attr_node *reversed = NULL;
307
+ while (*list) {
308
+ attr_node *next = (*list)->next;
309
+ (*list)->next = reversed;
310
+ reversed = *list;
311
+ *list = next;
312
+ }
313
+ *list = reversed;
314
+ }
315
+
316
+ /**
317
+ * Enhanced HTML rendering with attribute support
318
+ */
319
+ char *apex_render_html_with_attributes(cmark_node *document, int options) {
320
+ if (!document) return NULL;
321
+
322
+ /* First, render normally */
323
+ char *html = cmark_render_html(document, options, NULL);
324
+ if (!html) return NULL;
325
+
326
+ /* Collect all nodes with attributes */
327
+ attr_node *attr_list = NULL;
328
+ collect_nodes_with_attrs(document, &attr_list);
329
+
330
+ if (!attr_list) {
331
+ return html; /* No attributes to inject */
332
+ }
333
+
334
+ /* Build new HTML with attributes injected */
335
+ size_t html_len = strlen(html);
336
+
337
+ /* Calculate needed capacity: original HTML + all attribute strings */
338
+ size_t attrs_size = 0;
339
+ for (attr_node *a = attr_list; a; a = a->next) {
340
+ attrs_size += strlen(a->attrs);
341
+ }
342
+ size_t capacity = html_len + attrs_size + 1024; /* +1KB buffer */
343
+ char *output = malloc(capacity);
344
+ if (!output) {
345
+ /* Clean up attr list */
346
+ while (attr_list) {
347
+ attr_node *next = attr_list->next;
348
+ free(attr_list);
349
+ attr_list = next;
350
+ }
351
+ return html;
352
+ }
353
+
354
+ const char *read = html;
355
+ char *write = output;
356
+ size_t remaining = capacity;
357
+
358
+ /* Track which attributes we've used */
359
+ int attr_count = 0;
360
+ for (attr_node *a = attr_list; a; a = a->next) attr_count++;
361
+ bool *used = calloc(attr_count + 1, sizeof(bool));
362
+
363
+ /* Track element counts in HTML (same as AST walker) */
364
+ element_counters html_counters = {0};
365
+
366
+ /* Process HTML, injecting attributes */
367
+ while (*read) {
368
+ /* Check if we're at an opening tag */
369
+ if (*read == '<' && read[1] != '/' && read[1] != '!') {
370
+ const char *tag_start = read + 1;
371
+ const char *tag_name_end = tag_start;
372
+
373
+ /* Get tag name */
374
+ while (*tag_name_end && !isspace((unsigned char)*tag_name_end) &&
375
+ *tag_name_end != '>' && *tag_name_end != '/') {
376
+ tag_name_end++;
377
+ }
378
+
379
+ /* Find the end of the tag (> or />) */
380
+ const char *tag_end = tag_name_end;
381
+ while (*tag_end && *tag_end != '>') tag_end++;
382
+
383
+ /* Check if this is a block tag or table cell we care about */
384
+ int tag_len = tag_name_end - tag_start;
385
+
386
+ /* Determine element type and increment counter */
387
+ cmark_node_type elem_type = 0;
388
+ int elem_idx = -1;
389
+
390
+ if (tag_len == 1 && *tag_start == 'p') {
391
+ elem_type = CMARK_NODE_PARAGRAPH;
392
+ elem_idx = html_counters.para_count++;
393
+ } else if (tag_len == 2 && tag_start[0] == 'h' && tag_start[1] >= '1' && tag_start[1] <= '6') {
394
+ elem_type = CMARK_NODE_HEADING;
395
+ elem_idx = html_counters.heading_count++;
396
+ } else if (tag_len == 10 && memcmp(tag_start, "blockquote", 10) == 0) {
397
+ elem_type = CMARK_NODE_BLOCK_QUOTE;
398
+ elem_idx = html_counters.blockquote_count++;
399
+ } else if (tag_len == 5 && memcmp(tag_start, "table", 5) == 0) {
400
+ elem_type = CMARK_NODE_TABLE;
401
+ elem_idx = html_counters.table_count++;
402
+ } else if (tag_len == 2 && (memcmp(tag_start, "ul", 2) == 0 || memcmp(tag_start, "ol", 2) == 0)) {
403
+ elem_type = CMARK_NODE_LIST;
404
+ elem_idx = html_counters.list_count++;
405
+ } else if (tag_len == 2 && memcmp(tag_start, "li", 2) == 0) {
406
+ elem_type = CMARK_NODE_ITEM;
407
+ elem_idx = html_counters.item_count++;
408
+ } else if (tag_len == 3 && memcmp(tag_start, "pre", 3) == 0) {
409
+ elem_type = CMARK_NODE_CODE_BLOCK;
410
+ elem_idx = html_counters.code_count++;
411
+ } else if (tag_len == 1 && *tag_start == 'a') {
412
+ /* Links - inline elements */
413
+ elem_type = CMARK_NODE_LINK;
414
+ elem_idx = html_counters.link_count++;
415
+ } else if (tag_len == 3 && memcmp(tag_start, "img", 3) == 0) {
416
+ /* Images - inline elements */
417
+ elem_type = CMARK_NODE_IMAGE;
418
+ elem_idx = html_counters.image_count++;
419
+ } else if (tag_len == 6 && memcmp(tag_start, "strong", 6) == 0) {
420
+ /* Strong - inline elements */
421
+ elem_type = CMARK_NODE_STRONG;
422
+ elem_idx = html_counters.strong_count++;
423
+ } else if (tag_len == 2 && memcmp(tag_start, "em", 2) == 0) {
424
+ /* Emphasis - inline elements */
425
+ elem_type = CMARK_NODE_EMPH;
426
+ elem_idx = html_counters.emph_count++;
427
+ } else if (tag_len == 4 && memcmp(tag_start, "code", 4) == 0) {
428
+ /* Code - inline elements */
429
+ elem_type = CMARK_NODE_CODE;
430
+ elem_idx = html_counters.code_inline_count++;
431
+ }
432
+
433
+ /* Check if we should skip this element (marked for removal) */
434
+ /* We do this BEFORE the main matching to remove elements first */
435
+ bool should_remove = false;
436
+ int removal_idx = -1;
437
+ if (elem_type != 0) {
438
+ int check_idx = 0;
439
+ for (attr_node *a = attr_list; a; a = a->next, check_idx++) {
440
+ /* Check by element type and index for removal */
441
+ if (!used[check_idx] &&
442
+ (a->node_type == elem_type ||
443
+ (elem_type == CMARK_NODE_HEADING && a->node_type >= CMARK_NODE_HEADING && a->node_type <= CMARK_NODE_HEADING + 5)) &&
444
+ a->element_index == elem_idx) {
445
+ if (strstr(a->attrs, "data-remove")) {
446
+ should_remove = true;
447
+ removal_idx = check_idx;
448
+ break;
449
+ }
450
+ /* Found matching element but not for removal - stop checking */
451
+ break;
452
+ }
453
+ }
454
+ }
455
+
456
+ if (should_remove) {
457
+ /* Skip this entire element */
458
+ const char *close_start = read;
459
+ int depth = 1;
460
+ while (*close_start && depth > 0) {
461
+ if (*close_start == '<') {
462
+ if (close_start[1] == '/') {
463
+ /* Closing tag */
464
+ const char *tag_check = close_start + 2;
465
+ if (memcmp(tag_check, tag_start, tag_len) == 0 &&
466
+ (tag_check[tag_len] == '>' || isspace((unsigned char)tag_check[tag_len]))) {
467
+ depth--;
468
+ if (depth == 0) {
469
+ /* Found matching close tag */
470
+ while (*close_start && *close_start != '>') close_start++;
471
+ if (*close_start == '>') close_start++;
472
+ read = close_start;
473
+ if (removal_idx >= 0) used[removal_idx] = true;
474
+ goto skip_element;
475
+ }
476
+ }
477
+ } else if (close_start[1] != '!' && close_start[1] != '?') {
478
+ /* Another opening tag of same type */
479
+ const char *tag_check = close_start + 1;
480
+ if (memcmp(tag_check, tag_start, tag_len) == 0 &&
481
+ (tag_check[tag_len] == '>' || tag_check[tag_len] == ' ')) {
482
+ depth++;
483
+ }
484
+ }
485
+ }
486
+ close_start++;
487
+ }
488
+ }
489
+
490
+ skip_element:
491
+ if (read != html && *read != '<') {
492
+ continue; /* We skipped an element */
493
+ }
494
+
495
+ /* Handle both block and inline elements with attributes */
496
+ if (elem_type != 0) {
497
+ /* Extract fingerprint for matching */
498
+ char html_fingerprint[51] = {0};
499
+ int fp_idx = 0;
500
+
501
+ if (elem_type == CMARK_NODE_LINK || elem_type == CMARK_NODE_IMAGE) {
502
+ /* For links/images, extract the href/src attribute */
503
+ const char *url_attr = (elem_type == CMARK_NODE_LINK) ? "href=\"" : "src=\"";
504
+ const char *url_start = strstr(read, url_attr);
505
+ if (url_start) {
506
+ url_start += strlen(url_attr);
507
+ const char *url_end = strchr(url_start, '"');
508
+ if (url_end) {
509
+ size_t url_len = url_end - url_start;
510
+ if (url_len > 50) url_len = 50;
511
+ memcpy(html_fingerprint, url_start, url_len);
512
+ html_fingerprint[url_len] = '\0';
513
+ fp_idx = url_len;
514
+ }
515
+ }
516
+ } else if (elem_type == CMARK_NODE_STRONG || elem_type == CMARK_NODE_EMPH || elem_type == CMARK_NODE_CODE) {
517
+ /* For inline elements (strong, emph, code), extract text content */
518
+ const char *content_start = tag_name_end;
519
+ while (*content_start && *content_start != '>') content_start++;
520
+ if (*content_start == '>') content_start++;
521
+
522
+ const char *text_p = content_start;
523
+ while (*text_p && *text_p != '<' && fp_idx < 50) {
524
+ html_fingerprint[fp_idx++] = *text_p++;
525
+ }
526
+ html_fingerprint[fp_idx] = '\0';
527
+ } else {
528
+ /* For block elements, extract text content */
529
+ const char *content_start = tag_name_end;
530
+ while (*content_start && *content_start != '>') content_start++;
531
+ if (*content_start == '>') content_start++;
532
+
533
+ const char *text_p = content_start;
534
+ while (*text_p && *text_p != '<' && fp_idx < 50) {
535
+ html_fingerprint[fp_idx++] = *text_p++;
536
+ }
537
+ html_fingerprint[fp_idx] = '\0';
538
+ }
539
+
540
+ /* Find matching attribute - try fingerprint first, then index */
541
+ attr_node *matching = NULL;
542
+ int idx = 0;
543
+
544
+ /* For tables, use sequential matching (first unused table) since index may not match */
545
+ if (elem_type == CMARK_NODE_TABLE) {
546
+ for (attr_node *a = attr_list; a; a = a->next, idx++) {
547
+ if (used[idx]) continue;
548
+ if (a->node_type == CMARK_NODE_TABLE) {
549
+ matching = a;
550
+ used[idx] = true;
551
+ break;
552
+ }
553
+ }
554
+ } else {
555
+ /* For other elements, use the existing matching logic */
556
+ for (attr_node *a = attr_list; a; a = a->next, idx++) {
557
+ if (used[idx]) continue;
558
+
559
+ /* Check type match (including inline elements) */
560
+ bool type_match = (a->node_type == elem_type ||
561
+ (elem_type == CMARK_NODE_HEADING && a->node_type >= CMARK_NODE_HEADING && a->node_type <= CMARK_NODE_HEADING + 5));
562
+
563
+ if (!type_match) continue;
564
+
565
+ /* Try fingerprint match first (works for both block and inline) */
566
+ if (a->text_fingerprint && fp_idx > 0 &&
567
+ strncmp(a->text_fingerprint, html_fingerprint, 50) == 0) {
568
+ /* For inline elements, also check element_index to handle duplicates */
569
+ if (elem_type == CMARK_NODE_LINK || elem_type == CMARK_NODE_IMAGE ||
570
+ elem_type == CMARK_NODE_STRONG || elem_type == CMARK_NODE_EMPH ||
571
+ elem_type == CMARK_NODE_CODE) {
572
+ if (a->element_index == elem_idx) {
573
+ matching = a;
574
+ used[idx] = true;
575
+ break;
576
+ }
577
+ } else {
578
+ /* For other elements, fingerprint match is sufficient */
579
+ matching = a;
580
+ used[idx] = true;
581
+ break;
582
+ }
583
+ }
584
+
585
+ /* Fall back to index match if no fingerprint */
586
+ if (!a->text_fingerprint && a->element_index == elem_idx) {
587
+ matching = a;
588
+ used[idx] = true;
589
+ break;
590
+ }
591
+ }
592
+ }
593
+
594
+ if (matching) {
595
+ /* Skip internal attributes and table span attributes */
596
+ /* Table spans are handled by apex_inject_table_attributes() */
597
+ /* For tables, we need to extract IAL attributes even if data-caption is present */
598
+ bool skip_all = false;
599
+ bool extract_ial_from_caption = false;
600
+
601
+ if (strstr(matching->attrs, "data-remove") ||
602
+ strstr(matching->attrs, "colspan=") ||
603
+ strstr(matching->attrs, "rowspan=")) {
604
+ skip_all = true;
605
+ } else if (strstr(matching->attrs, "data-caption") && elem_type == CMARK_NODE_TABLE) {
606
+ /* For tables with captions, extract IAL attributes (id, class, etc.) but skip data-caption */
607
+ extract_ial_from_caption = true;
608
+ }
609
+
610
+ if (skip_all) {
611
+ /* These are handled elsewhere, don't inject here */
612
+ } else if (extract_ial_from_caption) {
613
+ /* Extract IAL attributes from the attribute string, excluding data-caption */
614
+ char *ial_attrs = extract_ial_from_table_attrs(matching->attrs);
615
+ if (ial_attrs && *ial_attrs) {
616
+ /* Find where to inject attributes (before closing > of <table> tag) */
617
+ const char *inject_point = tag_end;
618
+ if (*inject_point == '>') {
619
+ /* Copy up to injection point */
620
+ size_t prefix_len = inject_point - read;
621
+ if (prefix_len <= remaining) {
622
+ memcpy(write, read, prefix_len);
623
+ write += prefix_len;
624
+ remaining -= prefix_len;
625
+ }
626
+
627
+ /* Inject IAL attributes - ensure leading space */
628
+ /* Check if we need to add a space before the attributes */
629
+ bool needs_leading_space = (ial_attrs[0] != ' ');
630
+ size_t ial_len = strlen(ial_attrs);
631
+ size_t total_len = ial_len + (needs_leading_space ? 1 : 0);
632
+
633
+ if (total_len <= remaining) {
634
+ if (needs_leading_space) {
635
+ *write++ = ' ';
636
+ remaining--;
637
+ }
638
+ memcpy(write, ial_attrs, ial_len);
639
+ write += ial_len;
640
+ remaining -= ial_len;
641
+ } else {
642
+ /* Buffer too small - need to expand */
643
+ size_t current_pos = write - output;
644
+ size_t new_cap = (current_pos + total_len + 1) * 2;
645
+ char *new_output = realloc(output, new_cap);
646
+ if (new_output) {
647
+ output = new_output;
648
+ write = output + current_pos;
649
+ remaining = new_cap - current_pos;
650
+ if (needs_leading_space) {
651
+ *write++ = ' ';
652
+ remaining--;
653
+ }
654
+ memcpy(write, ial_attrs, ial_len);
655
+ write += ial_len;
656
+ remaining -= ial_len;
657
+ }
658
+ }
659
+ if (remaining > 0) {
660
+ *write++ = '>';
661
+ remaining--;
662
+ }
663
+ read = inject_point + 1;
664
+ free(ial_attrs);
665
+ continue;
666
+ }
667
+ }
668
+ if (ial_attrs) free(ial_attrs);
669
+ /* No IAL attributes to inject, but table still needs to be copied - fall through */
670
+ } else {
671
+ /* Find where to inject attributes */
672
+ const char *inject_point = NULL;
673
+
674
+ if (elem_type == CMARK_NODE_IMAGE || elem_type == CMARK_NODE_LINK ||
675
+ elem_type == CMARK_NODE_STRONG || elem_type == CMARK_NODE_EMPH ||
676
+ elem_type == CMARK_NODE_CODE) {
677
+ /* For inline elements (img, a), inject before the closing > or /> */
678
+ /* Find the closing > for this tag */
679
+ const char *close_pos = tag_end;
680
+ bool is_self_closing = false;
681
+ if (*close_pos == '>') {
682
+ /* Check if it's a self-closing tag /> */
683
+ if (close_pos > tag_name_end && close_pos[-1] == '/') {
684
+ inject_point = close_pos - 1; /* Before /> */
685
+ is_self_closing = true;
686
+ } else {
687
+ inject_point = close_pos; /* Before > */
688
+ }
689
+ } else {
690
+ /* Fallback: after tag name if we can't find > */
691
+ inject_point = tag_name_end;
692
+ while (*inject_point && isspace((unsigned char)*inject_point) && *inject_point != '>') inject_point++;
693
+ }
694
+
695
+ /* Copy up to injection point (but for self-closing tags, don't include the space before /) */
696
+ size_t prefix_len;
697
+ if (is_self_closing && inject_point > read && inject_point[-1] == ' ') {
698
+ /* Don't copy the space before / - we'll add it back after attributes */
699
+ prefix_len = inject_point - read - 1;
700
+ } else {
701
+ prefix_len = inject_point - read;
702
+ }
703
+
704
+ if (prefix_len < remaining && prefix_len > 0) {
705
+ memcpy(write, read, prefix_len);
706
+ write += prefix_len;
707
+ remaining -= prefix_len;
708
+ }
709
+
710
+ /* Always add a space before attributes (they need to be separated from existing attributes) */
711
+ /* The only exception is if inject_point is at > and there's already a space before it */
712
+ /* But since we're injecting attributes, we always need a space before them */
713
+ if (remaining > 0) {
714
+ *write++ = ' ';
715
+ remaining--;
716
+ }
717
+
718
+ /* Inject attributes */
719
+ size_t attr_len = strlen(matching->attrs);
720
+ if (attr_len <= remaining) {
721
+ memcpy(write, matching->attrs, attr_len);
722
+ write += attr_len;
723
+ remaining -= attr_len;
724
+ }
725
+
726
+ /* For self-closing tags, ensure space before / */
727
+ if (is_self_closing && remaining > 0) {
728
+ *write++ = ' ';
729
+ remaining--;
730
+ }
731
+
732
+ read = inject_point;
733
+ } else {
734
+ /* For block elements, inject after tag name */
735
+ inject_point = tag_name_end;
736
+ while (*inject_point && isspace((unsigned char)*inject_point)) inject_point++;
737
+
738
+ /* Copy up to injection point */
739
+ size_t prefix_len = inject_point - read;
740
+ if (prefix_len < remaining) {
741
+ memcpy(write, read, prefix_len);
742
+ write += prefix_len;
743
+ remaining -= prefix_len;
744
+ }
745
+
746
+ /* Always add space before attributes for block elements */
747
+ /* We're injecting right after the tag name, so we need a space */
748
+ /* Check if there's already whitespace to avoid doubling spaces */
749
+ bool needs_space = true;
750
+ if (inject_point > read) {
751
+ /* Check the character immediately before inject_point */
752
+ const char *before_inject = inject_point - 1;
753
+ if (isspace((unsigned char)*before_inject)) {
754
+ /* There's already whitespace, don't add another */
755
+ needs_space = false;
756
+ }
757
+ }
758
+
759
+ if (needs_space && remaining > 0) {
760
+ *write++ = ' ';
761
+ remaining--;
762
+ }
763
+
764
+ /* Inject attributes */
765
+ size_t attr_len = strlen(matching->attrs);
766
+ if (attr_len <= remaining) {
767
+ memcpy(write, matching->attrs, attr_len);
768
+ write += attr_len;
769
+ remaining -= attr_len;
770
+ }
771
+
772
+ read = inject_point;
773
+ }
774
+ continue;
775
+ }
776
+ }
777
+ }
778
+ }
779
+
780
+ /* Copy character */
781
+ if (remaining > 0) {
782
+ *write++ = *read++;
783
+ remaining--;
784
+ } else {
785
+ read++;
786
+ }
787
+ }
788
+
789
+ free(used);
790
+
791
+ *write = '\0';
792
+
793
+ /* Clean up */
794
+ while (attr_list) {
795
+ attr_node *next = attr_list->next;
796
+ free(attr_list->text_fingerprint);
797
+ free(attr_list);
798
+ attr_list = next;
799
+ }
800
+
801
+ free(html);
802
+ return output;
803
+ }
804
+
805
+ /**
806
+ * Inject header IDs into HTML output
807
+ */
808
+ char *apex_inject_header_ids(const char *html, cmark_node *document, bool generate_ids, bool use_anchors, int id_format) {
809
+ if (!html || !document || !generate_ids) {
810
+ return html ? strdup(html) : NULL;
811
+ }
812
+
813
+ /* Collect all headers from AST with their IDs */
814
+ typedef struct header_id_map {
815
+ char *text;
816
+ char *id;
817
+ int index;
818
+ struct header_id_map *next;
819
+ } header_id_map;
820
+
821
+ header_id_map *header_map = NULL;
822
+ int header_count = 0;
823
+
824
+ /* Walk AST to collect headers */
825
+ cmark_iter *iter = cmark_iter_new(document);
826
+ cmark_event_type event;
827
+ while ((event = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
828
+ cmark_node *node = cmark_iter_get_node(iter);
829
+ if (event == CMARK_EVENT_ENTER && cmark_node_get_type(node) == CMARK_NODE_HEADING) {
830
+ char *text = apex_extract_heading_text(node);
831
+ char *id = NULL;
832
+
833
+ /* Check if ID already exists from IAL or manual ID (stored in user_data) */
834
+ char *user_data = (char *)cmark_node_get_user_data(node);
835
+ if (user_data) {
836
+ /* Look for id="..." in user_data */
837
+ const char *id_attr = strstr(user_data, "id=\"");
838
+ if (id_attr) {
839
+ const char *id_start = id_attr + 4;
840
+ const char *id_end = strchr(id_start, '"');
841
+ if (id_end && id_end > id_start) {
842
+ size_t id_len = id_end - id_start;
843
+ id = malloc(id_len + 1);
844
+ if (id) {
845
+ memcpy(id, id_start, id_len);
846
+ id[id_len] = '\0';
847
+ }
848
+ }
849
+ }
850
+ }
851
+
852
+ /* If no manual/IAL ID, generate one automatically */
853
+ if (!id) {
854
+ id = apex_generate_header_id(text, (apex_id_format_t)id_format);
855
+ }
856
+
857
+ header_id_map *entry = malloc(sizeof(header_id_map));
858
+ if (entry) {
859
+ entry->text = text;
860
+ entry->id = id;
861
+ entry->index = header_count++;
862
+ entry->next = header_map;
863
+ header_map = entry;
864
+ } else {
865
+ free(text);
866
+ free(id);
867
+ }
868
+ }
869
+ }
870
+ cmark_iter_free(iter);
871
+
872
+ if (!header_map) {
873
+ return strdup(html);
874
+ }
875
+
876
+ /* Reverse the list to get document order */
877
+ header_id_map *reversed = NULL;
878
+ while (header_map) {
879
+ header_id_map *next = header_map->next;
880
+ header_map->next = reversed;
881
+ reversed = header_map;
882
+ header_map = next;
883
+ }
884
+ header_map = reversed;
885
+
886
+ /* Process HTML to inject IDs */
887
+ size_t html_len = strlen(html);
888
+ size_t capacity = html_len + header_count * 100; /* Extra space for IDs */
889
+ char *output = malloc(capacity + 1); /* +1 for null terminator */
890
+ if (!output) {
891
+ /* Clean up */
892
+ while (header_map) {
893
+ header_id_map *next = header_map->next;
894
+ free(header_map->text);
895
+ free(header_map->id);
896
+ free(header_map);
897
+ header_map = next;
898
+ }
899
+ return strdup(html);
900
+ }
901
+
902
+ const char *read = html;
903
+ char *write = output;
904
+ size_t remaining = capacity; /* Reserve 1 byte for null terminator */
905
+ int current_header_idx = 0;
906
+
907
+ while (*read) {
908
+ /* Look for header opening tags: <h1>, <h2>, etc. */
909
+ if (*read == '<' && read[1] == 'h' &&
910
+ read[2] >= '1' && read[2] <= '6' &&
911
+ (read[3] == '>' || isspace((unsigned char)read[3]))) {
912
+
913
+ /* Find the end of the tag */
914
+ const char *tag_start = read;
915
+ const char *tag_end = read + 3;
916
+ while (*tag_end && *tag_end != '>') tag_end++;
917
+ if (*tag_end != '>') {
918
+ /* Malformed tag, just copy */
919
+ if (remaining > 0) {
920
+ *write++ = *read++;
921
+ remaining--;
922
+ } else {
923
+ read++;
924
+ }
925
+ continue;
926
+ }
927
+
928
+ /* Check if ID already exists in the tag */
929
+ bool has_id = false;
930
+ const char *id_attr = strstr(tag_start, "id=");
931
+ const char *id_start = NULL;
932
+ const char *id_end = NULL;
933
+ if (id_attr && id_attr < tag_end) {
934
+ has_id = true;
935
+ /* Find the ID value boundaries for replacement */
936
+ id_start = id_attr + 3; /* After 'id=' */
937
+ while (id_start < tag_end && (*id_start == ' ' || *id_start == '"' || *id_start == '\'')) {
938
+ id_start++;
939
+ }
940
+ id_end = id_start;
941
+ while (id_end < tag_end && *id_end != '"' && *id_end != '\'' && *id_end != ' ' && *id_end != '>') {
942
+ id_end++;
943
+ }
944
+ }
945
+
946
+ /* Get the header ID - always get it so we can replace existing IDs */
947
+ header_id_map *header = NULL;
948
+ if (current_header_idx < header_count) {
949
+ header = header_map;
950
+ for (int i = 0; i < current_header_idx && header; i++) {
951
+ header = header->next;
952
+ }
953
+ }
954
+
955
+ if (use_anchors && header && header->id) {
956
+ /* For anchor tags: copy the entire header tag, then inject anchor after '>' */
957
+ size_t tag_len = tag_end - tag_start + 1; /* Include '>' */
958
+ if (tag_len <= remaining) {
959
+ memcpy(write, tag_start, tag_len);
960
+ write += tag_len;
961
+ remaining -= tag_len;
962
+ }
963
+ read = tag_end + 1;
964
+
965
+ /* Inject anchor tag after the header tag */
966
+ char anchor_tag[512];
967
+ snprintf(anchor_tag, sizeof(anchor_tag),
968
+ "<a href=\"#%s\" aria-hidden=\"true\" class=\"anchor\" id=\"%s\"></a>",
969
+ header->id, header->id);
970
+ size_t anchor_len = strlen(anchor_tag);
971
+ if (anchor_len <= remaining) {
972
+ memcpy(write, anchor_tag, anchor_len);
973
+ write += anchor_len;
974
+ remaining -= anchor_len;
975
+ }
976
+ current_header_idx++;
977
+ } else if (!use_anchors && header && header->id) {
978
+ /* For header IDs: replace existing ID or inject new one */
979
+ if (has_id && id_attr) {
980
+ /* Replace existing ID: copy up to id=, skip old ID value, inject new ID, copy rest */
981
+ size_t before_id_len = id_attr - tag_start;
982
+ if (before_id_len <= remaining) {
983
+ memcpy(write, tag_start, before_id_len);
984
+ write += before_id_len;
985
+ remaining -= before_id_len;
986
+ }
987
+
988
+ /* Find the end of the old ID attribute value */
989
+ const char *old_id_end = id_attr + 3; /* After 'id=' */
990
+ /* Skip whitespace and opening quote */
991
+ while (old_id_end < tag_end && (old_id_end[0] == ' ' || old_id_end[0] == '"' || old_id_end[0] == '\'')) {
992
+ old_id_end++;
993
+ }
994
+ /* Skip the ID value until closing quote or space or > */
995
+ while (old_id_end < tag_end && old_id_end[0] != '"' && old_id_end[0] != '\'' && old_id_end[0] != ' ' && old_id_end[0] != '>') {
996
+ old_id_end++;
997
+ }
998
+ /* Skip closing quote if present */
999
+ if (old_id_end < tag_end && (old_id_end[0] == '"' || old_id_end[0] == '\'')) {
1000
+ old_id_end++;
1001
+ }
1002
+
1003
+ /* Inject new id="..." */
1004
+ char id_attr_str[512];
1005
+ snprintf(id_attr_str, sizeof(id_attr_str), "id=\"%s\"", header->id);
1006
+ size_t id_len = strlen(id_attr_str);
1007
+ if (id_len <= remaining) {
1008
+ memcpy(write, id_attr_str, id_len);
1009
+ write += id_len;
1010
+ remaining -= id_len;
1011
+ }
1012
+
1013
+ /* Copy rest of tag from after old ID until '>' */
1014
+ read = old_id_end;
1015
+ while (read < tag_end && *read != '>') {
1016
+ if (remaining > 0) {
1017
+ *write++ = *read++;
1018
+ remaining--;
1019
+ } else {
1020
+ read++;
1021
+ }
1022
+ }
1023
+
1024
+ /* Copy closing '>' */
1025
+ if (read < tag_end && *read == '>') {
1026
+ if (remaining > 0) {
1027
+ *write++ = *read++;
1028
+ remaining--;
1029
+ } else {
1030
+ read++;
1031
+ }
1032
+ }
1033
+ current_header_idx++;
1034
+ } else {
1035
+ /* No existing ID: copy tag up to '>', inject id attribute, then copy '>' */
1036
+ const char *after_tag_name = tag_start + 3;
1037
+ while (*after_tag_name && *after_tag_name != '>' && !isspace((unsigned char)*after_tag_name)) {
1038
+ after_tag_name++;
1039
+ }
1040
+
1041
+ /* Copy '<hN' */
1042
+ size_t tag_prefix_len = after_tag_name - tag_start;
1043
+ if (tag_prefix_len <= remaining) {
1044
+ memcpy(write, tag_start, tag_prefix_len);
1045
+ write += tag_prefix_len;
1046
+ remaining -= tag_prefix_len;
1047
+ }
1048
+ read = after_tag_name;
1049
+
1050
+ /* Copy any existing attributes before injecting id */
1051
+ const char *attr_start = read;
1052
+ while (*read && *read != '>') {
1053
+ read++;
1054
+ }
1055
+
1056
+ /* If there are existing attributes, copy them */
1057
+ if (read > attr_start) {
1058
+ size_t attr_len = read - attr_start;
1059
+ if (attr_len <= remaining) {
1060
+ memcpy(write, attr_start, attr_len);
1061
+ write += attr_len;
1062
+ remaining -= attr_len;
1063
+ }
1064
+ }
1065
+
1066
+ /* Add space before id attribute if needed */
1067
+ if ((read > attr_start || *read == '>') && remaining > 0) {
1068
+ *write++ = ' ';
1069
+ remaining--;
1070
+ }
1071
+
1072
+ /* Inject id="..." */
1073
+ char id_attr_str[512];
1074
+ snprintf(id_attr_str, sizeof(id_attr_str), "id=\"%s\"", header->id);
1075
+ size_t id_len = strlen(id_attr_str);
1076
+ if (id_len <= remaining) {
1077
+ memcpy(write, id_attr_str, id_len);
1078
+ write += id_len;
1079
+ remaining -= id_len;
1080
+ }
1081
+
1082
+ /* Copy closing '>' */
1083
+ if (*read == '>') {
1084
+ if (remaining > 0) {
1085
+ *write++ = *read++;
1086
+ remaining--;
1087
+ } else {
1088
+ read++;
1089
+ }
1090
+ }
1091
+ }
1092
+ current_header_idx++;
1093
+ } else {
1094
+ /* No ID to inject, just copy the tag */
1095
+ size_t tag_len = tag_end - tag_start + 1;
1096
+ if (tag_len <= remaining) {
1097
+ memcpy(write, tag_start, tag_len);
1098
+ write += tag_len;
1099
+ remaining -= tag_len;
1100
+ }
1101
+ read = tag_end + 1;
1102
+ if (!has_id) {
1103
+ current_header_idx++;
1104
+ }
1105
+ }
1106
+ } else {
1107
+ /* Copy character */
1108
+ if (remaining > 0) {
1109
+ *write++ = *read++;
1110
+ remaining--;
1111
+ } else {
1112
+ read++;
1113
+ }
1114
+ }
1115
+ }
1116
+
1117
+ /* Ensure we have space for null terminator */
1118
+ if (remaining < 1) {
1119
+ size_t used = write - output;
1120
+ size_t new_capacity = (used + 1) * 2;
1121
+ char *new_output = realloc(output, new_capacity + 1);
1122
+ if (new_output) {
1123
+ output = new_output;
1124
+ write = output + used;
1125
+ remaining = new_capacity - used;
1126
+ }
1127
+ }
1128
+ *write = '\0';
1129
+
1130
+ /* Clean up */
1131
+ while (header_map) {
1132
+ header_id_map *next = header_map->next;
1133
+ free(header_map->text);
1134
+ free(header_map->id);
1135
+ free(header_map);
1136
+ header_map = next;
1137
+ }
1138
+
1139
+ return output;
1140
+ }
1141
+
1142
+ /**
1143
+ * Clean up HTML tag spacing
1144
+ * - Compresses multiple spaces in tags to single spaces
1145
+ * - Removes spaces before closing >
1146
+ */
1147
+ char *apex_clean_html_tag_spacing(const char *html) {
1148
+ if (!html) return NULL;
1149
+
1150
+ size_t len = strlen(html);
1151
+ char *output = malloc(len + 1);
1152
+ if (!output) return NULL;
1153
+
1154
+ const char *read = html;
1155
+ char *write = output;
1156
+ bool in_tag = false;
1157
+ bool last_was_space = false;
1158
+
1159
+ while (*read) {
1160
+ if (*read == '<' && (read[1] != '/' && read[1] != '!' && read[1] != '?')) {
1161
+ /* Entering a tag */
1162
+ in_tag = true;
1163
+ last_was_space = false;
1164
+ *write++ = *read++;
1165
+ } else if (*read == '>') {
1166
+ /* Exiting a tag - skip any trailing space */
1167
+ if (last_was_space && write > output && write[-1] == ' ') {
1168
+ write--;
1169
+ }
1170
+ in_tag = false;
1171
+ last_was_space = false;
1172
+ *write++ = *read++;
1173
+ } else if (in_tag && isspace((unsigned char)*read)) {
1174
+ /* Space inside tag */
1175
+ if (!last_was_space) {
1176
+ /* First space - keep it */
1177
+ *write++ = ' ';
1178
+ last_was_space = true;
1179
+ }
1180
+ /* Skip additional spaces */
1181
+ read++;
1182
+ } else {
1183
+ /* Regular character */
1184
+ last_was_space = false;
1185
+ *write++ = *read++;
1186
+ }
1187
+ }
1188
+
1189
+ *write = '\0';
1190
+ return output;
1191
+ }
1192
+
1193
+ /**
1194
+ * Collapse newlines and surrounding whitespace between adjacent tags.
1195
+ *
1196
+ * Example:
1197
+ * "</table>\n\n<figure>" -> "</table><figure>"
1198
+ *
1199
+ * Strategy:
1200
+ * - Whenever we see a '>' character, look ahead over any combination of
1201
+ * spaces/tabs/newlines/carriage returns.
1202
+ * - If the next non-whitespace character is '<' and there was at least one
1203
+ * newline in the skipped range, we drop all of that whitespace so the tags
1204
+ * become adjacent.
1205
+ * - Otherwise, we leave the whitespace untouched.
1206
+ *
1207
+ * This keeps text content (including code/pre blocks) intact, while
1208
+ * compacting vertical spacing between block-level HTML elements in
1209
+ * non-pretty mode.
1210
+ */
1211
+ char *apex_collapse_intertag_newlines(const char *html) {
1212
+ if (!html) return NULL;
1213
+
1214
+ size_t len = strlen(html);
1215
+ char *output = malloc(len + 1);
1216
+ if (!output) return NULL;
1217
+
1218
+ const char *read = html;
1219
+ char *write = output;
1220
+
1221
+ while (*read) {
1222
+ if (*read == '>') {
1223
+ /* Copy the '>' */
1224
+ *write++ = *read++;
1225
+
1226
+ /* Look ahead over whitespace between this tag and the next content */
1227
+ const char *look = read;
1228
+ int newline_count = 0;
1229
+ while (*look == ' ' || *look == '\t' || *look == '\n' || *look == '\r') {
1230
+ if (*look == '\n' || *look == '\r') {
1231
+ newline_count++;
1232
+ }
1233
+ look++;
1234
+ }
1235
+
1236
+ if (newline_count > 0 && *look == '<') {
1237
+ /* We are between two tags. Compress any run of newlines here so that
1238
+ * \n{2,} becomes exactly \n\n (one blank line), and a single newline
1239
+ * stays a single newline.
1240
+ */
1241
+ int to_emit = (newline_count >= 2) ? 2 : 1;
1242
+ for (int i = 0; i < to_emit; i++) {
1243
+ *write++ = '\n';
1244
+ }
1245
+ read = look;
1246
+ continue;
1247
+ }
1248
+ /* Otherwise, fall through and let the normal loop copy whitespace */
1249
+ }
1250
+
1251
+ *write++ = *read++;
1252
+ }
1253
+
1254
+ *write = '\0';
1255
+ return output;
1256
+ }
1257
+
1258
+ /**
1259
+ * Check if a table cell contains only em dashes and whitespace
1260
+ */
1261
+ static bool cell_contains_only_dashes(const char *cell_start, const char *cell_end) {
1262
+ const char *p = cell_start;
1263
+ bool has_content = false;
1264
+
1265
+ while (p < cell_end) {
1266
+ /* Check for em dash (—) U+2014: 0xE2 0x80 0x94 */
1267
+ if ((unsigned char)*p == 0xE2 && p + 2 < cell_end &&
1268
+ (unsigned char)p[1] == 0x80 && (unsigned char)p[2] == 0x94) {
1269
+ has_content = true;
1270
+ p += 3;
1271
+ } else if (*p == ':' || *p == '-' || *p == '|') {
1272
+ /* Colons, dashes, and pipes are OK in separator rows (for alignment: |:----|:---:|----:|) */
1273
+ if (*p == '-' || *p == ':') {
1274
+ has_content = true; /* Dashes and colons count as content */
1275
+ }
1276
+ p++;
1277
+ } else if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') {
1278
+ /* Whitespace is OK */
1279
+ p++;
1280
+ } else if (*p == '<') {
1281
+ /* HTML tags are OK (opening/closing tags) */
1282
+ if (strncmp(p, "<td", 3) == 0 || strncmp(p, "</td>", 5) == 0 ||
1283
+ strncmp(p, "<th", 3) == 0 || strncmp(p, "</th>", 5) == 0) {
1284
+ /* Skip the tag */
1285
+ if (strncmp(p, "</td>", 5) == 0 || strncmp(p, "</th>", 5) == 0) {
1286
+ p += 5;
1287
+ } else {
1288
+ /* Skip to > */
1289
+ while (p < cell_end && *p != '>') p++;
1290
+ if (p < cell_end) p++;
1291
+ }
1292
+ } else {
1293
+ /* Other content - not a separator cell */
1294
+ return false;
1295
+ }
1296
+ } else {
1297
+ /* Non-dash, non-whitespace, non-tag content */
1298
+ return false;
1299
+ }
1300
+ }
1301
+
1302
+ return has_content; /* Must have at least one em dash */
1303
+ }
1304
+
1305
+ /**
1306
+ * Convert thead to tbody for relaxed tables ONLY
1307
+ * Converts <thead><tr><th>...</th></tr></thead> to <tbody><tr><td>...</td></tr></tbody>
1308
+ * ONLY for tables that were created from relaxed table input (no separator rows in original)
1309
+ *
1310
+ * Strategy: Check if there's a separator row (with em dashes) in the tbody.
1311
+ * - If there IS a separator row in tbody → regular table (keep thead)
1312
+ * - If there is NO separator row in tbody → relaxed table (convert thead to tbody)
1313
+ *
1314
+ * This works because:
1315
+ * - Regular tables: separator row is in tbody (between header and data)
1316
+ * - Relaxed tables: separator row was inserted by preprocessing, but we removed it
1317
+ * (or it was converted to em dashes and removed)
1318
+ */
1319
+ char *apex_convert_relaxed_table_headers(const char *html) {
1320
+ if (!html) return NULL;
1321
+
1322
+ size_t len = strlen(html);
1323
+ char *output = malloc(len * 2);
1324
+ if (!output) return NULL;
1325
+
1326
+ const char *read = html;
1327
+ char *write = output;
1328
+ size_t remaining = len * 2;
1329
+
1330
+ while (*read) {
1331
+ /* Expand buffer if needed */
1332
+ if (remaining < 100) {
1333
+ size_t written = write - output;
1334
+ size_t new_capacity = (write - output) * 2;
1335
+ if (new_capacity < written + 100) {
1336
+ new_capacity = written + 1000; /* Ensure we have enough space */
1337
+ }
1338
+ char *old_output = output; /* Save old pointer */
1339
+ char *new_output = realloc(output, new_capacity);
1340
+ if (!new_output) {
1341
+ /* realloc failed - original pointer is still valid, free it */
1342
+ free(old_output);
1343
+ return NULL;
1344
+ }
1345
+ /* realloc succeeded - update pointers */
1346
+ output = new_output;
1347
+ write = output + written;
1348
+ remaining = new_capacity - written;
1349
+ }
1350
+
1351
+ /* Check for <thead> */
1352
+ if (strncmp(read, "<thead>", 7) == 0) {
1353
+ const char *after_thead = read + 7;
1354
+ const char *thead_end = strstr(after_thead, "</thead>");
1355
+ const char *tbody_start = strstr(after_thead, "<tbody>");
1356
+
1357
+ if (thead_end) {
1358
+ /* Check if thead contains only empty cells (dummy headers from headerless tables) */
1359
+ bool all_cells_empty = true;
1360
+ bool found_any_th = false;
1361
+
1362
+ /* Search for all <th> or <th ...> tags in thead */
1363
+ const char *search = after_thead;
1364
+ while (search < thead_end) {
1365
+ /* Check for <th> without attributes */
1366
+ if (strncmp(search, "<th>", 4) == 0) {
1367
+ found_any_th = true;
1368
+ const char *th_end = strstr(search, "</th>");
1369
+ if (!th_end || th_end >= thead_end) {
1370
+ all_cells_empty = false;
1371
+ break;
1372
+ }
1373
+ /* Check if content between > and </th> is empty */
1374
+ const char *content_start = search + 4; /* After <th>, which is > */
1375
+ if (content_start < th_end) {
1376
+ while (content_start < th_end) {
1377
+ if (!isspace((unsigned char)*content_start)) {
1378
+ all_cells_empty = false;
1379
+ break;
1380
+ }
1381
+ content_start++;
1382
+ }
1383
+ }
1384
+ /* If content_start >= th_end, tags are adjacent (empty) - OK */
1385
+ if (!all_cells_empty) break;
1386
+ search = th_end + 5; /* Move past </th> */
1387
+ }
1388
+ /* Check for <th with attributes */
1389
+ else if (strncmp(search, "<th", 3) == 0 &&
1390
+ (search[3] == ' ' || search[3] == '\t' || search[3] == '>')) {
1391
+ found_any_th = true;
1392
+ /* Find the closing > of opening tag */
1393
+ const char *tag_end = strchr(search, '>');
1394
+ if (!tag_end || tag_end >= thead_end) {
1395
+ all_cells_empty = false;
1396
+ break;
1397
+ }
1398
+ const char *th_end = strstr(tag_end, "</th>");
1399
+ if (!th_end || th_end >= thead_end) {
1400
+ all_cells_empty = false;
1401
+ break;
1402
+ }
1403
+ /* Check content between > and </th> */
1404
+ const char *content_start = tag_end + 1;
1405
+ if (content_start < th_end) {
1406
+ while (content_start < th_end) {
1407
+ if (!isspace((unsigned char)*content_start)) {
1408
+ all_cells_empty = false;
1409
+ break;
1410
+ }
1411
+ content_start++;
1412
+ }
1413
+ }
1414
+ /* If content_start >= th_end, tags are adjacent (empty) - OK */
1415
+ if (!all_cells_empty) break;
1416
+ search = th_end + 5; /* Move past </th> */
1417
+ } else {
1418
+ search++;
1419
+ }
1420
+ }
1421
+
1422
+ /* If we found th cells and they're all empty, remove the entire thead */
1423
+ if (found_any_th && all_cells_empty) {
1424
+ read = thead_end + 8; /* Skip <thead>...</thead> */
1425
+ continue;
1426
+ }
1427
+ }
1428
+
1429
+ if (thead_end && tbody_start && thead_end < tbody_start) {
1430
+ /* Check if tbody contains a separator row (row with only em dashes) */
1431
+ bool has_separator_row = false;
1432
+ const char *tbody_end = strstr(tbody_start, "</tbody>");
1433
+ const char *table_end = strstr(tbody_start, "</table>");
1434
+
1435
+ if (tbody_end && (!table_end || tbody_end < table_end)) {
1436
+ /* Look for rows with only em dashes in tbody */
1437
+ const char *search = tbody_start;
1438
+ while (search < tbody_end) {
1439
+ if (strncmp(search, "<tr>", 4) == 0) {
1440
+ const char *tr_end = strstr(search, "</tr>");
1441
+ if (tr_end && tr_end < tbody_end) {
1442
+ /* Check if this row contains only em dashes */
1443
+ bool row_is_separator = true;
1444
+ const char *cell_start = search + 4;
1445
+ while (cell_start < tr_end) {
1446
+ if (strncmp(cell_start, "<td", 3) == 0 || strncmp(cell_start, "<th", 3) == 0) {
1447
+ const char *tag_end = strstr(cell_start, ">");
1448
+ if (!tag_end) break;
1449
+ tag_end++;
1450
+
1451
+ const char *cell_end = NULL;
1452
+ if (strncmp(cell_start, "<td", 3) == 0) {
1453
+ cell_end = strstr(tag_end, "</td>");
1454
+ if (cell_end) cell_end += 5;
1455
+ } else {
1456
+ cell_end = strstr(tag_end, "</th>");
1457
+ if (cell_end) cell_end += 5;
1458
+ }
1459
+
1460
+ if (cell_end && cell_end <= tr_end) {
1461
+ if (!cell_contains_only_dashes(tag_end, cell_end - 5)) {
1462
+ row_is_separator = false;
1463
+ break;
1464
+ }
1465
+ cell_start = cell_end;
1466
+ } else {
1467
+ break;
1468
+ }
1469
+ } else {
1470
+ cell_start++;
1471
+ }
1472
+ }
1473
+
1474
+ if (row_is_separator) {
1475
+ has_separator_row = true;
1476
+ break;
1477
+ }
1478
+
1479
+ search = tr_end + 5;
1480
+ } else {
1481
+ break;
1482
+ }
1483
+ } else {
1484
+ search++;
1485
+ }
1486
+ }
1487
+ }
1488
+
1489
+ /* If there's a separator row, it's a regular table - keep thead */
1490
+ /* If there's no separator row, it's a relaxed table - convert thead to tbody */
1491
+ if (!has_separator_row) {
1492
+ /* Convert thead to tbody */
1493
+ memcpy(write, "<tbody>", 7);
1494
+ write += 7;
1495
+ remaining -= 7;
1496
+ read += 7; /* Skip <thead> */
1497
+
1498
+ /* Convert <th> to <td> and skip </thead> */
1499
+ while (read < thead_end + 8) {
1500
+ if (strncmp(read, "<th>", 4) == 0) {
1501
+ memcpy(write, "<td>", 4);
1502
+ write += 4;
1503
+ remaining -= 4;
1504
+ read += 4;
1505
+ } else if (strncmp(read, "</th>", 5) == 0) {
1506
+ memcpy(write, "</td>", 5);
1507
+ write += 5;
1508
+ remaining -= 5;
1509
+ read += 5;
1510
+ } else if (strncmp(read, "<th ", 4) == 0) {
1511
+ memcpy(write, "<td", 3);
1512
+ write += 3;
1513
+ remaining -= 3;
1514
+ read += 3;
1515
+ /* Copy attributes until > */
1516
+ while (*read && *read != '>') {
1517
+ *write++ = *read++;
1518
+ remaining--;
1519
+ }
1520
+ if (*read == '>') {
1521
+ *write++ = *read++;
1522
+ remaining--;
1523
+ }
1524
+ } else if (strncmp(read, "</thead>", 8) == 0) {
1525
+ /* Skip </thead> - we'll close tbody later if needed */
1526
+ read += 8;
1527
+ /* Check if next is <tbody> - if so, skip opening tbody */
1528
+ const char *next = read;
1529
+ while (*next && (*next == ' ' || *next == '\n' || *next == '\t')) next++;
1530
+ if (strncmp(next, "<tbody>", 7) == 0) {
1531
+ read = next + 7;
1532
+ }
1533
+ break;
1534
+ } else {
1535
+ *write++ = *read++;
1536
+ remaining--;
1537
+ }
1538
+ }
1539
+ continue;
1540
+ }
1541
+ }
1542
+ }
1543
+
1544
+ /* Copy character */
1545
+ *write++ = *read++;
1546
+ remaining--;
1547
+ }
1548
+
1549
+ *write = '\0';
1550
+ return output;
1551
+ }
1552
+
1553
+ /**
1554
+ * Remove blank lines within tables
1555
+ * Removes lines containing only whitespace/newlines between <table> and </table> tags
1556
+ */
1557
+ char *apex_remove_table_blank_lines(const char *html) {
1558
+ if (!html) return NULL;
1559
+
1560
+ size_t len = strlen(html);
1561
+ char *output = malloc(len + 1);
1562
+ if (!output) return NULL;
1563
+
1564
+ const char *read = html;
1565
+ char *write = output;
1566
+ bool in_table = false;
1567
+ const char *line_start = read;
1568
+ bool line_is_blank = true;
1569
+
1570
+ while (*read) {
1571
+ /* Check for table tags */
1572
+ if (strncmp(read, "<table", 6) == 0 && (read[6] == '>' || read[6] == ' ')) {
1573
+ in_table = true;
1574
+ } else if (strncmp(read, "</table>", 8) == 0) {
1575
+ in_table = false;
1576
+ }
1577
+
1578
+ /* On newline, check if the line was blank */
1579
+ if (*read == '\n') {
1580
+ if (in_table && line_is_blank) {
1581
+ /* Blank line in table - skip it */
1582
+ read++;
1583
+ line_start = read;
1584
+ line_is_blank = true;
1585
+ continue;
1586
+ }
1587
+ /* Not blank or not in table - write the line including newline */
1588
+ while (line_start <= read) {
1589
+ *write++ = *line_start++;
1590
+ }
1591
+ read++;
1592
+ line_start = read;
1593
+ line_is_blank = true;
1594
+ continue;
1595
+ }
1596
+
1597
+ /* Check if line has non-whitespace content */
1598
+ if (*read != ' ' && *read != '\t' && *read != '\r') {
1599
+ line_is_blank = false;
1600
+ }
1601
+
1602
+ read++;
1603
+ }
1604
+
1605
+ /* Write any remaining content */
1606
+ while (*line_start) {
1607
+ *write++ = *line_start++;
1608
+ }
1609
+
1610
+ *write = '\0';
1611
+ return output;
1612
+ }
1613
+
1614
+ /**
1615
+ * Remove table rows that contain only em dashes (separator rows incorrectly rendered as data rows)
1616
+ * This happens when smart typography converts --- to — in separator rows
1617
+ * @param html The HTML to process
1618
+ * @return Newly allocated HTML with separator rows removed (must be freed)
1619
+ */
1620
+ char *apex_remove_table_separator_rows(const char *html) {
1621
+ if (!html) return NULL;
1622
+
1623
+ size_t len = strlen(html);
1624
+ char *output = malloc(len + 1);
1625
+ if (!output) return NULL;
1626
+
1627
+ const char *read = html;
1628
+ char *write = output;
1629
+ bool in_table = false;
1630
+ const char *row_start = NULL;
1631
+
1632
+ while (*read) {
1633
+ /* Check for table tags */
1634
+ if (strncmp(read, "<table", 6) == 0 && (read[6] == '>' || read[6] == ' ')) {
1635
+ in_table = true;
1636
+ } else if (strncmp(read, "</table>", 8) == 0) {
1637
+ in_table = false;
1638
+ } else if (in_table && strncmp(read, "<tr>", 4) == 0) {
1639
+ row_start = read;
1640
+ read += 4;
1641
+
1642
+ /* Check all cells in this row */
1643
+ bool is_separator_row = true;
1644
+ const char *row_end = NULL;
1645
+
1646
+ /* Find the end of this row */
1647
+ const char *search = read;
1648
+ while (*search) {
1649
+ if (strncmp(search, "</tr>", 5) == 0) {
1650
+ row_end = search + 5;
1651
+ break;
1652
+ }
1653
+ search++;
1654
+ }
1655
+
1656
+ if (row_end) {
1657
+ /* Check each cell in the row */
1658
+ const char *cell_start = read;
1659
+ while (cell_start < row_end) {
1660
+ if (strncmp(cell_start, "<td", 3) == 0 || strncmp(cell_start, "<th", 3) == 0) {
1661
+ /* Find the closing tag */
1662
+ const char *tag_end = strstr(cell_start, ">");
1663
+ if (!tag_end) break;
1664
+ tag_end++;
1665
+
1666
+ /* Find the closing </td> or </th> */
1667
+ const char *cell_end = NULL;
1668
+ if (strncmp(cell_start, "<td", 3) == 0) {
1669
+ cell_end = strstr(tag_end, "</td>");
1670
+ if (cell_end) cell_end += 5;
1671
+ } else {
1672
+ cell_end = strstr(tag_end, "</th>");
1673
+ if (cell_end) cell_end += 5;
1674
+ }
1675
+
1676
+ if (cell_end && cell_end <= row_end) {
1677
+ /* Check if this cell contains only dashes */
1678
+ if (!cell_contains_only_dashes(tag_end, cell_end - 5)) {
1679
+ is_separator_row = false;
1680
+ break;
1681
+ }
1682
+ cell_start = cell_end;
1683
+ } else {
1684
+ break;
1685
+ }
1686
+ } else {
1687
+ cell_start++;
1688
+ }
1689
+ }
1690
+ }
1691
+
1692
+ if (is_separator_row && row_end) {
1693
+ /* Skip this entire row */
1694
+ read = row_end;
1695
+ continue;
1696
+ } else {
1697
+ /* Write the row start */
1698
+ while (row_start < read) {
1699
+ *write++ = *row_start++;
1700
+ }
1701
+ }
1702
+ continue;
1703
+ }
1704
+
1705
+ /* Copy character */
1706
+ *write++ = *read++;
1707
+ }
1708
+
1709
+ *write = '\0';
1710
+ return output;
1711
+ }
1712
+
1713
+ /**
1714
+ * Adjust header levels in HTML based on Base Header Level metadata
1715
+ * Shifts all headers by the specified offset (e.g., Base Header Level: 2 means h1->h2, h2->h3, etc.)
1716
+ */
1717
+ char *apex_adjust_header_levels(const char *html, int base_header_level) {
1718
+ if (!html || base_header_level <= 0 || base_header_level > 6) {
1719
+ return html ? strdup(html) : NULL;
1720
+ }
1721
+
1722
+ /* If base_header_level is 1, no adjustment needed */
1723
+ if (base_header_level == 1) {
1724
+ return strdup(html);
1725
+ }
1726
+
1727
+ size_t len = strlen(html);
1728
+ size_t capacity = len + 1024; /* Extra space for potential changes */
1729
+ char *output = malloc(capacity);
1730
+ if (!output) return NULL;
1731
+
1732
+ const char *read = html;
1733
+ char *write = output;
1734
+ size_t remaining = capacity;
1735
+
1736
+ while (*read) {
1737
+ /* Look for header opening tags: <h1>, <h2>, etc. or closing tags: </h1>, </h2>, etc. */
1738
+ bool is_closing_tag = false;
1739
+ int header_level = -1;
1740
+
1741
+ if (*read == '<') {
1742
+ /* Check for closing tag </h1> first */
1743
+ if (read[1] == '/' && read[2] == 'h' &&
1744
+ read[3] >= '1' && read[3] <= '6' && read[4] == '>') {
1745
+ is_closing_tag = true;
1746
+ header_level = read[3] - '0';
1747
+ }
1748
+ /* Check for opening tag <h1> or <h1 ...> */
1749
+ else if (read[1] == 'h' && read[2] >= '1' && read[2] <= '6' &&
1750
+ (read[3] == '>' || isspace((unsigned char)read[3]))) {
1751
+ is_closing_tag = false;
1752
+ header_level = read[2] - '0';
1753
+ }
1754
+ }
1755
+
1756
+ if (header_level >= 1 && header_level <= 6) {
1757
+ /* Calculate new level */
1758
+ int new_level = header_level + (base_header_level - 1);
1759
+
1760
+ /* Clamp to valid range (1-6) */
1761
+ if (new_level > 6) {
1762
+ new_level = 6;
1763
+ } else if (new_level < 1) {
1764
+ new_level = 1;
1765
+ }
1766
+
1767
+ /* Find the end of the tag */
1768
+ const char *tag_start = read;
1769
+ const char *tag_end = strchr(tag_start, '>');
1770
+ if (!tag_end) {
1771
+ /* Malformed tag, just copy */
1772
+ if (remaining > 0) {
1773
+ *write++ = *read++;
1774
+ remaining--;
1775
+ } else {
1776
+ read++;
1777
+ }
1778
+ continue;
1779
+ }
1780
+
1781
+ /* Check if we need to adjust the level */
1782
+ if (new_level != header_level) {
1783
+ /* Need to replace h<header_level> with h<new_level> */
1784
+ size_t tag_len = tag_end - tag_start;
1785
+
1786
+ /* Ensure we have enough space */
1787
+ if (remaining < tag_len + 10) {
1788
+ size_t written = write - output;
1789
+ capacity = (written + tag_len + 10) * 2;
1790
+ char *new_output = realloc(output, capacity);
1791
+ if (!new_output) {
1792
+ free(output);
1793
+ return NULL;
1794
+ }
1795
+ output = new_output;
1796
+ write = output + written;
1797
+ remaining = capacity - written;
1798
+ }
1799
+
1800
+ if (is_closing_tag) {
1801
+ /* Closing tag: </h1> -> </h2> */
1802
+ *write++ = '<';
1803
+ *write++ = '/';
1804
+ *write++ = 'h';
1805
+ *write++ = '0' + new_level;
1806
+ *write++ = '>';
1807
+ remaining -= 5;
1808
+ read = tag_end + 1;
1809
+ } else {
1810
+ /* Opening tag: <h1> or <h1 ...> */
1811
+ const char *h_pos = tag_start + 1; /* After '<' */
1812
+ size_t before_h = h_pos - tag_start;
1813
+ memcpy(write, tag_start, before_h);
1814
+ write += before_h;
1815
+ remaining -= before_h;
1816
+
1817
+ /* Write 'h' */
1818
+ *write++ = 'h';
1819
+ remaining--;
1820
+
1821
+ /* Write new level */
1822
+ *write++ = '0' + new_level;
1823
+ remaining--;
1824
+
1825
+ /* Copy rest of tag */
1826
+ const char *after_level = tag_start + 3; /* After 'h' and level digit */
1827
+ size_t rest_len = tag_end - after_level;
1828
+ if (rest_len > 0 && remaining >= rest_len) {
1829
+ memcpy(write, after_level, rest_len);
1830
+ write += rest_len;
1831
+ remaining -= rest_len;
1832
+ }
1833
+
1834
+ /* Copy closing '>' */
1835
+ *write++ = '>';
1836
+ remaining--;
1837
+
1838
+ read = tag_end + 1;
1839
+ }
1840
+ } else {
1841
+ /* No change needed, copy tag as-is */
1842
+ size_t tag_len = tag_end - tag_start + 1;
1843
+ if (tag_len < remaining) {
1844
+ memcpy(write, tag_start, tag_len);
1845
+ write += tag_len;
1846
+ remaining -= tag_len;
1847
+ } else {
1848
+ /* Need more space */
1849
+ size_t written = write - output;
1850
+ capacity = (written + tag_len + 1) * 2;
1851
+ char *new_output = realloc(output, capacity);
1852
+ if (!new_output) {
1853
+ free(output);
1854
+ return NULL;
1855
+ }
1856
+ output = new_output;
1857
+ write = output + written;
1858
+ remaining = capacity - written;
1859
+ memcpy(write, tag_start, tag_len);
1860
+ write += tag_len;
1861
+ remaining -= tag_len;
1862
+ }
1863
+ read = tag_end + 1;
1864
+ }
1865
+ } else {
1866
+ /* Not a header tag, copy character */
1867
+ if (remaining > 0) {
1868
+ *write++ = *read++;
1869
+ remaining--;
1870
+ } else {
1871
+ /* Need more space */
1872
+ size_t written = write - output;
1873
+ capacity = (written + 1) * 2;
1874
+ char *new_output = realloc(output, capacity);
1875
+ if (!new_output) {
1876
+ free(output);
1877
+ return NULL;
1878
+ }
1879
+ output = new_output;
1880
+ write = output + written;
1881
+ remaining = capacity - written;
1882
+ *write++ = *read++;
1883
+ remaining--;
1884
+ }
1885
+ }
1886
+ }
1887
+
1888
+ *write = '\0';
1889
+ return output;
1890
+ }
1891
+
1892
+ /**
1893
+ * Adjust quote styles in HTML based on Quotes Language metadata
1894
+ * Replaces default English quote entities with language-specific quotes
1895
+ */
1896
+ char *apex_adjust_quote_language(const char *html, const char *quotes_language) {
1897
+ if (!html) return NULL;
1898
+
1899
+ /* Default to English if not specified */
1900
+ if (!quotes_language || *quotes_language == '\0') {
1901
+ return strdup(html);
1902
+ }
1903
+
1904
+ /* Normalize quotes language (lowercase, no spaces) */
1905
+ char normalized[64] = {0};
1906
+ const char *src = quotes_language;
1907
+ char *dst = normalized;
1908
+ while (*src && (dst - normalized) < (int)sizeof(normalized) - 1) {
1909
+ if (!isspace((unsigned char)*src)) {
1910
+ *dst++ = (char)tolower((unsigned char)*src);
1911
+ }
1912
+ src++;
1913
+ }
1914
+ *dst = '\0';
1915
+
1916
+ /* Determine quote replacements based on language */
1917
+ const char *double_open = NULL;
1918
+ const char *double_close = NULL;
1919
+ const char *single_open = NULL;
1920
+ const char *single_close = NULL;
1921
+
1922
+ if (strcmp(normalized, "english") == 0 || strcmp(normalized, "en") == 0) {
1923
+ /* English: &ldquo; &rdquo; &lsquo; &rsquo; (default, no change needed) */
1924
+ return strdup(html);
1925
+ } else if (strcmp(normalized, "french") == 0 || strcmp(normalized, "fr") == 0) {
1926
+ /* French: « » (guillemets) with spaces, ' ' for single */
1927
+ double_open = "&laquo;&nbsp;";
1928
+ double_close = "&nbsp;&raquo;";
1929
+ single_open = "&rsquo;";
1930
+ single_close = "&rsquo;";
1931
+ } else if (strcmp(normalized, "german") == 0 || strcmp(normalized, "de") == 0) {
1932
+ /* German: „ " (bottom/top) */
1933
+ double_open = "&bdquo;";
1934
+ double_close = "&ldquo;";
1935
+ single_open = "&sbquo;";
1936
+ single_close = "&lsquo;";
1937
+ } else if (strcmp(normalized, "germanguillemets") == 0) {
1938
+ /* German guillemets: » « (reversed) */
1939
+ double_open = "&raquo;";
1940
+ double_close = "&laquo;";
1941
+ single_open = "&rsaquo;";
1942
+ single_close = "&lsaquo;";
1943
+ } else if (strcmp(normalized, "spanish") == 0 || strcmp(normalized, "es") == 0) {
1944
+ /* Spanish: « » (guillemets) */
1945
+ double_open = "&laquo;";
1946
+ double_close = "&raquo;";
1947
+ single_open = "&lsquo;";
1948
+ single_close = "&rsquo;";
1949
+ } else if (strcmp(normalized, "dutch") == 0 || strcmp(normalized, "nl") == 0) {
1950
+ /* Dutch: „ " (like German) */
1951
+ double_open = "&bdquo;";
1952
+ double_close = "&ldquo;";
1953
+ single_open = "&sbquo;";
1954
+ single_close = "&lsquo;";
1955
+ } else if (strcmp(normalized, "swedish") == 0 || strcmp(normalized, "sv") == 0) {
1956
+ /* Swedish: " " (straight quotes become curly) */
1957
+ double_open = "&rdquo;";
1958
+ double_close = "&rdquo;";
1959
+ single_open = "&rsquo;";
1960
+ single_close = "&rsquo;";
1961
+ } else {
1962
+ /* Unknown language, use English (no change) */
1963
+ return strdup(html);
1964
+ }
1965
+
1966
+ /* If no replacements needed, return copy */
1967
+ if (!double_open) {
1968
+ return strdup(html);
1969
+ }
1970
+
1971
+ /* Replace quote entities in HTML */
1972
+ size_t html_len = strlen(html);
1973
+ size_t capacity = html_len * 2; /* Extra space for longer entities */
1974
+ char *output = malloc(capacity);
1975
+ if (!output) return NULL;
1976
+
1977
+ const char *read = html;
1978
+ char *write = output;
1979
+ size_t remaining = capacity;
1980
+
1981
+ while (*read) {
1982
+ /* Check for double quote HTML entities */
1983
+ if (strncmp(read, "&ldquo;", 7) == 0) {
1984
+ size_t repl_len = strlen(double_open);
1985
+ if (repl_len < remaining) {
1986
+ memcpy(write, double_open, repl_len);
1987
+ write += repl_len;
1988
+ remaining -= repl_len;
1989
+ read += 7;
1990
+ continue;
1991
+ }
1992
+ } else if (strncmp(read, "&rdquo;", 7) == 0) {
1993
+ size_t repl_len = strlen(double_close);
1994
+ if (repl_len < remaining) {
1995
+ memcpy(write, double_close, repl_len);
1996
+ write += repl_len;
1997
+ remaining -= repl_len;
1998
+ read += 7;
1999
+ continue;
2000
+ }
2001
+ } else if (strncmp(read, "&lsquo;", 7) == 0) {
2002
+ size_t repl_len = strlen(single_open);
2003
+ if (repl_len < remaining) {
2004
+ memcpy(write, single_open, repl_len);
2005
+ write += repl_len;
2006
+ remaining -= repl_len;
2007
+ read += 7;
2008
+ continue;
2009
+ }
2010
+ } else if (strncmp(read, "&rsquo;", 7) == 0) {
2011
+ size_t repl_len = strlen(single_close);
2012
+ if (repl_len < remaining) {
2013
+ memcpy(write, single_close, repl_len);
2014
+ write += repl_len;
2015
+ remaining -= repl_len;
2016
+ read += 7;
2017
+ continue;
2018
+ }
2019
+ }
2020
+ /* Check for Unicode curly quotes (UTF-8 encoded) */
2021
+ /* Left double quotation mark: U+201C = 0xE2 0x80 0x9C */
2022
+ else if ((unsigned char)read[0] == 0xE2 && (unsigned char)read[1] == 0x80 && (unsigned char)read[2] == 0x9C) {
2023
+ size_t repl_len = strlen(double_open);
2024
+ if (repl_len < remaining) {
2025
+ memcpy(write, double_open, repl_len);
2026
+ write += repl_len;
2027
+ remaining -= repl_len;
2028
+ read += 3;
2029
+ continue;
2030
+ }
2031
+ }
2032
+ /* Right double quotation mark: U+201D = 0xE2 0x80 0x9D */
2033
+ else if ((unsigned char)read[0] == 0xE2 && (unsigned char)read[1] == 0x80 && (unsigned char)read[2] == 0x9D) {
2034
+ size_t repl_len = strlen(double_close);
2035
+ if (repl_len < remaining) {
2036
+ memcpy(write, double_close, repl_len);
2037
+ write += repl_len;
2038
+ remaining -= repl_len;
2039
+ read += 3;
2040
+ continue;
2041
+ }
2042
+ }
2043
+ /* Left single quotation mark: U+2018 = 0xE2 0x80 0x98 */
2044
+ else if ((unsigned char)read[0] == 0xE2 && (unsigned char)read[1] == 0x80 && (unsigned char)read[2] == 0x98) {
2045
+ size_t repl_len = strlen(single_open);
2046
+ if (repl_len < remaining) {
2047
+ memcpy(write, single_open, repl_len);
2048
+ write += repl_len;
2049
+ remaining -= repl_len;
2050
+ read += 3;
2051
+ continue;
2052
+ }
2053
+ }
2054
+ /* Right single quotation mark: U+2019 = 0xE2 0x80 0x99 */
2055
+ else if ((unsigned char)read[0] == 0xE2 && (unsigned char)read[1] == 0x80 && (unsigned char)read[2] == 0x99) {
2056
+ size_t repl_len = strlen(single_close);
2057
+ if (repl_len < remaining) {
2058
+ memcpy(write, single_close, repl_len);
2059
+ write += repl_len;
2060
+ remaining -= repl_len;
2061
+ read += 3;
2062
+ continue;
2063
+ }
2064
+ }
2065
+
2066
+ /* Not a quote entity, copy character */
2067
+ if (remaining > 0) {
2068
+ *write++ = *read++;
2069
+ remaining--;
2070
+ } else {
2071
+ /* Need more space */
2072
+ size_t written = write - output;
2073
+ capacity = (written + 1) * 2;
2074
+ char *new_output = realloc(output, capacity);
2075
+ if (!new_output) {
2076
+ free(output);
2077
+ return NULL;
2078
+ }
2079
+ output = new_output;
2080
+ write = output + written;
2081
+ remaining = capacity - written;
2082
+ *write++ = *read++;
2083
+ remaining--;
2084
+ }
2085
+ }
2086
+
2087
+ *write = '\0';
2088
+ return output;
2089
+ }
2090
+
2091
+ /**
2092
+ * Apply ARIA labels and accessibility attributes to HTML output
2093
+ * @param html The HTML output
2094
+ * @param document The AST document (currently unused but kept for consistency with other functions)
2095
+ * @return Newly allocated HTML with ARIA attributes injected (must be freed)
2096
+ */
2097
+ char *apex_apply_aria_labels(const char *html, cmark_node *document) {
2098
+ (void)document; /* Currently unused, but kept for API consistency */
2099
+
2100
+ if (!html) return NULL;
2101
+
2102
+ size_t html_len = strlen(html);
2103
+
2104
+ /* Two-pass approach: First pass collects figcaption IDs, second pass injects ARIA attributes */
2105
+
2106
+ /* Pass 1: Collect figcaption IDs and their positions */
2107
+ typedef struct caption_info {
2108
+ const char *figcaption_pos; /* Position in HTML where figcaption starts */
2109
+ char *caption_id; /* ID value (allocated) */
2110
+ const char *figure_start; /* Position of opening <figure> tag */
2111
+ struct caption_info *next;
2112
+ } caption_info;
2113
+
2114
+ caption_info *caption_list = NULL;
2115
+ int table_caption_counter = 0;
2116
+
2117
+ /* First pass: find all figcaptions in table-figures and collect their IDs */
2118
+ const char *search = html;
2119
+ while (*search) {
2120
+ if (*search == '<' && strncmp(search, "<figcaption", 11) == 0) {
2121
+ const char *cap_tag_start = search;
2122
+ const char *cap_tag_end = strchr(search, '>');
2123
+ if (cap_tag_end) {
2124
+ /* Check if we're in a table-figure context */
2125
+ const char *before_cap = search - 1;
2126
+ bool in_table_figure = false;
2127
+ const char *figure_start_pos = NULL;
2128
+ while (before_cap >= html && before_cap > search - 200) {
2129
+ if (*before_cap == '<' && strncmp(before_cap, "<figure", 7) == 0) {
2130
+ const char *class_check = strstr(before_cap, "class=\"table-figure\"");
2131
+ if (!class_check) {
2132
+ class_check = strstr(before_cap, "class='table-figure'");
2133
+ }
2134
+ if (class_check && class_check < cap_tag_start) {
2135
+ in_table_figure = true;
2136
+ figure_start_pos = before_cap;
2137
+ break;
2138
+ }
2139
+ }
2140
+ before_cap--;
2141
+ }
2142
+
2143
+ if (in_table_figure) {
2144
+ /* Check if ID already exists */
2145
+ const char *id_attr = strstr(cap_tag_start, "id=\"");
2146
+ if (!id_attr) {
2147
+ id_attr = strstr(cap_tag_start, "id='");
2148
+ }
2149
+
2150
+ char *caption_id = NULL;
2151
+ if (id_attr && id_attr < cap_tag_end) {
2152
+ /* Extract existing ID */
2153
+ const char *id_start = id_attr + 4;
2154
+ const char *id_end = strchr(id_start, '"');
2155
+ if (!id_end) id_end = strchr(id_start, '\'');
2156
+ if (id_end && id_end > id_start) {
2157
+ size_t id_len = id_end - id_start;
2158
+ caption_id = malloc(id_len + 1);
2159
+ if (caption_id) {
2160
+ memcpy(caption_id, id_start, id_len);
2161
+ caption_id[id_len] = '\0';
2162
+ }
2163
+ }
2164
+ } else {
2165
+ /* Generate ID */
2166
+ table_caption_counter++;
2167
+ caption_id = malloc(64);
2168
+ if (caption_id) {
2169
+ snprintf(caption_id, 64, "table-caption-%d", table_caption_counter);
2170
+ }
2171
+ }
2172
+
2173
+ if (caption_id) {
2174
+ caption_info *info = malloc(sizeof(caption_info));
2175
+ if (info) {
2176
+ info->figcaption_pos = cap_tag_start;
2177
+ info->caption_id = caption_id;
2178
+ info->figure_start = figure_start_pos;
2179
+ info->next = caption_list;
2180
+ caption_list = info;
2181
+ } else {
2182
+ free(caption_id);
2183
+ }
2184
+ }
2185
+ }
2186
+ }
2187
+ }
2188
+ search++;
2189
+ }
2190
+
2191
+ /* Allocate buffer with extra space for ARIA attributes */
2192
+ size_t capacity = html_len + 2048 + (caption_list ? strlen(caption_list->caption_id) * 10 : 0);
2193
+ char *output = malloc(capacity + 1);
2194
+ if (!output) {
2195
+ /* Free caption list */
2196
+ while (caption_list) {
2197
+ caption_info *next = caption_list->next;
2198
+ free(caption_list->caption_id);
2199
+ free(caption_list);
2200
+ caption_list = next;
2201
+ }
2202
+ return strdup(html);
2203
+ }
2204
+
2205
+ const char *read = html;
2206
+ char *write = output;
2207
+ size_t remaining = capacity;
2208
+
2209
+ /* Helper macro to append strings safely */
2210
+ #define APPEND_SAFE(str) do { \
2211
+ size_t len = strlen(str); \
2212
+ if (len <= remaining) { \
2213
+ memcpy(write, str, len); \
2214
+ write += len; \
2215
+ remaining -= len; \
2216
+ } \
2217
+ } while(0)
2218
+
2219
+ /* Helper macro to copy characters safely */
2220
+ #define COPY_CHAR(c) do { \
2221
+ if (remaining > 0) { \
2222
+ *write++ = (c); \
2223
+ remaining--; \
2224
+ } \
2225
+ } while(0)
2226
+
2227
+ while (*read) {
2228
+ /* Check for <nav class="toc"> */
2229
+ if (*read == '<' && strncmp(read, "<nav", 4) == 0) {
2230
+ const char *tag_start = read;
2231
+ const char *tag_end = strchr(read, '>');
2232
+ if (!tag_end) {
2233
+ COPY_CHAR(*read++);
2234
+ continue;
2235
+ }
2236
+
2237
+ /* Check if this is a TOC nav element */
2238
+ const char *class_attr = strstr(tag_start, "class=\"toc\"");
2239
+ if (!class_attr) {
2240
+ class_attr = strstr(tag_start, "class='toc'");
2241
+ }
2242
+
2243
+ if (class_attr && class_attr < tag_end) {
2244
+ /* Check if aria-label already exists */
2245
+ const char *aria_label = strstr(tag_start, "aria-label=");
2246
+ if (!aria_label || aria_label > tag_end) {
2247
+ /* Copy up to just before closing >, add aria-label, then close */
2248
+ size_t prefix_len = tag_end - tag_start;
2249
+ if (prefix_len <= remaining) {
2250
+ memcpy(write, tag_start, prefix_len);
2251
+ write += prefix_len;
2252
+ remaining -= prefix_len;
2253
+ }
2254
+
2255
+ /* Add aria-label before closing > */
2256
+ APPEND_SAFE(" aria-label=\"Table of contents\"");
2257
+ COPY_CHAR('>');
2258
+ read = tag_end + 1;
2259
+ continue;
2260
+ }
2261
+ }
2262
+ }
2263
+
2264
+ /* Check for <figure> */
2265
+ if (*read == '<' && strncmp(read, "<figure", 7) == 0) {
2266
+ const char *tag_start = read;
2267
+ const char *tag_end = strchr(read, '>');
2268
+ if (!tag_end) {
2269
+ COPY_CHAR(*read++);
2270
+ continue;
2271
+ }
2272
+
2273
+ /* Check if role already exists */
2274
+ const char *role_attr = strstr(tag_start, "role=");
2275
+ if (!role_attr || role_attr > tag_end) {
2276
+ /* Copy up to just before closing >, add role, then close */
2277
+ size_t prefix_len = tag_end - tag_start;
2278
+ if (prefix_len <= remaining) {
2279
+ memcpy(write, tag_start, prefix_len);
2280
+ write += prefix_len;
2281
+ remaining -= prefix_len;
2282
+ }
2283
+
2284
+ /* Add role="figure" before closing > */
2285
+ APPEND_SAFE(" role=\"figure\"");
2286
+ COPY_CHAR('>');
2287
+ read = tag_end + 1;
2288
+ continue;
2289
+ }
2290
+ }
2291
+
2292
+ /* Check for <table> */
2293
+ if (*read == '<' && strncmp(read, "<table", 6) == 0) {
2294
+ const char *tag_start = read;
2295
+ const char *tag_end = strchr(read, '>');
2296
+ if (!tag_end) {
2297
+ COPY_CHAR(*read++);
2298
+ continue;
2299
+ }
2300
+
2301
+ /* Check if role already exists */
2302
+ const char *role_attr = strstr(tag_start, "role=");
2303
+ bool needs_role = (!role_attr || role_attr > tag_end);
2304
+
2305
+ /* Check if aria-describedby already exists */
2306
+ const char *aria_desc = strstr(tag_start, "aria-describedby=");
2307
+ bool has_aria_desc = (aria_desc && aria_desc < tag_end);
2308
+
2309
+ /* Check if we're in a table-figure context and look for figcaption */
2310
+ bool in_table_figure = false;
2311
+ const char *before_table = read - 1;
2312
+ while (before_table >= html && before_table > read - 500) {
2313
+ if (*before_table == '<' && strncmp(before_table, "<figure", 7) == 0) {
2314
+ const char *class_check = strstr(before_table, "class=\"table-figure\"");
2315
+ if (!class_check) {
2316
+ class_check = strstr(before_table, "class='table-figure'");
2317
+ }
2318
+ if (class_check && class_check < tag_start) {
2319
+ in_table_figure = true;
2320
+ break;
2321
+ }
2322
+ }
2323
+ before_table--;
2324
+ }
2325
+
2326
+ /* Find figcaption ID for this table by checking caption_list */
2327
+ char *caption_id = NULL;
2328
+ if (in_table_figure && !has_aria_desc) {
2329
+ /* Find the figure_start for this table */
2330
+ const char *this_figure_start = NULL;
2331
+ const char *find_fig = read - 1;
2332
+ while (find_fig >= html && find_fig > read - 500) {
2333
+ if (*find_fig == '<' && strncmp(find_fig, "<figure", 7) == 0) {
2334
+ const char *class_check = strstr(find_fig, "class=\"table-figure\"");
2335
+ if (!class_check) {
2336
+ class_check = strstr(find_fig, "class='table-figure'");
2337
+ }
2338
+ if (class_check && class_check < tag_start) {
2339
+ this_figure_start = find_fig;
2340
+ break;
2341
+ }
2342
+ }
2343
+ find_fig--;
2344
+ }
2345
+
2346
+ /* Look for a caption in this figure (either before or after table) */
2347
+ if (this_figure_start) {
2348
+ for (caption_info *cap = caption_list; cap; cap = cap->next) {
2349
+ if (cap->figure_start == this_figure_start) {
2350
+ /* Found a caption in the same figure - use it regardless of position */
2351
+ caption_id = strdup(cap->caption_id);
2352
+ break;
2353
+ }
2354
+ }
2355
+ }
2356
+ }
2357
+
2358
+ if (needs_role || caption_id) {
2359
+ /* Copy up to just before closing >, add attributes, then close */
2360
+ size_t prefix_len = tag_end - tag_start;
2361
+ if (prefix_len <= remaining) {
2362
+ memcpy(write, tag_start, prefix_len);
2363
+ write += prefix_len;
2364
+ remaining -= prefix_len;
2365
+ }
2366
+
2367
+ /* Add role="table" if needed */
2368
+ if (needs_role) {
2369
+ APPEND_SAFE(" role=\"table\"");
2370
+ }
2371
+
2372
+ /* Add aria-describedby if we found a caption ID */
2373
+ if (caption_id) {
2374
+ char aria_desc_str[256];
2375
+ snprintf(aria_desc_str, sizeof(aria_desc_str), " aria-describedby=\"%s\"", caption_id);
2376
+ APPEND_SAFE(aria_desc_str);
2377
+ free(caption_id);
2378
+ }
2379
+
2380
+ COPY_CHAR('>');
2381
+ read = tag_end + 1;
2382
+ continue;
2383
+ }
2384
+ }
2385
+
2386
+ /* Check for <figcaption> within table-figure to add IDs if missing */
2387
+ if (*read == '<' && strncmp(read, "<figcaption", 11) == 0) {
2388
+ const char *tag_start = read;
2389
+ const char *tag_end = strchr(read, '>');
2390
+ if (!tag_end) {
2391
+ COPY_CHAR(*read++);
2392
+ continue;
2393
+ }
2394
+
2395
+ /* Find this figcaption in our caption_list */
2396
+ caption_info *this_caption = NULL;
2397
+ for (caption_info *cap = caption_list; cap; cap = cap->next) {
2398
+ if (cap->figcaption_pos == tag_start) {
2399
+ this_caption = cap;
2400
+ break;
2401
+ }
2402
+ }
2403
+
2404
+ if (this_caption) {
2405
+ /* Check if ID already exists in original HTML */
2406
+ const char *id_attr = strstr(tag_start, "id=\"");
2407
+ if (!id_attr) {
2408
+ id_attr = strstr(tag_start, "id='");
2409
+ }
2410
+
2411
+ if (!id_attr || id_attr > tag_end) {
2412
+ /* No ID in original, add the one we generated/collected */
2413
+ size_t prefix_len = tag_end - tag_start;
2414
+ if (prefix_len <= remaining) {
2415
+ memcpy(write, tag_start, prefix_len);
2416
+ write += prefix_len;
2417
+ remaining -= prefix_len;
2418
+ }
2419
+
2420
+ /* Add id attribute */
2421
+ char id_attr_str[128];
2422
+ snprintf(id_attr_str, sizeof(id_attr_str), " id=\"%s\"", this_caption->caption_id);
2423
+ APPEND_SAFE(id_attr_str);
2424
+ COPY_CHAR('>');
2425
+ read = tag_end + 1;
2426
+ continue;
2427
+ }
2428
+ }
2429
+ }
2430
+
2431
+ /* Default: copy character */
2432
+ COPY_CHAR(*read++);
2433
+ }
2434
+
2435
+ #undef APPEND_SAFE
2436
+ #undef COPY_CHAR
2437
+
2438
+ *write = '\0';
2439
+
2440
+ /* Free caption list */
2441
+ while (caption_list) {
2442
+ caption_info *next = caption_list->next;
2443
+ free(caption_list->caption_id);
2444
+ free(caption_list);
2445
+ caption_list = next;
2446
+ }
2447
+
2448
+ return output;
2449
+ }
2450
+
2451
+ /* Helper: trim leading/trailing ASCII whitespace from an attribute value */
2452
+ static void apex_trim_attr_value(const char *s, size_t len,
2453
+ const char **out_s, size_t *out_len) {
2454
+ const char *start = s;
2455
+ const char *end = s + len;
2456
+ while (start < end && isspace((unsigned char)*start)) start++;
2457
+ while (end > start && isspace((unsigned char)*(end - 1))) end--;
2458
+ *out_s = start;
2459
+ *out_len = (size_t)(end - start);
2460
+ }
2461
+
2462
+ /**
2463
+ * Convert <img> tags to <figure> with <figcaption> when alt/title/caption are present.
2464
+ * If caption="TEXT" is present, always wrap. Otherwise when enable_image_captions,
2465
+ * use title or alt (unless title_captions_only, then only title).
2466
+ */
2467
+ char *apex_convert_image_captions(const char *html, bool enable_image_captions, bool title_captions_only) {
2468
+ if (!html) return NULL;
2469
+
2470
+ size_t len = strlen(html);
2471
+ /* Allow extra space for <figure> and <figcaption> wrappers */
2472
+ size_t capacity = len * 2 + 128;
2473
+ char *output = malloc(capacity);
2474
+ if (!output) return NULL;
2475
+
2476
+ const char *read = html;
2477
+ char *write = output;
2478
+ size_t remaining = capacity;
2479
+
2480
+ while (*read) {
2481
+ /* Look for <img tag */
2482
+ if (*read == '<' && (read[1] == 'i' || read[1] == 'I') &&
2483
+ (read[2] == 'm' || read[2] == 'M') &&
2484
+ (read[3] == 'g' || read[3] == 'G') &&
2485
+ (read[4] == ' ' || read[4] == '\t' || read[4] == '\r' ||
2486
+ read[4] == '\n' || read[4] == '>' || read[4] == '/')) {
2487
+
2488
+ const char *tag_start = read;
2489
+ const char *p = read + 4;
2490
+
2491
+ /* Find end of tag '>' while respecting quotes */
2492
+ bool in_quote = false;
2493
+ char quote_char = '\0';
2494
+ while (*p) {
2495
+ if (!in_quote && (*p == '"' || *p == '\'')) {
2496
+ in_quote = true;
2497
+ quote_char = *p;
2498
+ } else if (in_quote && *p == quote_char) {
2499
+ in_quote = false;
2500
+ quote_char = '\0';
2501
+ } else if (!in_quote && *p == '>') {
2502
+ break;
2503
+ }
2504
+ p++;
2505
+ }
2506
+
2507
+ if (!*p) {
2508
+ /* Malformed tag - copy rest and stop */
2509
+ size_t to_copy = strlen(read);
2510
+ if (to_copy >= remaining) {
2511
+ size_t used = write - output;
2512
+ size_t new_cap = (used + to_copy + 1) * 2;
2513
+ char *new_out = realloc(output, new_cap);
2514
+ if (!new_out) {
2515
+ free(output);
2516
+ return NULL;
2517
+ }
2518
+ output = new_out;
2519
+ write = output + used;
2520
+ remaining = new_cap - used;
2521
+ }
2522
+ memcpy(write, read, to_copy);
2523
+ write += to_copy;
2524
+ remaining -= to_copy;
2525
+ break;
2526
+ }
2527
+
2528
+ const char *tag_end = p; /* Points at '>' */
2529
+
2530
+ /* Parse attributes between <img and > */
2531
+ const char *attr_start = tag_start + 4;
2532
+ const char *attr_end = tag_end;
2533
+ const char *title_val = NULL;
2534
+ size_t title_len = 0;
2535
+ const char *alt_val = NULL;
2536
+ size_t alt_len = 0;
2537
+ const char *caption_val = NULL;
2538
+ size_t caption_len = 0;
2539
+ const char *caption_attr_start = NULL; /* start of caption attr (for stripping) */
2540
+ const char *caption_attr_end = NULL;
2541
+
2542
+ const char *q = attr_start;
2543
+ while (q < attr_end) {
2544
+ /* Skip whitespace */
2545
+ while (q < attr_end && isspace((unsigned char)*q)) q++;
2546
+ if (q >= attr_end || *q == '/' || *q == '>') break;
2547
+
2548
+ const char *name_start = q;
2549
+ while (q < attr_end && !isspace((unsigned char)*q) &&
2550
+ *q != '=' && *q != '>' && *q != '/') {
2551
+ q++;
2552
+ }
2553
+ const char *name_end = q;
2554
+
2555
+ /* Skip whitespace before '=' */
2556
+ while (q < attr_end && isspace((unsigned char)*q)) q++;
2557
+ if (q >= attr_end || *q != '=') {
2558
+ /* Not a name=value pair, skip token */
2559
+ while (q < attr_end && *q != ' ' && *q != '\t' &&
2560
+ *q != '\r' && *q != '\n' && *q != '>') {
2561
+ q++;
2562
+ }
2563
+ continue;
2564
+ }
2565
+ q++; /* skip '=' */
2566
+ while (q < attr_end && isspace((unsigned char)*q)) q++;
2567
+ if (q >= attr_end) break;
2568
+
2569
+ /* Parse value */
2570
+ const char *value_start = q;
2571
+ const char *value_end = NULL;
2572
+ if (*q == '"' || *q == '\'') {
2573
+ char qc = *q;
2574
+ value_start = q + 1;
2575
+ q++;
2576
+ while (q < attr_end && *q != qc) q++;
2577
+ value_end = q;
2578
+ if (q < attr_end) q++; /* skip closing quote */
2579
+ } else {
2580
+ while (q < attr_end && !isspace((unsigned char)*q) &&
2581
+ *q != '>') {
2582
+ q++;
2583
+ }
2584
+ value_end = q;
2585
+ }
2586
+
2587
+ size_t name_len = (size_t)(name_end - name_start);
2588
+ if (name_len > 0) {
2589
+ /* Compare attribute name case-insensitively */
2590
+ if (name_len == 5 &&
2591
+ (strncasecmp(name_start, "title", 5) == 0)) {
2592
+ title_val = value_start;
2593
+ title_len = (size_t)(value_end - value_start);
2594
+ } else if (name_len == 3 &&
2595
+ (strncasecmp(name_start, "alt", 3) == 0)) {
2596
+ alt_val = value_start;
2597
+ alt_len = (size_t)(value_end - value_start);
2598
+ } else if (name_len == 7 &&
2599
+ (strncasecmp(name_start, "caption", 7) == 0)) {
2600
+ caption_val = value_start;
2601
+ caption_len = (size_t)(value_end - value_start);
2602
+ /* Include leading space so we strip " caption=\"...\"" */
2603
+ caption_attr_start = (name_start > attr_start && isspace((unsigned char)name_start[-1])) ? name_start - 1 : name_start;
2604
+ caption_attr_end = q;
2605
+ }
2606
+ }
2607
+ }
2608
+
2609
+ /* Determine caption text: caption= always wins; else title or alt per options */
2610
+ const char *caption = NULL;
2611
+ size_t caption_text_len = 0;
2612
+ bool use_caption_attr = (caption_val != NULL && caption_len > 0);
2613
+
2614
+ if (use_caption_attr) {
2615
+ apex_trim_attr_value(caption_val, caption_len, &caption, &caption_text_len);
2616
+ }
2617
+ if (!use_caption_attr && (caption == NULL || caption_text_len == 0)) {
2618
+ if (!enable_image_captions) {
2619
+ caption = NULL;
2620
+ caption_text_len = 0;
2621
+ } else if (title_captions_only) {
2622
+ /* Only use title, never alt */
2623
+ if (title_val && title_len > 0) {
2624
+ apex_trim_attr_value(title_val, title_len, &caption, &caption_text_len);
2625
+ }
2626
+ } else {
2627
+ /* Default: prefer title, then alt */
2628
+ if (title_val && title_len > 0) {
2629
+ apex_trim_attr_value(title_val, title_len, &caption, &caption_text_len);
2630
+ } else if (alt_val && alt_len > 0) {
2631
+ apex_trim_attr_value(alt_val, alt_len, &caption, &caption_text_len);
2632
+ }
2633
+ }
2634
+ }
2635
+
2636
+ if (!caption || caption_text_len == 0) {
2637
+ /* No caption - copy tag as-is */
2638
+ size_t tag_len = (size_t)(tag_end - tag_start + 1);
2639
+ if (tag_len >= remaining) {
2640
+ size_t used = write - output;
2641
+ size_t new_cap = (used + tag_len + 1) * 2;
2642
+ char *new_out = realloc(output, new_cap);
2643
+ if (!new_out) {
2644
+ free(output);
2645
+ return NULL;
2646
+ }
2647
+ output = new_out;
2648
+ write = output + used;
2649
+ remaining = new_cap - used;
2650
+ }
2651
+ memcpy(write, tag_start, tag_len);
2652
+ write += tag_len;
2653
+ remaining -= tag_len;
2654
+ read = tag_end + 1;
2655
+ continue;
2656
+ }
2657
+
2658
+ /* We have caption text - wrap in <figure><img ...><figcaption>...</figcaption></figure> */
2659
+ const char *figure_open = "<figure>";
2660
+ const char *figcaption_open = "<figcaption>";
2661
+ const char *figcaption_close = "</figcaption>";
2662
+ const char *figure_close = "</figure>";
2663
+
2664
+ /* When we have caption= attribute, output img tag without it (strip caption attr) */
2665
+ size_t img_tag_output_len;
2666
+ if (caption_attr_start != NULL && caption_attr_end != NULL) {
2667
+ img_tag_output_len = (size_t)(caption_attr_start - tag_start) +
2668
+ (size_t)(tag_end + 1 - caption_attr_end);
2669
+ } else {
2670
+ img_tag_output_len = (size_t)(tag_end - tag_start + 1);
2671
+ }
2672
+
2673
+ size_t extra = strlen(figure_open) + strlen(figcaption_open) +
2674
+ caption_text_len + strlen(figcaption_close) +
2675
+ strlen(figure_close);
2676
+ size_t needed = img_tag_output_len + extra;
2677
+ if (needed >= remaining) {
2678
+ size_t used = write - output;
2679
+ size_t new_cap = (used + needed + 1) * 2;
2680
+ char *new_out = realloc(output, new_cap);
2681
+ if (!new_out) {
2682
+ free(output);
2683
+ return NULL;
2684
+ }
2685
+ output = new_out;
2686
+ write = output + used;
2687
+ remaining = new_cap - used;
2688
+ }
2689
+
2690
+ /* Write <figure> */
2691
+ memcpy(write, figure_open, strlen(figure_open));
2692
+ write += strlen(figure_open);
2693
+ remaining -= strlen(figure_open);
2694
+
2695
+ /* Write <img ...> tag (omitting caption attribute if present) */
2696
+ if (caption_attr_start != NULL && caption_attr_end != NULL) {
2697
+ size_t part1 = (size_t)(caption_attr_start - tag_start);
2698
+ memcpy(write, tag_start, part1);
2699
+ write += part1;
2700
+ remaining -= part1;
2701
+ size_t part2 = (size_t)(tag_end + 1 - caption_attr_end);
2702
+ memcpy(write, caption_attr_end, part2);
2703
+ write += part2;
2704
+ remaining -= part2;
2705
+ } else {
2706
+ size_t tag_len = (size_t)(tag_end - tag_start + 1);
2707
+ memcpy(write, tag_start, tag_len);
2708
+ write += tag_len;
2709
+ remaining -= tag_len;
2710
+ }
2711
+
2712
+ /* Write <figcaption>Caption</figcaption></figure> */
2713
+ memcpy(write, figcaption_open, strlen(figcaption_open));
2714
+ write += strlen(figcaption_open);
2715
+ remaining -= strlen(figcaption_open);
2716
+
2717
+ memcpy(write, caption, caption_text_len);
2718
+ write += caption_text_len;
2719
+ remaining -= caption_text_len;
2720
+
2721
+ memcpy(write, figcaption_close, strlen(figcaption_close));
2722
+ write += strlen(figcaption_close);
2723
+ remaining -= strlen(figcaption_close);
2724
+
2725
+ memcpy(write, figure_close, strlen(figure_close));
2726
+ write += strlen(figure_close);
2727
+ remaining -= strlen(figure_close);
2728
+
2729
+ read = tag_end + 1;
2730
+ continue;
2731
+ }
2732
+
2733
+ /* Default: copy character */
2734
+ if (remaining < 1) {
2735
+ size_t used = write - output;
2736
+ size_t new_cap = (used + 64) * 2;
2737
+ char *new_out = realloc(output, new_cap);
2738
+ if (!new_out) {
2739
+ free(output);
2740
+ return NULL;
2741
+ }
2742
+ output = new_out;
2743
+ write = output + used;
2744
+ remaining = new_cap - used;
2745
+ }
2746
+ *write++ = *read++;
2747
+ remaining--;
2748
+ }
2749
+
2750
+ if (remaining < 1) {
2751
+ size_t used = write - output;
2752
+ char *new_out = realloc(output, used + 1);
2753
+ if (!new_out) {
2754
+ free(output);
2755
+ return NULL;
2756
+ }
2757
+ output = new_out;
2758
+ write = output + used;
2759
+ }
2760
+ *write = '\0';
2761
+ return output;
2762
+ }