daqing_kramdown 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (557) hide show
  1. checksums.yaml +7 -0
  2. data/AUTHORS +1 -0
  3. data/CONTRIBUTERS +78 -0
  4. data/COPYING +30 -0
  5. data/README.md +71 -0
  6. data/VERSION +1 -0
  7. data/bin/daqing_kramdown +132 -0
  8. data/data/kramdown/document.html +22 -0
  9. data/data/kramdown/document.latex +50 -0
  10. data/lib/kramdown.rb +10 -0
  11. data/lib/kramdown/converter.rb +68 -0
  12. data/lib/kramdown/converter/base.rb +261 -0
  13. data/lib/kramdown/converter/hash_ast.rb +38 -0
  14. data/lib/kramdown/converter/html.rb +535 -0
  15. data/lib/kramdown/converter/kramdown.rb +448 -0
  16. data/lib/kramdown/converter/latex.rb +625 -0
  17. data/lib/kramdown/converter/man.rb +300 -0
  18. data/lib/kramdown/converter/math_engine/mathjax.rb +32 -0
  19. data/lib/kramdown/converter/remove_html_tags.rb +57 -0
  20. data/lib/kramdown/converter/syntax_highlighter.rb +56 -0
  21. data/lib/kramdown/converter/syntax_highlighter/minted.rb +35 -0
  22. data/lib/kramdown/converter/syntax_highlighter/rouge.rb +85 -0
  23. data/lib/kramdown/converter/toc.rb +69 -0
  24. data/lib/kramdown/document.rb +139 -0
  25. data/lib/kramdown/element.rb +551 -0
  26. data/lib/kramdown/error.rb +17 -0
  27. data/lib/kramdown/options.rb +604 -0
  28. data/lib/kramdown/parser.rb +26 -0
  29. data/lib/kramdown/parser/base.rb +131 -0
  30. data/lib/kramdown/parser/html.rb +608 -0
  31. data/lib/kramdown/parser/kramdown.rb +376 -0
  32. data/lib/kramdown/parser/kramdown/abbreviation.rb +78 -0
  33. data/lib/kramdown/parser/kramdown/autolink.rb +31 -0
  34. data/lib/kramdown/parser/kramdown/blank_line.rb +30 -0
  35. data/lib/kramdown/parser/kramdown/block_boundary.rb +34 -0
  36. data/lib/kramdown/parser/kramdown/blockquote.rb +38 -0
  37. data/lib/kramdown/parser/kramdown/codeblock.rb +57 -0
  38. data/lib/kramdown/parser/kramdown/codespan.rb +54 -0
  39. data/lib/kramdown/parser/kramdown/emphasis.rb +61 -0
  40. data/lib/kramdown/parser/kramdown/eob.rb +26 -0
  41. data/lib/kramdown/parser/kramdown/escaped_chars.rb +25 -0
  42. data/lib/kramdown/parser/kramdown/extensions.rb +214 -0
  43. data/lib/kramdown/parser/kramdown/footnote.rb +64 -0
  44. data/lib/kramdown/parser/kramdown/header.rb +70 -0
  45. data/lib/kramdown/parser/kramdown/horizontal_rule.rb +27 -0
  46. data/lib/kramdown/parser/kramdown/html.rb +162 -0
  47. data/lib/kramdown/parser/kramdown/html_entity.rb +34 -0
  48. data/lib/kramdown/parser/kramdown/line_break.rb +25 -0
  49. data/lib/kramdown/parser/kramdown/link.rb +149 -0
  50. data/lib/kramdown/parser/kramdown/list.rb +284 -0
  51. data/lib/kramdown/parser/kramdown/math.rb +53 -0
  52. data/lib/kramdown/parser/kramdown/paragraph.rb +62 -0
  53. data/lib/kramdown/parser/kramdown/smart_quotes.rb +174 -0
  54. data/lib/kramdown/parser/kramdown/table.rb +171 -0
  55. data/lib/kramdown/parser/kramdown/typographic_symbol.rb +44 -0
  56. data/lib/kramdown/parser/markdown.rb +57 -0
  57. data/lib/kramdown/utils.rb +45 -0
  58. data/lib/kramdown/utils/configurable.rb +45 -0
  59. data/lib/kramdown/utils/entities.rb +344 -0
  60. data/lib/kramdown/utils/html.rb +84 -0
  61. data/lib/kramdown/utils/lru_cache.rb +41 -0
  62. data/lib/kramdown/utils/string_scanner.rb +81 -0
  63. data/lib/kramdown/utils/unidecoder.rb +50 -0
  64. data/lib/kramdown/version.rb +15 -0
  65. data/man/man1/kramdown.1 +0 -0
  66. data/test/run_tests.rb +46 -0
  67. data/test/test_files.rb +298 -0
  68. data/test/test_location.rb +216 -0
  69. data/test/test_string_scanner_kramdown.rb +27 -0
  70. data/test/testcases/block/01_blank_line/spaces.html +1 -0
  71. data/test/testcases/block/01_blank_line/spaces.text +3 -0
  72. data/test/testcases/block/01_blank_line/tabs.html +1 -0
  73. data/test/testcases/block/01_blank_line/tabs.text +6 -0
  74. data/test/testcases/block/02_eob/beginning.html +1 -0
  75. data/test/testcases/block/02_eob/beginning.text +3 -0
  76. data/test/testcases/block/02_eob/end.html +1 -0
  77. data/test/testcases/block/02_eob/end.text +3 -0
  78. data/test/testcases/block/02_eob/middle.html +1 -0
  79. data/test/testcases/block/02_eob/middle.text +5 -0
  80. data/test/testcases/block/03_paragraph/indented.html +18 -0
  81. data/test/testcases/block/03_paragraph/indented.html.gfm +18 -0
  82. data/test/testcases/block/03_paragraph/indented.text +19 -0
  83. data/test/testcases/block/03_paragraph/line_break_last_line.html +9 -0
  84. data/test/testcases/block/03_paragraph/line_break_last_line.text +9 -0
  85. data/test/testcases/block/03_paragraph/no_newline_at_end.html +5 -0
  86. data/test/testcases/block/03_paragraph/no_newline_at_end.text +5 -0
  87. data/test/testcases/block/03_paragraph/one_para.html +1 -0
  88. data/test/testcases/block/03_paragraph/one_para.text +1 -0
  89. data/test/testcases/block/03_paragraph/standalone_image.html +8 -0
  90. data/test/testcases/block/03_paragraph/standalone_image.text +6 -0
  91. data/test/testcases/block/03_paragraph/two_para.html +4 -0
  92. data/test/testcases/block/03_paragraph/two_para.text +4 -0
  93. data/test/testcases/block/03_paragraph/with_html_to_native.html +1 -0
  94. data/test/testcases/block/03_paragraph/with_html_to_native.options +1 -0
  95. data/test/testcases/block/03_paragraph/with_html_to_native.text +1 -0
  96. data/test/testcases/block/04_header/atx_header.html +57 -0
  97. data/test/testcases/block/04_header/atx_header.text +54 -0
  98. data/test/testcases/block/04_header/atx_header_no_newline_at_end.html +1 -0
  99. data/test/testcases/block/04_header/atx_header_no_newline_at_end.text +1 -0
  100. data/test/testcases/block/04_header/header_type_offset.html +11 -0
  101. data/test/testcases/block/04_header/header_type_offset.kramdown +12 -0
  102. data/test/testcases/block/04_header/header_type_offset.latex +12 -0
  103. data/test/testcases/block/04_header/header_type_offset.options +2 -0
  104. data/test/testcases/block/04_header/header_type_offset.text +13 -0
  105. data/test/testcases/block/04_header/setext_header.html +32 -0
  106. data/test/testcases/block/04_header/setext_header.text +39 -0
  107. data/test/testcases/block/04_header/setext_header_no_newline_at_end.html +1 -0
  108. data/test/testcases/block/04_header/setext_header_no_newline_at_end.text +2 -0
  109. data/test/testcases/block/04_header/with_auto_id_prefix.html +3 -0
  110. data/test/testcases/block/04_header/with_auto_id_prefix.options +2 -0
  111. data/test/testcases/block/04_header/with_auto_id_prefix.text +3 -0
  112. data/test/testcases/block/04_header/with_auto_id_stripping.html +1 -0
  113. data/test/testcases/block/04_header/with_auto_id_stripping.options +1 -0
  114. data/test/testcases/block/04_header/with_auto_id_stripping.text +1 -0
  115. data/test/testcases/block/04_header/with_auto_ids.html +21 -0
  116. data/test/testcases/block/04_header/with_auto_ids.options +2 -0
  117. data/test/testcases/block/04_header/with_auto_ids.text +24 -0
  118. data/test/testcases/block/05_blockquote/indented.html +25 -0
  119. data/test/testcases/block/05_blockquote/indented.text +14 -0
  120. data/test/testcases/block/05_blockquote/lazy.html +34 -0
  121. data/test/testcases/block/05_blockquote/lazy.text +20 -0
  122. data/test/testcases/block/05_blockquote/nested.html +10 -0
  123. data/test/testcases/block/05_blockquote/nested.text +6 -0
  124. data/test/testcases/block/05_blockquote/no_newline_at_end.html +4 -0
  125. data/test/testcases/block/05_blockquote/no_newline_at_end.text +2 -0
  126. data/test/testcases/block/05_blockquote/very_long_line.html +3 -0
  127. data/test/testcases/block/05_blockquote/very_long_line.text +1 -0
  128. data/test/testcases/block/05_blockquote/with_code_blocks.html +15 -0
  129. data/test/testcases/block/05_blockquote/with_code_blocks.text +11 -0
  130. data/test/testcases/block/06_codeblock/disable-highlighting.html +4 -0
  131. data/test/testcases/block/06_codeblock/disable-highlighting.options +1 -0
  132. data/test/testcases/block/06_codeblock/disable-highlighting.text +4 -0
  133. data/test/testcases/block/06_codeblock/error.html +4 -0
  134. data/test/testcases/block/06_codeblock/error.text +4 -0
  135. data/test/testcases/block/06_codeblock/guess_lang_css_class.html +15 -0
  136. data/test/testcases/block/06_codeblock/guess_lang_css_class.options +2 -0
  137. data/test/testcases/block/06_codeblock/guess_lang_css_class.text +13 -0
  138. data/test/testcases/block/06_codeblock/highlighting-minted-with-opts.latex +9 -0
  139. data/test/testcases/block/06_codeblock/highlighting-minted-with-opts.options +4 -0
  140. data/test/testcases/block/06_codeblock/highlighting-minted-with-opts.text +5 -0
  141. data/test/testcases/block/06_codeblock/highlighting-minted.latex +8 -0
  142. data/test/testcases/block/06_codeblock/highlighting-minted.options +3 -0
  143. data/test/testcases/block/06_codeblock/highlighting-minted.text +4 -0
  144. data/test/testcases/block/06_codeblock/highlighting-opts.html +6 -0
  145. data/test/testcases/block/06_codeblock/highlighting-opts.options +7 -0
  146. data/test/testcases/block/06_codeblock/highlighting-opts.text +4 -0
  147. data/test/testcases/block/06_codeblock/highlighting.html +5 -0
  148. data/test/testcases/block/06_codeblock/highlighting.options +5 -0
  149. data/test/testcases/block/06_codeblock/highlighting.text +4 -0
  150. data/test/testcases/block/06_codeblock/issue_gh45.html +164 -0
  151. data/test/testcases/block/06_codeblock/issue_gh45.test +188 -0
  152. data/test/testcases/block/06_codeblock/lazy.html +4 -0
  153. data/test/testcases/block/06_codeblock/lazy.text +5 -0
  154. data/test/testcases/block/06_codeblock/no_newline_at_end.html +2 -0
  155. data/test/testcases/block/06_codeblock/no_newline_at_end.text +1 -0
  156. data/test/testcases/block/06_codeblock/no_newline_at_end_1.html +2 -0
  157. data/test/testcases/block/06_codeblock/no_newline_at_end_1.text +2 -0
  158. data/test/testcases/block/06_codeblock/normal.html +13 -0
  159. data/test/testcases/block/06_codeblock/normal.text +10 -0
  160. data/test/testcases/block/06_codeblock/rouge/disabled.html +2 -0
  161. data/test/testcases/block/06_codeblock/rouge/disabled.options +4 -0
  162. data/test/testcases/block/06_codeblock/rouge/disabled.text +1 -0
  163. data/test/testcases/block/06_codeblock/rouge/multiple.html +11 -0
  164. data/test/testcases/block/06_codeblock/rouge/multiple.options +4 -0
  165. data/test/testcases/block/06_codeblock/rouge/multiple.text +11 -0
  166. data/test/testcases/block/06_codeblock/rouge/simple.html +10 -0
  167. data/test/testcases/block/06_codeblock/rouge/simple.options +3 -0
  168. data/test/testcases/block/06_codeblock/rouge/simple.text +9 -0
  169. data/test/testcases/block/06_codeblock/tilde_syntax.html +7 -0
  170. data/test/testcases/block/06_codeblock/tilde_syntax.text +9 -0
  171. data/test/testcases/block/06_codeblock/whitespace.html +3 -0
  172. data/test/testcases/block/06_codeblock/whitespace.text +3 -0
  173. data/test/testcases/block/06_codeblock/with_blank_line.html +13 -0
  174. data/test/testcases/block/06_codeblock/with_blank_line.text +12 -0
  175. data/test/testcases/block/06_codeblock/with_eob_marker.html +6 -0
  176. data/test/testcases/block/06_codeblock/with_eob_marker.text +5 -0
  177. data/test/testcases/block/06_codeblock/with_ial.html +6 -0
  178. data/test/testcases/block/06_codeblock/with_ial.text +5 -0
  179. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block.html +24 -0
  180. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block.options +2 -0
  181. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block.text +33 -0
  182. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_any_char.html +8 -0
  183. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_any_char.options +2 -0
  184. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_any_char.text +11 -0
  185. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_name_with_dash.html +3 -0
  186. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_name_with_dash.options +2 -0
  187. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_name_with_dash.text +4 -0
  188. data/test/testcases/block/07_horizontal_rule/error.html +7 -0
  189. data/test/testcases/block/07_horizontal_rule/error.text +7 -0
  190. data/test/testcases/block/07_horizontal_rule/normal.html +19 -0
  191. data/test/testcases/block/07_horizontal_rule/normal.text +20 -0
  192. data/test/testcases/block/07_horizontal_rule/sepspaces.html +3 -0
  193. data/test/testcases/block/07_horizontal_rule/sepspaces.text +3 -0
  194. data/test/testcases/block/07_horizontal_rule/septabs.html +3 -0
  195. data/test/testcases/block/07_horizontal_rule/septabs.text +3 -0
  196. data/test/testcases/block/08_list/brackets_in_item.latex +3 -0
  197. data/test/testcases/block/08_list/brackets_in_item.text +1 -0
  198. data/test/testcases/block/08_list/escaping.html +17 -0
  199. data/test/testcases/block/08_list/escaping.text +17 -0
  200. data/test/testcases/block/08_list/item_ial.html +10 -0
  201. data/test/testcases/block/08_list/item_ial.text +8 -0
  202. data/test/testcases/block/08_list/lazy.html +39 -0
  203. data/test/testcases/block/08_list/lazy.text +29 -0
  204. data/test/testcases/block/08_list/lazy_and_nested.html +9 -0
  205. data/test/testcases/block/08_list/lazy_and_nested.text +4 -0
  206. data/test/testcases/block/08_list/list_and_hr.html +9 -0
  207. data/test/testcases/block/08_list/list_and_hr.text +5 -0
  208. data/test/testcases/block/08_list/list_and_others.html +40 -0
  209. data/test/testcases/block/08_list/list_and_others.text +26 -0
  210. data/test/testcases/block/08_list/mixed.html +117 -0
  211. data/test/testcases/block/08_list/mixed.text +66 -0
  212. data/test/testcases/block/08_list/nested.html +17 -0
  213. data/test/testcases/block/08_list/nested.text +7 -0
  214. data/test/testcases/block/08_list/other_first_element.html +39 -0
  215. data/test/testcases/block/08_list/other_first_element.text +18 -0
  216. data/test/testcases/block/08_list/simple_ol.html +19 -0
  217. data/test/testcases/block/08_list/simple_ol.text +13 -0
  218. data/test/testcases/block/08_list/simple_ul.html +48 -0
  219. data/test/testcases/block/08_list/simple_ul.text +36 -0
  220. data/test/testcases/block/08_list/single_item.html +3 -0
  221. data/test/testcases/block/08_list/single_item.text +1 -0
  222. data/test/testcases/block/08_list/special_cases.html +62 -0
  223. data/test/testcases/block/08_list/special_cases.text +40 -0
  224. data/test/testcases/block/09_html/comment.html +18 -0
  225. data/test/testcases/block/09_html/comment.text +15 -0
  226. data/test/testcases/block/09_html/content_model/deflists.html +6 -0
  227. data/test/testcases/block/09_html/content_model/deflists.options +1 -0
  228. data/test/testcases/block/09_html/content_model/deflists.text +6 -0
  229. data/test/testcases/block/09_html/content_model/tables.html +14 -0
  230. data/test/testcases/block/09_html/content_model/tables.options +1 -0
  231. data/test/testcases/block/09_html/content_model/tables.text +14 -0
  232. data/test/testcases/block/09_html/html5_attributes.html +15 -0
  233. data/test/testcases/block/09_html/html5_attributes.text +15 -0
  234. data/test/testcases/block/09_html/html_after_block.html +7 -0
  235. data/test/testcases/block/09_html/html_after_block.text +5 -0
  236. data/test/testcases/block/09_html/html_and_codeblocks.html +15 -0
  237. data/test/testcases/block/09_html/html_and_codeblocks.options +1 -0
  238. data/test/testcases/block/09_html/html_and_codeblocks.text +13 -0
  239. data/test/testcases/block/09_html/html_and_headers.html +5 -0
  240. data/test/testcases/block/09_html/html_and_headers.text +6 -0
  241. data/test/testcases/block/09_html/html_to_native/code.html +10 -0
  242. data/test/testcases/block/09_html/html_to_native/code.text +9 -0
  243. data/test/testcases/block/09_html/html_to_native/comment.html +7 -0
  244. data/test/testcases/block/09_html/html_to_native/comment.text +8 -0
  245. data/test/testcases/block/09_html/html_to_native/emphasis.html +6 -0
  246. data/test/testcases/block/09_html/html_to_native/emphasis.text +6 -0
  247. data/test/testcases/block/09_html/html_to_native/entity.html +1 -0
  248. data/test/testcases/block/09_html/html_to_native/entity.text +1 -0
  249. data/test/testcases/block/09_html/html_to_native/header.html +6 -0
  250. data/test/testcases/block/09_html/html_to_native/header.options +2 -0
  251. data/test/testcases/block/09_html/html_to_native/header.text +6 -0
  252. data/test/testcases/block/09_html/html_to_native/list_dl.html +8 -0
  253. data/test/testcases/block/09_html/html_to_native/list_dl.text +8 -0
  254. data/test/testcases/block/09_html/html_to_native/list_ol.html +15 -0
  255. data/test/testcases/block/09_html/html_to_native/list_ol.text +17 -0
  256. data/test/testcases/block/09_html/html_to_native/list_ul.html +19 -0
  257. data/test/testcases/block/09_html/html_to_native/list_ul.text +22 -0
  258. data/test/testcases/block/09_html/html_to_native/options +1 -0
  259. data/test/testcases/block/09_html/html_to_native/paragraph.html +3 -0
  260. data/test/testcases/block/09_html/html_to_native/paragraph.text +4 -0
  261. data/test/testcases/block/09_html/html_to_native/table_normal.html +12 -0
  262. data/test/testcases/block/09_html/html_to_native/table_normal.text +12 -0
  263. data/test/testcases/block/09_html/html_to_native/table_simple.html +61 -0
  264. data/test/testcases/block/09_html/html_to_native/table_simple.text +71 -0
  265. data/test/testcases/block/09_html/html_to_native/typography.html +1 -0
  266. data/test/testcases/block/09_html/html_to_native/typography.text +1 -0
  267. data/test/testcases/block/09_html/invalid_html_1.html +5 -0
  268. data/test/testcases/block/09_html/invalid_html_1.text +5 -0
  269. data/test/testcases/block/09_html/invalid_html_2.html +5 -0
  270. data/test/testcases/block/09_html/invalid_html_2.text +5 -0
  271. data/test/testcases/block/09_html/markdown_attr.html +38 -0
  272. data/test/testcases/block/09_html/markdown_attr.text +38 -0
  273. data/test/testcases/block/09_html/not_parsed.html +24 -0
  274. data/test/testcases/block/09_html/not_parsed.text +24 -0
  275. data/test/testcases/block/09_html/parse_as_raw.html +35 -0
  276. data/test/testcases/block/09_html/parse_as_raw.htmlinput +34 -0
  277. data/test/testcases/block/09_html/parse_as_raw.options +1 -0
  278. data/test/testcases/block/09_html/parse_as_raw.text +33 -0
  279. data/test/testcases/block/09_html/parse_as_span.html +12 -0
  280. data/test/testcases/block/09_html/parse_as_span.htmlinput +12 -0
  281. data/test/testcases/block/09_html/parse_as_span.options +1 -0
  282. data/test/testcases/block/09_html/parse_as_span.text +9 -0
  283. data/test/testcases/block/09_html/parse_block_html.html +21 -0
  284. data/test/testcases/block/09_html/parse_block_html.options +1 -0
  285. data/test/testcases/block/09_html/parse_block_html.text +17 -0
  286. data/test/testcases/block/09_html/processing_instruction.html +12 -0
  287. data/test/testcases/block/09_html/processing_instruction.text +12 -0
  288. data/test/testcases/block/09_html/simple.html +60 -0
  289. data/test/testcases/block/09_html/simple.options +1 -0
  290. data/test/testcases/block/09_html/simple.text +55 -0
  291. data/test/testcases/block/09_html/standalone_image_in_div.htmlinput +7 -0
  292. data/test/testcases/block/09_html/standalone_image_in_div.text +8 -0
  293. data/test/testcases/block/09_html/textarea.html +8 -0
  294. data/test/testcases/block/09_html/textarea.text +8 -0
  295. data/test/testcases/block/09_html/xml.html +8 -0
  296. data/test/testcases/block/09_html/xml.text +7 -0
  297. data/test/testcases/block/10_ald/simple.html +2 -0
  298. data/test/testcases/block/10_ald/simple.text +8 -0
  299. data/test/testcases/block/11_ial/auto_id_and_ial.html +1 -0
  300. data/test/testcases/block/11_ial/auto_id_and_ial.options +1 -0
  301. data/test/testcases/block/11_ial/auto_id_and_ial.text +2 -0
  302. data/test/testcases/block/11_ial/nested.html +11 -0
  303. data/test/testcases/block/11_ial/nested.text +15 -0
  304. data/test/testcases/block/11_ial/simple.html +29 -0
  305. data/test/testcases/block/11_ial/simple.text +41 -0
  306. data/test/testcases/block/12_extension/comment.html +8 -0
  307. data/test/testcases/block/12_extension/comment.text +12 -0
  308. data/test/testcases/block/12_extension/ignored.html +8 -0
  309. data/test/testcases/block/12_extension/ignored.text +8 -0
  310. data/test/testcases/block/12_extension/nomarkdown.html +10 -0
  311. data/test/testcases/block/12_extension/nomarkdown.kramdown +20 -0
  312. data/test/testcases/block/12_extension/nomarkdown.latex +13 -0
  313. data/test/testcases/block/12_extension/nomarkdown.text +21 -0
  314. data/test/testcases/block/12_extension/options.html +21 -0
  315. data/test/testcases/block/12_extension/options.text +23 -0
  316. data/test/testcases/block/12_extension/options2.html +10 -0
  317. data/test/testcases/block/12_extension/options2.text +5 -0
  318. data/test/testcases/block/12_extension/options3.html +8 -0
  319. data/test/testcases/block/12_extension/options3.text +7 -0
  320. data/test/testcases/block/13_definition_list/auto_ids.html +15 -0
  321. data/test/testcases/block/13_definition_list/auto_ids.text +18 -0
  322. data/test/testcases/block/13_definition_list/definition_at_beginning.html +1 -0
  323. data/test/testcases/block/13_definition_list/definition_at_beginning.text +1 -0
  324. data/test/testcases/block/13_definition_list/deflist_ial.html +4 -0
  325. data/test/testcases/block/13_definition_list/deflist_ial.text +4 -0
  326. data/test/testcases/block/13_definition_list/item_ial.html +17 -0
  327. data/test/testcases/block/13_definition_list/item_ial.text +16 -0
  328. data/test/testcases/block/13_definition_list/multiple_terms.html +13 -0
  329. data/test/testcases/block/13_definition_list/multiple_terms.text +10 -0
  330. data/test/testcases/block/13_definition_list/no_def_list.html +2 -0
  331. data/test/testcases/block/13_definition_list/no_def_list.text +2 -0
  332. data/test/testcases/block/13_definition_list/para_wrapping.html +10 -0
  333. data/test/testcases/block/13_definition_list/para_wrapping.text +6 -0
  334. data/test/testcases/block/13_definition_list/separated_by_eob.html +8 -0
  335. data/test/testcases/block/13_definition_list/separated_by_eob.text +5 -0
  336. data/test/testcases/block/13_definition_list/simple.html +10 -0
  337. data/test/testcases/block/13_definition_list/simple.text +10 -0
  338. data/test/testcases/block/13_definition_list/styled_terms.html +4 -0
  339. data/test/testcases/block/13_definition_list/styled_terms.text +2 -0
  340. data/test/testcases/block/13_definition_list/too_much_space.html +3 -0
  341. data/test/testcases/block/13_definition_list/too_much_space.text +4 -0
  342. data/test/testcases/block/13_definition_list/with_blocks.html +38 -0
  343. data/test/testcases/block/13_definition_list/with_blocks.text +24 -0
  344. data/test/testcases/block/14_table/empty_tag_in_cell.html +8 -0
  345. data/test/testcases/block/14_table/empty_tag_in_cell.options +1 -0
  346. data/test/testcases/block/14_table/empty_tag_in_cell.text +1 -0
  347. data/test/testcases/block/14_table/errors.html +12 -0
  348. data/test/testcases/block/14_table/errors.text +13 -0
  349. data/test/testcases/block/14_table/escaping.html +52 -0
  350. data/test/testcases/block/14_table/escaping.text +19 -0
  351. data/test/testcases/block/14_table/footer.html +65 -0
  352. data/test/testcases/block/14_table/footer.text +25 -0
  353. data/test/testcases/block/14_table/header.html +117 -0
  354. data/test/testcases/block/14_table/header.text +39 -0
  355. data/test/testcases/block/14_table/no_table.html +3 -0
  356. data/test/testcases/block/14_table/no_table.text +3 -0
  357. data/test/testcases/block/14_table/simple.html +192 -0
  358. data/test/testcases/block/14_table/simple.text +53 -0
  359. data/test/testcases/block/14_table/table_with_footnote.html +25 -0
  360. data/test/testcases/block/14_table/table_with_footnote.latex +11 -0
  361. data/test/testcases/block/14_table/table_with_footnote.text +6 -0
  362. data/test/testcases/block/15_math/gh_128.html +1 -0
  363. data/test/testcases/block/15_math/gh_128.text +1 -0
  364. data/test/testcases/block/15_math/no_engine.html +3 -0
  365. data/test/testcases/block/15_math/no_engine.options +1 -0
  366. data/test/testcases/block/15_math/no_engine.text +2 -0
  367. data/test/testcases/block/15_math/normal.html +30 -0
  368. data/test/testcases/block/15_math/normal.text +30 -0
  369. data/test/testcases/block/16_toc/no_toc.html +14 -0
  370. data/test/testcases/block/16_toc/no_toc.text +16 -0
  371. data/test/testcases/block/16_toc/toc_exclude.html +35 -0
  372. data/test/testcases/block/16_toc/toc_exclude.options +1 -0
  373. data/test/testcases/block/16_toc/toc_exclude.text +19 -0
  374. data/test/testcases/block/16_toc/toc_levels.html +24 -0
  375. data/test/testcases/block/16_toc/toc_levels.options +2 -0
  376. data/test/testcases/block/16_toc/toc_levels.text +16 -0
  377. data/test/testcases/block/16_toc/toc_with_footnotes.html +13 -0
  378. data/test/testcases/block/16_toc/toc_with_footnotes.options +1 -0
  379. data/test/testcases/block/16_toc/toc_with_footnotes.text +6 -0
  380. data/test/testcases/block/16_toc/toc_with_links.html +8 -0
  381. data/test/testcases/block/16_toc/toc_with_links.options +2 -0
  382. data/test/testcases/block/16_toc/toc_with_links.text +8 -0
  383. data/test/testcases/cjk-line-break.html +4 -0
  384. data/test/testcases/cjk-line-break.options +1 -0
  385. data/test/testcases/cjk-line-break.text +12 -0
  386. data/test/testcases/encoding.html +46 -0
  387. data/test/testcases/encoding.text +28 -0
  388. data/test/testcases/man/example.man +123 -0
  389. data/test/testcases/man/example.text +85 -0
  390. data/test/testcases/man/heading-name-dash-description.man +4 -0
  391. data/test/testcases/man/heading-name-dash-description.text +1 -0
  392. data/test/testcases/man/heading-name-description.man +4 -0
  393. data/test/testcases/man/heading-name-description.text +2 -0
  394. data/test/testcases/man/heading-name-section-description.man +4 -0
  395. data/test/testcases/man/heading-name-section-description.text +1 -0
  396. data/test/testcases/man/heading-name-section.man +2 -0
  397. data/test/testcases/man/heading-name-section.text +1 -0
  398. data/test/testcases/man/heading-name.man +2 -0
  399. data/test/testcases/man/heading-name.text +1 -0
  400. data/test/testcases/man/sections.man +4 -0
  401. data/test/testcases/man/sections.text +11 -0
  402. data/test/testcases/man/text-escaping.man +8 -0
  403. data/test/testcases/man/text-escaping.text +7 -0
  404. data/test/testcases/span/01_link/empty.html +5 -0
  405. data/test/testcases/span/01_link/empty.text +5 -0
  406. data/test/testcases/span/01_link/empty_title.htmlinput +3 -0
  407. data/test/testcases/span/01_link/empty_title.text +7 -0
  408. data/test/testcases/span/01_link/image_in_a.html +5 -0
  409. data/test/testcases/span/01_link/image_in_a.text +5 -0
  410. data/test/testcases/span/01_link/imagelinks.html +15 -0
  411. data/test/testcases/span/01_link/imagelinks.text +18 -0
  412. data/test/testcases/span/01_link/inline.html +46 -0
  413. data/test/testcases/span/01_link/inline.text +48 -0
  414. data/test/testcases/span/01_link/latex_escaping.latex +6 -0
  415. data/test/testcases/span/01_link/latex_escaping.text +5 -0
  416. data/test/testcases/span/01_link/link_defs.html +9 -0
  417. data/test/testcases/span/01_link/link_defs.text +27 -0
  418. data/test/testcases/span/01_link/link_defs_with_ial.html +4 -0
  419. data/test/testcases/span/01_link/link_defs_with_ial.text +16 -0
  420. data/test/testcases/span/01_link/links_with_angle_brackets.html +3 -0
  421. data/test/testcases/span/01_link/links_with_angle_brackets.text +3 -0
  422. data/test/testcases/span/01_link/reference.html +37 -0
  423. data/test/testcases/span/01_link/reference.options +3 -0
  424. data/test/testcases/span/01_link/reference.text +53 -0
  425. data/test/testcases/span/02_emphasis/empty.html +3 -0
  426. data/test/testcases/span/02_emphasis/empty.text +3 -0
  427. data/test/testcases/span/02_emphasis/errors.html +9 -0
  428. data/test/testcases/span/02_emphasis/errors.text +9 -0
  429. data/test/testcases/span/02_emphasis/nesting.html +41 -0
  430. data/test/testcases/span/02_emphasis/nesting.text +36 -0
  431. data/test/testcases/span/02_emphasis/normal.html +65 -0
  432. data/test/testcases/span/02_emphasis/normal.options +1 -0
  433. data/test/testcases/span/02_emphasis/normal.text +63 -0
  434. data/test/testcases/span/03_codespan/empty.html +5 -0
  435. data/test/testcases/span/03_codespan/empty.text +5 -0
  436. data/test/testcases/span/03_codespan/errors.html +1 -0
  437. data/test/testcases/span/03_codespan/errors.text +1 -0
  438. data/test/testcases/span/03_codespan/highlighting-minted.latex +2 -0
  439. data/test/testcases/span/03_codespan/highlighting-minted.options +1 -0
  440. data/test/testcases/span/03_codespan/highlighting-minted.text +1 -0
  441. data/test/testcases/span/03_codespan/highlighting.html +1 -0
  442. data/test/testcases/span/03_codespan/highlighting.text +1 -0
  443. data/test/testcases/span/03_codespan/normal-css-class.html +1 -0
  444. data/test/testcases/span/03_codespan/normal-css-class.options +2 -0
  445. data/test/testcases/span/03_codespan/normal-css-class.text +1 -0
  446. data/test/testcases/span/03_codespan/normal.html +16 -0
  447. data/test/testcases/span/03_codespan/normal.text +16 -0
  448. data/test/testcases/span/03_codespan/rouge/disabled.html +1 -0
  449. data/test/testcases/span/03_codespan/rouge/disabled.options +4 -0
  450. data/test/testcases/span/03_codespan/rouge/disabled.text +1 -0
  451. data/test/testcases/span/03_codespan/rouge/simple.html +1 -0
  452. data/test/testcases/span/03_codespan/rouge/simple.options +1 -0
  453. data/test/testcases/span/03_codespan/rouge/simple.text +1 -0
  454. data/test/testcases/span/04_footnote/backlink_inline.html +79 -0
  455. data/test/testcases/span/04_footnote/backlink_inline.options +1 -0
  456. data/test/testcases/span/04_footnote/backlink_inline.text +38 -0
  457. data/test/testcases/span/04_footnote/backlink_text.html +9 -0
  458. data/test/testcases/span/04_footnote/backlink_text.options +1 -0
  459. data/test/testcases/span/04_footnote/backlink_text.text +3 -0
  460. data/test/testcases/span/04_footnote/definitions.html +17 -0
  461. data/test/testcases/span/04_footnote/definitions.latex +17 -0
  462. data/test/testcases/span/04_footnote/definitions.text +24 -0
  463. data/test/testcases/span/04_footnote/footnote_nr.html +12 -0
  464. data/test/testcases/span/04_footnote/footnote_nr.latex +2 -0
  465. data/test/testcases/span/04_footnote/footnote_nr.options +1 -0
  466. data/test/testcases/span/04_footnote/footnote_nr.text +4 -0
  467. data/test/testcases/span/04_footnote/footnote_prefix.html +12 -0
  468. data/test/testcases/span/04_footnote/footnote_prefix.options +1 -0
  469. data/test/testcases/span/04_footnote/footnote_prefix.text +4 -0
  470. data/test/testcases/span/04_footnote/inside_footnote.html +17 -0
  471. data/test/testcases/span/04_footnote/inside_footnote.text +9 -0
  472. data/test/testcases/span/04_footnote/markers.html +46 -0
  473. data/test/testcases/span/04_footnote/markers.latex +23 -0
  474. data/test/testcases/span/04_footnote/markers.options +2 -0
  475. data/test/testcases/span/04_footnote/markers.text +27 -0
  476. data/test/testcases/span/04_footnote/placement.html +11 -0
  477. data/test/testcases/span/04_footnote/placement.options +1 -0
  478. data/test/testcases/span/04_footnote/placement.text +8 -0
  479. data/test/testcases/span/04_footnote/regexp_problem.html +14 -0
  480. data/test/testcases/span/04_footnote/regexp_problem.options +2 -0
  481. data/test/testcases/span/04_footnote/regexp_problem.text +52 -0
  482. data/test/testcases/span/04_footnote/without_backlink.html +9 -0
  483. data/test/testcases/span/04_footnote/without_backlink.options +1 -0
  484. data/test/testcases/span/04_footnote/without_backlink.text +3 -0
  485. data/test/testcases/span/05_html/across_lines.html +1 -0
  486. data/test/testcases/span/05_html/across_lines.text +2 -0
  487. data/test/testcases/span/05_html/button.html +7 -0
  488. data/test/testcases/span/05_html/button.text +7 -0
  489. data/test/testcases/span/05_html/invalid.html +1 -0
  490. data/test/testcases/span/05_html/invalid.text +1 -0
  491. data/test/testcases/span/05_html/link_with_mailto.html +1 -0
  492. data/test/testcases/span/05_html/link_with_mailto.text +1 -0
  493. data/test/testcases/span/05_html/mark_element.html +3 -0
  494. data/test/testcases/span/05_html/mark_element.text +3 -0
  495. data/test/testcases/span/05_html/markdown_attr.html +6 -0
  496. data/test/testcases/span/05_html/markdown_attr.text +6 -0
  497. data/test/testcases/span/05_html/normal.html +43 -0
  498. data/test/testcases/span/05_html/normal.text +43 -0
  499. data/test/testcases/span/05_html/raw_span_elements.html +2 -0
  500. data/test/testcases/span/05_html/raw_span_elements.text +2 -0
  501. data/test/testcases/span/05_html/xml.html +5 -0
  502. data/test/testcases/span/05_html/xml.text +5 -0
  503. data/test/testcases/span/abbreviations/abbrev.html +21 -0
  504. data/test/testcases/span/abbreviations/abbrev.text +34 -0
  505. data/test/testcases/span/abbreviations/abbrev_defs.html +2 -0
  506. data/test/testcases/span/abbreviations/abbrev_defs.text +5 -0
  507. data/test/testcases/span/abbreviations/in_footnote.html +9 -0
  508. data/test/testcases/span/abbreviations/in_footnote.text +5 -0
  509. data/test/testcases/span/autolinks/url_links.html +15 -0
  510. data/test/testcases/span/autolinks/url_links.text +16 -0
  511. data/test/testcases/span/escaped_chars/normal.html +47 -0
  512. data/test/testcases/span/escaped_chars/normal.text +47 -0
  513. data/test/testcases/span/extension/comment.html +6 -0
  514. data/test/testcases/span/extension/comment.text +6 -0
  515. data/test/testcases/span/extension/ignored.html +1 -0
  516. data/test/testcases/span/extension/ignored.text +1 -0
  517. data/test/testcases/span/extension/nomarkdown.html +1 -0
  518. data/test/testcases/span/extension/nomarkdown.text +1 -0
  519. data/test/testcases/span/extension/options.html +1 -0
  520. data/test/testcases/span/extension/options.text +1 -0
  521. data/test/testcases/span/ial/simple.html +6 -0
  522. data/test/testcases/span/ial/simple.text +6 -0
  523. data/test/testcases/span/line_breaks/normal.html +11 -0
  524. data/test/testcases/span/line_breaks/normal.latex +12 -0
  525. data/test/testcases/span/line_breaks/normal.text +11 -0
  526. data/test/testcases/span/math/no_engine.html +1 -0
  527. data/test/testcases/span/math/no_engine.options +1 -0
  528. data/test/testcases/span/math/no_engine.text +1 -0
  529. data/test/testcases/span/math/normal.html +10 -0
  530. data/test/testcases/span/math/normal.text +10 -0
  531. data/test/testcases/span/text_substitutions/entities.html +6 -0
  532. data/test/testcases/span/text_substitutions/entities.options +1 -0
  533. data/test/testcases/span/text_substitutions/entities.text +6 -0
  534. data/test/testcases/span/text_substitutions/entities_as_char.html +1 -0
  535. data/test/testcases/span/text_substitutions/entities_as_char.options +2 -0
  536. data/test/testcases/span/text_substitutions/entities_as_char.text +1 -0
  537. data/test/testcases/span/text_substitutions/entities_as_input.html +1 -0
  538. data/test/testcases/span/text_substitutions/entities_as_input.options +1 -0
  539. data/test/testcases/span/text_substitutions/entities_as_input.text +1 -0
  540. data/test/testcases/span/text_substitutions/entities_numeric.html +1 -0
  541. data/test/testcases/span/text_substitutions/entities_numeric.options +1 -0
  542. data/test/testcases/span/text_substitutions/entities_numeric.text +1 -0
  543. data/test/testcases/span/text_substitutions/entities_symbolic.html +1 -0
  544. data/test/testcases/span/text_substitutions/entities_symbolic.options +1 -0
  545. data/test/testcases/span/text_substitutions/entities_symbolic.text +1 -0
  546. data/test/testcases/span/text_substitutions/greaterthan.html +1 -0
  547. data/test/testcases/span/text_substitutions/greaterthan.text +1 -0
  548. data/test/testcases/span/text_substitutions/lowerthan.html +1 -0
  549. data/test/testcases/span/text_substitutions/lowerthan.text +1 -0
  550. data/test/testcases/span/text_substitutions/typography.html +40 -0
  551. data/test/testcases/span/text_substitutions/typography.options +1 -0
  552. data/test/testcases/span/text_substitutions/typography.text +40 -0
  553. data/test/testcases/span/text_substitutions/typography_subst.html +3 -0
  554. data/test/testcases/span/text_substitutions/typography_subst.latex +4 -0
  555. data/test/testcases/span/text_substitutions/typography_subst.options +8 -0
  556. data/test/testcases/span/text_substitutions/typography_subst.text +3 -0
  557. metadata +659 -0
@@ -0,0 +1,26 @@
1
+ # -*- coding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ module Kramdown
11
+
12
+ # This module contains all available parsers. A parser takes an input string and converts the
13
+ # string to an element tree.
14
+ #
15
+ # New parsers should be derived from the Base class which provides common functionality - see its
16
+ # API documentation for how to create a custom converter class.
17
+ module Parser
18
+
19
+ autoload :Base, 'kramdown/parser/base'
20
+ autoload :Kramdown, 'kramdown/parser/kramdown'
21
+ autoload :Html, 'kramdown/parser/html'
22
+ autoload :Markdown, 'kramdown/parser/markdown'
23
+
24
+ end
25
+
26
+ end
@@ -0,0 +1,131 @@
1
+ # -*- coding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ require 'kramdown/utils'
11
+ require 'kramdown/parser'
12
+
13
+ module Kramdown
14
+
15
+ module Parser
16
+
17
+ # == \Base class for parsers
18
+ #
19
+ # This class serves as base class for parsers. It provides common methods that can/should be
20
+ # used by all parsers, especially by those using StringScanner(Kramdown) for parsing.
21
+ #
22
+ # A parser object is used as a throw-away object, i.e. it is only used for storing the needed
23
+ # state information during parsing. Therefore one can't instantiate a parser object directly but
24
+ # only use the Base::parse method.
25
+ #
26
+ # == Implementing a parser
27
+ #
28
+ # Implementing a new parser is rather easy: just derive a new class from this class and put it
29
+ # in the Kramdown::Parser module -- the latter is needed so that the auto-detection of the new
30
+ # parser works correctly. Then you need to implement the +#parse+ method which has to contain
31
+ # the parsing code.
32
+ #
33
+ # Have a look at the Base::parse, Base::new and Base#parse methods for additional information!
34
+ class Base
35
+
36
+ # The hash with the parsing options.
37
+ attr_reader :options
38
+
39
+ # The array with the parser warnings.
40
+ attr_reader :warnings
41
+
42
+ # The original source string.
43
+ attr_reader :source
44
+
45
+ # The root element of element tree that is created from the source string.
46
+ attr_reader :root
47
+
48
+ # Initialize the parser object with the +source+ string and the parsing +options+.
49
+ #
50
+ # The @root element, the @warnings array and @text_type (specifies the default type for newly
51
+ # created text nodes) are automatically initialized.
52
+ def initialize(source, options)
53
+ @source = source
54
+ @options = Kramdown::Options.merge(options)
55
+ @root = Element.new(:root, nil, nil, encoding: (source.encoding rescue nil), location: 1,
56
+ options: {}, abbrev_defs: {}, abbrev_attr: {})
57
+ @warnings = []
58
+ @text_type = :text
59
+ end
60
+ private_class_method(:new, :allocate)
61
+
62
+ # Parse the +source+ string into an element tree, possibly using the parsing +options+, and
63
+ # return the root element of the element tree and an array with warning messages.
64
+ #
65
+ # Initializes a new instance of the calling class and then calls the +#parse+ method that must
66
+ # be implemented by each subclass.
67
+ def self.parse(source, options = {})
68
+ parser = new(source, options)
69
+ parser.parse
70
+ [parser.root, parser.warnings]
71
+ end
72
+
73
+ # Parse the source string into an element tree.
74
+ #
75
+ # The parsing code should parse the source provided in @source and build an element tree the
76
+ # root of which should be @root.
77
+ #
78
+ # This is the only method that has to be implemented by sub-classes!
79
+ def parse
80
+ raise NotImplementedError
81
+ end
82
+
83
+ # Add the given warning +text+ to the warning array.
84
+ def warning(text)
85
+ @warnings << text
86
+ # TODO: add position information
87
+ end
88
+
89
+ # Modify the string +source+ to be usable by the parser (unifies line ending characters to
90
+ # +\n+ and makes sure +source+ ends with a new line character).
91
+ def adapt_source(source)
92
+ unless source.valid_encoding?
93
+ raise "The source text contains invalid characters for the used encoding #{source.encoding}"
94
+ end
95
+ source = source.encode('UTF-8')
96
+ source.gsub!(/\r\n?/, "\n")
97
+ source.chomp!
98
+ source << "\n"
99
+ end
100
+
101
+ # This helper method adds the given +text+ either to the last element in the +tree+ if it is a
102
+ # +type+ element or creates a new text element with the given +type+.
103
+ def add_text(text, tree = @tree, type = @text_type)
104
+ last = tree.children.last
105
+ if last && last.type == type
106
+ last.value << text
107
+ elsif !text.empty?
108
+ location = (last && last.options[:location] || tree.options[:location])
109
+ tree.children << Element.new(type, text, nil, location: location)
110
+ end
111
+ end
112
+
113
+ # Extract the part of the StringScanner +strscan+ backed string specified by the +range+. This
114
+ # method works correctly under Ruby 1.8 and Ruby 1.9.
115
+ def extract_string(range, strscan)
116
+ result = nil
117
+ begin
118
+ enc = strscan.string.encoding
119
+ strscan.string.force_encoding('ASCII-8BIT')
120
+ result = strscan.string[range].force_encoding(enc)
121
+ ensure
122
+ strscan.string.force_encoding(enc)
123
+ end
124
+ result
125
+ end
126
+
127
+ end
128
+
129
+ end
130
+
131
+ end
@@ -0,0 +1,608 @@
1
+ # -*- coding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ require 'rexml/parsers/baseparser'
11
+ require 'strscan'
12
+ require 'kramdown/utils'
13
+ require 'kramdown/parser'
14
+
15
+ module Kramdown
16
+
17
+ module Parser
18
+
19
+ # Used for parsing an HTML document.
20
+ #
21
+ # The parsing code is in the Parser module that can also be used by other parsers.
22
+ class Html < Base
23
+
24
+ # Contains all constants that are used when parsing.
25
+ module Constants
26
+
27
+ #:stopdoc:
28
+ # The following regexps are based on the ones used by REXML, with some slight modifications.
29
+ HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/im
30
+ HTML_COMMENT_RE = /<!--(.*?)-->/m
31
+ HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
32
+ HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})(?:\s*=\s*(?:(\p{Word}+)|("|')(.*?)\3))?/m
33
+ HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}(?:\s*=\s*(?:\p{Word}+|("|').*?\3))?)*)\s*(\/)?>/m
34
+ HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::UNAME_STR})\s*>/m
35
+ HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
36
+
37
+ HTML_CONTENT_MODEL_BLOCK = %w[address applet article aside blockquote body
38
+ dd details div dl fieldset figure figcaption
39
+ footer form header hgroup iframe li main
40
+ map menu nav noscript object section summary td]
41
+ HTML_CONTENT_MODEL_SPAN = %w[a abbr acronym b bdo big button cite caption del dfn dt em
42
+ h1 h2 h3 h4 h5 h6 i ins label legend optgroup p q rb rbc
43
+ rp rt rtc ruby select small span strong sub sup th tt]
44
+ HTML_CONTENT_MODEL_RAW = %w[script style math option textarea pre code kbd samp var]
45
+ # The following elements are also parsed as raw since they need child elements that cannot
46
+ # be expressed using kramdown syntax: colgroup table tbody thead tfoot tr ul ol
47
+
48
+ HTML_CONTENT_MODEL = Hash.new {|h, k| h[k] = :raw }
49
+ HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block }
50
+ HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span }
51
+ HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw }
52
+
53
+ # Some HTML elements like script belong to both categories (i.e. are valid in block and
54
+ # span HTML) and don't appear therefore!
55
+ # script, textarea
56
+ HTML_SPAN_ELEMENTS = %w[a abbr acronym b big bdo br button cite code del dfn em i img input
57
+ ins kbd label mark option q rb rbc rp rt rtc ruby samp select small
58
+ span strong sub sup tt u var]
59
+ HTML_BLOCK_ELEMENTS = %w[address article aside applet body blockquote caption col colgroup
60
+ dd div dl dt fieldset figcaption footer form h1 h2 h3 h4 h5 h6
61
+ header hgroup hr html head iframe legend menu li main map nav ol
62
+ optgroup p pre section summary table tbody td th thead tfoot tr ul]
63
+ HTML_ELEMENTS_WITHOUT_BODY = %w[area base br col command embed hr img input keygen link
64
+ meta param source track wbr]
65
+
66
+ HTML_ELEMENT = Hash.new(false)
67
+ (HTML_SPAN_ELEMENTS + HTML_BLOCK_ELEMENTS + HTML_ELEMENTS_WITHOUT_BODY +
68
+ HTML_CONTENT_MODEL.keys).each do |a|
69
+ HTML_ELEMENT[a] = true
70
+ end
71
+ end
72
+
73
+ # Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
74
+ # functionality. The only thing that must be provided by the class are instance variable
75
+ # @stack for storing the needed state and @src (instance of StringScanner) for the actual
76
+ # parsing.
77
+ module Parser
78
+
79
+ include Constants
80
+
81
+ # Process the HTML start tag that has already be scanned/checked via @src.
82
+ #
83
+ # Does the common processing steps and then yields to the caller for further processing
84
+ # (first parameter is the created element; the second parameter is +true+ if the HTML
85
+ # element is already closed, ie. contains no body; the third parameter specifies whether the
86
+ # body - and the end tag - need to be handled in case closed=false).
87
+ def handle_html_start_tag(line = nil) # :yields: el, closed, handle_body
88
+ name = @src[1]
89
+ name.downcase! if HTML_ELEMENT[name.downcase]
90
+ closed = !@src[4].nil?
91
+ attrs = parse_html_attributes(@src[2], line, HTML_ELEMENT[name])
92
+
93
+ el = Element.new(:html_element, name, attrs, category: :block)
94
+ el.options[:location] = line if line
95
+ @tree.children << el
96
+
97
+ if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
98
+ closed = true
99
+ end
100
+ if name == 'script' || name == 'style'
101
+ handle_raw_html_tag(name)
102
+ yield(el, false, false)
103
+ else
104
+ yield(el, closed, true)
105
+ end
106
+ end
107
+
108
+ # Parses the given string for HTML attributes and returns the resulting hash.
109
+ #
110
+ # If the optional +line+ parameter is supplied, it is used in warning messages.
111
+ #
112
+ # If the optional +in_html_tag+ parameter is set to +false+, attributes are not modified to
113
+ # contain only lowercase letters.
114
+ def parse_html_attributes(str, line = nil, in_html_tag = true)
115
+ attrs = {}
116
+ str.scan(HTML_ATTRIBUTE_RE).each do |attr, val, _sep, quoted_val|
117
+ attr.downcase! if in_html_tag
118
+ if attrs.key?(attr)
119
+ warning("Duplicate HTML attribute '#{attr}' on line #{line || '?'} - overwriting previous one")
120
+ end
121
+ attrs[attr] = val || quoted_val || ""
122
+ end
123
+ attrs
124
+ end
125
+
126
+ # Handle the raw HTML tag at the current position.
127
+ def handle_raw_html_tag(name)
128
+ curpos = @src.pos
129
+ if @src.scan_until(/(?=<\/#{name}\s*>)/mi)
130
+ add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
131
+ @src.scan(HTML_TAG_CLOSE_RE)
132
+ else
133
+ add_text(@src.rest, @tree.children.last, :raw)
134
+ @src.terminate
135
+ warning("Found no end tag for '#{name}' - auto-closing it")
136
+ end
137
+ end
138
+
139
+ HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
140
+
141
+ # Parse raw HTML from the current source position, storing the found elements in +el+.
142
+ # Parsing continues until one of the following criteria are fulfilled:
143
+ #
144
+ # - The end of the document is reached.
145
+ # - The matching end tag for the element +el+ is found (only used if +el+ is an HTML
146
+ # element).
147
+ #
148
+ # When an HTML start tag is found, processing is deferred to #handle_html_start_tag,
149
+ # providing the block given to this method.
150
+ def parse_raw_html(el, &block)
151
+ @stack.push(@tree)
152
+ @tree = el
153
+
154
+ done = false
155
+ while !@src.eos? && !done
156
+ if (result = @src.scan_until(HTML_RAW_START))
157
+ add_text(result, @tree, :text)
158
+ line = @src.current_line_number
159
+ if (result = @src.scan(HTML_COMMENT_RE))
160
+ @tree.children << Element.new(:xml_comment, result, nil, category: :block, location: line)
161
+ elsif (result = @src.scan(HTML_INSTRUCTION_RE))
162
+ @tree.children << Element.new(:xml_pi, result, nil, category: :block, location: line)
163
+ elsif @src.scan(HTML_TAG_RE)
164
+ if method(:handle_html_start_tag).arity.abs >= 1
165
+ handle_html_start_tag(line, &block)
166
+ else
167
+ handle_html_start_tag(&block) # DEPRECATED: method needs to accept line number in 2.0
168
+ end
169
+ elsif @src.scan(HTML_TAG_CLOSE_RE)
170
+ if @tree.value == (HTML_ELEMENT[@tree.value] ? @src[1].downcase : @src[1])
171
+ done = true
172
+ else
173
+ add_text(@src.matched, @tree, :text)
174
+ warning("Found invalidly used HTML closing tag for '#{@src[1]}' on " \
175
+ "line #{line} - ignoring it")
176
+ end
177
+ else
178
+ add_text(@src.getch, @tree, :text)
179
+ end
180
+ else
181
+ add_text(@src.rest, @tree, :text)
182
+ @src.terminate
183
+ if @tree.type == :html_element
184
+ warning("Found no end tag for '#{@tree.value}' on line " \
185
+ "#{@tree.options[:location]} - auto-closing it")
186
+ end
187
+ done = true
188
+ end
189
+ end
190
+
191
+ @tree = @stack.pop
192
+ end
193
+
194
+ end
195
+
196
+ # Converts HTML elements to native elements if possible.
197
+ class ElementConverter
198
+
199
+ # :stopdoc:
200
+
201
+ include Constants
202
+ include ::Kramdown::Utils::Entities
203
+
204
+ REMOVE_TEXT_CHILDREN = %w[html head hgroup ol ul dl table colgroup tbody thead tfoot tr
205
+ select optgroup]
206
+ WRAP_TEXT_CHILDREN = %w[body section nav article aside header footer address div li dd
207
+ blockquote figure figcaption fieldset form]
208
+ REMOVE_WHITESPACE_CHILDREN = %w[body section nav article aside header footer address
209
+ div li dd blockquote figure figcaption td th fieldset form]
210
+ STRIP_WHITESPACE = %w[address article aside blockquote body caption dd div dl dt fieldset
211
+ figcaption form footer header h1 h2 h3 h4 h5 h6 legend li nav p
212
+ section td th]
213
+ SIMPLE_ELEMENTS = %w[em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl
214
+ li dl dt dd]
215
+
216
+ def initialize(root)
217
+ @root = root
218
+ end
219
+
220
+ def self.convert(root, el = root)
221
+ new(root).process(el)
222
+ end
223
+
224
+ # Convert the element +el+ and its children.
225
+ def process(el, do_conversion = true, preserve_text = false, parent = nil)
226
+ case el.type
227
+ when :xml_comment, :xml_pi
228
+ ptype = if parent.nil?
229
+ 'div'
230
+ else
231
+ case parent.type
232
+ when :html_element then parent.value
233
+ when :code_span then 'code'
234
+ when :code_block then 'pre'
235
+ when :header then 'h1'
236
+ else parent.type.to_s
237
+ end
238
+ end
239
+ el.options.replace(category: (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block))
240
+ return
241
+ when :html_element
242
+ when :root
243
+ el.children.each {|c| process(c) }
244
+ remove_whitespace_children(el)
245
+ return
246
+ else return
247
+ end
248
+
249
+ mname = "convert_#{el.value}"
250
+ if do_conversion && self.class.method_defined?(mname)
251
+ send(mname, el)
252
+ else
253
+ type = el.value
254
+ remove_text_children(el) if do_conversion && REMOVE_TEXT_CHILDREN.include?(type)
255
+
256
+ if do_conversion && SIMPLE_ELEMENTS.include?(type)
257
+ set_basics(el, type.intern)
258
+ process_children(el, do_conversion, preserve_text)
259
+ else
260
+ process_html_element(el, do_conversion, preserve_text)
261
+ end
262
+
263
+ if do_conversion
264
+ strip_whitespace(el) if STRIP_WHITESPACE.include?(type)
265
+ remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type)
266
+ wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type)
267
+ end
268
+ end
269
+ end
270
+
271
+ def process_children(el, do_conversion = true, preserve_text = false)
272
+ el.children.map! do |c|
273
+ if c.type == :text
274
+ process_text(c.value, preserve_text || !do_conversion)
275
+ else
276
+ process(c, do_conversion, preserve_text, el)
277
+ c
278
+ end
279
+ end.flatten!
280
+ end
281
+
282
+ # Process the HTML text +raw+: compress whitespace (if +preserve+ is +false+) and convert
283
+ # entities in entity elements.
284
+ def process_text(raw, preserve = false)
285
+ raw.gsub!(/\s+/, ' ') unless preserve
286
+ src = Kramdown::Utils::StringScanner.new(raw)
287
+ result = []
288
+ until src.eos?
289
+ if (tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/o))
290
+ result << Element.new(:text, tmp)
291
+ src.scan(HTML_ENTITY_RE)
292
+ val = src[1] || (src[2]&.to_i) || src[3].hex
293
+ result << if %w[lsquo rsquo ldquo rdquo].include?(val)
294
+ Element.new(:smart_quote, val.intern)
295
+ elsif %w[mdash ndash hellip laquo raquo].include?(val)
296
+ Element.new(:typographic_sym, val.intern)
297
+ else
298
+ begin
299
+ Element.new(:entity, entity(val), nil, original: src.matched)
300
+ rescue ::Kramdown::Error
301
+ src.pos -= src.matched_size - 1
302
+ Element.new(:entity, ::Kramdown::Utils::Entities.entity('amp'))
303
+ end
304
+ end
305
+ else
306
+ result << Element.new(:text, src.rest)
307
+ src.terminate
308
+ end
309
+ end
310
+ result
311
+ end
312
+
313
+ def process_html_element(el, do_conversion = true, preserve_text = false)
314
+ el.options.replace(category: HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
315
+ content_model: (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw))
316
+ process_children(el, do_conversion, preserve_text)
317
+ end
318
+
319
+ def remove_text_children(el)
320
+ el.children.delete_if {|c| c.type == :text }
321
+ end
322
+
323
+ def wrap_text_children(el)
324
+ tmp = []
325
+ last_is_p = false
326
+ el.children.each do |c|
327
+ if !c.block? || c.type == :text
328
+ unless last_is_p
329
+ tmp << Element.new(:p, nil, nil, transparent: true)
330
+ last_is_p = true
331
+ end
332
+ tmp.last.children << c
333
+ tmp
334
+ else
335
+ tmp << c
336
+ last_is_p = false
337
+ end
338
+ end
339
+ el.children = tmp
340
+ end
341
+
342
+ def strip_whitespace(el)
343
+ return if el.children.empty?
344
+ if el.children.first.type == :text
345
+ el.children.first.value.lstrip!
346
+ end
347
+ if el.children.last.type == :text
348
+ el.children.last.value.rstrip!
349
+ end
350
+ end
351
+
352
+ def remove_whitespace_children(el)
353
+ i = -1
354
+ el.children = el.children.reject do |c|
355
+ i += 1
356
+ c.type == :text && c.value.strip.empty? &&
357
+ (i == 0 || i == el.children.length - 1 || ((el.children[i - 1]).block? &&
358
+ (el.children[i + 1]).block?))
359
+ end
360
+ end
361
+
362
+ def set_basics(el, type, opts = {})
363
+ el.type = type
364
+ el.options.replace(opts)
365
+ el.value = nil
366
+ end
367
+
368
+ def extract_text(el, raw)
369
+ raw << el.value.to_s if el.type == :text
370
+ el.children.each {|c| extract_text(c, raw) }
371
+ end
372
+
373
+ def convert_textarea(el)
374
+ process_html_element(el, true, true)
375
+ end
376
+
377
+ def convert_a(el)
378
+ if el.attr['href']
379
+ set_basics(el, :a)
380
+ process_children(el)
381
+ else
382
+ process_html_element(el, false)
383
+ end
384
+ end
385
+
386
+ EMPHASIS_TYPE_MAP = {'em' => :em, 'i' => :em, 'strong' => :strong, 'b' => :strong}
387
+ def convert_em(el)
388
+ text = +''
389
+ extract_text(el, text)
390
+ if text =~ /\A\s/ || text =~ /\s\z/
391
+ process_html_element(el, false)
392
+ else
393
+ set_basics(el, EMPHASIS_TYPE_MAP[el.value])
394
+ process_children(el)
395
+ end
396
+ end
397
+ %w[b strong i].each do |i|
398
+ alias_method("convert_#{i}".to_sym, :convert_em)
399
+ end
400
+
401
+ def convert_h1(el)
402
+ set_basics(el, :header, level: el.value[1..1].to_i)
403
+ extract_text(el, el.options[:raw_text] = +'')
404
+ process_children(el)
405
+ end
406
+ %w[h2 h3 h4 h5 h6].each do |i|
407
+ alias_method("convert_#{i}".to_sym, :convert_h1)
408
+ end
409
+
410
+ def convert_code(el)
411
+ raw = +''
412
+ extract_text(el, raw)
413
+ result = process_text(raw, true)
414
+ begin
415
+ str = result.inject(+'') do |mem, c|
416
+ if c.type == :text
417
+ mem << c.value
418
+ elsif c.type == :entity
419
+ mem << if [60, 62, 34, 38].include?(c.value.code_point)
420
+ c.value.code_point.chr
421
+ else
422
+ c.value.char
423
+ end
424
+ elsif c.type == :smart_quote || c.type == :typographic_sym
425
+ mem << entity(c.value.to_s).char
426
+ else
427
+ raise "Bug - please report"
428
+ end
429
+ end
430
+ result.clear
431
+ result << Element.new(:text, str)
432
+ rescue StandardError
433
+ end
434
+ if result.length > 1 || result.first.type != :text
435
+ process_html_element(el, false, true)
436
+ else
437
+ if el.value == 'code'
438
+ set_basics(el, :codespan)
439
+ el.attr['class']&.gsub!(/\s+\bhighlighter-\w+\b|\bhighlighter-\w+\b\s*/, '')
440
+ else
441
+ set_basics(el, :codeblock)
442
+ if el.children.size == 1 && el.children.first.value == 'code'
443
+ value = (el.children.first.attr['class'] || '').scan(/\blanguage-\S+/).first
444
+ el.attr['class'] = "#{value} #{el.attr['class']}".rstrip if value
445
+ end
446
+ end
447
+ el.value = result.first.value
448
+ el.children.clear
449
+ end
450
+ end
451
+ alias convert_pre convert_code
452
+
453
+ def convert_table(el)
454
+ unless is_simple_table?(el)
455
+ process_html_element(el, false)
456
+ return
457
+ end
458
+ remove_text_children(el)
459
+ process_children(el)
460
+ set_basics(el, :table)
461
+
462
+ calc_alignment = lambda do |c|
463
+ if c.type == :tr
464
+ el.options[:alignment] = c.children.map do |td|
465
+ if td.attr['style']
466
+ td.attr['style'].slice!(/(?:;\s*)?text-align:\s+(center|left|right)/)
467
+ td.attr.delete('style') if td.attr['style'].strip.empty?
468
+ $1 ? $1.to_sym : :default
469
+ else
470
+ :default
471
+ end
472
+ end
473
+ else
474
+ c.children.each {|cc| calc_alignment.call(cc) }
475
+ end
476
+ end
477
+ calc_alignment.call(el)
478
+ el.children.delete_if {|c| c.type == :html_element }
479
+
480
+ change_th_type = lambda do |c|
481
+ if c.type == :th
482
+ c.type = :td
483
+ else
484
+ c.children.each {|cc| change_th_type.call(cc) }
485
+ end
486
+ end
487
+ change_th_type.call(el)
488
+
489
+ if el.children.first.type == :tr
490
+ tbody = Element.new(:tbody)
491
+ tbody.children = el.children
492
+ el.children = [tbody]
493
+ end
494
+ end
495
+
496
+ def is_simple_table?(el)
497
+ only_phrasing_content = lambda do |c|
498
+ c.children.all? do |cc|
499
+ (cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc)
500
+ end
501
+ end
502
+ check_cells = proc do |c|
503
+ if c.value == 'th' || c.value == 'td'
504
+ return false unless only_phrasing_content.call(c)
505
+ else
506
+ c.children.each {|cc| check_cells.call(cc) }
507
+ end
508
+ end
509
+ check_cells.call(el)
510
+
511
+ nr_cells = 0
512
+ check_nr_cells = lambda do |t|
513
+ if t.value == 'tr'
514
+ count = t.children.select {|cc| cc.value == 'th' || cc.value == 'td' }.length
515
+ if count != nr_cells
516
+ if nr_cells == 0
517
+ nr_cells = count
518
+ else
519
+ nr_cells = -1
520
+ break
521
+ end
522
+ end
523
+ else
524
+ t.children.each {|cc| check_nr_cells.call(cc) }
525
+ end
526
+ end
527
+ check_nr_cells.call(el)
528
+ return false if nr_cells == -1
529
+
530
+ alignment = nil
531
+ check_alignment = proc do |t|
532
+ if t.value == 'tr'
533
+ cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td' }.map do |cell|
534
+ md = /text-align:\s+(center|left|right|justify|inherit)/.match(cell.attr['style'].to_s)
535
+ return false if md && (md[1] == 'justify' || md[1] == 'inherit')
536
+ md.nil? ? :default : md[1]
537
+ end
538
+ alignment = cur_alignment if alignment.nil?
539
+ return false if alignment != cur_alignment
540
+ else
541
+ t.children.each {|cc| check_alignment.call(cc) }
542
+ end
543
+ end
544
+ check_alignment.call(el)
545
+
546
+ check_rows = lambda do |t, type|
547
+ t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text }}
548
+ end
549
+ check_rows.call(el, 'td') ||
550
+ (el.children.all? do |t|
551
+ t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) ||
552
+ ((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td'))
553
+ end && el.children.any? {|t| t.value == 'tbody' })
554
+ end
555
+
556
+ def convert_script(el)
557
+ if !is_math_tag?(el)
558
+ process_html_element(el)
559
+ else
560
+ handle_math_tag(el)
561
+ end
562
+ end
563
+
564
+ def is_math_tag?(el)
565
+ el.attr['type'].to_s =~ /\bmath\/tex\b/
566
+ end
567
+
568
+ def handle_math_tag(el)
569
+ set_basics(el, :math, category: (el.attr['type'] =~ /mode=display/ ? :block : :span))
570
+ el.value = el.children.shift.value.sub(/\A(?:%\s*)?<!\[CDATA\[\n?(.*?)(?:\s%)?\]\]>\z/m, '\1')
571
+ el.attr.delete('type')
572
+ end
573
+
574
+ end
575
+
576
+ include Parser
577
+
578
+ # Parse the source string provided on initialization as HTML document.
579
+ def parse
580
+ @stack, @tree = [], @root
581
+ @src = Kramdown::Utils::StringScanner.new(adapt_source(source))
582
+
583
+ while true
584
+ if (result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/o))
585
+ @tree.children << Element.new(:xml_pi, result.strip, nil, category: :block)
586
+ elsif (result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/o))
587
+ # ignore the doctype
588
+ elsif (result = @src.scan(/\s*#{HTML_COMMENT_RE}/o))
589
+ @tree.children << Element.new(:xml_comment, result.strip, nil, category: :block)
590
+ else
591
+ break
592
+ end
593
+ end
594
+
595
+ tag_handler = lambda do |c, closed, handle_body|
596
+ parse_raw_html(c, &tag_handler) if !closed && handle_body
597
+ end
598
+ parse_raw_html(@tree, &tag_handler)
599
+
600
+ ElementConverter.convert(@tree)
601
+ end
602
+
603
+ end
604
+
605
+ end
606
+
607
+ end
608
+