daqing_kramdown 2.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (557) hide show
  1. checksums.yaml +7 -0
  2. data/AUTHORS +1 -0
  3. data/CONTRIBUTERS +78 -0
  4. data/COPYING +30 -0
  5. data/README.md +71 -0
  6. data/VERSION +1 -0
  7. data/bin/daqing_kramdown +132 -0
  8. data/data/kramdown/document.html +22 -0
  9. data/data/kramdown/document.latex +50 -0
  10. data/lib/kramdown.rb +10 -0
  11. data/lib/kramdown/converter.rb +68 -0
  12. data/lib/kramdown/converter/base.rb +261 -0
  13. data/lib/kramdown/converter/hash_ast.rb +38 -0
  14. data/lib/kramdown/converter/html.rb +535 -0
  15. data/lib/kramdown/converter/kramdown.rb +448 -0
  16. data/lib/kramdown/converter/latex.rb +625 -0
  17. data/lib/kramdown/converter/man.rb +300 -0
  18. data/lib/kramdown/converter/math_engine/mathjax.rb +32 -0
  19. data/lib/kramdown/converter/remove_html_tags.rb +57 -0
  20. data/lib/kramdown/converter/syntax_highlighter.rb +56 -0
  21. data/lib/kramdown/converter/syntax_highlighter/minted.rb +35 -0
  22. data/lib/kramdown/converter/syntax_highlighter/rouge.rb +85 -0
  23. data/lib/kramdown/converter/toc.rb +69 -0
  24. data/lib/kramdown/document.rb +139 -0
  25. data/lib/kramdown/element.rb +551 -0
  26. data/lib/kramdown/error.rb +17 -0
  27. data/lib/kramdown/options.rb +604 -0
  28. data/lib/kramdown/parser.rb +26 -0
  29. data/lib/kramdown/parser/base.rb +131 -0
  30. data/lib/kramdown/parser/html.rb +608 -0
  31. data/lib/kramdown/parser/kramdown.rb +376 -0
  32. data/lib/kramdown/parser/kramdown/abbreviation.rb +78 -0
  33. data/lib/kramdown/parser/kramdown/autolink.rb +31 -0
  34. data/lib/kramdown/parser/kramdown/blank_line.rb +30 -0
  35. data/lib/kramdown/parser/kramdown/block_boundary.rb +34 -0
  36. data/lib/kramdown/parser/kramdown/blockquote.rb +38 -0
  37. data/lib/kramdown/parser/kramdown/codeblock.rb +57 -0
  38. data/lib/kramdown/parser/kramdown/codespan.rb +54 -0
  39. data/lib/kramdown/parser/kramdown/emphasis.rb +61 -0
  40. data/lib/kramdown/parser/kramdown/eob.rb +26 -0
  41. data/lib/kramdown/parser/kramdown/escaped_chars.rb +25 -0
  42. data/lib/kramdown/parser/kramdown/extensions.rb +214 -0
  43. data/lib/kramdown/parser/kramdown/footnote.rb +64 -0
  44. data/lib/kramdown/parser/kramdown/header.rb +70 -0
  45. data/lib/kramdown/parser/kramdown/horizontal_rule.rb +27 -0
  46. data/lib/kramdown/parser/kramdown/html.rb +162 -0
  47. data/lib/kramdown/parser/kramdown/html_entity.rb +34 -0
  48. data/lib/kramdown/parser/kramdown/line_break.rb +25 -0
  49. data/lib/kramdown/parser/kramdown/link.rb +149 -0
  50. data/lib/kramdown/parser/kramdown/list.rb +284 -0
  51. data/lib/kramdown/parser/kramdown/math.rb +53 -0
  52. data/lib/kramdown/parser/kramdown/paragraph.rb +62 -0
  53. data/lib/kramdown/parser/kramdown/smart_quotes.rb +174 -0
  54. data/lib/kramdown/parser/kramdown/table.rb +171 -0
  55. data/lib/kramdown/parser/kramdown/typographic_symbol.rb +44 -0
  56. data/lib/kramdown/parser/markdown.rb +57 -0
  57. data/lib/kramdown/utils.rb +45 -0
  58. data/lib/kramdown/utils/configurable.rb +45 -0
  59. data/lib/kramdown/utils/entities.rb +344 -0
  60. data/lib/kramdown/utils/html.rb +84 -0
  61. data/lib/kramdown/utils/lru_cache.rb +41 -0
  62. data/lib/kramdown/utils/string_scanner.rb +81 -0
  63. data/lib/kramdown/utils/unidecoder.rb +50 -0
  64. data/lib/kramdown/version.rb +15 -0
  65. data/man/man1/kramdown.1 +0 -0
  66. data/test/run_tests.rb +46 -0
  67. data/test/test_files.rb +298 -0
  68. data/test/test_location.rb +216 -0
  69. data/test/test_string_scanner_kramdown.rb +27 -0
  70. data/test/testcases/block/01_blank_line/spaces.html +1 -0
  71. data/test/testcases/block/01_blank_line/spaces.text +3 -0
  72. data/test/testcases/block/01_blank_line/tabs.html +1 -0
  73. data/test/testcases/block/01_blank_line/tabs.text +6 -0
  74. data/test/testcases/block/02_eob/beginning.html +1 -0
  75. data/test/testcases/block/02_eob/beginning.text +3 -0
  76. data/test/testcases/block/02_eob/end.html +1 -0
  77. data/test/testcases/block/02_eob/end.text +3 -0
  78. data/test/testcases/block/02_eob/middle.html +1 -0
  79. data/test/testcases/block/02_eob/middle.text +5 -0
  80. data/test/testcases/block/03_paragraph/indented.html +18 -0
  81. data/test/testcases/block/03_paragraph/indented.html.gfm +18 -0
  82. data/test/testcases/block/03_paragraph/indented.text +19 -0
  83. data/test/testcases/block/03_paragraph/line_break_last_line.html +9 -0
  84. data/test/testcases/block/03_paragraph/line_break_last_line.text +9 -0
  85. data/test/testcases/block/03_paragraph/no_newline_at_end.html +5 -0
  86. data/test/testcases/block/03_paragraph/no_newline_at_end.text +5 -0
  87. data/test/testcases/block/03_paragraph/one_para.html +1 -0
  88. data/test/testcases/block/03_paragraph/one_para.text +1 -0
  89. data/test/testcases/block/03_paragraph/standalone_image.html +8 -0
  90. data/test/testcases/block/03_paragraph/standalone_image.text +6 -0
  91. data/test/testcases/block/03_paragraph/two_para.html +4 -0
  92. data/test/testcases/block/03_paragraph/two_para.text +4 -0
  93. data/test/testcases/block/03_paragraph/with_html_to_native.html +1 -0
  94. data/test/testcases/block/03_paragraph/with_html_to_native.options +1 -0
  95. data/test/testcases/block/03_paragraph/with_html_to_native.text +1 -0
  96. data/test/testcases/block/04_header/atx_header.html +57 -0
  97. data/test/testcases/block/04_header/atx_header.text +54 -0
  98. data/test/testcases/block/04_header/atx_header_no_newline_at_end.html +1 -0
  99. data/test/testcases/block/04_header/atx_header_no_newline_at_end.text +1 -0
  100. data/test/testcases/block/04_header/header_type_offset.html +11 -0
  101. data/test/testcases/block/04_header/header_type_offset.kramdown +12 -0
  102. data/test/testcases/block/04_header/header_type_offset.latex +12 -0
  103. data/test/testcases/block/04_header/header_type_offset.options +2 -0
  104. data/test/testcases/block/04_header/header_type_offset.text +13 -0
  105. data/test/testcases/block/04_header/setext_header.html +32 -0
  106. data/test/testcases/block/04_header/setext_header.text +39 -0
  107. data/test/testcases/block/04_header/setext_header_no_newline_at_end.html +1 -0
  108. data/test/testcases/block/04_header/setext_header_no_newline_at_end.text +2 -0
  109. data/test/testcases/block/04_header/with_auto_id_prefix.html +3 -0
  110. data/test/testcases/block/04_header/with_auto_id_prefix.options +2 -0
  111. data/test/testcases/block/04_header/with_auto_id_prefix.text +3 -0
  112. data/test/testcases/block/04_header/with_auto_id_stripping.html +1 -0
  113. data/test/testcases/block/04_header/with_auto_id_stripping.options +1 -0
  114. data/test/testcases/block/04_header/with_auto_id_stripping.text +1 -0
  115. data/test/testcases/block/04_header/with_auto_ids.html +21 -0
  116. data/test/testcases/block/04_header/with_auto_ids.options +2 -0
  117. data/test/testcases/block/04_header/with_auto_ids.text +24 -0
  118. data/test/testcases/block/05_blockquote/indented.html +25 -0
  119. data/test/testcases/block/05_blockquote/indented.text +14 -0
  120. data/test/testcases/block/05_blockquote/lazy.html +34 -0
  121. data/test/testcases/block/05_blockquote/lazy.text +20 -0
  122. data/test/testcases/block/05_blockquote/nested.html +10 -0
  123. data/test/testcases/block/05_blockquote/nested.text +6 -0
  124. data/test/testcases/block/05_blockquote/no_newline_at_end.html +4 -0
  125. data/test/testcases/block/05_blockquote/no_newline_at_end.text +2 -0
  126. data/test/testcases/block/05_blockquote/very_long_line.html +3 -0
  127. data/test/testcases/block/05_blockquote/very_long_line.text +1 -0
  128. data/test/testcases/block/05_blockquote/with_code_blocks.html +15 -0
  129. data/test/testcases/block/05_blockquote/with_code_blocks.text +11 -0
  130. data/test/testcases/block/06_codeblock/disable-highlighting.html +4 -0
  131. data/test/testcases/block/06_codeblock/disable-highlighting.options +1 -0
  132. data/test/testcases/block/06_codeblock/disable-highlighting.text +4 -0
  133. data/test/testcases/block/06_codeblock/error.html +4 -0
  134. data/test/testcases/block/06_codeblock/error.text +4 -0
  135. data/test/testcases/block/06_codeblock/guess_lang_css_class.html +15 -0
  136. data/test/testcases/block/06_codeblock/guess_lang_css_class.options +2 -0
  137. data/test/testcases/block/06_codeblock/guess_lang_css_class.text +13 -0
  138. data/test/testcases/block/06_codeblock/highlighting-minted-with-opts.latex +9 -0
  139. data/test/testcases/block/06_codeblock/highlighting-minted-with-opts.options +4 -0
  140. data/test/testcases/block/06_codeblock/highlighting-minted-with-opts.text +5 -0
  141. data/test/testcases/block/06_codeblock/highlighting-minted.latex +8 -0
  142. data/test/testcases/block/06_codeblock/highlighting-minted.options +3 -0
  143. data/test/testcases/block/06_codeblock/highlighting-minted.text +4 -0
  144. data/test/testcases/block/06_codeblock/highlighting-opts.html +6 -0
  145. data/test/testcases/block/06_codeblock/highlighting-opts.options +7 -0
  146. data/test/testcases/block/06_codeblock/highlighting-opts.text +4 -0
  147. data/test/testcases/block/06_codeblock/highlighting.html +5 -0
  148. data/test/testcases/block/06_codeblock/highlighting.options +5 -0
  149. data/test/testcases/block/06_codeblock/highlighting.text +4 -0
  150. data/test/testcases/block/06_codeblock/issue_gh45.html +164 -0
  151. data/test/testcases/block/06_codeblock/issue_gh45.test +188 -0
  152. data/test/testcases/block/06_codeblock/lazy.html +4 -0
  153. data/test/testcases/block/06_codeblock/lazy.text +5 -0
  154. data/test/testcases/block/06_codeblock/no_newline_at_end.html +2 -0
  155. data/test/testcases/block/06_codeblock/no_newline_at_end.text +1 -0
  156. data/test/testcases/block/06_codeblock/no_newline_at_end_1.html +2 -0
  157. data/test/testcases/block/06_codeblock/no_newline_at_end_1.text +2 -0
  158. data/test/testcases/block/06_codeblock/normal.html +13 -0
  159. data/test/testcases/block/06_codeblock/normal.text +10 -0
  160. data/test/testcases/block/06_codeblock/rouge/disabled.html +2 -0
  161. data/test/testcases/block/06_codeblock/rouge/disabled.options +4 -0
  162. data/test/testcases/block/06_codeblock/rouge/disabled.text +1 -0
  163. data/test/testcases/block/06_codeblock/rouge/multiple.html +11 -0
  164. data/test/testcases/block/06_codeblock/rouge/multiple.options +4 -0
  165. data/test/testcases/block/06_codeblock/rouge/multiple.text +11 -0
  166. data/test/testcases/block/06_codeblock/rouge/simple.html +10 -0
  167. data/test/testcases/block/06_codeblock/rouge/simple.options +3 -0
  168. data/test/testcases/block/06_codeblock/rouge/simple.text +9 -0
  169. data/test/testcases/block/06_codeblock/tilde_syntax.html +7 -0
  170. data/test/testcases/block/06_codeblock/tilde_syntax.text +9 -0
  171. data/test/testcases/block/06_codeblock/whitespace.html +3 -0
  172. data/test/testcases/block/06_codeblock/whitespace.text +3 -0
  173. data/test/testcases/block/06_codeblock/with_blank_line.html +13 -0
  174. data/test/testcases/block/06_codeblock/with_blank_line.text +12 -0
  175. data/test/testcases/block/06_codeblock/with_eob_marker.html +6 -0
  176. data/test/testcases/block/06_codeblock/with_eob_marker.text +5 -0
  177. data/test/testcases/block/06_codeblock/with_ial.html +6 -0
  178. data/test/testcases/block/06_codeblock/with_ial.text +5 -0
  179. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block.html +24 -0
  180. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block.options +2 -0
  181. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block.text +33 -0
  182. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_any_char.html +8 -0
  183. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_any_char.options +2 -0
  184. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_any_char.text +11 -0
  185. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_name_with_dash.html +3 -0
  186. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_name_with_dash.options +2 -0
  187. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_name_with_dash.text +4 -0
  188. data/test/testcases/block/07_horizontal_rule/error.html +7 -0
  189. data/test/testcases/block/07_horizontal_rule/error.text +7 -0
  190. data/test/testcases/block/07_horizontal_rule/normal.html +19 -0
  191. data/test/testcases/block/07_horizontal_rule/normal.text +20 -0
  192. data/test/testcases/block/07_horizontal_rule/sepspaces.html +3 -0
  193. data/test/testcases/block/07_horizontal_rule/sepspaces.text +3 -0
  194. data/test/testcases/block/07_horizontal_rule/septabs.html +3 -0
  195. data/test/testcases/block/07_horizontal_rule/septabs.text +3 -0
  196. data/test/testcases/block/08_list/brackets_in_item.latex +3 -0
  197. data/test/testcases/block/08_list/brackets_in_item.text +1 -0
  198. data/test/testcases/block/08_list/escaping.html +17 -0
  199. data/test/testcases/block/08_list/escaping.text +17 -0
  200. data/test/testcases/block/08_list/item_ial.html +10 -0
  201. data/test/testcases/block/08_list/item_ial.text +8 -0
  202. data/test/testcases/block/08_list/lazy.html +39 -0
  203. data/test/testcases/block/08_list/lazy.text +29 -0
  204. data/test/testcases/block/08_list/lazy_and_nested.html +9 -0
  205. data/test/testcases/block/08_list/lazy_and_nested.text +4 -0
  206. data/test/testcases/block/08_list/list_and_hr.html +9 -0
  207. data/test/testcases/block/08_list/list_and_hr.text +5 -0
  208. data/test/testcases/block/08_list/list_and_others.html +40 -0
  209. data/test/testcases/block/08_list/list_and_others.text +26 -0
  210. data/test/testcases/block/08_list/mixed.html +117 -0
  211. data/test/testcases/block/08_list/mixed.text +66 -0
  212. data/test/testcases/block/08_list/nested.html +17 -0
  213. data/test/testcases/block/08_list/nested.text +7 -0
  214. data/test/testcases/block/08_list/other_first_element.html +39 -0
  215. data/test/testcases/block/08_list/other_first_element.text +18 -0
  216. data/test/testcases/block/08_list/simple_ol.html +19 -0
  217. data/test/testcases/block/08_list/simple_ol.text +13 -0
  218. data/test/testcases/block/08_list/simple_ul.html +48 -0
  219. data/test/testcases/block/08_list/simple_ul.text +36 -0
  220. data/test/testcases/block/08_list/single_item.html +3 -0
  221. data/test/testcases/block/08_list/single_item.text +1 -0
  222. data/test/testcases/block/08_list/special_cases.html +62 -0
  223. data/test/testcases/block/08_list/special_cases.text +40 -0
  224. data/test/testcases/block/09_html/comment.html +18 -0
  225. data/test/testcases/block/09_html/comment.text +15 -0
  226. data/test/testcases/block/09_html/content_model/deflists.html +6 -0
  227. data/test/testcases/block/09_html/content_model/deflists.options +1 -0
  228. data/test/testcases/block/09_html/content_model/deflists.text +6 -0
  229. data/test/testcases/block/09_html/content_model/tables.html +14 -0
  230. data/test/testcases/block/09_html/content_model/tables.options +1 -0
  231. data/test/testcases/block/09_html/content_model/tables.text +14 -0
  232. data/test/testcases/block/09_html/html5_attributes.html +15 -0
  233. data/test/testcases/block/09_html/html5_attributes.text +15 -0
  234. data/test/testcases/block/09_html/html_after_block.html +7 -0
  235. data/test/testcases/block/09_html/html_after_block.text +5 -0
  236. data/test/testcases/block/09_html/html_and_codeblocks.html +15 -0
  237. data/test/testcases/block/09_html/html_and_codeblocks.options +1 -0
  238. data/test/testcases/block/09_html/html_and_codeblocks.text +13 -0
  239. data/test/testcases/block/09_html/html_and_headers.html +5 -0
  240. data/test/testcases/block/09_html/html_and_headers.text +6 -0
  241. data/test/testcases/block/09_html/html_to_native/code.html +10 -0
  242. data/test/testcases/block/09_html/html_to_native/code.text +9 -0
  243. data/test/testcases/block/09_html/html_to_native/comment.html +7 -0
  244. data/test/testcases/block/09_html/html_to_native/comment.text +8 -0
  245. data/test/testcases/block/09_html/html_to_native/emphasis.html +6 -0
  246. data/test/testcases/block/09_html/html_to_native/emphasis.text +6 -0
  247. data/test/testcases/block/09_html/html_to_native/entity.html +1 -0
  248. data/test/testcases/block/09_html/html_to_native/entity.text +1 -0
  249. data/test/testcases/block/09_html/html_to_native/header.html +6 -0
  250. data/test/testcases/block/09_html/html_to_native/header.options +2 -0
  251. data/test/testcases/block/09_html/html_to_native/header.text +6 -0
  252. data/test/testcases/block/09_html/html_to_native/list_dl.html +8 -0
  253. data/test/testcases/block/09_html/html_to_native/list_dl.text +8 -0
  254. data/test/testcases/block/09_html/html_to_native/list_ol.html +15 -0
  255. data/test/testcases/block/09_html/html_to_native/list_ol.text +17 -0
  256. data/test/testcases/block/09_html/html_to_native/list_ul.html +19 -0
  257. data/test/testcases/block/09_html/html_to_native/list_ul.text +22 -0
  258. data/test/testcases/block/09_html/html_to_native/options +1 -0
  259. data/test/testcases/block/09_html/html_to_native/paragraph.html +3 -0
  260. data/test/testcases/block/09_html/html_to_native/paragraph.text +4 -0
  261. data/test/testcases/block/09_html/html_to_native/table_normal.html +12 -0
  262. data/test/testcases/block/09_html/html_to_native/table_normal.text +12 -0
  263. data/test/testcases/block/09_html/html_to_native/table_simple.html +61 -0
  264. data/test/testcases/block/09_html/html_to_native/table_simple.text +71 -0
  265. data/test/testcases/block/09_html/html_to_native/typography.html +1 -0
  266. data/test/testcases/block/09_html/html_to_native/typography.text +1 -0
  267. data/test/testcases/block/09_html/invalid_html_1.html +5 -0
  268. data/test/testcases/block/09_html/invalid_html_1.text +5 -0
  269. data/test/testcases/block/09_html/invalid_html_2.html +5 -0
  270. data/test/testcases/block/09_html/invalid_html_2.text +5 -0
  271. data/test/testcases/block/09_html/markdown_attr.html +38 -0
  272. data/test/testcases/block/09_html/markdown_attr.text +38 -0
  273. data/test/testcases/block/09_html/not_parsed.html +24 -0
  274. data/test/testcases/block/09_html/not_parsed.text +24 -0
  275. data/test/testcases/block/09_html/parse_as_raw.html +35 -0
  276. data/test/testcases/block/09_html/parse_as_raw.htmlinput +34 -0
  277. data/test/testcases/block/09_html/parse_as_raw.options +1 -0
  278. data/test/testcases/block/09_html/parse_as_raw.text +33 -0
  279. data/test/testcases/block/09_html/parse_as_span.html +12 -0
  280. data/test/testcases/block/09_html/parse_as_span.htmlinput +12 -0
  281. data/test/testcases/block/09_html/parse_as_span.options +1 -0
  282. data/test/testcases/block/09_html/parse_as_span.text +9 -0
  283. data/test/testcases/block/09_html/parse_block_html.html +21 -0
  284. data/test/testcases/block/09_html/parse_block_html.options +1 -0
  285. data/test/testcases/block/09_html/parse_block_html.text +17 -0
  286. data/test/testcases/block/09_html/processing_instruction.html +12 -0
  287. data/test/testcases/block/09_html/processing_instruction.text +12 -0
  288. data/test/testcases/block/09_html/simple.html +60 -0
  289. data/test/testcases/block/09_html/simple.options +1 -0
  290. data/test/testcases/block/09_html/simple.text +55 -0
  291. data/test/testcases/block/09_html/standalone_image_in_div.htmlinput +7 -0
  292. data/test/testcases/block/09_html/standalone_image_in_div.text +8 -0
  293. data/test/testcases/block/09_html/textarea.html +8 -0
  294. data/test/testcases/block/09_html/textarea.text +8 -0
  295. data/test/testcases/block/09_html/xml.html +8 -0
  296. data/test/testcases/block/09_html/xml.text +7 -0
  297. data/test/testcases/block/10_ald/simple.html +2 -0
  298. data/test/testcases/block/10_ald/simple.text +8 -0
  299. data/test/testcases/block/11_ial/auto_id_and_ial.html +1 -0
  300. data/test/testcases/block/11_ial/auto_id_and_ial.options +1 -0
  301. data/test/testcases/block/11_ial/auto_id_and_ial.text +2 -0
  302. data/test/testcases/block/11_ial/nested.html +11 -0
  303. data/test/testcases/block/11_ial/nested.text +15 -0
  304. data/test/testcases/block/11_ial/simple.html +29 -0
  305. data/test/testcases/block/11_ial/simple.text +41 -0
  306. data/test/testcases/block/12_extension/comment.html +8 -0
  307. data/test/testcases/block/12_extension/comment.text +12 -0
  308. data/test/testcases/block/12_extension/ignored.html +8 -0
  309. data/test/testcases/block/12_extension/ignored.text +8 -0
  310. data/test/testcases/block/12_extension/nomarkdown.html +10 -0
  311. data/test/testcases/block/12_extension/nomarkdown.kramdown +20 -0
  312. data/test/testcases/block/12_extension/nomarkdown.latex +13 -0
  313. data/test/testcases/block/12_extension/nomarkdown.text +21 -0
  314. data/test/testcases/block/12_extension/options.html +21 -0
  315. data/test/testcases/block/12_extension/options.text +23 -0
  316. data/test/testcases/block/12_extension/options2.html +10 -0
  317. data/test/testcases/block/12_extension/options2.text +5 -0
  318. data/test/testcases/block/12_extension/options3.html +8 -0
  319. data/test/testcases/block/12_extension/options3.text +7 -0
  320. data/test/testcases/block/13_definition_list/auto_ids.html +15 -0
  321. data/test/testcases/block/13_definition_list/auto_ids.text +18 -0
  322. data/test/testcases/block/13_definition_list/definition_at_beginning.html +1 -0
  323. data/test/testcases/block/13_definition_list/definition_at_beginning.text +1 -0
  324. data/test/testcases/block/13_definition_list/deflist_ial.html +4 -0
  325. data/test/testcases/block/13_definition_list/deflist_ial.text +4 -0
  326. data/test/testcases/block/13_definition_list/item_ial.html +17 -0
  327. data/test/testcases/block/13_definition_list/item_ial.text +16 -0
  328. data/test/testcases/block/13_definition_list/multiple_terms.html +13 -0
  329. data/test/testcases/block/13_definition_list/multiple_terms.text +10 -0
  330. data/test/testcases/block/13_definition_list/no_def_list.html +2 -0
  331. data/test/testcases/block/13_definition_list/no_def_list.text +2 -0
  332. data/test/testcases/block/13_definition_list/para_wrapping.html +10 -0
  333. data/test/testcases/block/13_definition_list/para_wrapping.text +6 -0
  334. data/test/testcases/block/13_definition_list/separated_by_eob.html +8 -0
  335. data/test/testcases/block/13_definition_list/separated_by_eob.text +5 -0
  336. data/test/testcases/block/13_definition_list/simple.html +10 -0
  337. data/test/testcases/block/13_definition_list/simple.text +10 -0
  338. data/test/testcases/block/13_definition_list/styled_terms.html +4 -0
  339. data/test/testcases/block/13_definition_list/styled_terms.text +2 -0
  340. data/test/testcases/block/13_definition_list/too_much_space.html +3 -0
  341. data/test/testcases/block/13_definition_list/too_much_space.text +4 -0
  342. data/test/testcases/block/13_definition_list/with_blocks.html +38 -0
  343. data/test/testcases/block/13_definition_list/with_blocks.text +24 -0
  344. data/test/testcases/block/14_table/empty_tag_in_cell.html +8 -0
  345. data/test/testcases/block/14_table/empty_tag_in_cell.options +1 -0
  346. data/test/testcases/block/14_table/empty_tag_in_cell.text +1 -0
  347. data/test/testcases/block/14_table/errors.html +12 -0
  348. data/test/testcases/block/14_table/errors.text +13 -0
  349. data/test/testcases/block/14_table/escaping.html +52 -0
  350. data/test/testcases/block/14_table/escaping.text +19 -0
  351. data/test/testcases/block/14_table/footer.html +65 -0
  352. data/test/testcases/block/14_table/footer.text +25 -0
  353. data/test/testcases/block/14_table/header.html +117 -0
  354. data/test/testcases/block/14_table/header.text +39 -0
  355. data/test/testcases/block/14_table/no_table.html +3 -0
  356. data/test/testcases/block/14_table/no_table.text +3 -0
  357. data/test/testcases/block/14_table/simple.html +192 -0
  358. data/test/testcases/block/14_table/simple.text +53 -0
  359. data/test/testcases/block/14_table/table_with_footnote.html +25 -0
  360. data/test/testcases/block/14_table/table_with_footnote.latex +11 -0
  361. data/test/testcases/block/14_table/table_with_footnote.text +6 -0
  362. data/test/testcases/block/15_math/gh_128.html +1 -0
  363. data/test/testcases/block/15_math/gh_128.text +1 -0
  364. data/test/testcases/block/15_math/no_engine.html +3 -0
  365. data/test/testcases/block/15_math/no_engine.options +1 -0
  366. data/test/testcases/block/15_math/no_engine.text +2 -0
  367. data/test/testcases/block/15_math/normal.html +30 -0
  368. data/test/testcases/block/15_math/normal.text +30 -0
  369. data/test/testcases/block/16_toc/no_toc.html +14 -0
  370. data/test/testcases/block/16_toc/no_toc.text +16 -0
  371. data/test/testcases/block/16_toc/toc_exclude.html +35 -0
  372. data/test/testcases/block/16_toc/toc_exclude.options +1 -0
  373. data/test/testcases/block/16_toc/toc_exclude.text +19 -0
  374. data/test/testcases/block/16_toc/toc_levels.html +24 -0
  375. data/test/testcases/block/16_toc/toc_levels.options +2 -0
  376. data/test/testcases/block/16_toc/toc_levels.text +16 -0
  377. data/test/testcases/block/16_toc/toc_with_footnotes.html +13 -0
  378. data/test/testcases/block/16_toc/toc_with_footnotes.options +1 -0
  379. data/test/testcases/block/16_toc/toc_with_footnotes.text +6 -0
  380. data/test/testcases/block/16_toc/toc_with_links.html +8 -0
  381. data/test/testcases/block/16_toc/toc_with_links.options +2 -0
  382. data/test/testcases/block/16_toc/toc_with_links.text +8 -0
  383. data/test/testcases/cjk-line-break.html +4 -0
  384. data/test/testcases/cjk-line-break.options +1 -0
  385. data/test/testcases/cjk-line-break.text +12 -0
  386. data/test/testcases/encoding.html +46 -0
  387. data/test/testcases/encoding.text +28 -0
  388. data/test/testcases/man/example.man +123 -0
  389. data/test/testcases/man/example.text +85 -0
  390. data/test/testcases/man/heading-name-dash-description.man +4 -0
  391. data/test/testcases/man/heading-name-dash-description.text +1 -0
  392. data/test/testcases/man/heading-name-description.man +4 -0
  393. data/test/testcases/man/heading-name-description.text +2 -0
  394. data/test/testcases/man/heading-name-section-description.man +4 -0
  395. data/test/testcases/man/heading-name-section-description.text +1 -0
  396. data/test/testcases/man/heading-name-section.man +2 -0
  397. data/test/testcases/man/heading-name-section.text +1 -0
  398. data/test/testcases/man/heading-name.man +2 -0
  399. data/test/testcases/man/heading-name.text +1 -0
  400. data/test/testcases/man/sections.man +4 -0
  401. data/test/testcases/man/sections.text +11 -0
  402. data/test/testcases/man/text-escaping.man +8 -0
  403. data/test/testcases/man/text-escaping.text +7 -0
  404. data/test/testcases/span/01_link/empty.html +5 -0
  405. data/test/testcases/span/01_link/empty.text +5 -0
  406. data/test/testcases/span/01_link/empty_title.htmlinput +3 -0
  407. data/test/testcases/span/01_link/empty_title.text +7 -0
  408. data/test/testcases/span/01_link/image_in_a.html +5 -0
  409. data/test/testcases/span/01_link/image_in_a.text +5 -0
  410. data/test/testcases/span/01_link/imagelinks.html +15 -0
  411. data/test/testcases/span/01_link/imagelinks.text +18 -0
  412. data/test/testcases/span/01_link/inline.html +46 -0
  413. data/test/testcases/span/01_link/inline.text +48 -0
  414. data/test/testcases/span/01_link/latex_escaping.latex +6 -0
  415. data/test/testcases/span/01_link/latex_escaping.text +5 -0
  416. data/test/testcases/span/01_link/link_defs.html +9 -0
  417. data/test/testcases/span/01_link/link_defs.text +27 -0
  418. data/test/testcases/span/01_link/link_defs_with_ial.html +4 -0
  419. data/test/testcases/span/01_link/link_defs_with_ial.text +16 -0
  420. data/test/testcases/span/01_link/links_with_angle_brackets.html +3 -0
  421. data/test/testcases/span/01_link/links_with_angle_brackets.text +3 -0
  422. data/test/testcases/span/01_link/reference.html +37 -0
  423. data/test/testcases/span/01_link/reference.options +3 -0
  424. data/test/testcases/span/01_link/reference.text +53 -0
  425. data/test/testcases/span/02_emphasis/empty.html +3 -0
  426. data/test/testcases/span/02_emphasis/empty.text +3 -0
  427. data/test/testcases/span/02_emphasis/errors.html +9 -0
  428. data/test/testcases/span/02_emphasis/errors.text +9 -0
  429. data/test/testcases/span/02_emphasis/nesting.html +41 -0
  430. data/test/testcases/span/02_emphasis/nesting.text +36 -0
  431. data/test/testcases/span/02_emphasis/normal.html +65 -0
  432. data/test/testcases/span/02_emphasis/normal.options +1 -0
  433. data/test/testcases/span/02_emphasis/normal.text +63 -0
  434. data/test/testcases/span/03_codespan/empty.html +5 -0
  435. data/test/testcases/span/03_codespan/empty.text +5 -0
  436. data/test/testcases/span/03_codespan/errors.html +1 -0
  437. data/test/testcases/span/03_codespan/errors.text +1 -0
  438. data/test/testcases/span/03_codespan/highlighting-minted.latex +2 -0
  439. data/test/testcases/span/03_codespan/highlighting-minted.options +1 -0
  440. data/test/testcases/span/03_codespan/highlighting-minted.text +1 -0
  441. data/test/testcases/span/03_codespan/highlighting.html +1 -0
  442. data/test/testcases/span/03_codespan/highlighting.text +1 -0
  443. data/test/testcases/span/03_codespan/normal-css-class.html +1 -0
  444. data/test/testcases/span/03_codespan/normal-css-class.options +2 -0
  445. data/test/testcases/span/03_codespan/normal-css-class.text +1 -0
  446. data/test/testcases/span/03_codespan/normal.html +16 -0
  447. data/test/testcases/span/03_codespan/normal.text +16 -0
  448. data/test/testcases/span/03_codespan/rouge/disabled.html +1 -0
  449. data/test/testcases/span/03_codespan/rouge/disabled.options +4 -0
  450. data/test/testcases/span/03_codespan/rouge/disabled.text +1 -0
  451. data/test/testcases/span/03_codespan/rouge/simple.html +1 -0
  452. data/test/testcases/span/03_codespan/rouge/simple.options +1 -0
  453. data/test/testcases/span/03_codespan/rouge/simple.text +1 -0
  454. data/test/testcases/span/04_footnote/backlink_inline.html +79 -0
  455. data/test/testcases/span/04_footnote/backlink_inline.options +1 -0
  456. data/test/testcases/span/04_footnote/backlink_inline.text +38 -0
  457. data/test/testcases/span/04_footnote/backlink_text.html +9 -0
  458. data/test/testcases/span/04_footnote/backlink_text.options +1 -0
  459. data/test/testcases/span/04_footnote/backlink_text.text +3 -0
  460. data/test/testcases/span/04_footnote/definitions.html +17 -0
  461. data/test/testcases/span/04_footnote/definitions.latex +17 -0
  462. data/test/testcases/span/04_footnote/definitions.text +24 -0
  463. data/test/testcases/span/04_footnote/footnote_nr.html +12 -0
  464. data/test/testcases/span/04_footnote/footnote_nr.latex +2 -0
  465. data/test/testcases/span/04_footnote/footnote_nr.options +1 -0
  466. data/test/testcases/span/04_footnote/footnote_nr.text +4 -0
  467. data/test/testcases/span/04_footnote/footnote_prefix.html +12 -0
  468. data/test/testcases/span/04_footnote/footnote_prefix.options +1 -0
  469. data/test/testcases/span/04_footnote/footnote_prefix.text +4 -0
  470. data/test/testcases/span/04_footnote/inside_footnote.html +17 -0
  471. data/test/testcases/span/04_footnote/inside_footnote.text +9 -0
  472. data/test/testcases/span/04_footnote/markers.html +46 -0
  473. data/test/testcases/span/04_footnote/markers.latex +23 -0
  474. data/test/testcases/span/04_footnote/markers.options +2 -0
  475. data/test/testcases/span/04_footnote/markers.text +27 -0
  476. data/test/testcases/span/04_footnote/placement.html +11 -0
  477. data/test/testcases/span/04_footnote/placement.options +1 -0
  478. data/test/testcases/span/04_footnote/placement.text +8 -0
  479. data/test/testcases/span/04_footnote/regexp_problem.html +14 -0
  480. data/test/testcases/span/04_footnote/regexp_problem.options +2 -0
  481. data/test/testcases/span/04_footnote/regexp_problem.text +52 -0
  482. data/test/testcases/span/04_footnote/without_backlink.html +9 -0
  483. data/test/testcases/span/04_footnote/without_backlink.options +1 -0
  484. data/test/testcases/span/04_footnote/without_backlink.text +3 -0
  485. data/test/testcases/span/05_html/across_lines.html +1 -0
  486. data/test/testcases/span/05_html/across_lines.text +2 -0
  487. data/test/testcases/span/05_html/button.html +7 -0
  488. data/test/testcases/span/05_html/button.text +7 -0
  489. data/test/testcases/span/05_html/invalid.html +1 -0
  490. data/test/testcases/span/05_html/invalid.text +1 -0
  491. data/test/testcases/span/05_html/link_with_mailto.html +1 -0
  492. data/test/testcases/span/05_html/link_with_mailto.text +1 -0
  493. data/test/testcases/span/05_html/mark_element.html +3 -0
  494. data/test/testcases/span/05_html/mark_element.text +3 -0
  495. data/test/testcases/span/05_html/markdown_attr.html +6 -0
  496. data/test/testcases/span/05_html/markdown_attr.text +6 -0
  497. data/test/testcases/span/05_html/normal.html +43 -0
  498. data/test/testcases/span/05_html/normal.text +43 -0
  499. data/test/testcases/span/05_html/raw_span_elements.html +2 -0
  500. data/test/testcases/span/05_html/raw_span_elements.text +2 -0
  501. data/test/testcases/span/05_html/xml.html +5 -0
  502. data/test/testcases/span/05_html/xml.text +5 -0
  503. data/test/testcases/span/abbreviations/abbrev.html +21 -0
  504. data/test/testcases/span/abbreviations/abbrev.text +34 -0
  505. data/test/testcases/span/abbreviations/abbrev_defs.html +2 -0
  506. data/test/testcases/span/abbreviations/abbrev_defs.text +5 -0
  507. data/test/testcases/span/abbreviations/in_footnote.html +9 -0
  508. data/test/testcases/span/abbreviations/in_footnote.text +5 -0
  509. data/test/testcases/span/autolinks/url_links.html +15 -0
  510. data/test/testcases/span/autolinks/url_links.text +16 -0
  511. data/test/testcases/span/escaped_chars/normal.html +47 -0
  512. data/test/testcases/span/escaped_chars/normal.text +47 -0
  513. data/test/testcases/span/extension/comment.html +6 -0
  514. data/test/testcases/span/extension/comment.text +6 -0
  515. data/test/testcases/span/extension/ignored.html +1 -0
  516. data/test/testcases/span/extension/ignored.text +1 -0
  517. data/test/testcases/span/extension/nomarkdown.html +1 -0
  518. data/test/testcases/span/extension/nomarkdown.text +1 -0
  519. data/test/testcases/span/extension/options.html +1 -0
  520. data/test/testcases/span/extension/options.text +1 -0
  521. data/test/testcases/span/ial/simple.html +6 -0
  522. data/test/testcases/span/ial/simple.text +6 -0
  523. data/test/testcases/span/line_breaks/normal.html +11 -0
  524. data/test/testcases/span/line_breaks/normal.latex +12 -0
  525. data/test/testcases/span/line_breaks/normal.text +11 -0
  526. data/test/testcases/span/math/no_engine.html +1 -0
  527. data/test/testcases/span/math/no_engine.options +1 -0
  528. data/test/testcases/span/math/no_engine.text +1 -0
  529. data/test/testcases/span/math/normal.html +10 -0
  530. data/test/testcases/span/math/normal.text +10 -0
  531. data/test/testcases/span/text_substitutions/entities.html +6 -0
  532. data/test/testcases/span/text_substitutions/entities.options +1 -0
  533. data/test/testcases/span/text_substitutions/entities.text +6 -0
  534. data/test/testcases/span/text_substitutions/entities_as_char.html +1 -0
  535. data/test/testcases/span/text_substitutions/entities_as_char.options +2 -0
  536. data/test/testcases/span/text_substitutions/entities_as_char.text +1 -0
  537. data/test/testcases/span/text_substitutions/entities_as_input.html +1 -0
  538. data/test/testcases/span/text_substitutions/entities_as_input.options +1 -0
  539. data/test/testcases/span/text_substitutions/entities_as_input.text +1 -0
  540. data/test/testcases/span/text_substitutions/entities_numeric.html +1 -0
  541. data/test/testcases/span/text_substitutions/entities_numeric.options +1 -0
  542. data/test/testcases/span/text_substitutions/entities_numeric.text +1 -0
  543. data/test/testcases/span/text_substitutions/entities_symbolic.html +1 -0
  544. data/test/testcases/span/text_substitutions/entities_symbolic.options +1 -0
  545. data/test/testcases/span/text_substitutions/entities_symbolic.text +1 -0
  546. data/test/testcases/span/text_substitutions/greaterthan.html +1 -0
  547. data/test/testcases/span/text_substitutions/greaterthan.text +1 -0
  548. data/test/testcases/span/text_substitutions/lowerthan.html +1 -0
  549. data/test/testcases/span/text_substitutions/lowerthan.text +1 -0
  550. data/test/testcases/span/text_substitutions/typography.html +40 -0
  551. data/test/testcases/span/text_substitutions/typography.options +1 -0
  552. data/test/testcases/span/text_substitutions/typography.text +40 -0
  553. data/test/testcases/span/text_substitutions/typography_subst.html +3 -0
  554. data/test/testcases/span/text_substitutions/typography_subst.latex +4 -0
  555. data/test/testcases/span/text_substitutions/typography_subst.options +8 -0
  556. data/test/testcases/span/text_substitutions/typography_subst.text +3 -0
  557. metadata +659 -0
@@ -0,0 +1,26 @@
1
+ # -*- coding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ module Kramdown
11
+
12
+ # This module contains all available parsers. A parser takes an input string and converts the
13
+ # string to an element tree.
14
+ #
15
+ # New parsers should be derived from the Base class which provides common functionality - see its
16
+ # API documentation for how to create a custom converter class.
17
+ module Parser
18
+
19
+ autoload :Base, 'kramdown/parser/base'
20
+ autoload :Kramdown, 'kramdown/parser/kramdown'
21
+ autoload :Html, 'kramdown/parser/html'
22
+ autoload :Markdown, 'kramdown/parser/markdown'
23
+
24
+ end
25
+
26
+ end
@@ -0,0 +1,131 @@
1
+ # -*- coding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ require 'kramdown/utils'
11
+ require 'kramdown/parser'
12
+
13
+ module Kramdown
14
+
15
+ module Parser
16
+
17
+ # == \Base class for parsers
18
+ #
19
+ # This class serves as base class for parsers. It provides common methods that can/should be
20
+ # used by all parsers, especially by those using StringScanner(Kramdown) for parsing.
21
+ #
22
+ # A parser object is used as a throw-away object, i.e. it is only used for storing the needed
23
+ # state information during parsing. Therefore one can't instantiate a parser object directly but
24
+ # only use the Base::parse method.
25
+ #
26
+ # == Implementing a parser
27
+ #
28
+ # Implementing a new parser is rather easy: just derive a new class from this class and put it
29
+ # in the Kramdown::Parser module -- the latter is needed so that the auto-detection of the new
30
+ # parser works correctly. Then you need to implement the +#parse+ method which has to contain
31
+ # the parsing code.
32
+ #
33
+ # Have a look at the Base::parse, Base::new and Base#parse methods for additional information!
34
+ class Base
35
+
36
+ # The hash with the parsing options.
37
+ attr_reader :options
38
+
39
+ # The array with the parser warnings.
40
+ attr_reader :warnings
41
+
42
+ # The original source string.
43
+ attr_reader :source
44
+
45
+ # The root element of element tree that is created from the source string.
46
+ attr_reader :root
47
+
48
+ # Initialize the parser object with the +source+ string and the parsing +options+.
49
+ #
50
+ # The @root element, the @warnings array and @text_type (specifies the default type for newly
51
+ # created text nodes) are automatically initialized.
52
+ def initialize(source, options)
53
+ @source = source
54
+ @options = Kramdown::Options.merge(options)
55
+ @root = Element.new(:root, nil, nil, encoding: (source.encoding rescue nil), location: 1,
56
+ options: {}, abbrev_defs: {}, abbrev_attr: {})
57
+ @warnings = []
58
+ @text_type = :text
59
+ end
60
+ private_class_method(:new, :allocate)
61
+
62
+ # Parse the +source+ string into an element tree, possibly using the parsing +options+, and
63
+ # return the root element of the element tree and an array with warning messages.
64
+ #
65
+ # Initializes a new instance of the calling class and then calls the +#parse+ method that must
66
+ # be implemented by each subclass.
67
+ def self.parse(source, options = {})
68
+ parser = new(source, options)
69
+ parser.parse
70
+ [parser.root, parser.warnings]
71
+ end
72
+
73
+ # Parse the source string into an element tree.
74
+ #
75
+ # The parsing code should parse the source provided in @source and build an element tree the
76
+ # root of which should be @root.
77
+ #
78
+ # This is the only method that has to be implemented by sub-classes!
79
+ def parse
80
+ raise NotImplementedError
81
+ end
82
+
83
+ # Add the given warning +text+ to the warning array.
84
+ def warning(text)
85
+ @warnings << text
86
+ # TODO: add position information
87
+ end
88
+
89
+ # Modify the string +source+ to be usable by the parser (unifies line ending characters to
90
+ # +\n+ and makes sure +source+ ends with a new line character).
91
+ def adapt_source(source)
92
+ unless source.valid_encoding?
93
+ raise "The source text contains invalid characters for the used encoding #{source.encoding}"
94
+ end
95
+ source = source.encode('UTF-8')
96
+ source.gsub!(/\r\n?/, "\n")
97
+ source.chomp!
98
+ source << "\n"
99
+ end
100
+
101
+ # This helper method adds the given +text+ either to the last element in the +tree+ if it is a
102
+ # +type+ element or creates a new text element with the given +type+.
103
+ def add_text(text, tree = @tree, type = @text_type)
104
+ last = tree.children.last
105
+ if last && last.type == type
106
+ last.value << text
107
+ elsif !text.empty?
108
+ location = (last && last.options[:location] || tree.options[:location])
109
+ tree.children << Element.new(type, text, nil, location: location)
110
+ end
111
+ end
112
+
113
+ # Extract the part of the StringScanner +strscan+ backed string specified by the +range+. This
114
+ # method works correctly under Ruby 1.8 and Ruby 1.9.
115
+ def extract_string(range, strscan)
116
+ result = nil
117
+ begin
118
+ enc = strscan.string.encoding
119
+ strscan.string.force_encoding('ASCII-8BIT')
120
+ result = strscan.string[range].force_encoding(enc)
121
+ ensure
122
+ strscan.string.force_encoding(enc)
123
+ end
124
+ result
125
+ end
126
+
127
+ end
128
+
129
+ end
130
+
131
+ end
@@ -0,0 +1,608 @@
1
+ # -*- coding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ require 'rexml/parsers/baseparser'
11
+ require 'strscan'
12
+ require 'kramdown/utils'
13
+ require 'kramdown/parser'
14
+
15
+ module Kramdown
16
+
17
+ module Parser
18
+
19
+ # Used for parsing an HTML document.
20
+ #
21
+ # The parsing code is in the Parser module that can also be used by other parsers.
22
+ class Html < Base
23
+
24
+ # Contains all constants that are used when parsing.
25
+ module Constants
26
+
27
+ #:stopdoc:
28
+ # The following regexps are based on the ones used by REXML, with some slight modifications.
29
+ HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/im
30
+ HTML_COMMENT_RE = /<!--(.*?)-->/m
31
+ HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
32
+ HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})(?:\s*=\s*(?:(\p{Word}+)|("|')(.*?)\3))?/m
33
+ HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}(?:\s*=\s*(?:\p{Word}+|("|').*?\3))?)*)\s*(\/)?>/m
34
+ HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::UNAME_STR})\s*>/m
35
+ HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
36
+
37
+ HTML_CONTENT_MODEL_BLOCK = %w[address applet article aside blockquote body
38
+ dd details div dl fieldset figure figcaption
39
+ footer form header hgroup iframe li main
40
+ map menu nav noscript object section summary td]
41
+ HTML_CONTENT_MODEL_SPAN = %w[a abbr acronym b bdo big button cite caption del dfn dt em
42
+ h1 h2 h3 h4 h5 h6 i ins label legend optgroup p q rb rbc
43
+ rp rt rtc ruby select small span strong sub sup th tt]
44
+ HTML_CONTENT_MODEL_RAW = %w[script style math option textarea pre code kbd samp var]
45
+ # The following elements are also parsed as raw since they need child elements that cannot
46
+ # be expressed using kramdown syntax: colgroup table tbody thead tfoot tr ul ol
47
+
48
+ HTML_CONTENT_MODEL = Hash.new {|h, k| h[k] = :raw }
49
+ HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block }
50
+ HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span }
51
+ HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw }
52
+
53
+ # Some HTML elements like script belong to both categories (i.e. are valid in block and
54
+ # span HTML) and don't appear therefore!
55
+ # script, textarea
56
+ HTML_SPAN_ELEMENTS = %w[a abbr acronym b big bdo br button cite code del dfn em i img input
57
+ ins kbd label mark option q rb rbc rp rt rtc ruby samp select small
58
+ span strong sub sup tt u var]
59
+ HTML_BLOCK_ELEMENTS = %w[address article aside applet body blockquote caption col colgroup
60
+ dd div dl dt fieldset figcaption footer form h1 h2 h3 h4 h5 h6
61
+ header hgroup hr html head iframe legend menu li main map nav ol
62
+ optgroup p pre section summary table tbody td th thead tfoot tr ul]
63
+ HTML_ELEMENTS_WITHOUT_BODY = %w[area base br col command embed hr img input keygen link
64
+ meta param source track wbr]
65
+
66
+ HTML_ELEMENT = Hash.new(false)
67
+ (HTML_SPAN_ELEMENTS + HTML_BLOCK_ELEMENTS + HTML_ELEMENTS_WITHOUT_BODY +
68
+ HTML_CONTENT_MODEL.keys).each do |a|
69
+ HTML_ELEMENT[a] = true
70
+ end
71
+ end
72
+
73
+ # Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
74
+ # functionality. The only thing that must be provided by the class are instance variable
75
+ # @stack for storing the needed state and @src (instance of StringScanner) for the actual
76
+ # parsing.
77
+ module Parser
78
+
79
+ include Constants
80
+
81
+ # Process the HTML start tag that has already be scanned/checked via @src.
82
+ #
83
+ # Does the common processing steps and then yields to the caller for further processing
84
+ # (first parameter is the created element; the second parameter is +true+ if the HTML
85
+ # element is already closed, ie. contains no body; the third parameter specifies whether the
86
+ # body - and the end tag - need to be handled in case closed=false).
87
+ def handle_html_start_tag(line = nil) # :yields: el, closed, handle_body
88
+ name = @src[1]
89
+ name.downcase! if HTML_ELEMENT[name.downcase]
90
+ closed = !@src[4].nil?
91
+ attrs = parse_html_attributes(@src[2], line, HTML_ELEMENT[name])
92
+
93
+ el = Element.new(:html_element, name, attrs, category: :block)
94
+ el.options[:location] = line if line
95
+ @tree.children << el
96
+
97
+ if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
98
+ closed = true
99
+ end
100
+ if name == 'script' || name == 'style'
101
+ handle_raw_html_tag(name)
102
+ yield(el, false, false)
103
+ else
104
+ yield(el, closed, true)
105
+ end
106
+ end
107
+
108
+ # Parses the given string for HTML attributes and returns the resulting hash.
109
+ #
110
+ # If the optional +line+ parameter is supplied, it is used in warning messages.
111
+ #
112
+ # If the optional +in_html_tag+ parameter is set to +false+, attributes are not modified to
113
+ # contain only lowercase letters.
114
+ def parse_html_attributes(str, line = nil, in_html_tag = true)
115
+ attrs = {}
116
+ str.scan(HTML_ATTRIBUTE_RE).each do |attr, val, _sep, quoted_val|
117
+ attr.downcase! if in_html_tag
118
+ if attrs.key?(attr)
119
+ warning("Duplicate HTML attribute '#{attr}' on line #{line || '?'} - overwriting previous one")
120
+ end
121
+ attrs[attr] = val || quoted_val || ""
122
+ end
123
+ attrs
124
+ end
125
+
126
+ # Handle the raw HTML tag at the current position.
127
+ def handle_raw_html_tag(name)
128
+ curpos = @src.pos
129
+ if @src.scan_until(/(?=<\/#{name}\s*>)/mi)
130
+ add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
131
+ @src.scan(HTML_TAG_CLOSE_RE)
132
+ else
133
+ add_text(@src.rest, @tree.children.last, :raw)
134
+ @src.terminate
135
+ warning("Found no end tag for '#{name}' - auto-closing it")
136
+ end
137
+ end
138
+
139
+ HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
140
+
141
+ # Parse raw HTML from the current source position, storing the found elements in +el+.
142
+ # Parsing continues until one of the following criteria are fulfilled:
143
+ #
144
+ # - The end of the document is reached.
145
+ # - The matching end tag for the element +el+ is found (only used if +el+ is an HTML
146
+ # element).
147
+ #
148
+ # When an HTML start tag is found, processing is deferred to #handle_html_start_tag,
149
+ # providing the block given to this method.
150
+ def parse_raw_html(el, &block)
151
+ @stack.push(@tree)
152
+ @tree = el
153
+
154
+ done = false
155
+ while !@src.eos? && !done
156
+ if (result = @src.scan_until(HTML_RAW_START))
157
+ add_text(result, @tree, :text)
158
+ line = @src.current_line_number
159
+ if (result = @src.scan(HTML_COMMENT_RE))
160
+ @tree.children << Element.new(:xml_comment, result, nil, category: :block, location: line)
161
+ elsif (result = @src.scan(HTML_INSTRUCTION_RE))
162
+ @tree.children << Element.new(:xml_pi, result, nil, category: :block, location: line)
163
+ elsif @src.scan(HTML_TAG_RE)
164
+ if method(:handle_html_start_tag).arity.abs >= 1
165
+ handle_html_start_tag(line, &block)
166
+ else
167
+ handle_html_start_tag(&block) # DEPRECATED: method needs to accept line number in 2.0
168
+ end
169
+ elsif @src.scan(HTML_TAG_CLOSE_RE)
170
+ if @tree.value == (HTML_ELEMENT[@tree.value] ? @src[1].downcase : @src[1])
171
+ done = true
172
+ else
173
+ add_text(@src.matched, @tree, :text)
174
+ warning("Found invalidly used HTML closing tag for '#{@src[1]}' on " \
175
+ "line #{line} - ignoring it")
176
+ end
177
+ else
178
+ add_text(@src.getch, @tree, :text)
179
+ end
180
+ else
181
+ add_text(@src.rest, @tree, :text)
182
+ @src.terminate
183
+ if @tree.type == :html_element
184
+ warning("Found no end tag for '#{@tree.value}' on line " \
185
+ "#{@tree.options[:location]} - auto-closing it")
186
+ end
187
+ done = true
188
+ end
189
+ end
190
+
191
+ @tree = @stack.pop
192
+ end
193
+
194
+ end
195
+
196
+ # Converts HTML elements to native elements if possible.
197
+ class ElementConverter
198
+
199
+ # :stopdoc:
200
+
201
+ include Constants
202
+ include ::Kramdown::Utils::Entities
203
+
204
+ REMOVE_TEXT_CHILDREN = %w[html head hgroup ol ul dl table colgroup tbody thead tfoot tr
205
+ select optgroup]
206
+ WRAP_TEXT_CHILDREN = %w[body section nav article aside header footer address div li dd
207
+ blockquote figure figcaption fieldset form]
208
+ REMOVE_WHITESPACE_CHILDREN = %w[body section nav article aside header footer address
209
+ div li dd blockquote figure figcaption td th fieldset form]
210
+ STRIP_WHITESPACE = %w[address article aside blockquote body caption dd div dl dt fieldset
211
+ figcaption form footer header h1 h2 h3 h4 h5 h6 legend li nav p
212
+ section td th]
213
+ SIMPLE_ELEMENTS = %w[em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl
214
+ li dl dt dd]
215
+
216
+ def initialize(root)
217
+ @root = root
218
+ end
219
+
220
+ def self.convert(root, el = root)
221
+ new(root).process(el)
222
+ end
223
+
224
+ # Convert the element +el+ and its children.
225
+ def process(el, do_conversion = true, preserve_text = false, parent = nil)
226
+ case el.type
227
+ when :xml_comment, :xml_pi
228
+ ptype = if parent.nil?
229
+ 'div'
230
+ else
231
+ case parent.type
232
+ when :html_element then parent.value
233
+ when :code_span then 'code'
234
+ when :code_block then 'pre'
235
+ when :header then 'h1'
236
+ else parent.type.to_s
237
+ end
238
+ end
239
+ el.options.replace(category: (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block))
240
+ return
241
+ when :html_element
242
+ when :root
243
+ el.children.each {|c| process(c) }
244
+ remove_whitespace_children(el)
245
+ return
246
+ else return
247
+ end
248
+
249
+ mname = "convert_#{el.value}"
250
+ if do_conversion && self.class.method_defined?(mname)
251
+ send(mname, el)
252
+ else
253
+ type = el.value
254
+ remove_text_children(el) if do_conversion && REMOVE_TEXT_CHILDREN.include?(type)
255
+
256
+ if do_conversion && SIMPLE_ELEMENTS.include?(type)
257
+ set_basics(el, type.intern)
258
+ process_children(el, do_conversion, preserve_text)
259
+ else
260
+ process_html_element(el, do_conversion, preserve_text)
261
+ end
262
+
263
+ if do_conversion
264
+ strip_whitespace(el) if STRIP_WHITESPACE.include?(type)
265
+ remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type)
266
+ wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type)
267
+ end
268
+ end
269
+ end
270
+
271
+ def process_children(el, do_conversion = true, preserve_text = false)
272
+ el.children.map! do |c|
273
+ if c.type == :text
274
+ process_text(c.value, preserve_text || !do_conversion)
275
+ else
276
+ process(c, do_conversion, preserve_text, el)
277
+ c
278
+ end
279
+ end.flatten!
280
+ end
281
+
282
+ # Process the HTML text +raw+: compress whitespace (if +preserve+ is +false+) and convert
283
+ # entities in entity elements.
284
+ def process_text(raw, preserve = false)
285
+ raw.gsub!(/\s+/, ' ') unless preserve
286
+ src = Kramdown::Utils::StringScanner.new(raw)
287
+ result = []
288
+ until src.eos?
289
+ if (tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/o))
290
+ result << Element.new(:text, tmp)
291
+ src.scan(HTML_ENTITY_RE)
292
+ val = src[1] || (src[2]&.to_i) || src[3].hex
293
+ result << if %w[lsquo rsquo ldquo rdquo].include?(val)
294
+ Element.new(:smart_quote, val.intern)
295
+ elsif %w[mdash ndash hellip laquo raquo].include?(val)
296
+ Element.new(:typographic_sym, val.intern)
297
+ else
298
+ begin
299
+ Element.new(:entity, entity(val), nil, original: src.matched)
300
+ rescue ::Kramdown::Error
301
+ src.pos -= src.matched_size - 1
302
+ Element.new(:entity, ::Kramdown::Utils::Entities.entity('amp'))
303
+ end
304
+ end
305
+ else
306
+ result << Element.new(:text, src.rest)
307
+ src.terminate
308
+ end
309
+ end
310
+ result
311
+ end
312
+
313
+ def process_html_element(el, do_conversion = true, preserve_text = false)
314
+ el.options.replace(category: HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
315
+ content_model: (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw))
316
+ process_children(el, do_conversion, preserve_text)
317
+ end
318
+
319
+ def remove_text_children(el)
320
+ el.children.delete_if {|c| c.type == :text }
321
+ end
322
+
323
+ def wrap_text_children(el)
324
+ tmp = []
325
+ last_is_p = false
326
+ el.children.each do |c|
327
+ if !c.block? || c.type == :text
328
+ unless last_is_p
329
+ tmp << Element.new(:p, nil, nil, transparent: true)
330
+ last_is_p = true
331
+ end
332
+ tmp.last.children << c
333
+ tmp
334
+ else
335
+ tmp << c
336
+ last_is_p = false
337
+ end
338
+ end
339
+ el.children = tmp
340
+ end
341
+
342
+ def strip_whitespace(el)
343
+ return if el.children.empty?
344
+ if el.children.first.type == :text
345
+ el.children.first.value.lstrip!
346
+ end
347
+ if el.children.last.type == :text
348
+ el.children.last.value.rstrip!
349
+ end
350
+ end
351
+
352
+ def remove_whitespace_children(el)
353
+ i = -1
354
+ el.children = el.children.reject do |c|
355
+ i += 1
356
+ c.type == :text && c.value.strip.empty? &&
357
+ (i == 0 || i == el.children.length - 1 || ((el.children[i - 1]).block? &&
358
+ (el.children[i + 1]).block?))
359
+ end
360
+ end
361
+
362
+ def set_basics(el, type, opts = {})
363
+ el.type = type
364
+ el.options.replace(opts)
365
+ el.value = nil
366
+ end
367
+
368
+ def extract_text(el, raw)
369
+ raw << el.value.to_s if el.type == :text
370
+ el.children.each {|c| extract_text(c, raw) }
371
+ end
372
+
373
+ def convert_textarea(el)
374
+ process_html_element(el, true, true)
375
+ end
376
+
377
+ def convert_a(el)
378
+ if el.attr['href']
379
+ set_basics(el, :a)
380
+ process_children(el)
381
+ else
382
+ process_html_element(el, false)
383
+ end
384
+ end
385
+
386
+ EMPHASIS_TYPE_MAP = {'em' => :em, 'i' => :em, 'strong' => :strong, 'b' => :strong}
387
+ def convert_em(el)
388
+ text = +''
389
+ extract_text(el, text)
390
+ if text =~ /\A\s/ || text =~ /\s\z/
391
+ process_html_element(el, false)
392
+ else
393
+ set_basics(el, EMPHASIS_TYPE_MAP[el.value])
394
+ process_children(el)
395
+ end
396
+ end
397
+ %w[b strong i].each do |i|
398
+ alias_method("convert_#{i}".to_sym, :convert_em)
399
+ end
400
+
401
+ def convert_h1(el)
402
+ set_basics(el, :header, level: el.value[1..1].to_i)
403
+ extract_text(el, el.options[:raw_text] = +'')
404
+ process_children(el)
405
+ end
406
+ %w[h2 h3 h4 h5 h6].each do |i|
407
+ alias_method("convert_#{i}".to_sym, :convert_h1)
408
+ end
409
+
410
+ def convert_code(el)
411
+ raw = +''
412
+ extract_text(el, raw)
413
+ result = process_text(raw, true)
414
+ begin
415
+ str = result.inject(+'') do |mem, c|
416
+ if c.type == :text
417
+ mem << c.value
418
+ elsif c.type == :entity
419
+ mem << if [60, 62, 34, 38].include?(c.value.code_point)
420
+ c.value.code_point.chr
421
+ else
422
+ c.value.char
423
+ end
424
+ elsif c.type == :smart_quote || c.type == :typographic_sym
425
+ mem << entity(c.value.to_s).char
426
+ else
427
+ raise "Bug - please report"
428
+ end
429
+ end
430
+ result.clear
431
+ result << Element.new(:text, str)
432
+ rescue StandardError
433
+ end
434
+ if result.length > 1 || result.first.type != :text
435
+ process_html_element(el, false, true)
436
+ else
437
+ if el.value == 'code'
438
+ set_basics(el, :codespan)
439
+ el.attr['class']&.gsub!(/\s+\bhighlighter-\w+\b|\bhighlighter-\w+\b\s*/, '')
440
+ else
441
+ set_basics(el, :codeblock)
442
+ if el.children.size == 1 && el.children.first.value == 'code'
443
+ value = (el.children.first.attr['class'] || '').scan(/\blanguage-\S+/).first
444
+ el.attr['class'] = "#{value} #{el.attr['class']}".rstrip if value
445
+ end
446
+ end
447
+ el.value = result.first.value
448
+ el.children.clear
449
+ end
450
+ end
451
+ alias convert_pre convert_code
452
+
453
+ def convert_table(el)
454
+ unless is_simple_table?(el)
455
+ process_html_element(el, false)
456
+ return
457
+ end
458
+ remove_text_children(el)
459
+ process_children(el)
460
+ set_basics(el, :table)
461
+
462
+ calc_alignment = lambda do |c|
463
+ if c.type == :tr
464
+ el.options[:alignment] = c.children.map do |td|
465
+ if td.attr['style']
466
+ td.attr['style'].slice!(/(?:;\s*)?text-align:\s+(center|left|right)/)
467
+ td.attr.delete('style') if td.attr['style'].strip.empty?
468
+ $1 ? $1.to_sym : :default
469
+ else
470
+ :default
471
+ end
472
+ end
473
+ else
474
+ c.children.each {|cc| calc_alignment.call(cc) }
475
+ end
476
+ end
477
+ calc_alignment.call(el)
478
+ el.children.delete_if {|c| c.type == :html_element }
479
+
480
+ change_th_type = lambda do |c|
481
+ if c.type == :th
482
+ c.type = :td
483
+ else
484
+ c.children.each {|cc| change_th_type.call(cc) }
485
+ end
486
+ end
487
+ change_th_type.call(el)
488
+
489
+ if el.children.first.type == :tr
490
+ tbody = Element.new(:tbody)
491
+ tbody.children = el.children
492
+ el.children = [tbody]
493
+ end
494
+ end
495
+
496
+ def is_simple_table?(el)
497
+ only_phrasing_content = lambda do |c|
498
+ c.children.all? do |cc|
499
+ (cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc)
500
+ end
501
+ end
502
+ check_cells = proc do |c|
503
+ if c.value == 'th' || c.value == 'td'
504
+ return false unless only_phrasing_content.call(c)
505
+ else
506
+ c.children.each {|cc| check_cells.call(cc) }
507
+ end
508
+ end
509
+ check_cells.call(el)
510
+
511
+ nr_cells = 0
512
+ check_nr_cells = lambda do |t|
513
+ if t.value == 'tr'
514
+ count = t.children.select {|cc| cc.value == 'th' || cc.value == 'td' }.length
515
+ if count != nr_cells
516
+ if nr_cells == 0
517
+ nr_cells = count
518
+ else
519
+ nr_cells = -1
520
+ break
521
+ end
522
+ end
523
+ else
524
+ t.children.each {|cc| check_nr_cells.call(cc) }
525
+ end
526
+ end
527
+ check_nr_cells.call(el)
528
+ return false if nr_cells == -1
529
+
530
+ alignment = nil
531
+ check_alignment = proc do |t|
532
+ if t.value == 'tr'
533
+ cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td' }.map do |cell|
534
+ md = /text-align:\s+(center|left|right|justify|inherit)/.match(cell.attr['style'].to_s)
535
+ return false if md && (md[1] == 'justify' || md[1] == 'inherit')
536
+ md.nil? ? :default : md[1]
537
+ end
538
+ alignment = cur_alignment if alignment.nil?
539
+ return false if alignment != cur_alignment
540
+ else
541
+ t.children.each {|cc| check_alignment.call(cc) }
542
+ end
543
+ end
544
+ check_alignment.call(el)
545
+
546
+ check_rows = lambda do |t, type|
547
+ t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text }}
548
+ end
549
+ check_rows.call(el, 'td') ||
550
+ (el.children.all? do |t|
551
+ t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) ||
552
+ ((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td'))
553
+ end && el.children.any? {|t| t.value == 'tbody' })
554
+ end
555
+
556
+ def convert_script(el)
557
+ if !is_math_tag?(el)
558
+ process_html_element(el)
559
+ else
560
+ handle_math_tag(el)
561
+ end
562
+ end
563
+
564
+ def is_math_tag?(el)
565
+ el.attr['type'].to_s =~ /\bmath\/tex\b/
566
+ end
567
+
568
+ def handle_math_tag(el)
569
+ set_basics(el, :math, category: (el.attr['type'] =~ /mode=display/ ? :block : :span))
570
+ el.value = el.children.shift.value.sub(/\A(?:%\s*)?<!\[CDATA\[\n?(.*?)(?:\s%)?\]\]>\z/m, '\1')
571
+ el.attr.delete('type')
572
+ end
573
+
574
+ end
575
+
576
+ include Parser
577
+
578
+ # Parse the source string provided on initialization as HTML document.
579
+ def parse
580
+ @stack, @tree = [], @root
581
+ @src = Kramdown::Utils::StringScanner.new(adapt_source(source))
582
+
583
+ while true
584
+ if (result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/o))
585
+ @tree.children << Element.new(:xml_pi, result.strip, nil, category: :block)
586
+ elsif (result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/o))
587
+ # ignore the doctype
588
+ elsif (result = @src.scan(/\s*#{HTML_COMMENT_RE}/o))
589
+ @tree.children << Element.new(:xml_comment, result.strip, nil, category: :block)
590
+ else
591
+ break
592
+ end
593
+ end
594
+
595
+ tag_handler = lambda do |c, closed, handle_body|
596
+ parse_raw_html(c, &tag_handler) if !closed && handle_body
597
+ end
598
+ parse_raw_html(@tree, &tag_handler)
599
+
600
+ ElementConverter.convert(@tree)
601
+ end
602
+
603
+ end
604
+
605
+ end
606
+
607
+ end
608
+