kramdown 0.14.2 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of kramdown might be problematic. Click here for more details.

Files changed (323) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +63 -1
  3. data/COPYING +17 -11
  4. data/README.md +35 -14
  5. data/VERSION +1 -1
  6. data/bin/kramdown +92 -40
  7. data/data/kramdown/document.html +4 -0
  8. data/data/kramdown/document.latex +7 -0
  9. data/lib/kramdown.rb +3 -16
  10. data/lib/kramdown/converter.rb +42 -16
  11. data/lib/kramdown/converter/base.rb +102 -38
  12. data/lib/kramdown/converter/hash_ast.rb +38 -0
  13. data/lib/kramdown/converter/html.rb +232 -141
  14. data/lib/kramdown/converter/kramdown.rb +122 -104
  15. data/lib/kramdown/converter/latex.rb +95 -78
  16. data/lib/kramdown/converter/man.rb +300 -0
  17. data/lib/kramdown/converter/math_engine/mathjax.rb +32 -0
  18. data/lib/kramdown/converter/remove_html_tags.rb +8 -17
  19. data/lib/kramdown/converter/syntax_highlighter.rb +56 -0
  20. data/lib/kramdown/converter/syntax_highlighter/minted.rb +35 -0
  21. data/lib/kramdown/converter/syntax_highlighter/rouge.rb +85 -0
  22. data/lib/kramdown/converter/toc.rb +7 -20
  23. data/lib/kramdown/document.rb +30 -37
  24. data/lib/kramdown/element.rb +54 -27
  25. data/lib/kramdown/error.rb +3 -16
  26. data/lib/kramdown/options.rb +392 -247
  27. data/lib/kramdown/parser.rb +3 -16
  28. data/lib/kramdown/parser/base.rb +28 -33
  29. data/lib/kramdown/parser/html.rb +151 -119
  30. data/lib/kramdown/parser/kramdown.rb +87 -50
  31. data/lib/kramdown/parser/kramdown/abbreviation.rb +33 -27
  32. data/lib/kramdown/parser/kramdown/autolink.rb +7 -25
  33. data/lib/kramdown/parser/kramdown/blank_line.rb +6 -19
  34. data/lib/kramdown/parser/kramdown/block_boundary.rb +6 -18
  35. data/lib/kramdown/parser/kramdown/blockquote.rb +6 -19
  36. data/lib/kramdown/parser/kramdown/codeblock.rb +15 -24
  37. data/lib/kramdown/parser/kramdown/codespan.rb +20 -22
  38. data/lib/kramdown/parser/kramdown/emphasis.rb +15 -24
  39. data/lib/kramdown/parser/kramdown/eob.rb +3 -16
  40. data/lib/kramdown/parser/kramdown/escaped_chars.rb +3 -16
  41. data/lib/kramdown/parser/kramdown/extensions.rb +66 -56
  42. data/lib/kramdown/parser/kramdown/footnote.rb +21 -31
  43. data/lib/kramdown/parser/kramdown/header.rb +37 -37
  44. data/lib/kramdown/parser/kramdown/horizontal_rule.rb +5 -17
  45. data/lib/kramdown/parser/kramdown/html.rb +47 -56
  46. data/lib/kramdown/parser/kramdown/html_entity.rb +9 -19
  47. data/lib/kramdown/parser/kramdown/line_break.rb +4 -17
  48. data/lib/kramdown/parser/kramdown/link.rb +39 -38
  49. data/lib/kramdown/parser/kramdown/list.rb +124 -82
  50. data/lib/kramdown/parser/kramdown/math.rb +12 -24
  51. data/lib/kramdown/parser/kramdown/paragraph.rb +23 -24
  52. data/lib/kramdown/parser/kramdown/smart_quotes.rb +26 -66
  53. data/lib/kramdown/parser/kramdown/table.rb +41 -48
  54. data/lib/kramdown/parser/kramdown/typographic_symbol.rb +14 -22
  55. data/lib/kramdown/parser/markdown.rb +11 -23
  56. data/lib/kramdown/utils.rb +21 -18
  57. data/lib/kramdown/utils/configurable.rb +45 -0
  58. data/lib/kramdown/utils/entities.rb +287 -292
  59. data/lib/kramdown/utils/html.rb +27 -30
  60. data/lib/kramdown/utils/lru_cache.rb +41 -0
  61. data/lib/kramdown/utils/string_scanner.rb +81 -0
  62. data/lib/kramdown/utils/unidecoder.rb +50 -0
  63. data/lib/kramdown/version.rb +4 -17
  64. data/man/man1/kramdown.1 +340 -347
  65. data/test/run_tests.rb +7 -20
  66. data/test/test_files.rb +188 -100
  67. data/test/test_location.rb +216 -0
  68. data/test/test_string_scanner_kramdown.rb +27 -0
  69. data/test/testcases/block/03_paragraph/indented.html.gfm +18 -0
  70. data/test/testcases/block/03_paragraph/line_break_last_line.html +9 -0
  71. data/test/testcases/block/03_paragraph/line_break_last_line.text +9 -0
  72. data/test/testcases/block/03_paragraph/standalone_image.html +8 -0
  73. data/test/testcases/block/03_paragraph/standalone_image.text +6 -0
  74. data/test/testcases/block/03_paragraph/with_html_to_native.html +1 -0
  75. data/test/testcases/block/03_paragraph/with_html_to_native.options +1 -0
  76. data/test/testcases/block/03_paragraph/with_html_to_native.text +1 -0
  77. data/test/testcases/block/04_header/atx_header.html +15 -1
  78. data/test/testcases/block/04_header/atx_header.text +14 -1
  79. data/test/testcases/block/04_header/setext_header.html +3 -1
  80. data/test/testcases/block/04_header/setext_header.text +4 -1
  81. data/test/testcases/block/04_header/with_auto_id_stripping.html +1 -0
  82. data/test/testcases/block/04_header/with_auto_id_stripping.options +1 -0
  83. data/test/testcases/block/04_header/with_auto_id_stripping.text +1 -0
  84. data/test/testcases/block/04_header/with_auto_ids.html +2 -0
  85. data/test/testcases/block/04_header/with_auto_ids.options +1 -0
  86. data/test/testcases/block/04_header/with_auto_ids.text +2 -0
  87. data/test/testcases/block/06_codeblock/guess_lang_css_class.html +15 -0
  88. data/test/testcases/block/06_codeblock/guess_lang_css_class.options +2 -0
  89. data/test/testcases/block/06_codeblock/guess_lang_css_class.text +13 -0
  90. data/test/testcases/block/06_codeblock/highlighting-minted-with-opts.latex +9 -0
  91. data/test/testcases/block/06_codeblock/highlighting-minted-with-opts.options +4 -0
  92. data/test/testcases/block/06_codeblock/highlighting-minted-with-opts.text +5 -0
  93. data/test/testcases/block/06_codeblock/highlighting-minted.latex +8 -0
  94. data/test/testcases/block/06_codeblock/highlighting-minted.options +3 -0
  95. data/test/testcases/block/06_codeblock/highlighting-minted.text +4 -0
  96. data/test/testcases/block/06_codeblock/highlighting-opts.html +6 -0
  97. data/test/testcases/block/06_codeblock/highlighting-opts.options +7 -0
  98. data/test/testcases/block/06_codeblock/highlighting-opts.text +4 -0
  99. data/test/testcases/block/06_codeblock/highlighting.html +5 -6
  100. data/test/testcases/block/06_codeblock/issue_gh45.html +164 -0
  101. data/test/testcases/block/06_codeblock/issue_gh45.test +188 -0
  102. data/test/testcases/block/06_codeblock/rouge/disabled.html +2 -0
  103. data/test/testcases/block/06_codeblock/rouge/disabled.options +4 -0
  104. data/test/testcases/block/06_codeblock/rouge/disabled.text +1 -0
  105. data/test/testcases/block/06_codeblock/rouge/multiple.html +11 -0
  106. data/test/testcases/block/06_codeblock/rouge/multiple.options +4 -0
  107. data/test/testcases/block/06_codeblock/rouge/multiple.text +11 -0
  108. data/test/testcases/block/06_codeblock/rouge/simple.html +10 -0
  109. data/test/testcases/block/06_codeblock/rouge/simple.options +3 -0
  110. data/test/testcases/block/06_codeblock/rouge/simple.text +9 -0
  111. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block.options +1 -1
  112. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_any_char.html +8 -0
  113. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_any_char.options +2 -0
  114. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_any_char.text +11 -0
  115. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_name_with_dash.html +3 -0
  116. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_name_with_dash.options +2 -0
  117. data/test/testcases/block/06_codeblock/with_lang_in_fenced_block_name_with_dash.text +4 -0
  118. data/test/testcases/block/07_horizontal_rule/error.html +2 -2
  119. data/test/testcases/block/07_horizontal_rule/normal.html +2 -0
  120. data/test/testcases/block/07_horizontal_rule/normal.text +3 -0
  121. data/test/testcases/block/08_list/brackets_in_item.latex +3 -0
  122. data/test/testcases/block/08_list/brackets_in_item.text +1 -0
  123. data/test/testcases/block/08_list/lazy_and_nested.html +9 -0
  124. data/test/testcases/block/08_list/lazy_and_nested.text +4 -0
  125. data/test/testcases/block/09_html/html5_attributes.html +2 -0
  126. data/test/testcases/block/09_html/html5_attributes.text +2 -0
  127. data/test/testcases/block/09_html/html_after_block.html +7 -0
  128. data/test/testcases/block/09_html/html_after_block.text +5 -0
  129. data/test/testcases/block/09_html/html_to_native/table_simple.html +13 -0
  130. data/test/testcases/block/09_html/html_to_native/table_simple.text +15 -0
  131. data/test/testcases/block/09_html/html_to_native/typography.html +1 -1
  132. data/test/testcases/block/09_html/not_parsed.html +1 -1
  133. data/test/testcases/block/09_html/processing_instruction.html +5 -6
  134. data/test/testcases/block/09_html/simple.html +1 -5
  135. data/test/testcases/block/09_html/simple.text +1 -5
  136. data/test/testcases/block/09_html/standalone_image_in_div.htmlinput +7 -0
  137. data/test/testcases/block/09_html/standalone_image_in_div.text +8 -0
  138. data/test/testcases/block/09_html/textarea.html +8 -0
  139. data/test/testcases/block/09_html/textarea.text +8 -0
  140. data/test/testcases/block/09_html/xml.html +8 -0
  141. data/test/testcases/block/09_html/xml.text +7 -0
  142. data/test/testcases/block/11_ial/simple.html +5 -1
  143. data/test/testcases/block/11_ial/simple.text +8 -1
  144. data/test/testcases/block/12_extension/options.html +4 -4
  145. data/test/testcases/block/12_extension/options.text +2 -0
  146. data/test/testcases/block/12_extension/options2.html +4 -4
  147. data/test/testcases/block/12_extension/options3.html +7 -6
  148. data/test/testcases/block/12_extension/options3.text +2 -2
  149. data/test/testcases/block/13_definition_list/auto_ids.html +15 -0
  150. data/test/testcases/block/13_definition_list/auto_ids.text +18 -0
  151. data/test/testcases/block/13_definition_list/item_ial.html +5 -0
  152. data/test/testcases/block/13_definition_list/item_ial.text +8 -0
  153. data/test/testcases/block/14_table/empty_tag_in_cell.html +8 -0
  154. data/test/testcases/block/14_table/empty_tag_in_cell.options +1 -0
  155. data/test/testcases/block/14_table/empty_tag_in_cell.text +1 -0
  156. data/test/testcases/block/14_table/errors.html +4 -0
  157. data/test/testcases/block/14_table/errors.text +4 -0
  158. data/test/testcases/block/14_table/header.html +21 -0
  159. data/test/testcases/block/14_table/header.text +7 -0
  160. data/test/testcases/block/14_table/simple.html +22 -7
  161. data/test/testcases/block/14_table/simple.text +4 -0
  162. data/test/testcases/block/14_table/table_with_footnote.html +4 -4
  163. data/test/testcases/block/15_math/gh_128.html +1 -0
  164. data/test/testcases/block/15_math/gh_128.text +1 -0
  165. data/test/testcases/block/15_math/no_engine.html +3 -0
  166. data/test/testcases/block/15_math/no_engine.options +1 -0
  167. data/test/testcases/block/15_math/no_engine.text +2 -0
  168. data/test/testcases/block/15_math/normal.html +17 -14
  169. data/test/testcases/block/15_math/normal.text +2 -0
  170. data/test/testcases/block/16_toc/toc_exclude.html +7 -7
  171. data/test/testcases/block/16_toc/toc_levels.html +5 -5
  172. data/test/testcases/block/16_toc/toc_levels.text +1 -1
  173. data/test/testcases/block/16_toc/toc_with_footnotes.html +5 -5
  174. data/test/testcases/block/16_toc/toc_with_links.html +8 -0
  175. data/test/testcases/block/16_toc/toc_with_links.options +2 -0
  176. data/test/testcases/block/16_toc/toc_with_links.text +8 -0
  177. data/test/testcases/cjk-line-break.html +4 -0
  178. data/test/testcases/cjk-line-break.options +1 -0
  179. data/test/testcases/cjk-line-break.text +12 -0
  180. data/test/testcases/man/example.man +123 -0
  181. data/test/testcases/man/example.text +85 -0
  182. data/test/testcases/man/heading-name-dash-description.man +4 -0
  183. data/test/testcases/man/heading-name-dash-description.text +1 -0
  184. data/test/testcases/man/heading-name-description.man +4 -0
  185. data/test/testcases/man/heading-name-description.text +2 -0
  186. data/test/testcases/man/heading-name-section-description.man +4 -0
  187. data/test/testcases/man/heading-name-section-description.text +1 -0
  188. data/test/testcases/man/heading-name-section.man +2 -0
  189. data/test/testcases/man/heading-name-section.text +1 -0
  190. data/test/testcases/man/heading-name.man +2 -0
  191. data/test/testcases/man/heading-name.text +1 -0
  192. data/test/testcases/man/sections.man +4 -0
  193. data/test/testcases/man/sections.text +11 -0
  194. data/test/testcases/man/text-escaping.man +8 -0
  195. data/test/testcases/man/text-escaping.text +7 -0
  196. data/test/testcases/span/01_link/empty.html +1 -1
  197. data/test/testcases/span/01_link/empty_title.htmlinput +3 -0
  198. data/test/testcases/span/01_link/empty_title.text +7 -0
  199. data/test/testcases/span/01_link/imagelinks.html +1 -0
  200. data/test/testcases/span/01_link/imagelinks.text +2 -0
  201. data/test/testcases/span/01_link/inline.html +1 -1
  202. data/test/testcases/span/01_link/latex_escaping.latex +6 -0
  203. data/test/testcases/span/01_link/latex_escaping.text +5 -0
  204. data/test/testcases/span/01_link/link_defs.html +1 -1
  205. data/test/testcases/span/01_link/link_defs.text +2 -1
  206. data/test/testcases/span/01_link/link_defs_with_ial.html +4 -0
  207. data/test/testcases/span/01_link/link_defs_with_ial.text +16 -0
  208. data/test/testcases/span/01_link/reference.html +3 -3
  209. data/test/testcases/span/02_emphasis/nesting.html +3 -0
  210. data/test/testcases/span/02_emphasis/nesting.text +4 -1
  211. data/test/testcases/span/02_emphasis/normal.html +19 -0
  212. data/test/testcases/span/02_emphasis/normal.options +1 -0
  213. data/test/testcases/span/02_emphasis/normal.text +17 -0
  214. data/test/testcases/span/03_codespan/highlighting-minted.latex +2 -0
  215. data/test/testcases/span/03_codespan/highlighting-minted.options +1 -0
  216. data/test/testcases/span/03_codespan/highlighting-minted.text +1 -0
  217. data/test/testcases/span/03_codespan/highlighting.html +1 -1
  218. data/test/testcases/span/03_codespan/normal-css-class.html +1 -0
  219. data/test/testcases/span/03_codespan/normal-css-class.options +2 -0
  220. data/test/testcases/span/03_codespan/normal-css-class.text +1 -0
  221. data/test/testcases/span/03_codespan/rouge/disabled.html +1 -0
  222. data/test/testcases/span/03_codespan/rouge/disabled.options +4 -0
  223. data/test/testcases/span/03_codespan/rouge/disabled.text +1 -0
  224. data/test/testcases/span/03_codespan/rouge/simple.html +1 -0
  225. data/test/testcases/span/03_codespan/rouge/simple.options +1 -0
  226. data/test/testcases/span/03_codespan/rouge/simple.text +1 -0
  227. data/test/testcases/span/04_footnote/backlink_inline.html +79 -0
  228. data/test/testcases/span/04_footnote/backlink_inline.options +1 -0
  229. data/test/testcases/span/04_footnote/backlink_inline.text +38 -0
  230. data/test/testcases/span/04_footnote/backlink_text.html +9 -0
  231. data/test/testcases/span/04_footnote/backlink_text.options +1 -0
  232. data/test/testcases/span/04_footnote/backlink_text.text +3 -0
  233. data/test/testcases/span/04_footnote/definitions.latex +2 -2
  234. data/test/testcases/span/04_footnote/footnote_nr.html +6 -6
  235. data/test/testcases/span/04_footnote/footnote_prefix.html +12 -0
  236. data/test/testcases/span/04_footnote/footnote_prefix.options +1 -0
  237. data/test/testcases/span/04_footnote/footnote_prefix.text +4 -0
  238. data/test/testcases/span/04_footnote/inside_footnote.html +17 -0
  239. data/test/testcases/span/04_footnote/inside_footnote.text +9 -0
  240. data/test/testcases/span/04_footnote/markers.html +16 -16
  241. data/test/testcases/span/04_footnote/markers.latex +3 -3
  242. data/test/testcases/span/04_footnote/markers.options +2 -0
  243. data/test/testcases/span/04_footnote/markers.text +2 -1
  244. data/test/testcases/span/04_footnote/placement.html +11 -0
  245. data/test/testcases/span/04_footnote/placement.options +1 -0
  246. data/test/testcases/span/04_footnote/placement.text +8 -0
  247. data/test/testcases/span/04_footnote/regexp_problem.html +14 -0
  248. data/test/testcases/span/04_footnote/regexp_problem.options +2 -0
  249. data/test/testcases/span/04_footnote/regexp_problem.text +52 -0
  250. data/test/testcases/span/04_footnote/without_backlink.html +9 -0
  251. data/test/testcases/span/04_footnote/without_backlink.options +1 -0
  252. data/test/testcases/span/04_footnote/without_backlink.text +3 -0
  253. data/test/testcases/span/05_html/button.html +7 -0
  254. data/test/testcases/span/05_html/button.text +7 -0
  255. data/test/testcases/span/05_html/mark_element.html +3 -0
  256. data/test/testcases/span/05_html/mark_element.text +3 -0
  257. data/test/testcases/span/05_html/normal.html +10 -1
  258. data/test/testcases/span/05_html/normal.text +9 -0
  259. data/test/testcases/span/05_html/raw_span_elements.html +2 -0
  260. data/test/testcases/span/05_html/raw_span_elements.text +2 -0
  261. data/test/testcases/span/05_html/xml.html +5 -0
  262. data/test/testcases/span/05_html/xml.text +5 -0
  263. data/test/testcases/span/abbreviations/abbrev.html +14 -1
  264. data/test/testcases/span/abbreviations/abbrev.text +18 -2
  265. data/test/testcases/span/abbreviations/in_footnote.html +9 -0
  266. data/test/testcases/span/abbreviations/in_footnote.text +5 -0
  267. data/test/testcases/span/autolinks/url_links.html +5 -4
  268. data/test/testcases/span/autolinks/url_links.text +1 -0
  269. data/test/testcases/span/line_breaks/normal.html +2 -2
  270. data/test/testcases/span/line_breaks/normal.latex +2 -2
  271. data/test/testcases/span/math/no_engine.html +1 -0
  272. data/test/testcases/span/math/no_engine.options +1 -0
  273. data/test/testcases/span/math/no_engine.text +1 -0
  274. data/test/testcases/span/math/normal.html +4 -3
  275. data/test/testcases/span/math/normal.text +2 -1
  276. data/test/testcases/span/text_substitutions/entities_as_char.html +1 -1
  277. data/test/testcases/span/text_substitutions/entities_as_char.options +1 -0
  278. data/test/testcases/span/text_substitutions/entities_as_char.text +1 -1
  279. data/test/testcases/span/text_substitutions/typography.html +22 -0
  280. data/test/testcases/span/text_substitutions/typography.text +22 -0
  281. data/test/testcases/span/text_substitutions/typography_subst.html +3 -0
  282. data/test/testcases/span/text_substitutions/typography_subst.latex +4 -0
  283. data/test/testcases/span/text_substitutions/typography_subst.options +8 -0
  284. data/test/testcases/span/text_substitutions/typography_subst.text +3 -0
  285. metadata +218 -67
  286. data/ChangeLog +0 -7436
  287. data/GPL +0 -674
  288. data/Rakefile +0 -306
  289. data/benchmark/benchmark.rb +0 -36
  290. data/benchmark/benchmark.sh +0 -74
  291. data/benchmark/generate_data.rb +0 -119
  292. data/benchmark/mdbasics.text +0 -306
  293. data/benchmark/mdsyntax.text +0 -888
  294. data/benchmark/testing.sh +0 -9
  295. data/benchmark/timing.sh +0 -10
  296. data/doc/bg.png +0 -0
  297. data/doc/default.scss.css +0 -181
  298. data/doc/default.template +0 -68
  299. data/doc/design.scss.css +0 -441
  300. data/doc/documentation.page +0 -84
  301. data/doc/documentation.template +0 -20
  302. data/doc/index.page +0 -94
  303. data/doc/installation.page +0 -88
  304. data/doc/links.markdown +0 -6
  305. data/doc/metainfo +0 -3
  306. data/doc/news.feed +0 -10
  307. data/doc/news.page +0 -29
  308. data/doc/options.page +0 -10
  309. data/doc/quickref.page +0 -598
  310. data/doc/sidebar.template +0 -21
  311. data/doc/syntax.page +0 -1700
  312. data/doc/tests.page +0 -91
  313. data/doc/virtual +0 -2
  314. data/lib/kramdown/compatibility.rb +0 -49
  315. data/lib/kramdown/utils/ordered_hash.rb +0 -100
  316. data/setup.rb +0 -1585
  317. data/test/testcases/block/07_horizontal_rule/error.html.19 +0 -7
  318. data/test/testcases/block/09_html/html_to_native/typography.html.19 +0 -1
  319. data/test/testcases/block/09_html/simple.html.19 +0 -64
  320. data/test/testcases/block/14_table/simple.html.19 +0 -177
  321. data/test/testcases/span/01_link/inline.html.19 +0 -46
  322. data/test/testcases/span/01_link/reference.html.19 +0 -37
  323. data/test/testcases/span/text_substitutions/entities_as_char.html.19 +0 -1
@@ -1,22 +1,9 @@
1
- # -*- coding: utf-8 -*-
1
+ # -*- coding: utf-8; frozen_string_literal: true -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009-2012 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
- # This file is part of kramdown.
7
- #
8
- # kramdown is free software: you can redistribute it and/or modify
9
- # it under the terms of the GNU General Public License as published by
10
- # the Free Software Foundation, either version 3 of the License, or
11
- # (at your option) any later version.
12
- #
13
- # This program is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
- # GNU General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU General Public License
19
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
6
+ # This file is part of kramdown which is licensed under the MIT.
20
7
  #++
21
8
  #
22
9
 
@@ -1,25 +1,15 @@
1
- # -*- coding: utf-8 -*-
1
+ # -*- coding: utf-8; frozen_string_literal: true -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009-2012 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
- # This file is part of kramdown.
7
- #
8
- # kramdown is free software: you can redistribute it and/or modify
9
- # it under the terms of the GNU General Public License as published by
10
- # the Free Software Foundation, either version 3 of the License, or
11
- # (at your option) any later version.
12
- #
13
- # This program is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
- # GNU General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU General Public License
19
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
6
+ # This file is part of kramdown which is licensed under the MIT.
20
7
  #++
21
8
  #
22
9
 
10
+ require 'kramdown/utils'
11
+ require 'kramdown/parser'
12
+
23
13
  module Kramdown
24
14
 
25
15
  module Parser
@@ -27,7 +17,7 @@ module Kramdown
27
17
  # == \Base class for parsers
28
18
  #
29
19
  # This class serves as base class for parsers. It provides common methods that can/should be
30
- # used by all parsers, especially by those using StringScanner for parsing.
20
+ # used by all parsers, especially by those using StringScanner(Kramdown) for parsing.
31
21
  #
32
22
  # A parser object is used as a throw-away object, i.e. it is only used for storing the needed
33
23
  # state information during parsing. Therefore one can't instantiate a parser object directly but
@@ -62,7 +52,8 @@ module Kramdown
62
52
  def initialize(source, options)
63
53
  @source = source
64
54
  @options = Kramdown::Options.merge(options)
65
- @root = Element.new(:root, nil, nil, :encoding => (source.encoding rescue nil))
55
+ @root = Element.new(:root, nil, nil, encoding: (source.encoding rescue nil), location: 1,
56
+ options: {}, abbrev_defs: {}, abbrev_attr: {})
66
57
  @warnings = []
67
58
  @text_type = :text
68
59
  end
@@ -92,22 +83,30 @@ module Kramdown
92
83
  # Add the given warning +text+ to the warning array.
93
84
  def warning(text)
94
85
  @warnings << text
95
- #TODO: add position information
86
+ # TODO: add position information
96
87
  end
97
88
 
98
89
  # Modify the string +source+ to be usable by the parser (unifies line ending characters to
99
90
  # +\n+ and makes sure +source+ ends with a new line character).
100
91
  def adapt_source(source)
101
- source.gsub(/\r\n?/, "\n").chomp + "\n"
92
+ unless source.valid_encoding?
93
+ raise "The source text contains invalid characters for the used encoding #{source.encoding}"
94
+ end
95
+ source = source.encode('UTF-8')
96
+ source.gsub!(/\r\n?/, "\n")
97
+ source.chomp!
98
+ source << "\n"
102
99
  end
103
100
 
104
101
  # This helper method adds the given +text+ either to the last element in the +tree+ if it is a
105
102
  # +type+ element or creates a new text element with the given +type+.
106
103
  def add_text(text, tree = @tree, type = @text_type)
107
- if tree.children.last && tree.children.last.type == type
108
- tree.children.last.value << text
104
+ last = tree.children.last
105
+ if last && last.type == type
106
+ last.value << text
109
107
  elsif !text.empty?
110
- tree.children << Element.new(type, text)
108
+ location = (last && last.options[:location] || tree.options[:location])
109
+ tree.children << Element.new(type, text, nil, location: location)
111
110
  end
112
111
  end
113
112
 
@@ -115,16 +114,12 @@ module Kramdown
115
114
  # method works correctly under Ruby 1.8 and Ruby 1.9.
116
115
  def extract_string(range, strscan)
117
116
  result = nil
118
- if strscan.string.respond_to?(:encoding)
119
- begin
120
- enc = strscan.string.encoding
121
- strscan.string.force_encoding('ASCII-8BIT')
122
- result = strscan.string[range].force_encoding(enc)
123
- ensure
124
- strscan.string.force_encoding(enc)
125
- end
126
- else
127
- result = strscan.string[range]
117
+ begin
118
+ enc = strscan.string.encoding
119
+ strscan.string.force_encoding('ASCII-8BIT')
120
+ result = strscan.string[range].force_encoding(enc)
121
+ ensure
122
+ strscan.string.force_encoding(enc)
128
123
  end
129
124
  result
130
125
  end
@@ -1,33 +1,22 @@
1
- # -*- coding: utf-8 -*-
1
+ # -*- coding: utf-8; frozen_string_literal: true -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009-2012 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
- # This file is part of kramdown.
7
- #
8
- # kramdown is free software: you can redistribute it and/or modify
9
- # it under the terms of the GNU General Public License as published by
10
- # the Free Software Foundation, either version 3 of the License, or
11
- # (at your option) any later version.
12
- #
13
- # This program is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
- # GNU General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU General Public License
19
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
6
+ # This file is part of kramdown which is licensed under the MIT.
20
7
  #++
21
8
  #
22
9
 
23
10
  require 'rexml/parsers/baseparser'
24
11
  require 'strscan'
12
+ require 'kramdown/utils'
13
+ require 'kramdown/parser'
25
14
 
26
15
  module Kramdown
27
16
 
28
17
  module Parser
29
18
 
30
- # Used for parsing a HTML document.
19
+ # Used for parsing an HTML document.
31
20
  #
32
21
  # The parsing code is in the Parser module that can also be used by other parsers.
33
22
  class Html < Base
@@ -40,38 +29,47 @@ module Kramdown
40
29
  HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/im
41
30
  HTML_COMMENT_RE = /<!--(.*?)-->/m
42
31
  HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
43
- HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})(?:\s*=\s*(["'])(.*?)\2)?/m
44
- HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}(?:\s*=\s*(["']).*?\3)?)*)\s*(\/)?>/m
32
+ HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})(?:\s*=\s*(?:(\p{Word}+)|("|')(.*?)\3))?/m
33
+ HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}(?:\s*=\s*(?:\p{Word}+|("|').*?\3))?)*)\s*(\/)?>/m
45
34
  HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::UNAME_STR})\s*>/m
46
35
  HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
47
36
 
48
- HTML_CONTENT_MODEL_BLOCK = %w{address applet article aside button blockquote body
49
- dd div dl fieldset figure figcaption footer form header hgroup iframe li map menu nav
50
- noscript object section td}
51
- HTML_CONTENT_MODEL_SPAN = %w{a abbr acronym b bdo big button cite caption del dfn dt em
52
- h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p q rb rbc
53
- rp rt rtc ruby samp select small span strong sub sup summary th tt var}
54
- HTML_CONTENT_MODEL_RAW = %w{script style math option textarea pre code}
37
+ HTML_CONTENT_MODEL_BLOCK = %w[address applet article aside blockquote body
38
+ dd details div dl fieldset figure figcaption
39
+ footer form header hgroup iframe li main
40
+ map menu nav noscript object section summary td]
41
+ HTML_CONTENT_MODEL_SPAN = %w[a abbr acronym b bdo big button cite caption del dfn dt em
42
+ h1 h2 h3 h4 h5 h6 i ins label legend optgroup p q rb rbc
43
+ rp rt rtc ruby select small span strong sub sup th tt]
44
+ HTML_CONTENT_MODEL_RAW = %w[script style math option textarea pre code kbd samp var]
55
45
  # The following elements are also parsed as raw since they need child elements that cannot
56
46
  # be expressed using kramdown syntax: colgroup table tbody thead tfoot tr ul ol
57
47
 
58
- HTML_CONTENT_MODEL = Hash.new {|h,k| h[k] = :raw}
59
- HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block}
60
- HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span}
61
- HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw}
48
+ HTML_CONTENT_MODEL = Hash.new {|h, k| h[k] = :raw }
49
+ HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block }
50
+ HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span }
51
+ HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw }
62
52
 
63
53
  # Some HTML elements like script belong to both categories (i.e. are valid in block and
64
54
  # span HTML) and don't appear therefore!
65
- HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
66
- ins kbd label option q rb rbc rp rt rtc ruby samp select small span
67
- strong sub sup textarea tt var}
68
- HTML_BLOCK_ELEMENTS = %w{address article aside applet body button blockquote caption col colgroup dd div dl dt fieldset
69
- figcaption footer form h1 h2 h3 h4 h5 h6 header hgroup hr html head iframe legend menu
70
- li map nav ol optgroup p pre section summary table tbody td th thead tfoot tr ul}
71
- HTML_ELEMENTS_WITHOUT_BODY = %w{area base br col command embed hr img input keygen link meta param source track wbr}
55
+ # script, textarea
56
+ HTML_SPAN_ELEMENTS = %w[a abbr acronym b big bdo br button cite code del dfn em i img input
57
+ ins kbd label mark option q rb rbc rp rt rtc ruby samp select small
58
+ span strong sub sup tt u var]
59
+ HTML_BLOCK_ELEMENTS = %w[address article aside applet body blockquote caption col colgroup
60
+ dd div dl dt fieldset figcaption footer form h1 h2 h3 h4 h5 h6
61
+ header hgroup hr html head iframe legend menu li main map nav ol
62
+ optgroup p pre section summary table tbody td th thead tfoot tr ul]
63
+ HTML_ELEMENTS_WITHOUT_BODY = %w[area base br col command embed hr img input keygen link
64
+ meta param source track wbr]
65
+
66
+ HTML_ELEMENT = Hash.new(false)
67
+ (HTML_SPAN_ELEMENTS + HTML_BLOCK_ELEMENTS + HTML_ELEMENTS_WITHOUT_BODY +
68
+ HTML_CONTENT_MODEL.keys).each do |a|
69
+ HTML_ELEMENT[a] = true
70
+ end
72
71
  end
73
72
 
74
-
75
73
  # Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
76
74
  # functionality. The only thing that must be provided by the class are instance variable
77
75
  # @stack for storing the needed state and @src (instance of StringScanner) for the actual
@@ -86,17 +84,17 @@ module Kramdown
86
84
  # (first parameter is the created element; the second parameter is +true+ if the HTML
87
85
  # element is already closed, ie. contains no body; the third parameter specifies whether the
88
86
  # body - and the end tag - need to be handled in case closed=false).
89
- def handle_html_start_tag # :yields: el, closed, handle_body
90
- name = @src[1].downcase
87
+ def handle_html_start_tag(line = nil) # :yields: el, closed, handle_body
88
+ name = @src[1]
89
+ name.downcase! if HTML_ELEMENT[name.downcase]
91
90
  closed = !@src[4].nil?
92
- attrs = Utils::OrderedHash.new
93
- @src[2].scan(HTML_ATTRIBUTE_RE).each {|attr,sep,val| attrs[attr.downcase] = val || ""}
91
+ attrs = parse_html_attributes(@src[2], line, HTML_ELEMENT[name])
94
92
 
95
- el = Element.new(:html_element, name, attrs, :category => :block)
93
+ el = Element.new(:html_element, name, attrs, category: :block)
94
+ el.options[:location] = line if line
96
95
  @tree.children << el
97
96
 
98
97
  if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
99
- warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
100
98
  closed = true
101
99
  end
102
100
  if name == 'script' || name == 'style'
@@ -107,6 +105,24 @@ module Kramdown
107
105
  end
108
106
  end
109
107
 
108
+ # Parses the given string for HTML attributes and returns the resulting hash.
109
+ #
110
+ # If the optional +line+ parameter is supplied, it is used in warning messages.
111
+ #
112
+ # If the optional +in_html_tag+ parameter is set to +false+, attributes are not modified to
113
+ # contain only lowercase letters.
114
+ def parse_html_attributes(str, line = nil, in_html_tag = true)
115
+ attrs = {}
116
+ str.scan(HTML_ATTRIBUTE_RE).each do |attr, val, _sep, quoted_val|
117
+ attr.downcase! if in_html_tag
118
+ if attrs.key?(attr)
119
+ warning("Duplicate HTML attribute '#{attr}' on line #{line || '?'} - overwriting previous one")
120
+ end
121
+ attrs[attr] = val || quoted_val || ""
122
+ end
123
+ attrs
124
+ end
125
+
110
126
  # Handle the raw HTML tag at the current position.
111
127
  def handle_raw_html_tag(name)
112
128
  curpos = @src.pos
@@ -137,19 +153,26 @@ module Kramdown
137
153
 
138
154
  done = false
139
155
  while !@src.eos? && !done
140
- if result = @src.scan_until(HTML_RAW_START)
156
+ if (result = @src.scan_until(HTML_RAW_START))
141
157
  add_text(result, @tree, :text)
142
- if result = @src.scan(HTML_COMMENT_RE)
143
- @tree.children << Element.new(:xml_comment, result, nil, :category => :block)
144
- elsif result = @src.scan(HTML_INSTRUCTION_RE)
145
- @tree.children << Element.new(:xml_pi, result, nil, :category => :block)
158
+ line = @src.current_line_number
159
+ if (result = @src.scan(HTML_COMMENT_RE))
160
+ @tree.children << Element.new(:xml_comment, result, nil, category: :block, location: line)
161
+ elsif (result = @src.scan(HTML_INSTRUCTION_RE))
162
+ @tree.children << Element.new(:xml_pi, result, nil, category: :block, location: line)
146
163
  elsif @src.scan(HTML_TAG_RE)
147
- handle_html_start_tag(&block)
164
+ if method(:handle_html_start_tag).arity.abs >= 1
165
+ handle_html_start_tag(line, &block)
166
+ else
167
+ handle_html_start_tag(&block) # DEPRECATED: method needs to accept line number in 2.0
168
+ end
148
169
  elsif @src.scan(HTML_TAG_CLOSE_RE)
149
- if @tree.value == @src[1].downcase
170
+ if @tree.value == (HTML_ELEMENT[@tree.value] ? @src[1].downcase : @src[1])
150
171
  done = true
151
172
  else
152
- warning("Found invalidly used HTML closing tag for '#{@src[1].downcase}' - ignoring it")
173
+ add_text(@src.matched, @tree, :text)
174
+ warning("Found invalidly used HTML closing tag for '#{@src[1]}' on " \
175
+ "line #{line} - ignoring it")
153
176
  end
154
177
  else
155
178
  add_text(@src.getch, @tree, :text)
@@ -157,7 +180,10 @@ module Kramdown
157
180
  else
158
181
  add_text(@src.rest, @tree, :text)
159
182
  @src.terminate
160
- warning("Found no end tag for '#{@tree.value}' - auto-closing it") if @tree.type == :html_element
183
+ if @tree.type == :html_element
184
+ warning("Found no end tag for '#{@tree.value}' on line " \
185
+ "#{@tree.options[:location]} - auto-closing it")
186
+ end
161
187
  done = true
162
188
  end
163
189
  end
@@ -167,7 +193,6 @@ module Kramdown
167
193
 
168
194
  end
169
195
 
170
-
171
196
  # Converts HTML elements to native elements if possible.
172
197
  class ElementConverter
173
198
 
@@ -176,14 +201,17 @@ module Kramdown
176
201
  include Constants
177
202
  include ::Kramdown::Utils::Entities
178
203
 
179
- REMOVE_TEXT_CHILDREN = %w{html head hgroup ol ul dl table colgroup tbody thead tfoot tr select optgroup}
180
- WRAP_TEXT_CHILDREN = %w{body section nav article aside header footer address div li dd blockquote figure
181
- figcaption fieldset form}
182
- REMOVE_WHITESPACE_CHILDREN = %w{body section nav article aside header footer address
183
- div li dd blockquote figure figcaption td th fieldset form}
184
- STRIP_WHITESPACE = %w{address article aside blockquote body caption dd div dl dt fieldset figcaption form footer
185
- header h1 h2 h3 h4 h5 h6 legend li nav p section td th}
186
- SIMPLE_ELEMENTS = %w{em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl li dl dt dd}
204
+ REMOVE_TEXT_CHILDREN = %w[html head hgroup ol ul dl table colgroup tbody thead tfoot tr
205
+ select optgroup]
206
+ WRAP_TEXT_CHILDREN = %w[body section nav article aside header footer address div li dd
207
+ blockquote figure figcaption fieldset form]
208
+ REMOVE_WHITESPACE_CHILDREN = %w[body section nav article aside header footer address
209
+ div li dd blockquote figure figcaption td th fieldset form]
210
+ STRIP_WHITESPACE = %w[address article aside blockquote body caption dd div dl dt fieldset
211
+ figcaption form footer header h1 h2 h3 h4 h5 h6 legend li nav p
212
+ section td th]
213
+ SIMPLE_ELEMENTS = %w[em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl
214
+ li dl dt dd]
187
215
 
188
216
  def initialize(root)
189
217
  @root = root
@@ -208,11 +236,11 @@ module Kramdown
208
236
  else parent.type.to_s
209
237
  end
210
238
  end
211
- el.options.replace({:category => (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block)})
239
+ el.options.replace(category: (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block))
212
240
  return
213
241
  when :html_element
214
242
  when :root
215
- el.children.each {|c| process(c)}
243
+ el.children.each {|c| process(c) }
216
244
  remove_whitespace_children(el)
217
245
  return
218
246
  else return
@@ -255,20 +283,20 @@ module Kramdown
255
283
  # entities in entity elements.
256
284
  def process_text(raw, preserve = false)
257
285
  raw.gsub!(/\s+/, ' ') unless preserve
258
- src = StringScanner.new(raw)
286
+ src = Kramdown::Utils::StringScanner.new(raw)
259
287
  result = []
260
- while !src.eos?
261
- if tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/)
288
+ until src.eos?
289
+ if (tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/o))
262
290
  result << Element.new(:text, tmp)
263
291
  src.scan(HTML_ENTITY_RE)
264
- val = src[1] || (src[2] && src[2].to_i) || src[3].hex
265
- result << if %w{lsquo rsquo ldquo rdquo}.include?(val)
292
+ val = src[1] || (src[2]&.to_i) || src[3].hex
293
+ result << if %w[lsquo rsquo ldquo rdquo].include?(val)
266
294
  Element.new(:smart_quote, val.intern)
267
- elsif %w{mdash ndash hellip laquo raquo}.include?(val)
295
+ elsif %w[mdash ndash hellip laquo raquo].include?(val)
268
296
  Element.new(:typographic_sym, val.intern)
269
297
  else
270
298
  begin
271
- Element.new(:entity, entity(val), nil, :original => src.matched)
299
+ Element.new(:entity, entity(val), nil, original: src.matched)
272
300
  rescue ::Kramdown::Error
273
301
  src.pos -= src.matched_size - 1
274
302
  Element.new(:entity, ::Kramdown::Utils::Entities.entity('amp'))
@@ -283,22 +311,22 @@ module Kramdown
283
311
  end
284
312
 
285
313
  def process_html_element(el, do_conversion = true, preserve_text = false)
286
- el.options.replace(:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
287
- :content_model => (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw))
314
+ el.options.replace(category: HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
315
+ content_model: (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw))
288
316
  process_children(el, do_conversion, preserve_text)
289
317
  end
290
318
 
291
319
  def remove_text_children(el)
292
- el.children.delete_if {|c| c.type == :text}
320
+ el.children.delete_if {|c| c.type == :text }
293
321
  end
294
322
 
295
323
  def wrap_text_children(el)
296
324
  tmp = []
297
325
  last_is_p = false
298
326
  el.children.each do |c|
299
- if Element.category(c) != :block || c.type == :text
300
- if !last_is_p
301
- tmp << Element.new(:p, nil, nil, :transparent => true)
327
+ if !c.block? || c.type == :text
328
+ unless last_is_p
329
+ tmp << Element.new(:p, nil, nil, transparent: true)
302
330
  last_is_p = true
303
331
  end
304
332
  tmp.last.children << c
@@ -326,8 +354,8 @@ module Kramdown
326
354
  el.children = el.children.reject do |c|
327
355
  i += 1
328
356
  c.type == :text && c.value.strip.empty? &&
329
- (i == 0 || i == el.children.length - 1 || (Element.category(el.children[i-1]) == :block &&
330
- Element.category(el.children[i+1]) == :block))
357
+ (i == 0 || i == el.children.length - 1 || ((el.children[i - 1]).block? &&
358
+ (el.children[i + 1]).block?))
331
359
  end
332
360
  end
333
361
 
@@ -339,7 +367,11 @@ module Kramdown
339
367
 
340
368
  def extract_text(el, raw)
341
369
  raw << el.value.to_s if el.type == :text
342
- el.children.each {|c| extract_text(c, raw)}
370
+ el.children.each {|c| extract_text(c, raw) }
371
+ end
372
+
373
+ def convert_textarea(el)
374
+ process_html_element(el, true, true)
343
375
  end
344
376
 
345
377
  def convert_a(el)
@@ -353,7 +385,7 @@ module Kramdown
353
385
 
354
386
  EMPHASIS_TYPE_MAP = {'em' => :em, 'i' => :em, 'strong' => :strong, 'b' => :strong}
355
387
  def convert_em(el)
356
- text = ''
388
+ text = +''
357
389
  extract_text(el, text)
358
390
  if text =~ /\A\s/ || text =~ /\s\z/
359
391
  process_html_element(el, false)
@@ -362,53 +394,53 @@ module Kramdown
362
394
  process_children(el)
363
395
  end
364
396
  end
365
- %w{b strong i}.each do |i|
397
+ %w[b strong i].each do |i|
366
398
  alias_method("convert_#{i}".to_sym, :convert_em)
367
399
  end
368
400
 
369
401
  def convert_h1(el)
370
- set_basics(el, :header, :level => el.value[1..1].to_i)
371
- extract_text(el, el.options[:raw_text] = '')
402
+ set_basics(el, :header, level: el.value[1..1].to_i)
403
+ extract_text(el, el.options[:raw_text] = +'')
372
404
  process_children(el)
373
405
  end
374
- %w{h2 h3 h4 h5 h6}.each do |i|
406
+ %w[h2 h3 h4 h5 h6].each do |i|
375
407
  alias_method("convert_#{i}".to_sym, :convert_h1)
376
408
  end
377
409
 
378
410
  def convert_code(el)
379
- raw = ''
411
+ raw = +''
380
412
  extract_text(el, raw)
381
413
  result = process_text(raw, true)
382
414
  begin
383
- str = result.inject('') do |mem, c|
415
+ str = result.inject(+'') do |mem, c|
384
416
  if c.type == :text
385
417
  mem << c.value
386
418
  elsif c.type == :entity
387
- value_char = c.value.char
388
- if value_char.respond_to?(:encode)
389
- mem << value_char.encode(@root.options[:encoding])
390
- elsif [60, 62, 34, 38].include?(c.value.code_point)
391
- mem << c.value.code_point.chr
392
- end
419
+ mem << if [60, 62, 34, 38].include?(c.value.code_point)
420
+ c.value.code_point.chr
421
+ else
422
+ c.value.char
423
+ end
393
424
  elsif c.type == :smart_quote || c.type == :typographic_sym
394
- mem << entity(c.value.to_s).char.encode(@root.options[:encoding])
425
+ mem << entity(c.value.to_s).char
395
426
  else
396
427
  raise "Bug - please report"
397
428
  end
398
429
  end
399
430
  result.clear
400
431
  result << Element.new(:text, str)
401
- rescue
432
+ rescue StandardError
402
433
  end
403
434
  if result.length > 1 || result.first.type != :text
404
435
  process_html_element(el, false, true)
405
436
  else
406
437
  if el.value == 'code'
407
438
  set_basics(el, :codespan)
439
+ el.attr['class']&.gsub!(/\s+\bhighlighter-\w+\b|\bhighlighter-\w+\b\s*/, '')
408
440
  else
409
441
  set_basics(el, :codeblock)
410
442
  if el.children.size == 1 && el.children.first.value == 'code'
411
- value = (el.children.first.attr['class'] || '').scan(/\blanguage-\w+\b/).first
443
+ value = (el.children.first.attr['class'] || '').scan(/\blanguage-\S+/).first
412
444
  el.attr['class'] = "#{value} #{el.attr['class']}".rstrip if value
413
445
  end
414
446
  end
@@ -416,10 +448,10 @@ module Kramdown
416
448
  el.children.clear
417
449
  end
418
450
  end
419
- alias :convert_pre :convert_code
451
+ alias convert_pre convert_code
420
452
 
421
453
  def convert_table(el)
422
- if !is_simple_table?(el)
454
+ unless is_simple_table?(el)
423
455
  process_html_element(el, false)
424
456
  return
425
457
  end
@@ -433,23 +465,23 @@ module Kramdown
433
465
  if td.attr['style']
434
466
  td.attr['style'].slice!(/(?:;\s*)?text-align:\s+(center|left|right)/)
435
467
  td.attr.delete('style') if td.attr['style'].strip.empty?
436
- $1.to_sym
468
+ $1 ? $1.to_sym : :default
437
469
  else
438
470
  :default
439
471
  end
440
472
  end
441
473
  else
442
- c.children.each {|cc| calc_alignment.call(cc)}
474
+ c.children.each {|cc| calc_alignment.call(cc) }
443
475
  end
444
476
  end
445
477
  calc_alignment.call(el)
446
- el.children.delete_if {|c| c.type == :html_element}
478
+ el.children.delete_if {|c| c.type == :html_element }
447
479
 
448
480
  change_th_type = lambda do |c|
449
481
  if c.type == :th
450
482
  c.type = :td
451
483
  else
452
- c.children.each {|cc| change_th_type.call(cc)}
484
+ c.children.each {|cc| change_th_type.call(cc) }
453
485
  end
454
486
  end
455
487
  change_th_type.call(el)
@@ -467,11 +499,11 @@ module Kramdown
467
499
  (cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc)
468
500
  end
469
501
  end
470
- check_cells = Proc.new do |c|
502
+ check_cells = proc do |c|
471
503
  if c.value == 'th' || c.value == 'td'
472
- return false if !only_phrasing_content.call(c)
504
+ return false unless only_phrasing_content.call(c)
473
505
  else
474
- c.children.each {|cc| check_cells.call(cc)}
506
+ c.children.each {|cc| check_cells.call(cc) }
475
507
  end
476
508
  end
477
509
  check_cells.call(el)
@@ -479,7 +511,7 @@ module Kramdown
479
511
  nr_cells = 0
480
512
  check_nr_cells = lambda do |t|
481
513
  if t.value == 'tr'
482
- count = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.length
514
+ count = t.children.select {|cc| cc.value == 'th' || cc.value == 'td' }.length
483
515
  if count != nr_cells
484
516
  if nr_cells == 0
485
517
  nr_cells = count
@@ -489,16 +521,16 @@ module Kramdown
489
521
  end
490
522
  end
491
523
  else
492
- t.children.each {|cc| check_nr_cells.call(cc)}
524
+ t.children.each {|cc| check_nr_cells.call(cc) }
493
525
  end
494
526
  end
495
527
  check_nr_cells.call(el)
496
528
  return false if nr_cells == -1
497
529
 
498
530
  alignment = nil
499
- check_alignment = Proc.new do |t|
531
+ check_alignment = proc do |t|
500
532
  if t.value == 'tr'
501
- cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.map do |cell|
533
+ cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td' }.map do |cell|
502
534
  md = /text-align:\s+(center|left|right|justify|inherit)/.match(cell.attr['style'].to_s)
503
535
  return false if md && (md[1] == 'justify' || md[1] == 'inherit')
504
536
  md.nil? ? :default : md[1]
@@ -506,19 +538,19 @@ module Kramdown
506
538
  alignment = cur_alignment if alignment.nil?
507
539
  return false if alignment != cur_alignment
508
540
  else
509
- t.children.each {|cc| check_alignment.call(cc)}
541
+ t.children.each {|cc| check_alignment.call(cc) }
510
542
  end
511
543
  end
512
544
  check_alignment.call(el)
513
545
 
514
546
  check_rows = lambda do |t, type|
515
- t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text}}
547
+ t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text }}
516
548
  end
517
549
  check_rows.call(el, 'td') ||
518
550
  (el.children.all? do |t|
519
551
  t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) ||
520
552
  ((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td'))
521
- end && el.children.any? {|t| t.value == 'tbody'})
553
+ end && el.children.any? {|t| t.value == 'tbody' })
522
554
  end
523
555
 
524
556
  def convert_script(el)
@@ -534,7 +566,7 @@ module Kramdown
534
566
  end
535
567
 
536
568
  def handle_math_tag(el)
537
- set_basics(el, :math, :category => (el.attr['type'] =~ /mode=display/ ? :block : :span))
569
+ set_basics(el, :math, category: (el.attr['type'] =~ /mode=display/ ? :block : :span))
538
570
  el.value = el.children.shift.value.sub(/\A(?:%\s*)?<!\[CDATA\[\n?(.*?)(?:\s%)?\]\]>\z/m, '\1')
539
571
  el.attr.delete('type')
540
572
  end
@@ -546,15 +578,15 @@ module Kramdown
546
578
  # Parse the source string provided on initialization as HTML document.
547
579
  def parse
548
580
  @stack, @tree = [], @root
549
- @src = StringScanner.new(adapt_source(source))
581
+ @src = Kramdown::Utils::StringScanner.new(adapt_source(source))
550
582
 
551
583
  while true
552
- if result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/)
553
- @tree.children << Element.new(:xml_pi, result.strip, nil, :category => :block)
554
- elsif result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/)
584
+ if (result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/o))
585
+ @tree.children << Element.new(:xml_pi, result.strip, nil, category: :block)
586
+ elsif (result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/o))
555
587
  # ignore the doctype
556
- elsif result = @src.scan(/\s*#{HTML_COMMENT_RE}/)
557
- @tree.children << Element.new(:xml_comment, result.strip, nil, :category => :block)
588
+ elsif (result = @src.scan(/\s*#{HTML_COMMENT_RE}/o))
589
+ @tree.children << Element.new(:xml_comment, result.strip, nil, category: :block)
558
590
  else
559
591
  break
560
592
  end