gitdown 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (416) hide show
  1. data/AUTHORS +1 -0
  2. data/COPYING +24 -0
  3. data/GPL +674 -0
  4. data/README +43 -0
  5. data/Rakefile +370 -0
  6. data/VERSION +1 -0
  7. data/benchmark/benchmark.rb +34 -0
  8. data/benchmark/benchmark.sh +74 -0
  9. data/benchmark/generate_data.rb +119 -0
  10. data/benchmark/mdbasics.text +306 -0
  11. data/benchmark/mdsyntax.text +888 -0
  12. data/benchmark/testing.sh +9 -0
  13. data/benchmark/timing.sh +10 -0
  14. data/bin/kramdown +78 -0
  15. data/data/kramdown/document.html +18 -0
  16. data/data/kramdown/document.latex +43 -0
  17. data/doc/default.scss.css +530 -0
  18. data/doc/default.template +80 -0
  19. data/doc/documentation.page +71 -0
  20. data/doc/index.page +98 -0
  21. data/doc/installation.page +88 -0
  22. data/doc/links.markdown +6 -0
  23. data/doc/news.feed +10 -0
  24. data/doc/news.page +28 -0
  25. data/doc/quickref.page +585 -0
  26. data/doc/syntax.page +1644 -0
  27. data/doc/tests.page +52 -0
  28. data/doc/virtual +2 -0
  29. data/lib/kramdown.rb +23 -0
  30. data/lib/kramdown/compatibility.rb +35 -0
  31. data/lib/kramdown/converter.rb +41 -0
  32. data/lib/kramdown/converter/base.rb +169 -0
  33. data/lib/kramdown/converter/html.rb +410 -0
  34. data/lib/kramdown/converter/kramdown.rb +422 -0
  35. data/lib/kramdown/converter/latex.rb +607 -0
  36. data/lib/kramdown/converter/toc.rb +82 -0
  37. data/lib/kramdown/document.rb +117 -0
  38. data/lib/kramdown/element.rb +524 -0
  39. data/lib/kramdown/error.rb +30 -0
  40. data/lib/kramdown/options.rb +373 -0
  41. data/lib/kramdown/parser.rb +40 -0
  42. data/lib/kramdown/parser/base.rb +136 -0
  43. data/lib/kramdown/parser/github_markdown.rb +44 -0
  44. data/lib/kramdown/parser/github_markdown/github_codeblock.rb +44 -0
  45. data/lib/kramdown/parser/html.rb +570 -0
  46. data/lib/kramdown/parser/kramdown.rb +338 -0
  47. data/lib/kramdown/parser/kramdown/abbreviation.rb +71 -0
  48. data/lib/kramdown/parser/kramdown/autolink.rb +53 -0
  49. data/lib/kramdown/parser/kramdown/blank_line.rb +43 -0
  50. data/lib/kramdown/parser/kramdown/block_boundary.rb +46 -0
  51. data/lib/kramdown/parser/kramdown/blockquote.rb +51 -0
  52. data/lib/kramdown/parser/kramdown/codeblock.rb +63 -0
  53. data/lib/kramdown/parser/kramdown/codespan.rb +56 -0
  54. data/lib/kramdown/parser/kramdown/emphasis.rb +70 -0
  55. data/lib/kramdown/parser/kramdown/eob.rb +39 -0
  56. data/lib/kramdown/parser/kramdown/escaped_chars.rb +38 -0
  57. data/lib/kramdown/parser/kramdown/extensions.rb +204 -0
  58. data/lib/kramdown/parser/kramdown/footnote.rb +74 -0
  59. data/lib/kramdown/parser/kramdown/header.rb +68 -0
  60. data/lib/kramdown/parser/kramdown/horizontal_rule.rb +39 -0
  61. data/lib/kramdown/parser/kramdown/html.rb +169 -0
  62. data/lib/kramdown/parser/kramdown/html_entity.rb +44 -0
  63. data/lib/kramdown/parser/kramdown/line_break.rb +38 -0
  64. data/lib/kramdown/parser/kramdown/link.rb +148 -0
  65. data/lib/kramdown/parser/kramdown/list.rb +240 -0
  66. data/lib/kramdown/parser/kramdown/math.rb +64 -0
  67. data/lib/kramdown/parser/kramdown/paragraph.rb +63 -0
  68. data/lib/kramdown/parser/kramdown/smart_quotes.rb +214 -0
  69. data/lib/kramdown/parser/kramdown/table.rb +178 -0
  70. data/lib/kramdown/parser/kramdown/typographic_symbol.rb +52 -0
  71. data/lib/kramdown/parser/markdown.rb +69 -0
  72. data/lib/kramdown/utils.rb +37 -0
  73. data/lib/kramdown/utils/entities.rb +348 -0
  74. data/lib/kramdown/utils/html.rb +85 -0
  75. data/lib/kramdown/utils/ordered_hash.rb +100 -0
  76. data/lib/kramdown/version.rb +28 -0
  77. data/setup.rb +1585 -0
  78. data/test/run_tests.rb +59 -0
  79. data/test/test_files.rb +197 -0
  80. data/test/testcases/block/01_blank_line/spaces.html +1 -0
  81. data/test/testcases/block/01_blank_line/spaces.text +3 -0
  82. data/test/testcases/block/01_blank_line/tabs.html +1 -0
  83. data/test/testcases/block/01_blank_line/tabs.text +6 -0
  84. data/test/testcases/block/02_eob/beginning.html +1 -0
  85. data/test/testcases/block/02_eob/beginning.text +3 -0
  86. data/test/testcases/block/02_eob/end.html +1 -0
  87. data/test/testcases/block/02_eob/end.text +3 -0
  88. data/test/testcases/block/02_eob/middle.html +1 -0
  89. data/test/testcases/block/02_eob/middle.text +5 -0
  90. data/test/testcases/block/03_paragraph/indented.html +18 -0
  91. data/test/testcases/block/03_paragraph/indented.text +19 -0
  92. data/test/testcases/block/03_paragraph/no_newline_at_end.html +5 -0
  93. data/test/testcases/block/03_paragraph/no_newline_at_end.text +5 -0
  94. data/test/testcases/block/03_paragraph/one_para.html +1 -0
  95. data/test/testcases/block/03_paragraph/one_para.text +1 -0
  96. data/test/testcases/block/03_paragraph/two_para.html +4 -0
  97. data/test/testcases/block/03_paragraph/two_para.text +4 -0
  98. data/test/testcases/block/04_header/atx_header.html +37 -0
  99. data/test/testcases/block/04_header/atx_header.text +34 -0
  100. data/test/testcases/block/04_header/atx_header_no_newline_at_end.html +1 -0
  101. data/test/testcases/block/04_header/atx_header_no_newline_at_end.text +1 -0
  102. data/test/testcases/block/04_header/setext_header.html +30 -0
  103. data/test/testcases/block/04_header/setext_header.html.19 +30 -0
  104. data/test/testcases/block/04_header/setext_header.text +36 -0
  105. data/test/testcases/block/04_header/setext_header_no_newline_at_end.html +1 -0
  106. data/test/testcases/block/04_header/setext_header_no_newline_at_end.text +2 -0
  107. data/test/testcases/block/04_header/with_auto_id_prefix.html +3 -0
  108. data/test/testcases/block/04_header/with_auto_id_prefix.options +2 -0
  109. data/test/testcases/block/04_header/with_auto_id_prefix.text +3 -0
  110. data/test/testcases/block/04_header/with_auto_ids.html +17 -0
  111. data/test/testcases/block/04_header/with_auto_ids.options +1 -0
  112. data/test/testcases/block/04_header/with_auto_ids.text +19 -0
  113. data/test/testcases/block/05_blockquote/indented.html +25 -0
  114. data/test/testcases/block/05_blockquote/indented.text +14 -0
  115. data/test/testcases/block/05_blockquote/lazy.html +34 -0
  116. data/test/testcases/block/05_blockquote/lazy.text +20 -0
  117. data/test/testcases/block/05_blockquote/nested.html +10 -0
  118. data/test/testcases/block/05_blockquote/nested.text +6 -0
  119. data/test/testcases/block/05_blockquote/no_newline_at_end.html +4 -0
  120. data/test/testcases/block/05_blockquote/no_newline_at_end.text +2 -0
  121. data/test/testcases/block/05_blockquote/very_long_line.html +3 -0
  122. data/test/testcases/block/05_blockquote/very_long_line.text +1 -0
  123. data/test/testcases/block/05_blockquote/with_code_blocks.html +15 -0
  124. data/test/testcases/block/05_blockquote/with_code_blocks.text +11 -0
  125. data/test/testcases/block/06_codeblock/error.html +4 -0
  126. data/test/testcases/block/06_codeblock/error.text +4 -0
  127. data/test/testcases/block/06_codeblock/lazy.html +4 -0
  128. data/test/testcases/block/06_codeblock/lazy.text +5 -0
  129. data/test/testcases/block/06_codeblock/no_newline_at_end.html +2 -0
  130. data/test/testcases/block/06_codeblock/no_newline_at_end.text +1 -0
  131. data/test/testcases/block/06_codeblock/no_newline_at_end_1.html +2 -0
  132. data/test/testcases/block/06_codeblock/no_newline_at_end_1.text +2 -0
  133. data/test/testcases/block/06_codeblock/normal.html +13 -0
  134. data/test/testcases/block/06_codeblock/normal.text +10 -0
  135. data/test/testcases/block/06_codeblock/tilde_syntax.html +7 -0
  136. data/test/testcases/block/06_codeblock/tilde_syntax.text +9 -0
  137. data/test/testcases/block/06_codeblock/whitespace.html +3 -0
  138. data/test/testcases/block/06_codeblock/whitespace.text +3 -0
  139. data/test/testcases/block/06_codeblock/with_blank_line.html +13 -0
  140. data/test/testcases/block/06_codeblock/with_blank_line.text +12 -0
  141. data/test/testcases/block/06_codeblock/with_eob_marker.html +6 -0
  142. data/test/testcases/block/06_codeblock/with_eob_marker.text +5 -0
  143. data/test/testcases/block/06_codeblock/with_ial.html +6 -0
  144. data/test/testcases/block/06_codeblock/with_ial.text +5 -0
  145. data/test/testcases/block/07_horizontal_rule/error.html +7 -0
  146. data/test/testcases/block/07_horizontal_rule/error.html.19 +7 -0
  147. data/test/testcases/block/07_horizontal_rule/error.text +7 -0
  148. data/test/testcases/block/07_horizontal_rule/normal.html +17 -0
  149. data/test/testcases/block/07_horizontal_rule/normal.text +17 -0
  150. data/test/testcases/block/07_horizontal_rule/sepspaces.html +3 -0
  151. data/test/testcases/block/07_horizontal_rule/sepspaces.text +3 -0
  152. data/test/testcases/block/07_horizontal_rule/septabs.html +3 -0
  153. data/test/testcases/block/07_horizontal_rule/septabs.text +3 -0
  154. data/test/testcases/block/08_list/escaping.html +17 -0
  155. data/test/testcases/block/08_list/escaping.text +17 -0
  156. data/test/testcases/block/08_list/item_ial.html +10 -0
  157. data/test/testcases/block/08_list/item_ial.text +8 -0
  158. data/test/testcases/block/08_list/lazy.html +39 -0
  159. data/test/testcases/block/08_list/lazy.text +29 -0
  160. data/test/testcases/block/08_list/list_and_hr.html +9 -0
  161. data/test/testcases/block/08_list/list_and_hr.text +5 -0
  162. data/test/testcases/block/08_list/list_and_others.html +40 -0
  163. data/test/testcases/block/08_list/list_and_others.text +26 -0
  164. data/test/testcases/block/08_list/mixed.html +117 -0
  165. data/test/testcases/block/08_list/mixed.text +66 -0
  166. data/test/testcases/block/08_list/nested.html +17 -0
  167. data/test/testcases/block/08_list/nested.text +7 -0
  168. data/test/testcases/block/08_list/other_first_element.html +39 -0
  169. data/test/testcases/block/08_list/other_first_element.text +18 -0
  170. data/test/testcases/block/08_list/simple_ol.html +19 -0
  171. data/test/testcases/block/08_list/simple_ol.text +13 -0
  172. data/test/testcases/block/08_list/simple_ul.html +48 -0
  173. data/test/testcases/block/08_list/simple_ul.text +36 -0
  174. data/test/testcases/block/08_list/single_item.html +3 -0
  175. data/test/testcases/block/08_list/single_item.text +1 -0
  176. data/test/testcases/block/08_list/special_cases.html +55 -0
  177. data/test/testcases/block/08_list/special_cases.text +35 -0
  178. data/test/testcases/block/09_html/comment.html +18 -0
  179. data/test/testcases/block/09_html/comment.text +15 -0
  180. data/test/testcases/block/09_html/content_model/deflists.html +6 -0
  181. data/test/testcases/block/09_html/content_model/deflists.options +1 -0
  182. data/test/testcases/block/09_html/content_model/deflists.text +6 -0
  183. data/test/testcases/block/09_html/content_model/tables.html +14 -0
  184. data/test/testcases/block/09_html/content_model/tables.options +1 -0
  185. data/test/testcases/block/09_html/content_model/tables.text +14 -0
  186. data/test/testcases/block/09_html/html_and_codeblocks.html +15 -0
  187. data/test/testcases/block/09_html/html_and_codeblocks.options +1 -0
  188. data/test/testcases/block/09_html/html_and_codeblocks.text +13 -0
  189. data/test/testcases/block/09_html/html_and_headers.html +5 -0
  190. data/test/testcases/block/09_html/html_and_headers.text +6 -0
  191. data/test/testcases/block/09_html/html_to_native/code.html +10 -0
  192. data/test/testcases/block/09_html/html_to_native/code.text +9 -0
  193. data/test/testcases/block/09_html/html_to_native/comment.html +7 -0
  194. data/test/testcases/block/09_html/html_to_native/comment.text +8 -0
  195. data/test/testcases/block/09_html/html_to_native/emphasis.html +6 -0
  196. data/test/testcases/block/09_html/html_to_native/emphasis.text +6 -0
  197. data/test/testcases/block/09_html/html_to_native/entity.html +1 -0
  198. data/test/testcases/block/09_html/html_to_native/entity.text +1 -0
  199. data/test/testcases/block/09_html/html_to_native/header.html +6 -0
  200. data/test/testcases/block/09_html/html_to_native/header.options +2 -0
  201. data/test/testcases/block/09_html/html_to_native/header.text +6 -0
  202. data/test/testcases/block/09_html/html_to_native/list_dl.html +8 -0
  203. data/test/testcases/block/09_html/html_to_native/list_dl.text +8 -0
  204. data/test/testcases/block/09_html/html_to_native/list_ol.html +15 -0
  205. data/test/testcases/block/09_html/html_to_native/list_ol.text +17 -0
  206. data/test/testcases/block/09_html/html_to_native/list_ul.html +19 -0
  207. data/test/testcases/block/09_html/html_to_native/list_ul.text +22 -0
  208. data/test/testcases/block/09_html/html_to_native/options +1 -0
  209. data/test/testcases/block/09_html/html_to_native/paragraph.html +3 -0
  210. data/test/testcases/block/09_html/html_to_native/paragraph.text +4 -0
  211. data/test/testcases/block/09_html/html_to_native/table_normal.html +12 -0
  212. data/test/testcases/block/09_html/html_to_native/table_normal.text +12 -0
  213. data/test/testcases/block/09_html/html_to_native/table_simple.html +48 -0
  214. data/test/testcases/block/09_html/html_to_native/table_simple.text +56 -0
  215. data/test/testcases/block/09_html/html_to_native/typography.html +1 -0
  216. data/test/testcases/block/09_html/html_to_native/typography.html.19 +1 -0
  217. data/test/testcases/block/09_html/html_to_native/typography.text +1 -0
  218. data/test/testcases/block/09_html/invalid_html_1.html +5 -0
  219. data/test/testcases/block/09_html/invalid_html_1.text +5 -0
  220. data/test/testcases/block/09_html/invalid_html_2.html +5 -0
  221. data/test/testcases/block/09_html/invalid_html_2.text +5 -0
  222. data/test/testcases/block/09_html/markdown_attr.html +38 -0
  223. data/test/testcases/block/09_html/markdown_attr.text +38 -0
  224. data/test/testcases/block/09_html/not_parsed.html +24 -0
  225. data/test/testcases/block/09_html/not_parsed.text +24 -0
  226. data/test/testcases/block/09_html/parse_as_raw.html +35 -0
  227. data/test/testcases/block/09_html/parse_as_raw.htmlinput +34 -0
  228. data/test/testcases/block/09_html/parse_as_raw.options +1 -0
  229. data/test/testcases/block/09_html/parse_as_raw.text +33 -0
  230. data/test/testcases/block/09_html/parse_as_span.html +12 -0
  231. data/test/testcases/block/09_html/parse_as_span.htmlinput +12 -0
  232. data/test/testcases/block/09_html/parse_as_span.options +1 -0
  233. data/test/testcases/block/09_html/parse_as_span.text +9 -0
  234. data/test/testcases/block/09_html/parse_block_html.html +21 -0
  235. data/test/testcases/block/09_html/parse_block_html.options +1 -0
  236. data/test/testcases/block/09_html/parse_block_html.text +17 -0
  237. data/test/testcases/block/09_html/processing_instruction.html +13 -0
  238. data/test/testcases/block/09_html/processing_instruction.text +12 -0
  239. data/test/testcases/block/09_html/simple.html +64 -0
  240. data/test/testcases/block/09_html/simple.html.19 +64 -0
  241. data/test/testcases/block/09_html/simple.options +1 -0
  242. data/test/testcases/block/09_html/simple.text +59 -0
  243. data/test/testcases/block/10_ald/simple.html +2 -0
  244. data/test/testcases/block/10_ald/simple.text +8 -0
  245. data/test/testcases/block/11_ial/auto_id_and_ial.html +1 -0
  246. data/test/testcases/block/11_ial/auto_id_and_ial.options +1 -0
  247. data/test/testcases/block/11_ial/auto_id_and_ial.text +2 -0
  248. data/test/testcases/block/11_ial/nested.html +11 -0
  249. data/test/testcases/block/11_ial/nested.text +15 -0
  250. data/test/testcases/block/11_ial/simple.html +25 -0
  251. data/test/testcases/block/11_ial/simple.text +34 -0
  252. data/test/testcases/block/12_extension/comment.html +8 -0
  253. data/test/testcases/block/12_extension/comment.text +12 -0
  254. data/test/testcases/block/12_extension/ignored.html +8 -0
  255. data/test/testcases/block/12_extension/ignored.text +8 -0
  256. data/test/testcases/block/12_extension/nomarkdown.html +10 -0
  257. data/test/testcases/block/12_extension/nomarkdown.kramdown +20 -0
  258. data/test/testcases/block/12_extension/nomarkdown.latex +13 -0
  259. data/test/testcases/block/12_extension/nomarkdown.text +21 -0
  260. data/test/testcases/block/12_extension/options.html +21 -0
  261. data/test/testcases/block/12_extension/options.text +21 -0
  262. data/test/testcases/block/12_extension/options2.html +10 -0
  263. data/test/testcases/block/12_extension/options2.text +5 -0
  264. data/test/testcases/block/12_extension/options3.html +7 -0
  265. data/test/testcases/block/12_extension/options3.text +7 -0
  266. data/test/testcases/block/13_definition_list/definition_at_beginning.html +1 -0
  267. data/test/testcases/block/13_definition_list/definition_at_beginning.text +1 -0
  268. data/test/testcases/block/13_definition_list/item_ial.html +12 -0
  269. data/test/testcases/block/13_definition_list/item_ial.text +8 -0
  270. data/test/testcases/block/13_definition_list/multiple_terms.html +13 -0
  271. data/test/testcases/block/13_definition_list/multiple_terms.text +10 -0
  272. data/test/testcases/block/13_definition_list/no_def_list.html +2 -0
  273. data/test/testcases/block/13_definition_list/no_def_list.text +2 -0
  274. data/test/testcases/block/13_definition_list/para_wrapping.html +10 -0
  275. data/test/testcases/block/13_definition_list/para_wrapping.text +6 -0
  276. data/test/testcases/block/13_definition_list/separated_by_eob.html +8 -0
  277. data/test/testcases/block/13_definition_list/separated_by_eob.text +5 -0
  278. data/test/testcases/block/13_definition_list/simple.html +8 -0
  279. data/test/testcases/block/13_definition_list/simple.text +7 -0
  280. data/test/testcases/block/13_definition_list/styled_terms.html +4 -0
  281. data/test/testcases/block/13_definition_list/styled_terms.text +2 -0
  282. data/test/testcases/block/13_definition_list/too_much_space.html +3 -0
  283. data/test/testcases/block/13_definition_list/too_much_space.text +4 -0
  284. data/test/testcases/block/13_definition_list/with_blocks.html +38 -0
  285. data/test/testcases/block/13_definition_list/with_blocks.text +24 -0
  286. data/test/testcases/block/14_table/errors.html +8 -0
  287. data/test/testcases/block/14_table/errors.text +9 -0
  288. data/test/testcases/block/14_table/escaping.html +52 -0
  289. data/test/testcases/block/14_table/escaping.text +19 -0
  290. data/test/testcases/block/14_table/footer.html +65 -0
  291. data/test/testcases/block/14_table/footer.text +25 -0
  292. data/test/testcases/block/14_table/header.html +96 -0
  293. data/test/testcases/block/14_table/header.text +32 -0
  294. data/test/testcases/block/14_table/no_table.html +3 -0
  295. data/test/testcases/block/14_table/no_table.text +3 -0
  296. data/test/testcases/block/14_table/simple.html +177 -0
  297. data/test/testcases/block/14_table/simple.html.19 +177 -0
  298. data/test/testcases/block/14_table/simple.text +49 -0
  299. data/test/testcases/block/14_table/table_with_footnote.html +25 -0
  300. data/test/testcases/block/14_table/table_with_footnote.latex +11 -0
  301. data/test/testcases/block/14_table/table_with_footnote.text +6 -0
  302. data/test/testcases/block/15_math/normal.html +26 -0
  303. data/test/testcases/block/15_math/normal.text +28 -0
  304. data/test/testcases/block/16_toc/no_toc.html +33 -0
  305. data/test/testcases/block/16_toc/no_toc.options +1 -0
  306. data/test/testcases/block/16_toc/no_toc.text +16 -0
  307. data/test/testcases/block/16_toc/toc_levels.html +24 -0
  308. data/test/testcases/block/16_toc/toc_levels.options +1 -0
  309. data/test/testcases/block/16_toc/toc_levels.text +16 -0
  310. data/test/testcases/block/17_github_codeblock/backtick_syntax.html +7 -0
  311. data/test/testcases/block/17_github_codeblock/backtick_syntax.text +9 -0
  312. data/test/testcases/block/17_github_codeblock/error.html +4 -0
  313. data/test/testcases/block/17_github_codeblock/error.text +4 -0
  314. data/test/testcases/block/17_github_codeblock/no_newline_at_end.html +2 -0
  315. data/test/testcases/block/17_github_codeblock/no_newline_at_end.text +3 -0
  316. data/test/testcases/encoding.html +46 -0
  317. data/test/testcases/encoding.text +28 -0
  318. data/test/testcases/span/01_link/empty.html +5 -0
  319. data/test/testcases/span/01_link/empty.text +5 -0
  320. data/test/testcases/span/01_link/image_in_a.html +5 -0
  321. data/test/testcases/span/01_link/image_in_a.text +5 -0
  322. data/test/testcases/span/01_link/imagelinks.html +14 -0
  323. data/test/testcases/span/01_link/imagelinks.text +16 -0
  324. data/test/testcases/span/01_link/inline.html +46 -0
  325. data/test/testcases/span/01_link/inline.html.19 +46 -0
  326. data/test/testcases/span/01_link/inline.text +48 -0
  327. data/test/testcases/span/01_link/link_defs.html +9 -0
  328. data/test/testcases/span/01_link/link_defs.text +26 -0
  329. data/test/testcases/span/01_link/links_with_angle_brackets.html +3 -0
  330. data/test/testcases/span/01_link/links_with_angle_brackets.text +3 -0
  331. data/test/testcases/span/01_link/reference.html +36 -0
  332. data/test/testcases/span/01_link/reference.html.19 +36 -0
  333. data/test/testcases/span/01_link/reference.text +50 -0
  334. data/test/testcases/span/02_emphasis/empty.html +3 -0
  335. data/test/testcases/span/02_emphasis/empty.text +3 -0
  336. data/test/testcases/span/02_emphasis/errors.html +9 -0
  337. data/test/testcases/span/02_emphasis/errors.text +9 -0
  338. data/test/testcases/span/02_emphasis/nesting.html +38 -0
  339. data/test/testcases/span/02_emphasis/nesting.text +33 -0
  340. data/test/testcases/span/02_emphasis/normal.html +46 -0
  341. data/test/testcases/span/02_emphasis/normal.text +46 -0
  342. data/test/testcases/span/03_codespan/empty.html +5 -0
  343. data/test/testcases/span/03_codespan/empty.text +5 -0
  344. data/test/testcases/span/03_codespan/errors.html +1 -0
  345. data/test/testcases/span/03_codespan/errors.text +1 -0
  346. data/test/testcases/span/03_codespan/highlighting.html +1 -0
  347. data/test/testcases/span/03_codespan/highlighting.text +1 -0
  348. data/test/testcases/span/03_codespan/normal.html +16 -0
  349. data/test/testcases/span/03_codespan/normal.text +16 -0
  350. data/test/testcases/span/04_footnote/definitions.html +17 -0
  351. data/test/testcases/span/04_footnote/definitions.latex +17 -0
  352. data/test/testcases/span/04_footnote/definitions.text +24 -0
  353. data/test/testcases/span/04_footnote/footnote_nr.html +12 -0
  354. data/test/testcases/span/04_footnote/footnote_nr.latex +2 -0
  355. data/test/testcases/span/04_footnote/footnote_nr.options +1 -0
  356. data/test/testcases/span/04_footnote/footnote_nr.text +4 -0
  357. data/test/testcases/span/04_footnote/markers.html +46 -0
  358. data/test/testcases/span/04_footnote/markers.latex +23 -0
  359. data/test/testcases/span/04_footnote/markers.text +26 -0
  360. data/test/testcases/span/05_html/across_lines.html +1 -0
  361. data/test/testcases/span/05_html/across_lines.text +2 -0
  362. data/test/testcases/span/05_html/invalid.html +1 -0
  363. data/test/testcases/span/05_html/invalid.text +1 -0
  364. data/test/testcases/span/05_html/link_with_mailto.html +1 -0
  365. data/test/testcases/span/05_html/link_with_mailto.text +1 -0
  366. data/test/testcases/span/05_html/markdown_attr.html +6 -0
  367. data/test/testcases/span/05_html/markdown_attr.text +6 -0
  368. data/test/testcases/span/05_html/normal.html +34 -0
  369. data/test/testcases/span/05_html/normal.text +34 -0
  370. data/test/testcases/span/abbreviations/abbrev.html +8 -0
  371. data/test/testcases/span/abbreviations/abbrev.text +15 -0
  372. data/test/testcases/span/abbreviations/abbrev_defs.html +2 -0
  373. data/test/testcases/span/abbreviations/abbrev_defs.text +5 -0
  374. data/test/testcases/span/autolinks/url_links.html +12 -0
  375. data/test/testcases/span/autolinks/url_links.text +12 -0
  376. data/test/testcases/span/escaped_chars/normal.html +47 -0
  377. data/test/testcases/span/escaped_chars/normal.text +47 -0
  378. data/test/testcases/span/extension/comment.html +6 -0
  379. data/test/testcases/span/extension/comment.text +6 -0
  380. data/test/testcases/span/extension/ignored.html +1 -0
  381. data/test/testcases/span/extension/ignored.text +1 -0
  382. data/test/testcases/span/extension/nomarkdown.html +1 -0
  383. data/test/testcases/span/extension/nomarkdown.text +1 -0
  384. data/test/testcases/span/extension/options.html +1 -0
  385. data/test/testcases/span/extension/options.text +1 -0
  386. data/test/testcases/span/ial/simple.html +6 -0
  387. data/test/testcases/span/ial/simple.text +6 -0
  388. data/test/testcases/span/line_breaks/normal.html +11 -0
  389. data/test/testcases/span/line_breaks/normal.latex +12 -0
  390. data/test/testcases/span/line_breaks/normal.text +11 -0
  391. data/test/testcases/span/math/normal.html +5 -0
  392. data/test/testcases/span/math/normal.text +5 -0
  393. data/test/testcases/span/text_substitutions/entities.html +6 -0
  394. data/test/testcases/span/text_substitutions/entities.options +1 -0
  395. data/test/testcases/span/text_substitutions/entities.text +6 -0
  396. data/test/testcases/span/text_substitutions/entities_as_char.html +1 -0
  397. data/test/testcases/span/text_substitutions/entities_as_char.html.19 +1 -0
  398. data/test/testcases/span/text_substitutions/entities_as_char.options +1 -0
  399. data/test/testcases/span/text_substitutions/entities_as_char.text +1 -0
  400. data/test/testcases/span/text_substitutions/entities_as_input.html +1 -0
  401. data/test/testcases/span/text_substitutions/entities_as_input.options +1 -0
  402. data/test/testcases/span/text_substitutions/entities_as_input.text +1 -0
  403. data/test/testcases/span/text_substitutions/entities_numeric.html +1 -0
  404. data/test/testcases/span/text_substitutions/entities_numeric.options +1 -0
  405. data/test/testcases/span/text_substitutions/entities_numeric.text +1 -0
  406. data/test/testcases/span/text_substitutions/entities_symbolic.html +1 -0
  407. data/test/testcases/span/text_substitutions/entities_symbolic.options +1 -0
  408. data/test/testcases/span/text_substitutions/entities_symbolic.text +1 -0
  409. data/test/testcases/span/text_substitutions/greaterthan.html +1 -0
  410. data/test/testcases/span/text_substitutions/greaterthan.text +1 -0
  411. data/test/testcases/span/text_substitutions/lowerthan.html +1 -0
  412. data/test/testcases/span/text_substitutions/lowerthan.text +1 -0
  413. data/test/testcases/span/text_substitutions/typography.html +18 -0
  414. data/test/testcases/span/text_substitutions/typography.html.19 +18 -0
  415. data/test/testcases/span/text_substitutions/typography.text +18 -0
  416. metadata +817 -0
@@ -0,0 +1,44 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+ require 'kramdown/parser/kramdown'
23
+
24
+ module Kramdown
25
+ module Parser
26
+ # Used for parsing a document in GithubMarkdown format.
27
+ #
28
+ # This parser is based on the kramdown parser and removes the parser methods for the additional
29
+ # non-Markdown features. However, since some things are handled differently by the kramdown
30
+ # parser methods (like deciding when a list item contains just text), this parser differs from
31
+ # real Markdown parsers in some respects.
32
+ #
33
+ # Note, though, that the parser basically fails just one of the Markdown test cases (some others
34
+ # also fail but those failures are negligible).
35
+ class GithubMarkdown < Markdown
36
+ def initialize(source, options)
37
+ super
38
+ @block_parsers.unshift(:github_codeblock)
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ require 'kramdown/parser/github_markdown/github_codeblock'
@@ -0,0 +1,44 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ require 'kramdown/parser/kramdown/blank_line'
24
+ require 'kramdown/parser/kramdown/extensions'
25
+ require 'kramdown/parser/kramdown/eob'
26
+ require 'kramdown/parser/kramdown/paragraph'
27
+
28
+ module Kramdown
29
+ module Parser
30
+ class GithubMarkdown
31
+ GITHUB_CODEBLOCK_START = /^```/
32
+ GITHUB_CODEBLOCK_MATCH = /^```([^\s]*)?\s*?\n(.*?)^```\s*\n/m
33
+
34
+ # Parse the indented codeblock at the current location.
35
+ def parse_github_codeblock
36
+ data = @src.scan(self.class::GITHUB_CODEBLOCK_MATCH)
37
+ matches = data.match GITHUB_CODEBLOCK_MATCH
38
+ @tree.children << new_block_el(:codeblock, matches[2], nil, {"language" => matches[1]})
39
+ true
40
+ end
41
+ define_parser(:github_codeblock, GITHUB_CODEBLOCK_START)
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,570 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ require 'rexml/parsers/baseparser'
24
+ require 'strscan'
25
+
26
+ module Kramdown
27
+
28
+ module Parser
29
+
30
+ # Used for parsing a HTML document.
31
+ #
32
+ # The parsing code is in the Parser module that can also be used by other parsers.
33
+ class Html < Base
34
+
35
+ # Contains all constants that are used when parsing.
36
+ module Constants
37
+
38
+ #:stopdoc:
39
+ # The following regexps are based on the ones used by REXML, with some slight modifications.
40
+ HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/m
41
+ HTML_COMMENT_RE = /<!--(.*?)-->/m
42
+ HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
43
+ HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/m
44
+ HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/m
45
+ HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::UNAME_STR})\s*>/m
46
+ HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
47
+
48
+ HTML_CONTENT_MODEL_BLOCK = %w{address applet article aside button blockquote body
49
+ dd div dl fieldset figure figcaption footer form header hgroup iframe li map menu nav
50
+ noscript object section td}
51
+ HTML_CONTENT_MODEL_SPAN = %w{a abbr acronym b bdo big button cite caption del dfn dt em
52
+ h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p q rb rbc
53
+ rp rt rtc ruby samp select small span strong sub sup summary th tt var}
54
+ HTML_CONTENT_MODEL_RAW = %w{script style math option textarea pre code}
55
+ # The following elements are also parsed as raw since they need child elements that cannot
56
+ # be expressed using kramdown syntax: colgroup table tbody thead tfoot tr ul ol
57
+
58
+ HTML_CONTENT_MODEL = Hash.new {|h,k| h[k] = :raw}
59
+ HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block}
60
+ HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span}
61
+ HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw}
62
+
63
+ # Some HTML elements like script belong to both categories (i.e. are valid in block and
64
+ # span HTML) and don't appear therefore!
65
+ HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
66
+ ins kbd label option q rb rbc rp rt rtc ruby samp select small span
67
+ strong sub sup textarea tt var}
68
+ HTML_BLOCK_ELEMENTS = %w{address article aside applet body button blockquote caption col colgroup dd div dl dt fieldset
69
+ figcaption footer form h1 h2 h3 h4 h5 h6 header hgroup hr html head iframe legend menu
70
+ li map nav ol optgroup p pre section summary table tbody td th thead tfoot tr ul}
71
+ HTML_ELEMENTS_WITHOUT_BODY = %w{area base br col command embed hr img input keygen link meta param source track wbr}
72
+ end
73
+
74
+
75
+ # Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
76
+ # functionality. The only thing that must be provided by the class are instance variable
77
+ # @stack for storing the needed state and @src (instance of StringScanner) for the actual
78
+ # parsing.
79
+ module Parser
80
+
81
+ include Constants
82
+
83
+ # Process the HTML start tag that has already be scanned/checked via @src.
84
+ #
85
+ # Does the common processing steps and then yields to the caller for further processing
86
+ # (first parameter is the created element, the second parameter is +true+ if the HTML
87
+ # element is already closed, ie. contains no body).
88
+ def handle_html_start_tag # :yields: el, closed
89
+ name = @src[1].downcase
90
+ closed = !@src[4].nil?
91
+ attrs = Utils::OrderedHash.new
92
+ @src[2].scan(HTML_ATTRIBUTE_RE).each {|attr,sep,val| attrs[attr] = val}
93
+
94
+ el = Element.new(:html_element, name, attrs, :category => :block)
95
+ @tree.children << el
96
+
97
+ if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
98
+ warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
99
+ closed = true
100
+ end
101
+ if name == 'script' || name == 'style'
102
+ handle_raw_html_tag(name)
103
+ yield(el, true)
104
+ else
105
+ yield(el, closed)
106
+ end
107
+ end
108
+
109
+ # Handle the raw HTML tag at the current position.
110
+ def handle_raw_html_tag(name)
111
+ curpos = @src.pos
112
+ if @src.scan_until(/(?=<\/#{name}\s*>)/mi)
113
+ add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
114
+ @src.scan(HTML_TAG_CLOSE_RE)
115
+ else
116
+ add_text(@src.rest, @tree.children.last, :raw)
117
+ @src.terminate
118
+ warning("Found no end tag for '#{name}' - auto-closing it")
119
+ end
120
+ end
121
+
122
+ HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
123
+
124
+ # Parse raw HTML from the current source position, storing the found elements in +el+.
125
+ # Parsing continues until one of the following criteria are fulfilled:
126
+ #
127
+ # - The end of the document is reached.
128
+ # - The matching end tag for the element +el+ is found (only used if +el+ is an HTML
129
+ # element).
130
+ #
131
+ # When an HTML start tag is found, processing is deferred to #handle_html_start_tag,
132
+ # providing the block given to this method.
133
+ def parse_raw_html(el, &block)
134
+ @stack.push(@tree)
135
+ @tree = el
136
+
137
+ done = false
138
+ while !@src.eos? && !done
139
+ if result = @src.scan_until(HTML_RAW_START)
140
+ add_text(result, @tree, :text)
141
+ if result = @src.scan(HTML_COMMENT_RE)
142
+ @tree.children << Element.new(:xml_comment, result, nil, :category => :block)
143
+ elsif result = @src.scan(HTML_INSTRUCTION_RE)
144
+ @tree.children << Element.new(:xml_pi, result, nil, :category => :block)
145
+ elsif @src.scan(HTML_TAG_RE)
146
+ handle_html_start_tag(&block)
147
+ elsif @src.scan(HTML_TAG_CLOSE_RE)
148
+ if @tree.value == @src[1].downcase
149
+ done = true
150
+ else
151
+ warning("Found invalidly used HTML closing tag for '#{@src[1].downcase}' - ignoring it")
152
+ end
153
+ else
154
+ add_text(@src.getch, @tree, :text)
155
+ end
156
+ else
157
+ add_text(@src.rest, @tree, :text)
158
+ @src.terminate
159
+ warning("Found no end tag for '#{@tree.value}' - auto-closing it") if @tree.type == :html_element
160
+ done = true
161
+ end
162
+ end
163
+
164
+ @tree = @stack.pop
165
+ end
166
+
167
+ end
168
+
169
+
170
+ # Converts HTML elements to native elements if possible.
171
+ class ElementConverter
172
+
173
+ # :stopdoc:
174
+
175
+ include Constants
176
+ include ::Kramdown::Utils::Entities
177
+
178
+ REMOVE_TEXT_CHILDREN = %w{html head hgroup ol ul dl table colgroup tbody thead tfoot tr select optgroup}
179
+ WRAP_TEXT_CHILDREN = %w{body section nav article aside header footer address div li dd blockquote figure
180
+ figcaption fieldset form}
181
+ REMOVE_WHITESPACE_CHILDREN = %w{body section nav article aside header footer address
182
+ div li dd blockquote figure figcaption td th fieldset form}
183
+ STRIP_WHITESPACE = %w{address article aside blockquote body caption dd div dl dt fieldset figcaption form footer
184
+ header h1 h2 h3 h4 h5 h6 legend li nav p section td th}
185
+ SIMPLE_ELEMENTS = %w{em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl li dl dt dd}
186
+
187
+ def initialize(root)
188
+ @root = root
189
+ end
190
+
191
+ def self.convert(root, el = root)
192
+ new(root).process(el)
193
+ end
194
+
195
+ # Convert the element +el+ and its children.
196
+ def process(el, do_conversion = true, preserve_text = false, parent = nil)
197
+ case el.type
198
+ when :xml_comment, :xml_pi
199
+ ptype = if parent.nil?
200
+ 'div'
201
+ else
202
+ case parent.type
203
+ when :html_element then parent.value
204
+ when :code_span then 'code'
205
+ when :code_block then 'pre'
206
+ when :header then 'h1'
207
+ else parent.type.to_s
208
+ end
209
+ end
210
+ el.options.replace({:category => (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block)})
211
+ return
212
+ when :html_element
213
+ when :root
214
+ el.children.each {|c| process(c)}
215
+ remove_whitespace_children(el)
216
+ return
217
+ else return
218
+ end
219
+
220
+ mname = "convert_#{el.value}"
221
+ if do_conversion && self.class.method_defined?(mname)
222
+ send(mname, el)
223
+ else
224
+ type = el.value
225
+ remove_text_children(el) if do_conversion && REMOVE_TEXT_CHILDREN.include?(type)
226
+
227
+ if do_conversion && SIMPLE_ELEMENTS.include?(type)
228
+ set_basics(el, type.intern)
229
+ process_children(el, do_conversion, preserve_text)
230
+ else
231
+ process_html_element(el, do_conversion, preserve_text)
232
+ end
233
+
234
+ if do_conversion
235
+ strip_whitespace(el) if STRIP_WHITESPACE.include?(type)
236
+ remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type)
237
+ wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type)
238
+ end
239
+ end
240
+ end
241
+
242
+ def process_children(el, do_conversion = true, preserve_text = false)
243
+ el.children.map! do |c|
244
+ if c.type == :text
245
+ process_text(c.value, preserve_text || !do_conversion)
246
+ else
247
+ process(c, do_conversion, preserve_text, el)
248
+ c
249
+ end
250
+ end.flatten!
251
+ end
252
+
253
+ # Process the HTML text +raw+: compress whitespace (if +preserve+ is +false+) and convert
254
+ # entities in entity elements.
255
+ def process_text(raw, preserve = false)
256
+ raw.gsub!(/\s+/, ' ') unless preserve
257
+ src = StringScanner.new(raw)
258
+ result = []
259
+ while !src.eos?
260
+ if tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/)
261
+ result << Element.new(:text, tmp)
262
+ src.scan(HTML_ENTITY_RE)
263
+ val = src[1] || (src[2] && src[2].to_i) || src[3].hex
264
+ result << if %w{lsquo rsquo ldquo rdquo}.include?(val)
265
+ Element.new(:smart_quote, val.intern)
266
+ elsif %w{mdash ndash hellip laquo raquo}.include?(val)
267
+ Element.new(:typographic_sym, val.intern)
268
+ else
269
+ begin
270
+ Element.new(:entity, entity(val), nil, :original => src.matched)
271
+ rescue ::Kramdown::Error
272
+ src.pos -= src.matched_size - 1
273
+ Element.new(:entity, ::Kramdown::Utils::Entities.entity('amp'))
274
+ end
275
+ end
276
+ else
277
+ result << Element.new(:text, src.rest)
278
+ src.terminate
279
+ end
280
+ end
281
+ result
282
+ end
283
+
284
+ def process_html_element(el, do_conversion = true, preserve_text = false)
285
+ el.options.replace(:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
286
+ :content_model => (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw))
287
+ process_children(el, do_conversion, preserve_text)
288
+ end
289
+
290
+ def remove_text_children(el)
291
+ el.children.delete_if {|c| c.type == :text}
292
+ end
293
+
294
+ def wrap_text_children(el)
295
+ tmp = []
296
+ last_is_p = false
297
+ el.children.each do |c|
298
+ if Element.category(c) != :block || c.type == :text
299
+ if !last_is_p
300
+ tmp << Element.new(:p, nil, nil, :transparent => true)
301
+ last_is_p = true
302
+ end
303
+ tmp.last.children << c
304
+ tmp
305
+ else
306
+ tmp << c
307
+ last_is_p = false
308
+ end
309
+ end
310
+ el.children = tmp
311
+ end
312
+
313
+ def strip_whitespace(el)
314
+ return if el.children.empty?
315
+ if el.children.first.type == :text
316
+ el.children.first.value.lstrip!
317
+ end
318
+ if el.children.last.type == :text
319
+ el.children.last.value.rstrip!
320
+ end
321
+ end
322
+
323
+ def remove_whitespace_children(el)
324
+ i = -1
325
+ el.children = el.children.reject do |c|
326
+ i += 1
327
+ c.type == :text && c.value.strip.empty? &&
328
+ (i == 0 || i == el.children.length - 1 || (Element.category(el.children[i-1]) == :block &&
329
+ Element.category(el.children[i+1]) == :block))
330
+ end
331
+ end
332
+
333
+ def set_basics(el, type, opts = {})
334
+ el.type = type
335
+ el.options.replace(opts)
336
+ el.value = nil
337
+ end
338
+
339
+ def extract_text(el, raw)
340
+ raw << el.value.to_s if el.type == :text
341
+ el.children.each {|c| extract_text(c, raw)}
342
+ end
343
+
344
+ def convert_a(el)
345
+ if el.attr['href']
346
+ set_basics(el, :a)
347
+ process_children(el)
348
+ else
349
+ process_html_element(el, false)
350
+ end
351
+ end
352
+
353
+ EMPHASIS_TYPE_MAP = {'em' => :em, 'i' => :em, 'strong' => :strong, 'b' => :strong}
354
+ def convert_em(el)
355
+ text = ''
356
+ extract_text(el, text)
357
+ if text =~ /\A\s/ || text =~ /\s\z/
358
+ process_html_element(el, false)
359
+ else
360
+ set_basics(el, EMPHASIS_TYPE_MAP[el.value])
361
+ process_children(el)
362
+ end
363
+ end
364
+ %w{b strong i}.each do |i|
365
+ alias_method("convert_#{i}".to_sym, :convert_em)
366
+ end
367
+
368
+ def convert_h1(el)
369
+ set_basics(el, :header, :level => el.value[1..1].to_i)
370
+ extract_text(el, el.options[:raw_text] = '')
371
+ process_children(el)
372
+ end
373
+ %w{h2 h3 h4 h5 h6}.each do |i|
374
+ alias_method("convert_#{i}".to_sym, :convert_h1)
375
+ end
376
+
377
+ def convert_code(el)
378
+ raw = ''
379
+ extract_text(el, raw)
380
+ result = process_text(raw, true)
381
+ begin
382
+ str = result.inject('') do |mem, c|
383
+ if c.type == :text
384
+ mem << c.value
385
+ elsif c.type == :entity
386
+ if RUBY_VERSION >= '1.9'
387
+ mem << c.value.char.encode(@root.options[:encoding])
388
+ elsif [60, 62, 34, 38].include?(c.value.code_point)
389
+ mem << c.value.code_point.chr
390
+ end
391
+ elsif c.type == :smart_quote || c.type == :typographic_sym
392
+ mem << entity(c.value.to_s).char.encode(@root.options[:encoding])
393
+ else
394
+ raise "Bug - please report"
395
+ end
396
+ end
397
+ result.clear
398
+ result << Element.new(:text, str)
399
+ rescue
400
+ end
401
+ if result.length > 1 || result.first.type != :text
402
+ process_html_element(el, false, true)
403
+ else
404
+ if el.value == 'code'
405
+ set_basics(el, :codespan)
406
+ else
407
+ set_basics(el, :codeblock)
408
+ end
409
+ el.value = result.first.value
410
+ el.children.clear
411
+ end
412
+ end
413
+ alias :convert_pre :convert_code
414
+
415
+ def convert_table(el)
416
+ if !is_simple_table?(el)
417
+ process_html_element(el, false)
418
+ return
419
+ end
420
+ remove_text_children(el)
421
+ process_children(el)
422
+ set_basics(el, :table)
423
+
424
+ calc_alignment = lambda do |c|
425
+ if c.type == :tr
426
+ el.options[:alignment] = c.children.map do |td|
427
+ if td.attr['style']
428
+ td.attr['style'].slice!(/(?:;\s*)?text-align:\s+(center|left|right)/)
429
+ td.attr.delete('style') if td.attr['style'].strip.empty?
430
+ $1.to_sym
431
+ else
432
+ :default
433
+ end
434
+ end
435
+ else
436
+ c.children.each {|cc| calc_alignment.call(cc)}
437
+ end
438
+ end
439
+ calc_alignment.call(el)
440
+ el.children.delete_if {|c| c.type == :html_element}
441
+
442
+ change_th_type = lambda do |c|
443
+ if c.type == :th
444
+ c.type = :td
445
+ else
446
+ c.children.each {|cc| change_th_type.call(cc)}
447
+ end
448
+ end
449
+ change_th_type.call(el)
450
+
451
+ if el.children.first.type == :tr
452
+ tbody = Element.new(:tbody)
453
+ tbody.children = el.children
454
+ el.children = [tbody]
455
+ end
456
+ end
457
+
458
+ def is_simple_table?(el)
459
+ only_phrasing_content = lambda do |c|
460
+ c.children.all? do |cc|
461
+ (cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc)
462
+ end
463
+ end
464
+ check_cells = Proc.new do |c|
465
+ if c.value == 'th' || c.value == 'td'
466
+ return false if !only_phrasing_content.call(c)
467
+ else
468
+ c.children.each {|cc| check_cells.call(cc)}
469
+ end
470
+ end
471
+ check_cells.call(el)
472
+
473
+ nr_cells = 0
474
+ check_nr_cells = lambda do |t|
475
+ if t.value == 'tr'
476
+ count = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.length
477
+ if count != nr_cells
478
+ if nr_cells == 0
479
+ nr_cells = count
480
+ else
481
+ nr_cells = -1
482
+ break
483
+ end
484
+ end
485
+ else
486
+ t.children.each {|cc| check_nr_cells.call(cc)}
487
+ end
488
+ end
489
+ check_nr_cells.call(el)
490
+ return false if nr_cells == -1
491
+
492
+ alignment = nil
493
+ check_alignment = Proc.new do |t|
494
+ if t.value == 'tr'
495
+ cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.map do |cell|
496
+ md = /text-align:\s+(center|left|right|justify|inherit)/.match(cell.attr['style'].to_s)
497
+ return false if md && (md[1] == 'justify' || md[1] == 'inherit')
498
+ md.nil? ? :default : md[1]
499
+ end
500
+ alignment = cur_alignment if alignment.nil?
501
+ return false if alignment != cur_alignment
502
+ else
503
+ t.children.each {|cc| check_alignment.call(cc)}
504
+ end
505
+ end
506
+ check_alignment.call(el)
507
+
508
+ check_rows = lambda do |t, type|
509
+ t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text}}
510
+ end
511
+ check_rows.call(el, 'td') ||
512
+ (el.children.all? do |t|
513
+ t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) ||
514
+ ((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td'))
515
+ end && el.children.any? {|t| t.value == 'tbody'})
516
+ end
517
+
518
+ def convert_script(el)
519
+ if !is_math_tag?(el)
520
+ process_html_element(el)
521
+ else
522
+ handle_math_tag(el)
523
+ end
524
+ end
525
+
526
+ def is_math_tag?(el)
527
+ el.attr['type'].to_s =~ /\bmath\/tex\b/
528
+ end
529
+
530
+ def handle_math_tag(el)
531
+ set_basics(el, :math, :category => (el.attr['type'] =~ /mode=display/ ? :block : :span))
532
+ el.value = el.children.shift.value
533
+ el.attr.delete('type')
534
+ end
535
+
536
+ end
537
+
538
+ include Parser
539
+
540
+ # Parse the source string provided on initialization as HTML document.
541
+ def parse
542
+ @stack, @tree = [], @root
543
+ @src = StringScanner.new(adapt_source(source))
544
+
545
+ while true
546
+ if result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/)
547
+ @tree.children << Element.new(:xml_pi, result.strip, nil, :category => :block)
548
+ elsif result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/)
549
+ # ignore the doctype
550
+ elsif result = @src.scan(/\s*#{HTML_COMMENT_RE}/)
551
+ @tree.children << Element.new(:xml_comment, result.strip, nil, :category => :block)
552
+ else
553
+ break
554
+ end
555
+ end
556
+
557
+ tag_handler = lambda do |c, closed|
558
+ parse_raw_html(c, &tag_handler) if !closed
559
+ end
560
+ parse_raw_html(@tree, &tag_handler)
561
+
562
+ ElementConverter.convert(@tree)
563
+ end
564
+
565
+ end
566
+
567
+ end
568
+
569
+ end
570
+