motion-kramdown 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +84 -0
  3. data/lib/kramdown/compatibility.rb +36 -0
  4. data/lib/kramdown/converter/base.rb +259 -0
  5. data/lib/kramdown/converter/html.rb +461 -0
  6. data/lib/kramdown/converter/kramdown.rb +423 -0
  7. data/lib/kramdown/converter/latex.rb +600 -0
  8. data/lib/kramdown/converter/math_engine/itex2mml.rb +39 -0
  9. data/lib/kramdown/converter/math_engine/mathjax.rb +33 -0
  10. data/lib/kramdown/converter/math_engine/ritex.rb +38 -0
  11. data/lib/kramdown/converter/pdf.rb +624 -0
  12. data/lib/kramdown/converter/remove_html_tags.rb +53 -0
  13. data/lib/kramdown/converter/syntax_highlighter/coderay.rb +78 -0
  14. data/lib/kramdown/converter/syntax_highlighter/rouge.rb +37 -0
  15. data/lib/kramdown/converter/toc.rb +69 -0
  16. data/lib/kramdown/converter.rb +69 -0
  17. data/lib/kramdown/document.rb +144 -0
  18. data/lib/kramdown/element.rb +515 -0
  19. data/lib/kramdown/error.rb +17 -0
  20. data/lib/kramdown/options.rb +584 -0
  21. data/lib/kramdown/parser/base.rb +130 -0
  22. data/lib/kramdown/parser/gfm.rb +55 -0
  23. data/lib/kramdown/parser/html.rb +575 -0
  24. data/lib/kramdown/parser/kramdown/abbreviation.rb +67 -0
  25. data/lib/kramdown/parser/kramdown/autolink.rb +37 -0
  26. data/lib/kramdown/parser/kramdown/blank_line.rb +30 -0
  27. data/lib/kramdown/parser/kramdown/block_boundary.rb +33 -0
  28. data/lib/kramdown/parser/kramdown/blockquote.rb +39 -0
  29. data/lib/kramdown/parser/kramdown/codeblock.rb +56 -0
  30. data/lib/kramdown/parser/kramdown/codespan.rb +44 -0
  31. data/lib/kramdown/parser/kramdown/emphasis.rb +61 -0
  32. data/lib/kramdown/parser/kramdown/eob.rb +26 -0
  33. data/lib/kramdown/parser/kramdown/escaped_chars.rb +25 -0
  34. data/lib/kramdown/parser/kramdown/extensions.rb +201 -0
  35. data/lib/kramdown/parser/kramdown/footnote.rb +56 -0
  36. data/lib/kramdown/parser/kramdown/header.rb +59 -0
  37. data/lib/kramdown/parser/kramdown/horizontal_rule.rb +27 -0
  38. data/lib/kramdown/parser/kramdown/html.rb +160 -0
  39. data/lib/kramdown/parser/kramdown/html_entity.rb +33 -0
  40. data/lib/kramdown/parser/kramdown/line_break.rb +25 -0
  41. data/lib/kramdown/parser/kramdown/link.rb +139 -0
  42. data/lib/kramdown/parser/kramdown/list.rb +256 -0
  43. data/lib/kramdown/parser/kramdown/math.rb +54 -0
  44. data/lib/kramdown/parser/kramdown/paragraph.rb +54 -0
  45. data/lib/kramdown/parser/kramdown/smart_quotes.rb +174 -0
  46. data/lib/kramdown/parser/kramdown/table.rb +171 -0
  47. data/lib/kramdown/parser/kramdown/typographic_symbol.rb +44 -0
  48. data/lib/kramdown/parser/kramdown.rb +359 -0
  49. data/lib/kramdown/parser/markdown.rb +56 -0
  50. data/lib/kramdown/parser.rb +27 -0
  51. data/lib/kramdown/utils/configurable.rb +44 -0
  52. data/lib/kramdown/utils/entities.rb +347 -0
  53. data/lib/kramdown/utils/html.rb +75 -0
  54. data/lib/kramdown/utils/ordered_hash.rb +87 -0
  55. data/lib/kramdown/utils/string_scanner.rb +74 -0
  56. data/lib/kramdown/utils/unidecoder.rb +51 -0
  57. data/lib/kramdown/utils.rb +58 -0
  58. data/lib/kramdown/version.rb +15 -0
  59. data/lib/kramdown.rb +10 -0
  60. data/lib/motion-kramdown.rb +47 -0
  61. data/lib/rubymotion/encodings.rb +37 -0
  62. data/lib/rubymotion/rexml_shim.rb +25 -0
  63. data/lib/rubymotion/set.rb +1349 -0
  64. data/lib/rubymotion/version.rb +6 -0
  65. data/spec/document_tree.rb +48 -0
  66. data/spec/gfm_to_html.rb +95 -0
  67. data/spec/helpers/it_behaves_like.rb +27 -0
  68. data/spec/helpers/option_file.rb +46 -0
  69. data/spec/helpers/spec_options.rb +37 -0
  70. data/spec/helpers/tidy.rb +12 -0
  71. data/spec/html_to_html.rb +40 -0
  72. data/spec/html_to_kramdown_to_html.rb +46 -0
  73. data/spec/kramdown_to_xxx.rb +40 -0
  74. data/spec/test_location.rb +203 -0
  75. data/spec/test_string_scanner_kramdown.rb +19 -0
  76. data/spec/text_to_kramdown_to_html.rb +52 -0
  77. data/spec/text_to_latex.rb +33 -0
  78. metadata +164 -0
@@ -0,0 +1,55 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # RM require 'kramdown/parser/kramdown'
4
+
5
+ module Kramdown
6
+ module Parser
7
+ class GFM < Kramdown::Parser::Kramdown
8
+
9
+ def initialize(source, options)
10
+ super
11
+ @span_parsers.delete(:line_break) if @options[:hard_wrap]
12
+ {:codeblock_fenced => :codeblock_fenced_gfm,
13
+ :atx_header => :atx_header_gfm}.each do |current, replacement|
14
+ i = @block_parsers.index(current)
15
+ @block_parsers.delete(current)
16
+ @block_parsers.insert(i, replacement)
17
+ end
18
+ end
19
+
20
+ def parse
21
+ super
22
+ add_hard_line_breaks(@root) if @options[:hard_wrap]
23
+ end
24
+
25
+ def add_hard_line_breaks(element)
26
+ element.children.map! do |child|
27
+ if child.type == :text && child.value =~ /\n/
28
+ children = []
29
+ lines = child.value.split(/\n/, -1)
30
+ omit_trailing_br = (Kramdown::Element.category(element) == :block && element.children[-1] == child &&
31
+ lines[-1].empty?)
32
+ lines.each_with_index do |line, index|
33
+ children << Element.new(:text, (index > 0 ? "\n#{line}" : line))
34
+ children << Element.new(:br) if index < lines.size - 2 ||
35
+ (index == lines.size - 2 && !omit_trailing_br)
36
+ end
37
+ children
38
+ elsif child.type == :html_element
39
+ child
40
+ else
41
+ add_hard_line_breaks(child)
42
+ child
43
+ end
44
+ end.flatten!
45
+ end
46
+
47
+ ATX_HEADER_START = /^\#{1,6}\s/
48
+ define_parser(:atx_header_gfm, ATX_HEADER_START, nil, 'parse_atx_header')
49
+
50
+ FENCED_CODEBLOCK_MATCH = /^(([~`]){3,})\s*?(\w+)?\s*?\n(.*?)^\1\2*\s*?\n/m
51
+ define_parser(:codeblock_fenced_gfm, /^[~`]{3,}/, nil, 'parse_codeblock_fenced')
52
+
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,575 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2014 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ # RM require 'rexml/parsers/baseparser'
11
+ # RM require 'strscan'
12
+ # RM require 'kramdown/utils'
13
+
14
+ module Kramdown
15
+
16
+ module Parser
17
+
18
+ # Used for parsing a HTML document.
19
+ #
20
+ # The parsing code is in the Parser module that can also be used by other parsers.
21
+ class Html < Base
22
+
23
+ # Contains all constants that are used when parsing.
24
+ module Constants
25
+
26
+ #:stopdoc:
27
+ # The following regexps are based on the ones used by REXML, with some slight modifications.
28
+ HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/im
29
+ HTML_COMMENT_RE = /<!--(.*?)-->/m
30
+ HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
31
+ HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})(?:\s*=\s*(["'])(.*?)\2)?/m
32
+ HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}(?:\s*=\s*(["']).*?\3)?)*)\s*(\/)?>/m
33
+ HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::UNAME_STR})\s*>/m
34
+ HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
35
+
36
+ HTML_CONTENT_MODEL_BLOCK = %w{address applet article aside blockquote body
37
+ dd details div dl fieldset figure figcaption footer form header hgroup iframe li map menu nav
38
+ noscript object section summary td}
39
+ HTML_CONTENT_MODEL_SPAN = %w{a abbr acronym b bdo big button cite caption del dfn dt em
40
+ h1 h2 h3 h4 h5 h6 i ins label legend optgroup p q rb rbc
41
+ rp rt rtc ruby select small span strong sub sup th tt}
42
+ HTML_CONTENT_MODEL_RAW = %w{script style math option textarea pre code kbd samp var}
43
+ # The following elements are also parsed as raw since they need child elements that cannot
44
+ # be expressed using kramdown syntax: colgroup table tbody thead tfoot tr ul ol
45
+
46
+ HTML_CONTENT_MODEL = Hash.new {|h,k| h[k] = :raw}
47
+ HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block}
48
+ HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span}
49
+ HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw}
50
+
51
+ # Some HTML elements like script belong to both categories (i.e. are valid in block and
52
+ # span HTML) and don't appear therefore!
53
+ # script, textarea
54
+ HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
55
+ ins kbd label option q rb rbc rp rt rtc ruby samp select small span
56
+ strong sub sup tt u var}
57
+ HTML_BLOCK_ELEMENTS = %w{address article aside applet body blockquote caption col colgroup dd div dl dt fieldset
58
+ figcaption footer form h1 h2 h3 h4 h5 h6 header hgroup hr html head iframe legend menu
59
+ li map nav ol optgroup p pre section summary table tbody td th thead tfoot tr ul}
60
+ HTML_ELEMENTS_WITHOUT_BODY = %w{area base br col command embed hr img input keygen link meta param source track wbr}
61
+ end
62
+
63
+
64
+ # Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
65
+ # functionality. The only thing that must be provided by the class are instance variable
66
+ # @stack for storing the needed state and @src (instance of StringScanner) for the actual
67
+ # parsing.
68
+ module Parser
69
+
70
+ include Constants
71
+
72
+ # Process the HTML start tag that has already be scanned/checked via @src.
73
+ #
74
+ # Does the common processing steps and then yields to the caller for further processing
75
+ # (first parameter is the created element; the second parameter is +true+ if the HTML
76
+ # element is already closed, ie. contains no body; the third parameter specifies whether the
77
+ # body - and the end tag - need to be handled in case closed=false).
78
+ def handle_html_start_tag(line = nil) # :yields: el, closed, handle_body
79
+ name = @src[1].downcase
80
+ closed = !@src[4].nil?
81
+ attrs = Utils::OrderedHash.new
82
+ @src[2].scan(HTML_ATTRIBUTE_RE).each {|attr,sep,val| attrs[attr.downcase] = val || ""}
83
+
84
+ el = Element.new(:html_element, name, attrs, :category => :block)
85
+ el.options[:location] = line if line
86
+ @tree.children << el
87
+
88
+ if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
89
+ warning("The HTML tag '#{el.value}' on line #{line} cannot have any content - auto-closing it")
90
+ closed = true
91
+ end
92
+ if name == 'script' || name == 'style'
93
+ handle_raw_html_tag(name)
94
+ yield(el, false, false)
95
+ else
96
+ yield(el, closed, true)
97
+ end
98
+ end
99
+
100
+ # Handle the raw HTML tag at the current position.
101
+ def handle_raw_html_tag(name)
102
+ curpos = @src.pos
103
+ if @src.scan_until(/(?=<\/#{name}\s*>)/mi)
104
+ add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
105
+ @src.scan(HTML_TAG_CLOSE_RE)
106
+ else
107
+ add_text(@src.rest, @tree.children.last, :raw)
108
+ @src.terminate
109
+ warning("Found no end tag for '#{name}' - auto-closing it")
110
+ end
111
+ end
112
+
113
+ HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
114
+
115
+ # Parse raw HTML from the current source position, storing the found elements in +el+.
116
+ # Parsing continues until one of the following criteria are fulfilled:
117
+ #
118
+ # - The end of the document is reached.
119
+ # - The matching end tag for the element +el+ is found (only used if +el+ is an HTML
120
+ # element).
121
+ #
122
+ # When an HTML start tag is found, processing is deferred to #handle_html_start_tag,
123
+ # providing the block given to this method.
124
+ def parse_raw_html(el, &block)
125
+ @stack.push(@tree)
126
+ @tree = el
127
+
128
+ done = false
129
+ while !@src.eos? && !done
130
+ if result = @src.scan_until(HTML_RAW_START)
131
+ add_text(result, @tree, :text)
132
+ line = @src.current_line_number
133
+ if result = @src.scan(HTML_COMMENT_RE)
134
+ @tree.children << Element.new(:xml_comment, result, nil, :category => :block, :location => line)
135
+ elsif result = @src.scan(HTML_INSTRUCTION_RE)
136
+ @tree.children << Element.new(:xml_pi, result, nil, :category => :block, :location => line)
137
+ elsif @src.scan(HTML_TAG_RE)
138
+ if method(:handle_html_start_tag).arity == 1
139
+ handle_html_start_tag(line, &block)
140
+ else
141
+ handle_html_start_tag(&block) # DEPRECATED: method needs to accept line number in 2.0
142
+ end
143
+ elsif @src.scan(HTML_TAG_CLOSE_RE)
144
+ if @tree.value == @src[1].downcase
145
+ done = true
146
+ else
147
+ warning("Found invalidly used HTML closing tag for '#{@src[1].downcase}' on line #{line} - ignoring it")
148
+ end
149
+ else
150
+ add_text(@src.getch, @tree, :text)
151
+ end
152
+ else
153
+ add_text(@src.rest, @tree, :text)
154
+ @src.terminate
155
+ warning("Found no end tag for '#{@tree.value}' on line #{@tree.options[:location]} - auto-closing it") if @tree.type == :html_element
156
+ done = true
157
+ end
158
+ end
159
+
160
+ @tree = @stack.pop
161
+ end
162
+
163
+ end
164
+
165
+
166
+ # Converts HTML elements to native elements if possible.
167
+ class ElementConverter
168
+
169
+ # :stopdoc:
170
+
171
+ include Constants
172
+ include ::Kramdown::Utils::Entities
173
+
174
+ REMOVE_TEXT_CHILDREN = %w{html head hgroup ol ul dl table colgroup tbody thead tfoot tr select optgroup}
175
+ WRAP_TEXT_CHILDREN = %w{body section nav article aside header footer address div li dd blockquote figure
176
+ figcaption fieldset form}
177
+ REMOVE_WHITESPACE_CHILDREN = %w{body section nav article aside header footer address
178
+ div li dd blockquote figure figcaption td th fieldset form}
179
+ STRIP_WHITESPACE = %w{address article aside blockquote body caption dd div dl dt fieldset figcaption form footer
180
+ header h1 h2 h3 h4 h5 h6 legend li nav p section td th}
181
+ SIMPLE_ELEMENTS = %w{em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl li dl dt dd}
182
+
183
+ def initialize(root)
184
+ @root = root
185
+ end
186
+
187
+ def self.convert(root, el = root)
188
+ new(root).process(el)
189
+ end
190
+
191
+ # Convert the element +el+ and its children.
192
+ def process(el, do_conversion = true, preserve_text = false, parent = nil)
193
+ case el.type
194
+ when :xml_comment, :xml_pi
195
+ ptype = if parent.nil?
196
+ 'div'
197
+ else
198
+ case parent.type
199
+ when :html_element then parent.value
200
+ when :code_span then 'code'
201
+ when :code_block then 'pre'
202
+ when :header then 'h1'
203
+ else parent.type.to_s
204
+ end
205
+ end
206
+ el.options.replace({:category => (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block)})
207
+ return
208
+ when :html_element
209
+ when :root
210
+ el.children.each {|c| process(c)}
211
+ remove_whitespace_children(el)
212
+ return
213
+ else return
214
+ end
215
+
216
+ mname = "convert_#{el.value}"
217
+ if do_conversion && self.class.method_defined?(mname)
218
+ send(mname, el)
219
+ else
220
+ type = el.value
221
+ remove_text_children(el) if do_conversion && REMOVE_TEXT_CHILDREN.include?(type)
222
+
223
+ if do_conversion && SIMPLE_ELEMENTS.include?(type)
224
+ set_basics(el, type.intern)
225
+ process_children(el, do_conversion, preserve_text)
226
+ else
227
+ process_html_element(el, do_conversion, preserve_text)
228
+ end
229
+
230
+ if do_conversion
231
+ strip_whitespace(el) if STRIP_WHITESPACE.include?(type)
232
+ remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type)
233
+ wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type)
234
+ end
235
+ end
236
+ end
237
+
238
+ def process_children(el, do_conversion = true, preserve_text = false)
239
+ el.children.map! do |c|
240
+ if c.type == :text
241
+ process_text(c.value, preserve_text || !do_conversion)
242
+ else
243
+ process(c, do_conversion, preserve_text, el)
244
+ c
245
+ end
246
+ end.flatten!
247
+ end
248
+
249
+ # Process the HTML text +raw+: compress whitespace (if +preserve+ is +false+) and convert
250
+ # entities in entity elements.
251
+ def process_text(raw, preserve = false)
252
+ raw.gsub!(/\s+/, ' ') unless preserve
253
+ src = Kramdown::Utils::StringScanner.new(raw)
254
+ result = []
255
+ while !src.eos?
256
+ if tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/)
257
+ result << Element.new(:text, tmp)
258
+ src.scan(HTML_ENTITY_RE)
259
+ val = src[1] || (src[2] && src[2].to_i) || src[3].hex
260
+ result << if %w{lsquo rsquo ldquo rdquo}.include?(val)
261
+ Element.new(:smart_quote, val.intern)
262
+ elsif %w{mdash ndash hellip laquo raquo}.include?(val)
263
+ Element.new(:typographic_sym, val.intern)
264
+ else
265
+ begin
266
+ Element.new(:entity, entity(val), nil, :original => src.matched)
267
+ rescue ::Kramdown::Error
268
+ src.pos -= src.matched_size - 1
269
+ Element.new(:entity, ::Kramdown::Utils::Entities.entity('amp'))
270
+ end
271
+ end
272
+ else
273
+ result << Element.new(:text, src.rest)
274
+ src.terminate
275
+ end
276
+ end
277
+ result
278
+ end
279
+
280
+ def process_html_element(el, do_conversion = true, preserve_text = false)
281
+ el.options.replace(:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
282
+ :content_model => (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw))
283
+ process_children(el, do_conversion, preserve_text)
284
+ end
285
+
286
+ def remove_text_children(el)
287
+ el.children.delete_if {|c| c.type == :text}
288
+ end
289
+
290
+ def wrap_text_children(el)
291
+ tmp = []
292
+ last_is_p = false
293
+ el.children.each do |c|
294
+ if Element.category(c) != :block || c.type == :text
295
+ if !last_is_p
296
+ tmp << Element.new(:p, nil, nil, :transparent => true)
297
+ last_is_p = true
298
+ end
299
+ tmp.last.children << c
300
+ tmp
301
+ else
302
+ tmp << c
303
+ last_is_p = false
304
+ end
305
+ end
306
+ el.children = tmp
307
+ end
308
+
309
+ def strip_whitespace(el)
310
+ return if el.children.empty?
311
+ if el.children.first.type == :text
312
+ el.children.first.value.lstrip!
313
+ end
314
+ if el.children.last.type == :text
315
+ el.children.last.value.rstrip!
316
+ end
317
+ end
318
+
319
+ def remove_whitespace_children(el)
320
+ i = -1
321
+ el.children = el.children.reject do |c|
322
+ i += 1
323
+ c.type == :text && c.value.strip.empty? &&
324
+ (i == 0 || i == el.children.length - 1 || (Element.category(el.children[i-1]) == :block &&
325
+ Element.category(el.children[i+1]) == :block))
326
+ end
327
+ end
328
+
329
+ def set_basics(el, type, opts = {})
330
+ el.type = type
331
+ el.options.replace(opts)
332
+ el.value = nil
333
+ end
334
+
335
+ def extract_text(el, raw)
336
+ raw << el.value.to_s if el.type == :text
337
+ el.children.each {|c| extract_text(c, raw)}
338
+ end
339
+
340
+ def convert_textarea(el)
341
+ process_html_element(el, true, true)
342
+ end
343
+
344
+ def convert_a(el)
345
+ if el.attr['href']
346
+ set_basics(el, :a)
347
+ process_children(el)
348
+ else
349
+ process_html_element(el, false)
350
+ end
351
+ end
352
+
353
+ EMPHASIS_TYPE_MAP = {'em' => :em, 'i' => :em, 'strong' => :strong, 'b' => :strong}
354
+ def convert_em(el)
355
+ text = ''
356
+ extract_text(el, text)
357
+ if text =~ /\A\s/ || text =~ /\s\z/
358
+ process_html_element(el, false)
359
+ else
360
+ set_basics(el, EMPHASIS_TYPE_MAP[el.value])
361
+ process_children(el)
362
+ end
363
+ end
364
+ %w{b strong i}.each do |i|
365
+ alias_method("convert_#{i}".to_sym, :convert_em)
366
+ end
367
+
368
+ def convert_h1(el)
369
+ set_basics(el, :header, :level => el.value[1..1].to_i)
370
+ extract_text(el, el.options[:raw_text] = '')
371
+ process_children(el)
372
+ end
373
+ %w{h2 h3 h4 h5 h6}.each do |i|
374
+ alias_method("convert_#{i}".to_sym, :convert_h1)
375
+ end
376
+
377
+ def convert_code(el)
378
+ raw = ''
379
+ extract_text(el, raw)
380
+ result = process_text(raw, true)
381
+ begin
382
+ str = result.inject('') do |mem, c|
383
+ if c.type == :text
384
+ mem << c.value
385
+ elsif c.type == :entity
386
+ if [60, 62, 34, 38].include?(c.value.code_point)
387
+ mem << c.value.code_point.chr
388
+ else
389
+ mem << c.value.char
390
+ end
391
+ elsif c.type == :smart_quote || c.type == :typographic_sym
392
+ mem << entity(c.value.to_s).char
393
+ else
394
+ raise "Bug - please report"
395
+ end
396
+ end
397
+ result.clear
398
+ result << Element.new(:text, str)
399
+ rescue
400
+ end
401
+ if result.length > 1 || result.first.type != :text
402
+ process_html_element(el, false, true)
403
+ else
404
+ if el.value == 'code'
405
+ set_basics(el, :codespan)
406
+ el.attr['class'].gsub!(/\s+\bhighlighter-\w+\b|\bhighlighter-\w+\b\s*/, '') if el.attr['class']
407
+ else
408
+ set_basics(el, :codeblock)
409
+ if el.children.size == 1 && el.children.first.value == 'code'
410
+ value = (el.children.first.attr['class'] || '').scan(/\blanguage-\w+\b/).first
411
+ el.attr['class'] = "#{value} #{el.attr['class']}".rstrip if value
412
+ end
413
+ end
414
+ el.value = result.first.value
415
+ el.children.clear
416
+ end
417
+ end
418
+ alias :convert_pre :convert_code
419
+
420
+ def convert_table(el)
421
+ if !is_simple_table?(el)
422
+ process_html_element(el, false)
423
+ return
424
+ end
425
+ remove_text_children(el)
426
+ process_children(el)
427
+ set_basics(el, :table)
428
+
429
+ calc_alignment = lambda do |c|
430
+ if c.type == :tr
431
+ el.options[:alignment] = c.children.map do |td|
432
+ if td.attr['style']
433
+ td.attr['style'].slice!(/(?:;\s*)?text-align:\s+(center|left|right)/)
434
+ td.attr.delete('style') if td.attr['style'].strip.empty?
435
+ $1 ? $1.to_sym : :default
436
+ else
437
+ :default
438
+ end
439
+ end
440
+ else
441
+ c.children.each {|cc| calc_alignment.call(cc)}
442
+ end
443
+ end
444
+ calc_alignment.call(el)
445
+ el.children.delete_if {|c| c.type == :html_element}
446
+
447
+ change_th_type = lambda do |c|
448
+ if c.type == :th
449
+ c.type = :td
450
+ else
451
+ c.children.each {|cc| change_th_type.call(cc)}
452
+ end
453
+ end
454
+ change_th_type.call(el)
455
+
456
+ if el.children.first.type == :tr
457
+ tbody = Element.new(:tbody)
458
+ tbody.children = el.children
459
+ el.children = [tbody]
460
+ end
461
+ end
462
+
463
+ def is_simple_table?(el)
464
+ only_phrasing_content = lambda do |c|
465
+ c.children.all? do |cc|
466
+ (cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc)
467
+ end
468
+ end
469
+ check_cells = Proc.new do |c|
470
+ if c.value == 'th' || c.value == 'td'
471
+ return false if !only_phrasing_content.call(c)
472
+ else
473
+ c.children.each {|cc| check_cells.call(cc)}
474
+ end
475
+ end
476
+ check_cells.call(el)
477
+
478
+ nr_cells = 0
479
+ check_nr_cells = lambda do |t|
480
+ if t.value == 'tr'
481
+ count = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.length
482
+ if count != nr_cells
483
+ if nr_cells == 0
484
+ nr_cells = count
485
+ else
486
+ nr_cells = -1
487
+ break
488
+ end
489
+ end
490
+ else
491
+ t.children.each {|cc| check_nr_cells.call(cc)}
492
+ end
493
+ end
494
+ check_nr_cells.call(el)
495
+ return false if nr_cells == -1
496
+
497
+ alignment = nil
498
+ check_alignment = Proc.new do |t|
499
+ if t.value == 'tr'
500
+ cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.map do |cell|
501
+ md = /text-align:\s+(center|left|right|justify|inherit)/.match(cell.attr['style'].to_s)
502
+ return false if md && (md[1] == 'justify' || md[1] == 'inherit')
503
+ md.nil? ? :default : md[1]
504
+ end
505
+ alignment = cur_alignment if alignment.nil?
506
+ return false if alignment != cur_alignment
507
+ else
508
+ t.children.each {|cc| check_alignment.call(cc)}
509
+ end
510
+ end
511
+ check_alignment.call(el)
512
+
513
+ check_rows = lambda do |t, type|
514
+ t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text}}
515
+ end
516
+ check_rows.call(el, 'td') ||
517
+ (el.children.all? do |t|
518
+ t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) ||
519
+ ((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td'))
520
+ end && el.children.any? {|t| t.value == 'tbody'})
521
+ end
522
+
523
+ def convert_script(el)
524
+ if !is_math_tag?(el)
525
+ process_html_element(el)
526
+ else
527
+ handle_math_tag(el)
528
+ end
529
+ end
530
+
531
+ def is_math_tag?(el)
532
+ el.attr['type'].to_s =~ /\bmath\/tex\b/
533
+ end
534
+
535
+ def handle_math_tag(el)
536
+ set_basics(el, :math, :category => (el.attr['type'] =~ /mode=display/ ? :block : :span))
537
+ el.value = el.children.shift.value.sub(/\A(?:%\s*)?<!\[CDATA\[\n?(.*?)(?:\s%)?\]\]>\z/m, '\1')
538
+ el.attr.delete('type')
539
+ end
540
+
541
+ end
542
+
543
+ include Parser
544
+
545
+ # Parse the source string provided on initialization as HTML document.
546
+ def parse
547
+ @stack, @tree = [], @root
548
+ @src = Kramdown::Utils::StringScanner.new(adapt_source(source))
549
+
550
+ while true
551
+ if result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/)
552
+ @tree.children << Element.new(:xml_pi, result.strip, nil, :category => :block)
553
+ elsif result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/)
554
+ # ignore the doctype
555
+ elsif result = @src.scan(/\s*#{HTML_COMMENT_RE}/)
556
+ @tree.children << Element.new(:xml_comment, result.strip, nil, :category => :block)
557
+ else
558
+ break
559
+ end
560
+ end
561
+
562
+ tag_handler = lambda do |c, closed, handle_body|
563
+ parse_raw_html(c, &tag_handler) if !closed && handle_body
564
+ end
565
+ parse_raw_html(@tree, &tag_handler)
566
+
567
+ ElementConverter.convert(@tree)
568
+ end
569
+
570
+ end
571
+
572
+ end
573
+
574
+ end
575
+
@@ -0,0 +1,67 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2014 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ module Kramdown
11
+ module Parser
12
+ class Kramdown
13
+
14
+ ABBREV_DEFINITION_START = /^#{OPT_SPACE}\*\[(.+?)\]:(.*?)\n/
15
+
16
+ # Parse the link definition at the current location.
17
+ def parse_abbrev_definition
18
+ start_line_number = @src.current_line_number
19
+ @src.pos += @src.matched_size
20
+ abbrev_id, abbrev_text = @src[1], @src[2]
21
+ abbrev_text.strip!
22
+ warning("Duplicate abbreviation ID '#{abbrev_id}' on line #{start_line_number} - overwriting") if @root.options[:abbrev_defs][abbrev_id]
23
+ @root.options[:abbrev_defs][abbrev_id] = abbrev_text
24
+ @tree.children << Element.new(:eob, :abbrev_def)
25
+ true
26
+ end
27
+ define_parser(:abbrev_definition, ABBREV_DEFINITION_START)
28
+
29
+ # Replace the abbreviation text with elements.
30
+ def replace_abbreviations(el, regexps = nil)
31
+ return if @root.options[:abbrev_defs].empty?
32
+ if !regexps
33
+ sorted_abbrevs = @root.options[:abbrev_defs].keys.sort {|a,b| b.length <=> a.length}
34
+ regexps = [Regexp.union(*sorted_abbrevs.map {|k| /#{Regexp.escape(k)}/})]
35
+ regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries
36
+ end
37
+ el.children.map! do |child|
38
+ if child.type == :text
39
+ if child.value =~ regexps.first
40
+ result = []
41
+ strscan = Kramdown::Utils::StringScanner.new(child.value, child.options[:location])
42
+ text_lineno = strscan.current_line_number
43
+ while temp = strscan.scan_until(regexps.last)
44
+ abbr_lineno = strscan.current_line_number
45
+ abbr = strscan.scan(regexps.first) # begin of line case of abbr with \W char as first one
46
+ if abbr.nil?
47
+ temp << strscan.scan(/\W|^/)
48
+ abbr = strscan.scan(regexps.first)
49
+ end
50
+ result << Element.new(:text, temp, nil, :location => text_lineno)
51
+ result << Element.new(:abbreviation, abbr, nil, :location => abbr_lineno)
52
+ text_lineno = strscan.current_line_number
53
+ end
54
+ result << Element.new(:text, strscan.rest, nil, :location => text_lineno)
55
+ else
56
+ child
57
+ end
58
+ else
59
+ replace_abbreviations(child, regexps)
60
+ child
61
+ end
62
+ end.flatten!
63
+ end
64
+
65
+ end
66
+ end
67
+ end