motion-kramdown 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +84 -0
  3. data/lib/kramdown/compatibility.rb +36 -0
  4. data/lib/kramdown/converter/base.rb +259 -0
  5. data/lib/kramdown/converter/html.rb +461 -0
  6. data/lib/kramdown/converter/kramdown.rb +423 -0
  7. data/lib/kramdown/converter/latex.rb +600 -0
  8. data/lib/kramdown/converter/math_engine/itex2mml.rb +39 -0
  9. data/lib/kramdown/converter/math_engine/mathjax.rb +33 -0
  10. data/lib/kramdown/converter/math_engine/ritex.rb +38 -0
  11. data/lib/kramdown/converter/pdf.rb +624 -0
  12. data/lib/kramdown/converter/remove_html_tags.rb +53 -0
  13. data/lib/kramdown/converter/syntax_highlighter/coderay.rb +78 -0
  14. data/lib/kramdown/converter/syntax_highlighter/rouge.rb +37 -0
  15. data/lib/kramdown/converter/toc.rb +69 -0
  16. data/lib/kramdown/converter.rb +69 -0
  17. data/lib/kramdown/document.rb +144 -0
  18. data/lib/kramdown/element.rb +515 -0
  19. data/lib/kramdown/error.rb +17 -0
  20. data/lib/kramdown/options.rb +584 -0
  21. data/lib/kramdown/parser/base.rb +130 -0
  22. data/lib/kramdown/parser/gfm.rb +55 -0
  23. data/lib/kramdown/parser/html.rb +575 -0
  24. data/lib/kramdown/parser/kramdown/abbreviation.rb +67 -0
  25. data/lib/kramdown/parser/kramdown/autolink.rb +37 -0
  26. data/lib/kramdown/parser/kramdown/blank_line.rb +30 -0
  27. data/lib/kramdown/parser/kramdown/block_boundary.rb +33 -0
  28. data/lib/kramdown/parser/kramdown/blockquote.rb +39 -0
  29. data/lib/kramdown/parser/kramdown/codeblock.rb +56 -0
  30. data/lib/kramdown/parser/kramdown/codespan.rb +44 -0
  31. data/lib/kramdown/parser/kramdown/emphasis.rb +61 -0
  32. data/lib/kramdown/parser/kramdown/eob.rb +26 -0
  33. data/lib/kramdown/parser/kramdown/escaped_chars.rb +25 -0
  34. data/lib/kramdown/parser/kramdown/extensions.rb +201 -0
  35. data/lib/kramdown/parser/kramdown/footnote.rb +56 -0
  36. data/lib/kramdown/parser/kramdown/header.rb +59 -0
  37. data/lib/kramdown/parser/kramdown/horizontal_rule.rb +27 -0
  38. data/lib/kramdown/parser/kramdown/html.rb +160 -0
  39. data/lib/kramdown/parser/kramdown/html_entity.rb +33 -0
  40. data/lib/kramdown/parser/kramdown/line_break.rb +25 -0
  41. data/lib/kramdown/parser/kramdown/link.rb +139 -0
  42. data/lib/kramdown/parser/kramdown/list.rb +256 -0
  43. data/lib/kramdown/parser/kramdown/math.rb +54 -0
  44. data/lib/kramdown/parser/kramdown/paragraph.rb +54 -0
  45. data/lib/kramdown/parser/kramdown/smart_quotes.rb +174 -0
  46. data/lib/kramdown/parser/kramdown/table.rb +171 -0
  47. data/lib/kramdown/parser/kramdown/typographic_symbol.rb +44 -0
  48. data/lib/kramdown/parser/kramdown.rb +359 -0
  49. data/lib/kramdown/parser/markdown.rb +56 -0
  50. data/lib/kramdown/parser.rb +27 -0
  51. data/lib/kramdown/utils/configurable.rb +44 -0
  52. data/lib/kramdown/utils/entities.rb +347 -0
  53. data/lib/kramdown/utils/html.rb +75 -0
  54. data/lib/kramdown/utils/ordered_hash.rb +87 -0
  55. data/lib/kramdown/utils/string_scanner.rb +74 -0
  56. data/lib/kramdown/utils/unidecoder.rb +51 -0
  57. data/lib/kramdown/utils.rb +58 -0
  58. data/lib/kramdown/version.rb +15 -0
  59. data/lib/kramdown.rb +10 -0
  60. data/lib/motion-kramdown.rb +47 -0
  61. data/lib/rubymotion/encodings.rb +37 -0
  62. data/lib/rubymotion/rexml_shim.rb +25 -0
  63. data/lib/rubymotion/set.rb +1349 -0
  64. data/lib/rubymotion/version.rb +6 -0
  65. data/spec/document_tree.rb +48 -0
  66. data/spec/gfm_to_html.rb +95 -0
  67. data/spec/helpers/it_behaves_like.rb +27 -0
  68. data/spec/helpers/option_file.rb +46 -0
  69. data/spec/helpers/spec_options.rb +37 -0
  70. data/spec/helpers/tidy.rb +12 -0
  71. data/spec/html_to_html.rb +40 -0
  72. data/spec/html_to_kramdown_to_html.rb +46 -0
  73. data/spec/kramdown_to_xxx.rb +40 -0
  74. data/spec/test_location.rb +203 -0
  75. data/spec/test_string_scanner_kramdown.rb +19 -0
  76. data/spec/text_to_kramdown_to_html.rb +52 -0
  77. data/spec/text_to_latex.rb +33 -0
  78. metadata +164 -0
@@ -0,0 +1,55 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # RM require 'kramdown/parser/kramdown'
4
+
5
+ module Kramdown
6
+ module Parser
7
+ class GFM < Kramdown::Parser::Kramdown
8
+
9
+ def initialize(source, options)
10
+ super
11
+ @span_parsers.delete(:line_break) if @options[:hard_wrap]
12
+ {:codeblock_fenced => :codeblock_fenced_gfm,
13
+ :atx_header => :atx_header_gfm}.each do |current, replacement|
14
+ i = @block_parsers.index(current)
15
+ @block_parsers.delete(current)
16
+ @block_parsers.insert(i, replacement)
17
+ end
18
+ end
19
+
20
+ def parse
21
+ super
22
+ add_hard_line_breaks(@root) if @options[:hard_wrap]
23
+ end
24
+
25
+ def add_hard_line_breaks(element)
26
+ element.children.map! do |child|
27
+ if child.type == :text && child.value =~ /\n/
28
+ children = []
29
+ lines = child.value.split(/\n/, -1)
30
+ omit_trailing_br = (Kramdown::Element.category(element) == :block && element.children[-1] == child &&
31
+ lines[-1].empty?)
32
+ lines.each_with_index do |line, index|
33
+ children << Element.new(:text, (index > 0 ? "\n#{line}" : line))
34
+ children << Element.new(:br) if index < lines.size - 2 ||
35
+ (index == lines.size - 2 && !omit_trailing_br)
36
+ end
37
+ children
38
+ elsif child.type == :html_element
39
+ child
40
+ else
41
+ add_hard_line_breaks(child)
42
+ child
43
+ end
44
+ end.flatten!
45
+ end
46
+
47
+ ATX_HEADER_START = /^\#{1,6}\s/
48
+ define_parser(:atx_header_gfm, ATX_HEADER_START, nil, 'parse_atx_header')
49
+
50
+ FENCED_CODEBLOCK_MATCH = /^(([~`]){3,})\s*?(\w+)?\s*?\n(.*?)^\1\2*\s*?\n/m
51
+ define_parser(:codeblock_fenced_gfm, /^[~`]{3,}/, nil, 'parse_codeblock_fenced')
52
+
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,575 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2014 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ # RM require 'rexml/parsers/baseparser'
11
+ # RM require 'strscan'
12
+ # RM require 'kramdown/utils'
13
+
14
+ module Kramdown
15
+
16
+ module Parser
17
+
18
+ # Used for parsing a HTML document.
19
+ #
20
+ # The parsing code is in the Parser module that can also be used by other parsers.
21
+ class Html < Base
22
+
23
+ # Contains all constants that are used when parsing.
24
+ module Constants
25
+
26
+ #:stopdoc:
27
+ # The following regexps are based on the ones used by REXML, with some slight modifications.
28
+ HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/im
29
+ HTML_COMMENT_RE = /<!--(.*?)-->/m
30
+ HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
31
+ HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})(?:\s*=\s*(["'])(.*?)\2)?/m
32
+ HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}(?:\s*=\s*(["']).*?\3)?)*)\s*(\/)?>/m
33
+ HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::UNAME_STR})\s*>/m
34
+ HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
35
+
36
+ HTML_CONTENT_MODEL_BLOCK = %w{address applet article aside blockquote body
37
+ dd details div dl fieldset figure figcaption footer form header hgroup iframe li map menu nav
38
+ noscript object section summary td}
39
+ HTML_CONTENT_MODEL_SPAN = %w{a abbr acronym b bdo big button cite caption del dfn dt em
40
+ h1 h2 h3 h4 h5 h6 i ins label legend optgroup p q rb rbc
41
+ rp rt rtc ruby select small span strong sub sup th tt}
42
+ HTML_CONTENT_MODEL_RAW = %w{script style math option textarea pre code kbd samp var}
43
+ # The following elements are also parsed as raw since they need child elements that cannot
44
+ # be expressed using kramdown syntax: colgroup table tbody thead tfoot tr ul ol
45
+
46
+ HTML_CONTENT_MODEL = Hash.new {|h,k| h[k] = :raw}
47
+ HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block}
48
+ HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span}
49
+ HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw}
50
+
51
+ # Some HTML elements like script belong to both categories (i.e. are valid in block and
52
+ # span HTML) and don't appear therefore!
53
+ # script, textarea
54
+ HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
55
+ ins kbd label option q rb rbc rp rt rtc ruby samp select small span
56
+ strong sub sup tt u var}
57
+ HTML_BLOCK_ELEMENTS = %w{address article aside applet body blockquote caption col colgroup dd div dl dt fieldset
58
+ figcaption footer form h1 h2 h3 h4 h5 h6 header hgroup hr html head iframe legend menu
59
+ li map nav ol optgroup p pre section summary table tbody td th thead tfoot tr ul}
60
+ HTML_ELEMENTS_WITHOUT_BODY = %w{area base br col command embed hr img input keygen link meta param source track wbr}
61
+ end
62
+
63
+
64
+ # Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
65
+ # functionality. The only thing that must be provided by the class are instance variable
66
+ # @stack for storing the needed state and @src (instance of StringScanner) for the actual
67
+ # parsing.
68
+ module Parser
69
+
70
+ include Constants
71
+
72
+ # Process the HTML start tag that has already be scanned/checked via @src.
73
+ #
74
+ # Does the common processing steps and then yields to the caller for further processing
75
+ # (first parameter is the created element; the second parameter is +true+ if the HTML
76
+ # element is already closed, ie. contains no body; the third parameter specifies whether the
77
+ # body - and the end tag - need to be handled in case closed=false).
78
+ def handle_html_start_tag(line = nil) # :yields: el, closed, handle_body
79
+ name = @src[1].downcase
80
+ closed = !@src[4].nil?
81
+ attrs = Utils::OrderedHash.new
82
+ @src[2].scan(HTML_ATTRIBUTE_RE).each {|attr,sep,val| attrs[attr.downcase] = val || ""}
83
+
84
+ el = Element.new(:html_element, name, attrs, :category => :block)
85
+ el.options[:location] = line if line
86
+ @tree.children << el
87
+
88
+ if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
89
+ warning("The HTML tag '#{el.value}' on line #{line} cannot have any content - auto-closing it")
90
+ closed = true
91
+ end
92
+ if name == 'script' || name == 'style'
93
+ handle_raw_html_tag(name)
94
+ yield(el, false, false)
95
+ else
96
+ yield(el, closed, true)
97
+ end
98
+ end
99
+
100
+ # Handle the raw HTML tag at the current position.
101
+ def handle_raw_html_tag(name)
102
+ curpos = @src.pos
103
+ if @src.scan_until(/(?=<\/#{name}\s*>)/mi)
104
+ add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
105
+ @src.scan(HTML_TAG_CLOSE_RE)
106
+ else
107
+ add_text(@src.rest, @tree.children.last, :raw)
108
+ @src.terminate
109
+ warning("Found no end tag for '#{name}' - auto-closing it")
110
+ end
111
+ end
112
+
113
+ HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
114
+
115
+ # Parse raw HTML from the current source position, storing the found elements in +el+.
116
+ # Parsing continues until one of the following criteria are fulfilled:
117
+ #
118
+ # - The end of the document is reached.
119
+ # - The matching end tag for the element +el+ is found (only used if +el+ is an HTML
120
+ # element).
121
+ #
122
+ # When an HTML start tag is found, processing is deferred to #handle_html_start_tag,
123
+ # providing the block given to this method.
124
+ def parse_raw_html(el, &block)
125
+ @stack.push(@tree)
126
+ @tree = el
127
+
128
+ done = false
129
+ while !@src.eos? && !done
130
+ if result = @src.scan_until(HTML_RAW_START)
131
+ add_text(result, @tree, :text)
132
+ line = @src.current_line_number
133
+ if result = @src.scan(HTML_COMMENT_RE)
134
+ @tree.children << Element.new(:xml_comment, result, nil, :category => :block, :location => line)
135
+ elsif result = @src.scan(HTML_INSTRUCTION_RE)
136
+ @tree.children << Element.new(:xml_pi, result, nil, :category => :block, :location => line)
137
+ elsif @src.scan(HTML_TAG_RE)
138
+ if method(:handle_html_start_tag).arity == 1
139
+ handle_html_start_tag(line, &block)
140
+ else
141
+ handle_html_start_tag(&block) # DEPRECATED: method needs to accept line number in 2.0
142
+ end
143
+ elsif @src.scan(HTML_TAG_CLOSE_RE)
144
+ if @tree.value == @src[1].downcase
145
+ done = true
146
+ else
147
+ warning("Found invalidly used HTML closing tag for '#{@src[1].downcase}' on line #{line} - ignoring it")
148
+ end
149
+ else
150
+ add_text(@src.getch, @tree, :text)
151
+ end
152
+ else
153
+ add_text(@src.rest, @tree, :text)
154
+ @src.terminate
155
+ warning("Found no end tag for '#{@tree.value}' on line #{@tree.options[:location]} - auto-closing it") if @tree.type == :html_element
156
+ done = true
157
+ end
158
+ end
159
+
160
+ @tree = @stack.pop
161
+ end
162
+
163
+ end
164
+
165
+
166
+ # Converts HTML elements to native elements if possible.
167
+ class ElementConverter
168
+
169
+ # :stopdoc:
170
+
171
+ include Constants
172
+ include ::Kramdown::Utils::Entities
173
+
174
+ REMOVE_TEXT_CHILDREN = %w{html head hgroup ol ul dl table colgroup tbody thead tfoot tr select optgroup}
175
+ WRAP_TEXT_CHILDREN = %w{body section nav article aside header footer address div li dd blockquote figure
176
+ figcaption fieldset form}
177
+ REMOVE_WHITESPACE_CHILDREN = %w{body section nav article aside header footer address
178
+ div li dd blockquote figure figcaption td th fieldset form}
179
+ STRIP_WHITESPACE = %w{address article aside blockquote body caption dd div dl dt fieldset figcaption form footer
180
+ header h1 h2 h3 h4 h5 h6 legend li nav p section td th}
181
+ SIMPLE_ELEMENTS = %w{em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl li dl dt dd}
182
+
183
+ def initialize(root)
184
+ @root = root
185
+ end
186
+
187
+ def self.convert(root, el = root)
188
+ new(root).process(el)
189
+ end
190
+
191
+ # Convert the element +el+ and its children.
192
+ def process(el, do_conversion = true, preserve_text = false, parent = nil)
193
+ case el.type
194
+ when :xml_comment, :xml_pi
195
+ ptype = if parent.nil?
196
+ 'div'
197
+ else
198
+ case parent.type
199
+ when :html_element then parent.value
200
+ when :code_span then 'code'
201
+ when :code_block then 'pre'
202
+ when :header then 'h1'
203
+ else parent.type.to_s
204
+ end
205
+ end
206
+ el.options.replace({:category => (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block)})
207
+ return
208
+ when :html_element
209
+ when :root
210
+ el.children.each {|c| process(c)}
211
+ remove_whitespace_children(el)
212
+ return
213
+ else return
214
+ end
215
+
216
+ mname = "convert_#{el.value}"
217
+ if do_conversion && self.class.method_defined?(mname)
218
+ send(mname, el)
219
+ else
220
+ type = el.value
221
+ remove_text_children(el) if do_conversion && REMOVE_TEXT_CHILDREN.include?(type)
222
+
223
+ if do_conversion && SIMPLE_ELEMENTS.include?(type)
224
+ set_basics(el, type.intern)
225
+ process_children(el, do_conversion, preserve_text)
226
+ else
227
+ process_html_element(el, do_conversion, preserve_text)
228
+ end
229
+
230
+ if do_conversion
231
+ strip_whitespace(el) if STRIP_WHITESPACE.include?(type)
232
+ remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type)
233
+ wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type)
234
+ end
235
+ end
236
+ end
237
+
238
+ def process_children(el, do_conversion = true, preserve_text = false)
239
+ el.children.map! do |c|
240
+ if c.type == :text
241
+ process_text(c.value, preserve_text || !do_conversion)
242
+ else
243
+ process(c, do_conversion, preserve_text, el)
244
+ c
245
+ end
246
+ end.flatten!
247
+ end
248
+
249
+ # Process the HTML text +raw+: compress whitespace (if +preserve+ is +false+) and convert
250
+ # entities in entity elements.
251
+ def process_text(raw, preserve = false)
252
+ raw.gsub!(/\s+/, ' ') unless preserve
253
+ src = Kramdown::Utils::StringScanner.new(raw)
254
+ result = []
255
+ while !src.eos?
256
+ if tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/)
257
+ result << Element.new(:text, tmp)
258
+ src.scan(HTML_ENTITY_RE)
259
+ val = src[1] || (src[2] && src[2].to_i) || src[3].hex
260
+ result << if %w{lsquo rsquo ldquo rdquo}.include?(val)
261
+ Element.new(:smart_quote, val.intern)
262
+ elsif %w{mdash ndash hellip laquo raquo}.include?(val)
263
+ Element.new(:typographic_sym, val.intern)
264
+ else
265
+ begin
266
+ Element.new(:entity, entity(val), nil, :original => src.matched)
267
+ rescue ::Kramdown::Error
268
+ src.pos -= src.matched_size - 1
269
+ Element.new(:entity, ::Kramdown::Utils::Entities.entity('amp'))
270
+ end
271
+ end
272
+ else
273
+ result << Element.new(:text, src.rest)
274
+ src.terminate
275
+ end
276
+ end
277
+ result
278
+ end
279
+
280
+ def process_html_element(el, do_conversion = true, preserve_text = false)
281
+ el.options.replace(:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
282
+ :content_model => (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw))
283
+ process_children(el, do_conversion, preserve_text)
284
+ end
285
+
286
+ def remove_text_children(el)
287
+ el.children.delete_if {|c| c.type == :text}
288
+ end
289
+
290
+ def wrap_text_children(el)
291
+ tmp = []
292
+ last_is_p = false
293
+ el.children.each do |c|
294
+ if Element.category(c) != :block || c.type == :text
295
+ if !last_is_p
296
+ tmp << Element.new(:p, nil, nil, :transparent => true)
297
+ last_is_p = true
298
+ end
299
+ tmp.last.children << c
300
+ tmp
301
+ else
302
+ tmp << c
303
+ last_is_p = false
304
+ end
305
+ end
306
+ el.children = tmp
307
+ end
308
+
309
+ def strip_whitespace(el)
310
+ return if el.children.empty?
311
+ if el.children.first.type == :text
312
+ el.children.first.value.lstrip!
313
+ end
314
+ if el.children.last.type == :text
315
+ el.children.last.value.rstrip!
316
+ end
317
+ end
318
+
319
+ def remove_whitespace_children(el)
320
+ i = -1
321
+ el.children = el.children.reject do |c|
322
+ i += 1
323
+ c.type == :text && c.value.strip.empty? &&
324
+ (i == 0 || i == el.children.length - 1 || (Element.category(el.children[i-1]) == :block &&
325
+ Element.category(el.children[i+1]) == :block))
326
+ end
327
+ end
328
+
329
+ def set_basics(el, type, opts = {})
330
+ el.type = type
331
+ el.options.replace(opts)
332
+ el.value = nil
333
+ end
334
+
335
+ def extract_text(el, raw)
336
+ raw << el.value.to_s if el.type == :text
337
+ el.children.each {|c| extract_text(c, raw)}
338
+ end
339
+
340
+ def convert_textarea(el)
341
+ process_html_element(el, true, true)
342
+ end
343
+
344
+ def convert_a(el)
345
+ if el.attr['href']
346
+ set_basics(el, :a)
347
+ process_children(el)
348
+ else
349
+ process_html_element(el, false)
350
+ end
351
+ end
352
+
353
+ EMPHASIS_TYPE_MAP = {'em' => :em, 'i' => :em, 'strong' => :strong, 'b' => :strong}
354
+ def convert_em(el)
355
+ text = ''
356
+ extract_text(el, text)
357
+ if text =~ /\A\s/ || text =~ /\s\z/
358
+ process_html_element(el, false)
359
+ else
360
+ set_basics(el, EMPHASIS_TYPE_MAP[el.value])
361
+ process_children(el)
362
+ end
363
+ end
364
+ %w{b strong i}.each do |i|
365
+ alias_method("convert_#{i}".to_sym, :convert_em)
366
+ end
367
+
368
+ def convert_h1(el)
369
+ set_basics(el, :header, :level => el.value[1..1].to_i)
370
+ extract_text(el, el.options[:raw_text] = '')
371
+ process_children(el)
372
+ end
373
+ %w{h2 h3 h4 h5 h6}.each do |i|
374
+ alias_method("convert_#{i}".to_sym, :convert_h1)
375
+ end
376
+
377
+ def convert_code(el)
378
+ raw = ''
379
+ extract_text(el, raw)
380
+ result = process_text(raw, true)
381
+ begin
382
+ str = result.inject('') do |mem, c|
383
+ if c.type == :text
384
+ mem << c.value
385
+ elsif c.type == :entity
386
+ if [60, 62, 34, 38].include?(c.value.code_point)
387
+ mem << c.value.code_point.chr
388
+ else
389
+ mem << c.value.char
390
+ end
391
+ elsif c.type == :smart_quote || c.type == :typographic_sym
392
+ mem << entity(c.value.to_s).char
393
+ else
394
+ raise "Bug - please report"
395
+ end
396
+ end
397
+ result.clear
398
+ result << Element.new(:text, str)
399
+ rescue
400
+ end
401
+ if result.length > 1 || result.first.type != :text
402
+ process_html_element(el, false, true)
403
+ else
404
+ if el.value == 'code'
405
+ set_basics(el, :codespan)
406
+ el.attr['class'].gsub!(/\s+\bhighlighter-\w+\b|\bhighlighter-\w+\b\s*/, '') if el.attr['class']
407
+ else
408
+ set_basics(el, :codeblock)
409
+ if el.children.size == 1 && el.children.first.value == 'code'
410
+ value = (el.children.first.attr['class'] || '').scan(/\blanguage-\w+\b/).first
411
+ el.attr['class'] = "#{value} #{el.attr['class']}".rstrip if value
412
+ end
413
+ end
414
+ el.value = result.first.value
415
+ el.children.clear
416
+ end
417
+ end
418
+ alias :convert_pre :convert_code
419
+
420
+ def convert_table(el)
421
+ if !is_simple_table?(el)
422
+ process_html_element(el, false)
423
+ return
424
+ end
425
+ remove_text_children(el)
426
+ process_children(el)
427
+ set_basics(el, :table)
428
+
429
+ calc_alignment = lambda do |c|
430
+ if c.type == :tr
431
+ el.options[:alignment] = c.children.map do |td|
432
+ if td.attr['style']
433
+ td.attr['style'].slice!(/(?:;\s*)?text-align:\s+(center|left|right)/)
434
+ td.attr.delete('style') if td.attr['style'].strip.empty?
435
+ $1 ? $1.to_sym : :default
436
+ else
437
+ :default
438
+ end
439
+ end
440
+ else
441
+ c.children.each {|cc| calc_alignment.call(cc)}
442
+ end
443
+ end
444
+ calc_alignment.call(el)
445
+ el.children.delete_if {|c| c.type == :html_element}
446
+
447
+ change_th_type = lambda do |c|
448
+ if c.type == :th
449
+ c.type = :td
450
+ else
451
+ c.children.each {|cc| change_th_type.call(cc)}
452
+ end
453
+ end
454
+ change_th_type.call(el)
455
+
456
+ if el.children.first.type == :tr
457
+ tbody = Element.new(:tbody)
458
+ tbody.children = el.children
459
+ el.children = [tbody]
460
+ end
461
+ end
462
+
463
+ def is_simple_table?(el)
464
+ only_phrasing_content = lambda do |c|
465
+ c.children.all? do |cc|
466
+ (cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc)
467
+ end
468
+ end
469
+ check_cells = Proc.new do |c|
470
+ if c.value == 'th' || c.value == 'td'
471
+ return false if !only_phrasing_content.call(c)
472
+ else
473
+ c.children.each {|cc| check_cells.call(cc)}
474
+ end
475
+ end
476
+ check_cells.call(el)
477
+
478
+ nr_cells = 0
479
+ check_nr_cells = lambda do |t|
480
+ if t.value == 'tr'
481
+ count = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.length
482
+ if count != nr_cells
483
+ if nr_cells == 0
484
+ nr_cells = count
485
+ else
486
+ nr_cells = -1
487
+ break
488
+ end
489
+ end
490
+ else
491
+ t.children.each {|cc| check_nr_cells.call(cc)}
492
+ end
493
+ end
494
+ check_nr_cells.call(el)
495
+ return false if nr_cells == -1
496
+
497
+ alignment = nil
498
+ check_alignment = Proc.new do |t|
499
+ if t.value == 'tr'
500
+ cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.map do |cell|
501
+ md = /text-align:\s+(center|left|right|justify|inherit)/.match(cell.attr['style'].to_s)
502
+ return false if md && (md[1] == 'justify' || md[1] == 'inherit')
503
+ md.nil? ? :default : md[1]
504
+ end
505
+ alignment = cur_alignment if alignment.nil?
506
+ return false if alignment != cur_alignment
507
+ else
508
+ t.children.each {|cc| check_alignment.call(cc)}
509
+ end
510
+ end
511
+ check_alignment.call(el)
512
+
513
+ check_rows = lambda do |t, type|
514
+ t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text}}
515
+ end
516
+ check_rows.call(el, 'td') ||
517
+ (el.children.all? do |t|
518
+ t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) ||
519
+ ((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td'))
520
+ end && el.children.any? {|t| t.value == 'tbody'})
521
+ end
522
+
523
+ def convert_script(el)
524
+ if !is_math_tag?(el)
525
+ process_html_element(el)
526
+ else
527
+ handle_math_tag(el)
528
+ end
529
+ end
530
+
531
+ def is_math_tag?(el)
532
+ el.attr['type'].to_s =~ /\bmath\/tex\b/
533
+ end
534
+
535
+ def handle_math_tag(el)
536
+ set_basics(el, :math, :category => (el.attr['type'] =~ /mode=display/ ? :block : :span))
537
+ el.value = el.children.shift.value.sub(/\A(?:%\s*)?<!\[CDATA\[\n?(.*?)(?:\s%)?\]\]>\z/m, '\1')
538
+ el.attr.delete('type')
539
+ end
540
+
541
+ end
542
+
543
+ include Parser
544
+
545
+ # Parse the source string provided on initialization as HTML document.
546
+ def parse
547
+ @stack, @tree = [], @root
548
+ @src = Kramdown::Utils::StringScanner.new(adapt_source(source))
549
+
550
+ while true
551
+ if result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/)
552
+ @tree.children << Element.new(:xml_pi, result.strip, nil, :category => :block)
553
+ elsif result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/)
554
+ # ignore the doctype
555
+ elsif result = @src.scan(/\s*#{HTML_COMMENT_RE}/)
556
+ @tree.children << Element.new(:xml_comment, result.strip, nil, :category => :block)
557
+ else
558
+ break
559
+ end
560
+ end
561
+
562
+ tag_handler = lambda do |c, closed, handle_body|
563
+ parse_raw_html(c, &tag_handler) if !closed && handle_body
564
+ end
565
+ parse_raw_html(@tree, &tag_handler)
566
+
567
+ ElementConverter.convert(@tree)
568
+ end
569
+
570
+ end
571
+
572
+ end
573
+
574
+ end
575
+
@@ -0,0 +1,67 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009-2014 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown which is licensed under the MIT.
7
+ #++
8
+ #
9
+
10
+ module Kramdown
11
+ module Parser
12
+ class Kramdown
13
+
14
+ ABBREV_DEFINITION_START = /^#{OPT_SPACE}\*\[(.+?)\]:(.*?)\n/
15
+
16
+ # Parse the link definition at the current location.
17
+ def parse_abbrev_definition
18
+ start_line_number = @src.current_line_number
19
+ @src.pos += @src.matched_size
20
+ abbrev_id, abbrev_text = @src[1], @src[2]
21
+ abbrev_text.strip!
22
+ warning("Duplicate abbreviation ID '#{abbrev_id}' on line #{start_line_number} - overwriting") if @root.options[:abbrev_defs][abbrev_id]
23
+ @root.options[:abbrev_defs][abbrev_id] = abbrev_text
24
+ @tree.children << Element.new(:eob, :abbrev_def)
25
+ true
26
+ end
27
+ define_parser(:abbrev_definition, ABBREV_DEFINITION_START)
28
+
29
+ # Replace the abbreviation text with elements.
30
+ def replace_abbreviations(el, regexps = nil)
31
+ return if @root.options[:abbrev_defs].empty?
32
+ if !regexps
33
+ sorted_abbrevs = @root.options[:abbrev_defs].keys.sort {|a,b| b.length <=> a.length}
34
+ regexps = [Regexp.union(*sorted_abbrevs.map {|k| /#{Regexp.escape(k)}/})]
35
+ regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries
36
+ end
37
+ el.children.map! do |child|
38
+ if child.type == :text
39
+ if child.value =~ regexps.first
40
+ result = []
41
+ strscan = Kramdown::Utils::StringScanner.new(child.value, child.options[:location])
42
+ text_lineno = strscan.current_line_number
43
+ while temp = strscan.scan_until(regexps.last)
44
+ abbr_lineno = strscan.current_line_number
45
+ abbr = strscan.scan(regexps.first) # begin of line case of abbr with \W char as first one
46
+ if abbr.nil?
47
+ temp << strscan.scan(/\W|^/)
48
+ abbr = strscan.scan(regexps.first)
49
+ end
50
+ result << Element.new(:text, temp, nil, :location => text_lineno)
51
+ result << Element.new(:abbreviation, abbr, nil, :location => abbr_lineno)
52
+ text_lineno = strscan.current_line_number
53
+ end
54
+ result << Element.new(:text, strscan.rest, nil, :location => text_lineno)
55
+ else
56
+ child
57
+ end
58
+ else
59
+ replace_abbreviations(child, regexps)
60
+ child
61
+ end
62
+ end.flatten!
63
+ end
64
+
65
+ end
66
+ end
67
+ end