motion-kramdown 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +84 -0
- data/lib/kramdown/compatibility.rb +36 -0
- data/lib/kramdown/converter/base.rb +259 -0
- data/lib/kramdown/converter/html.rb +461 -0
- data/lib/kramdown/converter/kramdown.rb +423 -0
- data/lib/kramdown/converter/latex.rb +600 -0
- data/lib/kramdown/converter/math_engine/itex2mml.rb +39 -0
- data/lib/kramdown/converter/math_engine/mathjax.rb +33 -0
- data/lib/kramdown/converter/math_engine/ritex.rb +38 -0
- data/lib/kramdown/converter/pdf.rb +624 -0
- data/lib/kramdown/converter/remove_html_tags.rb +53 -0
- data/lib/kramdown/converter/syntax_highlighter/coderay.rb +78 -0
- data/lib/kramdown/converter/syntax_highlighter/rouge.rb +37 -0
- data/lib/kramdown/converter/toc.rb +69 -0
- data/lib/kramdown/converter.rb +69 -0
- data/lib/kramdown/document.rb +144 -0
- data/lib/kramdown/element.rb +515 -0
- data/lib/kramdown/error.rb +17 -0
- data/lib/kramdown/options.rb +584 -0
- data/lib/kramdown/parser/base.rb +130 -0
- data/lib/kramdown/parser/gfm.rb +55 -0
- data/lib/kramdown/parser/html.rb +575 -0
- data/lib/kramdown/parser/kramdown/abbreviation.rb +67 -0
- data/lib/kramdown/parser/kramdown/autolink.rb +37 -0
- data/lib/kramdown/parser/kramdown/blank_line.rb +30 -0
- data/lib/kramdown/parser/kramdown/block_boundary.rb +33 -0
- data/lib/kramdown/parser/kramdown/blockquote.rb +39 -0
- data/lib/kramdown/parser/kramdown/codeblock.rb +56 -0
- data/lib/kramdown/parser/kramdown/codespan.rb +44 -0
- data/lib/kramdown/parser/kramdown/emphasis.rb +61 -0
- data/lib/kramdown/parser/kramdown/eob.rb +26 -0
- data/lib/kramdown/parser/kramdown/escaped_chars.rb +25 -0
- data/lib/kramdown/parser/kramdown/extensions.rb +201 -0
- data/lib/kramdown/parser/kramdown/footnote.rb +56 -0
- data/lib/kramdown/parser/kramdown/header.rb +59 -0
- data/lib/kramdown/parser/kramdown/horizontal_rule.rb +27 -0
- data/lib/kramdown/parser/kramdown/html.rb +160 -0
- data/lib/kramdown/parser/kramdown/html_entity.rb +33 -0
- data/lib/kramdown/parser/kramdown/line_break.rb +25 -0
- data/lib/kramdown/parser/kramdown/link.rb +139 -0
- data/lib/kramdown/parser/kramdown/list.rb +256 -0
- data/lib/kramdown/parser/kramdown/math.rb +54 -0
- data/lib/kramdown/parser/kramdown/paragraph.rb +54 -0
- data/lib/kramdown/parser/kramdown/smart_quotes.rb +174 -0
- data/lib/kramdown/parser/kramdown/table.rb +171 -0
- data/lib/kramdown/parser/kramdown/typographic_symbol.rb +44 -0
- data/lib/kramdown/parser/kramdown.rb +359 -0
- data/lib/kramdown/parser/markdown.rb +56 -0
- data/lib/kramdown/parser.rb +27 -0
- data/lib/kramdown/utils/configurable.rb +44 -0
- data/lib/kramdown/utils/entities.rb +347 -0
- data/lib/kramdown/utils/html.rb +75 -0
- data/lib/kramdown/utils/ordered_hash.rb +87 -0
- data/lib/kramdown/utils/string_scanner.rb +74 -0
- data/lib/kramdown/utils/unidecoder.rb +51 -0
- data/lib/kramdown/utils.rb +58 -0
- data/lib/kramdown/version.rb +15 -0
- data/lib/kramdown.rb +10 -0
- data/lib/motion-kramdown.rb +47 -0
- data/lib/rubymotion/encodings.rb +37 -0
- data/lib/rubymotion/rexml_shim.rb +25 -0
- data/lib/rubymotion/set.rb +1349 -0
- data/lib/rubymotion/version.rb +6 -0
- data/spec/document_tree.rb +48 -0
- data/spec/gfm_to_html.rb +95 -0
- data/spec/helpers/it_behaves_like.rb +27 -0
- data/spec/helpers/option_file.rb +46 -0
- data/spec/helpers/spec_options.rb +37 -0
- data/spec/helpers/tidy.rb +12 -0
- data/spec/html_to_html.rb +40 -0
- data/spec/html_to_kramdown_to_html.rb +46 -0
- data/spec/kramdown_to_xxx.rb +40 -0
- data/spec/test_location.rb +203 -0
- data/spec/test_string_scanner_kramdown.rb +19 -0
- data/spec/text_to_kramdown_to_html.rb +52 -0
- data/spec/text_to_latex.rb +33 -0
- metadata +164 -0
@@ -0,0 +1,55 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
# RM require 'kramdown/parser/kramdown'
|
4
|
+
|
5
|
+
module Kramdown
|
6
|
+
module Parser
|
7
|
+
class GFM < Kramdown::Parser::Kramdown
|
8
|
+
|
9
|
+
def initialize(source, options)
|
10
|
+
super
|
11
|
+
@span_parsers.delete(:line_break) if @options[:hard_wrap]
|
12
|
+
{:codeblock_fenced => :codeblock_fenced_gfm,
|
13
|
+
:atx_header => :atx_header_gfm}.each do |current, replacement|
|
14
|
+
i = @block_parsers.index(current)
|
15
|
+
@block_parsers.delete(current)
|
16
|
+
@block_parsers.insert(i, replacement)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse
|
21
|
+
super
|
22
|
+
add_hard_line_breaks(@root) if @options[:hard_wrap]
|
23
|
+
end
|
24
|
+
|
25
|
+
def add_hard_line_breaks(element)
|
26
|
+
element.children.map! do |child|
|
27
|
+
if child.type == :text && child.value =~ /\n/
|
28
|
+
children = []
|
29
|
+
lines = child.value.split(/\n/, -1)
|
30
|
+
omit_trailing_br = (Kramdown::Element.category(element) == :block && element.children[-1] == child &&
|
31
|
+
lines[-1].empty?)
|
32
|
+
lines.each_with_index do |line, index|
|
33
|
+
children << Element.new(:text, (index > 0 ? "\n#{line}" : line))
|
34
|
+
children << Element.new(:br) if index < lines.size - 2 ||
|
35
|
+
(index == lines.size - 2 && !omit_trailing_br)
|
36
|
+
end
|
37
|
+
children
|
38
|
+
elsif child.type == :html_element
|
39
|
+
child
|
40
|
+
else
|
41
|
+
add_hard_line_breaks(child)
|
42
|
+
child
|
43
|
+
end
|
44
|
+
end.flatten!
|
45
|
+
end
|
46
|
+
|
47
|
+
ATX_HEADER_START = /^\#{1,6}\s/
|
48
|
+
define_parser(:atx_header_gfm, ATX_HEADER_START, nil, 'parse_atx_header')
|
49
|
+
|
50
|
+
FENCED_CODEBLOCK_MATCH = /^(([~`]){3,})\s*?(\w+)?\s*?\n(.*?)^\1\2*\s*?\n/m
|
51
|
+
define_parser(:codeblock_fenced_gfm, /^[~`]{3,}/, nil, 'parse_codeblock_fenced')
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,575 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2009-2014 Thomas Leitner <t_leitner@gmx.at>
|
5
|
+
#
|
6
|
+
# This file is part of kramdown which is licensed under the MIT.
|
7
|
+
#++
|
8
|
+
#
|
9
|
+
|
10
|
+
# RM require 'rexml/parsers/baseparser'
|
11
|
+
# RM require 'strscan'
|
12
|
+
# RM require 'kramdown/utils'
|
13
|
+
|
14
|
+
module Kramdown
|
15
|
+
|
16
|
+
module Parser
|
17
|
+
|
18
|
+
# Used for parsing a HTML document.
|
19
|
+
#
|
20
|
+
# The parsing code is in the Parser module that can also be used by other parsers.
|
21
|
+
class Html < Base
|
22
|
+
|
23
|
+
# Contains all constants that are used when parsing.
|
24
|
+
module Constants
|
25
|
+
|
26
|
+
#:stopdoc:
|
27
|
+
# The following regexps are based on the ones used by REXML, with some slight modifications.
|
28
|
+
HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/im
|
29
|
+
HTML_COMMENT_RE = /<!--(.*?)-->/m
|
30
|
+
HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
|
31
|
+
HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})(?:\s*=\s*(["'])(.*?)\2)?/m
|
32
|
+
HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}(?:\s*=\s*(["']).*?\3)?)*)\s*(\/)?>/m
|
33
|
+
HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::UNAME_STR})\s*>/m
|
34
|
+
HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
|
35
|
+
|
36
|
+
HTML_CONTENT_MODEL_BLOCK = %w{address applet article aside blockquote body
|
37
|
+
dd details div dl fieldset figure figcaption footer form header hgroup iframe li map menu nav
|
38
|
+
noscript object section summary td}
|
39
|
+
HTML_CONTENT_MODEL_SPAN = %w{a abbr acronym b bdo big button cite caption del dfn dt em
|
40
|
+
h1 h2 h3 h4 h5 h6 i ins label legend optgroup p q rb rbc
|
41
|
+
rp rt rtc ruby select small span strong sub sup th tt}
|
42
|
+
HTML_CONTENT_MODEL_RAW = %w{script style math option textarea pre code kbd samp var}
|
43
|
+
# The following elements are also parsed as raw since they need child elements that cannot
|
44
|
+
# be expressed using kramdown syntax: colgroup table tbody thead tfoot tr ul ol
|
45
|
+
|
46
|
+
HTML_CONTENT_MODEL = Hash.new {|h,k| h[k] = :raw}
|
47
|
+
HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block}
|
48
|
+
HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span}
|
49
|
+
HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw}
|
50
|
+
|
51
|
+
# Some HTML elements like script belong to both categories (i.e. are valid in block and
|
52
|
+
# span HTML) and don't appear therefore!
|
53
|
+
# script, textarea
|
54
|
+
HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
|
55
|
+
ins kbd label option q rb rbc rp rt rtc ruby samp select small span
|
56
|
+
strong sub sup tt u var}
|
57
|
+
HTML_BLOCK_ELEMENTS = %w{address article aside applet body blockquote caption col colgroup dd div dl dt fieldset
|
58
|
+
figcaption footer form h1 h2 h3 h4 h5 h6 header hgroup hr html head iframe legend menu
|
59
|
+
li map nav ol optgroup p pre section summary table tbody td th thead tfoot tr ul}
|
60
|
+
HTML_ELEMENTS_WITHOUT_BODY = %w{area base br col command embed hr img input keygen link meta param source track wbr}
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
# Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
|
65
|
+
# functionality. The only thing that must be provided by the class are instance variable
|
66
|
+
# @stack for storing the needed state and @src (instance of StringScanner) for the actual
|
67
|
+
# parsing.
|
68
|
+
module Parser
|
69
|
+
|
70
|
+
include Constants
|
71
|
+
|
72
|
+
# Process the HTML start tag that has already be scanned/checked via @src.
|
73
|
+
#
|
74
|
+
# Does the common processing steps and then yields to the caller for further processing
|
75
|
+
# (first parameter is the created element; the second parameter is +true+ if the HTML
|
76
|
+
# element is already closed, ie. contains no body; the third parameter specifies whether the
|
77
|
+
# body - and the end tag - need to be handled in case closed=false).
|
78
|
+
def handle_html_start_tag(line = nil) # :yields: el, closed, handle_body
|
79
|
+
name = @src[1].downcase
|
80
|
+
closed = !@src[4].nil?
|
81
|
+
attrs = Utils::OrderedHash.new
|
82
|
+
@src[2].scan(HTML_ATTRIBUTE_RE).each {|attr,sep,val| attrs[attr.downcase] = val || ""}
|
83
|
+
|
84
|
+
el = Element.new(:html_element, name, attrs, :category => :block)
|
85
|
+
el.options[:location] = line if line
|
86
|
+
@tree.children << el
|
87
|
+
|
88
|
+
if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
|
89
|
+
warning("The HTML tag '#{el.value}' on line #{line} cannot have any content - auto-closing it")
|
90
|
+
closed = true
|
91
|
+
end
|
92
|
+
if name == 'script' || name == 'style'
|
93
|
+
handle_raw_html_tag(name)
|
94
|
+
yield(el, false, false)
|
95
|
+
else
|
96
|
+
yield(el, closed, true)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Handle the raw HTML tag at the current position.
|
101
|
+
def handle_raw_html_tag(name)
|
102
|
+
curpos = @src.pos
|
103
|
+
if @src.scan_until(/(?=<\/#{name}\s*>)/mi)
|
104
|
+
add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
|
105
|
+
@src.scan(HTML_TAG_CLOSE_RE)
|
106
|
+
else
|
107
|
+
add_text(@src.rest, @tree.children.last, :raw)
|
108
|
+
@src.terminate
|
109
|
+
warning("Found no end tag for '#{name}' - auto-closing it")
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
|
114
|
+
|
115
|
+
# Parse raw HTML from the current source position, storing the found elements in +el+.
|
116
|
+
# Parsing continues until one of the following criteria are fulfilled:
|
117
|
+
#
|
118
|
+
# - The end of the document is reached.
|
119
|
+
# - The matching end tag for the element +el+ is found (only used if +el+ is an HTML
|
120
|
+
# element).
|
121
|
+
#
|
122
|
+
# When an HTML start tag is found, processing is deferred to #handle_html_start_tag,
|
123
|
+
# providing the block given to this method.
|
124
|
+
def parse_raw_html(el, &block)
|
125
|
+
@stack.push(@tree)
|
126
|
+
@tree = el
|
127
|
+
|
128
|
+
done = false
|
129
|
+
while !@src.eos? && !done
|
130
|
+
if result = @src.scan_until(HTML_RAW_START)
|
131
|
+
add_text(result, @tree, :text)
|
132
|
+
line = @src.current_line_number
|
133
|
+
if result = @src.scan(HTML_COMMENT_RE)
|
134
|
+
@tree.children << Element.new(:xml_comment, result, nil, :category => :block, :location => line)
|
135
|
+
elsif result = @src.scan(HTML_INSTRUCTION_RE)
|
136
|
+
@tree.children << Element.new(:xml_pi, result, nil, :category => :block, :location => line)
|
137
|
+
elsif @src.scan(HTML_TAG_RE)
|
138
|
+
if method(:handle_html_start_tag).arity == 1
|
139
|
+
handle_html_start_tag(line, &block)
|
140
|
+
else
|
141
|
+
handle_html_start_tag(&block) # DEPRECATED: method needs to accept line number in 2.0
|
142
|
+
end
|
143
|
+
elsif @src.scan(HTML_TAG_CLOSE_RE)
|
144
|
+
if @tree.value == @src[1].downcase
|
145
|
+
done = true
|
146
|
+
else
|
147
|
+
warning("Found invalidly used HTML closing tag for '#{@src[1].downcase}' on line #{line} - ignoring it")
|
148
|
+
end
|
149
|
+
else
|
150
|
+
add_text(@src.getch, @tree, :text)
|
151
|
+
end
|
152
|
+
else
|
153
|
+
add_text(@src.rest, @tree, :text)
|
154
|
+
@src.terminate
|
155
|
+
warning("Found no end tag for '#{@tree.value}' on line #{@tree.options[:location]} - auto-closing it") if @tree.type == :html_element
|
156
|
+
done = true
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
@tree = @stack.pop
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
# Converts HTML elements to native elements if possible.
|
167
|
+
class ElementConverter
|
168
|
+
|
169
|
+
# :stopdoc:
|
170
|
+
|
171
|
+
include Constants
|
172
|
+
include ::Kramdown::Utils::Entities
|
173
|
+
|
174
|
+
REMOVE_TEXT_CHILDREN = %w{html head hgroup ol ul dl table colgroup tbody thead tfoot tr select optgroup}
|
175
|
+
WRAP_TEXT_CHILDREN = %w{body section nav article aside header footer address div li dd blockquote figure
|
176
|
+
figcaption fieldset form}
|
177
|
+
REMOVE_WHITESPACE_CHILDREN = %w{body section nav article aside header footer address
|
178
|
+
div li dd blockquote figure figcaption td th fieldset form}
|
179
|
+
STRIP_WHITESPACE = %w{address article aside blockquote body caption dd div dl dt fieldset figcaption form footer
|
180
|
+
header h1 h2 h3 h4 h5 h6 legend li nav p section td th}
|
181
|
+
SIMPLE_ELEMENTS = %w{em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl li dl dt dd}
|
182
|
+
|
183
|
+
def initialize(root)
|
184
|
+
@root = root
|
185
|
+
end
|
186
|
+
|
187
|
+
def self.convert(root, el = root)
|
188
|
+
new(root).process(el)
|
189
|
+
end
|
190
|
+
|
191
|
+
# Convert the element +el+ and its children.
|
192
|
+
def process(el, do_conversion = true, preserve_text = false, parent = nil)
|
193
|
+
case el.type
|
194
|
+
when :xml_comment, :xml_pi
|
195
|
+
ptype = if parent.nil?
|
196
|
+
'div'
|
197
|
+
else
|
198
|
+
case parent.type
|
199
|
+
when :html_element then parent.value
|
200
|
+
when :code_span then 'code'
|
201
|
+
when :code_block then 'pre'
|
202
|
+
when :header then 'h1'
|
203
|
+
else parent.type.to_s
|
204
|
+
end
|
205
|
+
end
|
206
|
+
el.options.replace({:category => (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block)})
|
207
|
+
return
|
208
|
+
when :html_element
|
209
|
+
when :root
|
210
|
+
el.children.each {|c| process(c)}
|
211
|
+
remove_whitespace_children(el)
|
212
|
+
return
|
213
|
+
else return
|
214
|
+
end
|
215
|
+
|
216
|
+
mname = "convert_#{el.value}"
|
217
|
+
if do_conversion && self.class.method_defined?(mname)
|
218
|
+
send(mname, el)
|
219
|
+
else
|
220
|
+
type = el.value
|
221
|
+
remove_text_children(el) if do_conversion && REMOVE_TEXT_CHILDREN.include?(type)
|
222
|
+
|
223
|
+
if do_conversion && SIMPLE_ELEMENTS.include?(type)
|
224
|
+
set_basics(el, type.intern)
|
225
|
+
process_children(el, do_conversion, preserve_text)
|
226
|
+
else
|
227
|
+
process_html_element(el, do_conversion, preserve_text)
|
228
|
+
end
|
229
|
+
|
230
|
+
if do_conversion
|
231
|
+
strip_whitespace(el) if STRIP_WHITESPACE.include?(type)
|
232
|
+
remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type)
|
233
|
+
wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
def process_children(el, do_conversion = true, preserve_text = false)
|
239
|
+
el.children.map! do |c|
|
240
|
+
if c.type == :text
|
241
|
+
process_text(c.value, preserve_text || !do_conversion)
|
242
|
+
else
|
243
|
+
process(c, do_conversion, preserve_text, el)
|
244
|
+
c
|
245
|
+
end
|
246
|
+
end.flatten!
|
247
|
+
end
|
248
|
+
|
249
|
+
# Process the HTML text +raw+: compress whitespace (if +preserve+ is +false+) and convert
|
250
|
+
# entities in entity elements.
|
251
|
+
def process_text(raw, preserve = false)
|
252
|
+
raw.gsub!(/\s+/, ' ') unless preserve
|
253
|
+
src = Kramdown::Utils::StringScanner.new(raw)
|
254
|
+
result = []
|
255
|
+
while !src.eos?
|
256
|
+
if tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/)
|
257
|
+
result << Element.new(:text, tmp)
|
258
|
+
src.scan(HTML_ENTITY_RE)
|
259
|
+
val = src[1] || (src[2] && src[2].to_i) || src[3].hex
|
260
|
+
result << if %w{lsquo rsquo ldquo rdquo}.include?(val)
|
261
|
+
Element.new(:smart_quote, val.intern)
|
262
|
+
elsif %w{mdash ndash hellip laquo raquo}.include?(val)
|
263
|
+
Element.new(:typographic_sym, val.intern)
|
264
|
+
else
|
265
|
+
begin
|
266
|
+
Element.new(:entity, entity(val), nil, :original => src.matched)
|
267
|
+
rescue ::Kramdown::Error
|
268
|
+
src.pos -= src.matched_size - 1
|
269
|
+
Element.new(:entity, ::Kramdown::Utils::Entities.entity('amp'))
|
270
|
+
end
|
271
|
+
end
|
272
|
+
else
|
273
|
+
result << Element.new(:text, src.rest)
|
274
|
+
src.terminate
|
275
|
+
end
|
276
|
+
end
|
277
|
+
result
|
278
|
+
end
|
279
|
+
|
280
|
+
def process_html_element(el, do_conversion = true, preserve_text = false)
|
281
|
+
el.options.replace(:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
|
282
|
+
:content_model => (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw))
|
283
|
+
process_children(el, do_conversion, preserve_text)
|
284
|
+
end
|
285
|
+
|
286
|
+
def remove_text_children(el)
|
287
|
+
el.children.delete_if {|c| c.type == :text}
|
288
|
+
end
|
289
|
+
|
290
|
+
def wrap_text_children(el)
|
291
|
+
tmp = []
|
292
|
+
last_is_p = false
|
293
|
+
el.children.each do |c|
|
294
|
+
if Element.category(c) != :block || c.type == :text
|
295
|
+
if !last_is_p
|
296
|
+
tmp << Element.new(:p, nil, nil, :transparent => true)
|
297
|
+
last_is_p = true
|
298
|
+
end
|
299
|
+
tmp.last.children << c
|
300
|
+
tmp
|
301
|
+
else
|
302
|
+
tmp << c
|
303
|
+
last_is_p = false
|
304
|
+
end
|
305
|
+
end
|
306
|
+
el.children = tmp
|
307
|
+
end
|
308
|
+
|
309
|
+
def strip_whitespace(el)
|
310
|
+
return if el.children.empty?
|
311
|
+
if el.children.first.type == :text
|
312
|
+
el.children.first.value.lstrip!
|
313
|
+
end
|
314
|
+
if el.children.last.type == :text
|
315
|
+
el.children.last.value.rstrip!
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def remove_whitespace_children(el)
|
320
|
+
i = -1
|
321
|
+
el.children = el.children.reject do |c|
|
322
|
+
i += 1
|
323
|
+
c.type == :text && c.value.strip.empty? &&
|
324
|
+
(i == 0 || i == el.children.length - 1 || (Element.category(el.children[i-1]) == :block &&
|
325
|
+
Element.category(el.children[i+1]) == :block))
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
def set_basics(el, type, opts = {})
|
330
|
+
el.type = type
|
331
|
+
el.options.replace(opts)
|
332
|
+
el.value = nil
|
333
|
+
end
|
334
|
+
|
335
|
+
def extract_text(el, raw)
|
336
|
+
raw << el.value.to_s if el.type == :text
|
337
|
+
el.children.each {|c| extract_text(c, raw)}
|
338
|
+
end
|
339
|
+
|
340
|
+
def convert_textarea(el)
|
341
|
+
process_html_element(el, true, true)
|
342
|
+
end
|
343
|
+
|
344
|
+
def convert_a(el)
|
345
|
+
if el.attr['href']
|
346
|
+
set_basics(el, :a)
|
347
|
+
process_children(el)
|
348
|
+
else
|
349
|
+
process_html_element(el, false)
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
EMPHASIS_TYPE_MAP = {'em' => :em, 'i' => :em, 'strong' => :strong, 'b' => :strong}
|
354
|
+
def convert_em(el)
|
355
|
+
text = ''
|
356
|
+
extract_text(el, text)
|
357
|
+
if text =~ /\A\s/ || text =~ /\s\z/
|
358
|
+
process_html_element(el, false)
|
359
|
+
else
|
360
|
+
set_basics(el, EMPHASIS_TYPE_MAP[el.value])
|
361
|
+
process_children(el)
|
362
|
+
end
|
363
|
+
end
|
364
|
+
%w{b strong i}.each do |i|
|
365
|
+
alias_method("convert_#{i}".to_sym, :convert_em)
|
366
|
+
end
|
367
|
+
|
368
|
+
def convert_h1(el)
|
369
|
+
set_basics(el, :header, :level => el.value[1..1].to_i)
|
370
|
+
extract_text(el, el.options[:raw_text] = '')
|
371
|
+
process_children(el)
|
372
|
+
end
|
373
|
+
%w{h2 h3 h4 h5 h6}.each do |i|
|
374
|
+
alias_method("convert_#{i}".to_sym, :convert_h1)
|
375
|
+
end
|
376
|
+
|
377
|
+
def convert_code(el)
|
378
|
+
raw = ''
|
379
|
+
extract_text(el, raw)
|
380
|
+
result = process_text(raw, true)
|
381
|
+
begin
|
382
|
+
str = result.inject('') do |mem, c|
|
383
|
+
if c.type == :text
|
384
|
+
mem << c.value
|
385
|
+
elsif c.type == :entity
|
386
|
+
if [60, 62, 34, 38].include?(c.value.code_point)
|
387
|
+
mem << c.value.code_point.chr
|
388
|
+
else
|
389
|
+
mem << c.value.char
|
390
|
+
end
|
391
|
+
elsif c.type == :smart_quote || c.type == :typographic_sym
|
392
|
+
mem << entity(c.value.to_s).char
|
393
|
+
else
|
394
|
+
raise "Bug - please report"
|
395
|
+
end
|
396
|
+
end
|
397
|
+
result.clear
|
398
|
+
result << Element.new(:text, str)
|
399
|
+
rescue
|
400
|
+
end
|
401
|
+
if result.length > 1 || result.first.type != :text
|
402
|
+
process_html_element(el, false, true)
|
403
|
+
else
|
404
|
+
if el.value == 'code'
|
405
|
+
set_basics(el, :codespan)
|
406
|
+
el.attr['class'].gsub!(/\s+\bhighlighter-\w+\b|\bhighlighter-\w+\b\s*/, '') if el.attr['class']
|
407
|
+
else
|
408
|
+
set_basics(el, :codeblock)
|
409
|
+
if el.children.size == 1 && el.children.first.value == 'code'
|
410
|
+
value = (el.children.first.attr['class'] || '').scan(/\blanguage-\w+\b/).first
|
411
|
+
el.attr['class'] = "#{value} #{el.attr['class']}".rstrip if value
|
412
|
+
end
|
413
|
+
end
|
414
|
+
el.value = result.first.value
|
415
|
+
el.children.clear
|
416
|
+
end
|
417
|
+
end
|
418
|
+
alias :convert_pre :convert_code
|
419
|
+
|
420
|
+
def convert_table(el)
|
421
|
+
if !is_simple_table?(el)
|
422
|
+
process_html_element(el, false)
|
423
|
+
return
|
424
|
+
end
|
425
|
+
remove_text_children(el)
|
426
|
+
process_children(el)
|
427
|
+
set_basics(el, :table)
|
428
|
+
|
429
|
+
calc_alignment = lambda do |c|
|
430
|
+
if c.type == :tr
|
431
|
+
el.options[:alignment] = c.children.map do |td|
|
432
|
+
if td.attr['style']
|
433
|
+
td.attr['style'].slice!(/(?:;\s*)?text-align:\s+(center|left|right)/)
|
434
|
+
td.attr.delete('style') if td.attr['style'].strip.empty?
|
435
|
+
$1 ? $1.to_sym : :default
|
436
|
+
else
|
437
|
+
:default
|
438
|
+
end
|
439
|
+
end
|
440
|
+
else
|
441
|
+
c.children.each {|cc| calc_alignment.call(cc)}
|
442
|
+
end
|
443
|
+
end
|
444
|
+
calc_alignment.call(el)
|
445
|
+
el.children.delete_if {|c| c.type == :html_element}
|
446
|
+
|
447
|
+
change_th_type = lambda do |c|
|
448
|
+
if c.type == :th
|
449
|
+
c.type = :td
|
450
|
+
else
|
451
|
+
c.children.each {|cc| change_th_type.call(cc)}
|
452
|
+
end
|
453
|
+
end
|
454
|
+
change_th_type.call(el)
|
455
|
+
|
456
|
+
if el.children.first.type == :tr
|
457
|
+
tbody = Element.new(:tbody)
|
458
|
+
tbody.children = el.children
|
459
|
+
el.children = [tbody]
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
def is_simple_table?(el)
|
464
|
+
only_phrasing_content = lambda do |c|
|
465
|
+
c.children.all? do |cc|
|
466
|
+
(cc.type == :text || !HTML_BLOCK_ELEMENTS.include?(cc.value)) && only_phrasing_content.call(cc)
|
467
|
+
end
|
468
|
+
end
|
469
|
+
check_cells = Proc.new do |c|
|
470
|
+
if c.value == 'th' || c.value == 'td'
|
471
|
+
return false if !only_phrasing_content.call(c)
|
472
|
+
else
|
473
|
+
c.children.each {|cc| check_cells.call(cc)}
|
474
|
+
end
|
475
|
+
end
|
476
|
+
check_cells.call(el)
|
477
|
+
|
478
|
+
nr_cells = 0
|
479
|
+
check_nr_cells = lambda do |t|
|
480
|
+
if t.value == 'tr'
|
481
|
+
count = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.length
|
482
|
+
if count != nr_cells
|
483
|
+
if nr_cells == 0
|
484
|
+
nr_cells = count
|
485
|
+
else
|
486
|
+
nr_cells = -1
|
487
|
+
break
|
488
|
+
end
|
489
|
+
end
|
490
|
+
else
|
491
|
+
t.children.each {|cc| check_nr_cells.call(cc)}
|
492
|
+
end
|
493
|
+
end
|
494
|
+
check_nr_cells.call(el)
|
495
|
+
return false if nr_cells == -1
|
496
|
+
|
497
|
+
alignment = nil
|
498
|
+
check_alignment = Proc.new do |t|
|
499
|
+
if t.value == 'tr'
|
500
|
+
cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.map do |cell|
|
501
|
+
md = /text-align:\s+(center|left|right|justify|inherit)/.match(cell.attr['style'].to_s)
|
502
|
+
return false if md && (md[1] == 'justify' || md[1] == 'inherit')
|
503
|
+
md.nil? ? :default : md[1]
|
504
|
+
end
|
505
|
+
alignment = cur_alignment if alignment.nil?
|
506
|
+
return false if alignment != cur_alignment
|
507
|
+
else
|
508
|
+
t.children.each {|cc| check_alignment.call(cc)}
|
509
|
+
end
|
510
|
+
end
|
511
|
+
check_alignment.call(el)
|
512
|
+
|
513
|
+
check_rows = lambda do |t, type|
|
514
|
+
t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text}}
|
515
|
+
end
|
516
|
+
check_rows.call(el, 'td') ||
|
517
|
+
(el.children.all? do |t|
|
518
|
+
t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) ||
|
519
|
+
((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td'))
|
520
|
+
end && el.children.any? {|t| t.value == 'tbody'})
|
521
|
+
end
|
522
|
+
|
523
|
+
def convert_script(el)
|
524
|
+
if !is_math_tag?(el)
|
525
|
+
process_html_element(el)
|
526
|
+
else
|
527
|
+
handle_math_tag(el)
|
528
|
+
end
|
529
|
+
end
|
530
|
+
|
531
|
+
def is_math_tag?(el)
|
532
|
+
el.attr['type'].to_s =~ /\bmath\/tex\b/
|
533
|
+
end
|
534
|
+
|
535
|
+
def handle_math_tag(el)
|
536
|
+
set_basics(el, :math, :category => (el.attr['type'] =~ /mode=display/ ? :block : :span))
|
537
|
+
el.value = el.children.shift.value.sub(/\A(?:%\s*)?<!\[CDATA\[\n?(.*?)(?:\s%)?\]\]>\z/m, '\1')
|
538
|
+
el.attr.delete('type')
|
539
|
+
end
|
540
|
+
|
541
|
+
end
|
542
|
+
|
543
|
+
include Parser
|
544
|
+
|
545
|
+
# Parse the source string provided on initialization as HTML document.
|
546
|
+
def parse
|
547
|
+
@stack, @tree = [], @root
|
548
|
+
@src = Kramdown::Utils::StringScanner.new(adapt_source(source))
|
549
|
+
|
550
|
+
while true
|
551
|
+
if result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/)
|
552
|
+
@tree.children << Element.new(:xml_pi, result.strip, nil, :category => :block)
|
553
|
+
elsif result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/)
|
554
|
+
# ignore the doctype
|
555
|
+
elsif result = @src.scan(/\s*#{HTML_COMMENT_RE}/)
|
556
|
+
@tree.children << Element.new(:xml_comment, result.strip, nil, :category => :block)
|
557
|
+
else
|
558
|
+
break
|
559
|
+
end
|
560
|
+
end
|
561
|
+
|
562
|
+
tag_handler = lambda do |c, closed, handle_body|
|
563
|
+
parse_raw_html(c, &tag_handler) if !closed && handle_body
|
564
|
+
end
|
565
|
+
parse_raw_html(@tree, &tag_handler)
|
566
|
+
|
567
|
+
ElementConverter.convert(@tree)
|
568
|
+
end
|
569
|
+
|
570
|
+
end
|
571
|
+
|
572
|
+
end
|
573
|
+
|
574
|
+
end
|
575
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2009-2014 Thomas Leitner <t_leitner@gmx.at>
|
5
|
+
#
|
6
|
+
# This file is part of kramdown which is licensed under the MIT.
|
7
|
+
#++
|
8
|
+
#
|
9
|
+
|
10
|
+
module Kramdown
|
11
|
+
module Parser
|
12
|
+
class Kramdown
|
13
|
+
|
14
|
+
ABBREV_DEFINITION_START = /^#{OPT_SPACE}\*\[(.+?)\]:(.*?)\n/
|
15
|
+
|
16
|
+
# Parse the link definition at the current location.
|
17
|
+
def parse_abbrev_definition
|
18
|
+
start_line_number = @src.current_line_number
|
19
|
+
@src.pos += @src.matched_size
|
20
|
+
abbrev_id, abbrev_text = @src[1], @src[2]
|
21
|
+
abbrev_text.strip!
|
22
|
+
warning("Duplicate abbreviation ID '#{abbrev_id}' on line #{start_line_number} - overwriting") if @root.options[:abbrev_defs][abbrev_id]
|
23
|
+
@root.options[:abbrev_defs][abbrev_id] = abbrev_text
|
24
|
+
@tree.children << Element.new(:eob, :abbrev_def)
|
25
|
+
true
|
26
|
+
end
|
27
|
+
define_parser(:abbrev_definition, ABBREV_DEFINITION_START)
|
28
|
+
|
29
|
+
# Replace the abbreviation text with elements.
|
30
|
+
def replace_abbreviations(el, regexps = nil)
|
31
|
+
return if @root.options[:abbrev_defs].empty?
|
32
|
+
if !regexps
|
33
|
+
sorted_abbrevs = @root.options[:abbrev_defs].keys.sort {|a,b| b.length <=> a.length}
|
34
|
+
regexps = [Regexp.union(*sorted_abbrevs.map {|k| /#{Regexp.escape(k)}/})]
|
35
|
+
regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries
|
36
|
+
end
|
37
|
+
el.children.map! do |child|
|
38
|
+
if child.type == :text
|
39
|
+
if child.value =~ regexps.first
|
40
|
+
result = []
|
41
|
+
strscan = Kramdown::Utils::StringScanner.new(child.value, child.options[:location])
|
42
|
+
text_lineno = strscan.current_line_number
|
43
|
+
while temp = strscan.scan_until(regexps.last)
|
44
|
+
abbr_lineno = strscan.current_line_number
|
45
|
+
abbr = strscan.scan(regexps.first) # begin of line case of abbr with \W char as first one
|
46
|
+
if abbr.nil?
|
47
|
+
temp << strscan.scan(/\W|^/)
|
48
|
+
abbr = strscan.scan(regexps.first)
|
49
|
+
end
|
50
|
+
result << Element.new(:text, temp, nil, :location => text_lineno)
|
51
|
+
result << Element.new(:abbreviation, abbr, nil, :location => abbr_lineno)
|
52
|
+
text_lineno = strscan.current_line_number
|
53
|
+
end
|
54
|
+
result << Element.new(:text, strscan.rest, nil, :location => text_lineno)
|
55
|
+
else
|
56
|
+
child
|
57
|
+
end
|
58
|
+
else
|
59
|
+
replace_abbreviations(child, regexps)
|
60
|
+
child
|
61
|
+
end
|
62
|
+
end.flatten!
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|