reverse_markdown 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/reverse_markdown.rb +214 -0
- data/test/test.rb +72 -0
- metadata +47 -0
@@ -0,0 +1,214 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
|
3
|
+
# reverse markdown for ruby
|
4
|
+
# author: JO
|
5
|
+
# e-mail: xijo@gmx.de
|
6
|
+
# date: 14.7.2009
|
7
|
+
# version: 0.1
|
8
|
+
# license: GPL
|
9
|
+
# taken from https://github.com/xijo/reverse-markdown/raw/master/reverse_markdown.rb
|
10
|
+
|
11
|
+
# TODO
|
12
|
+
# - ol numbering is buggy, in fact doesn't matter for markdown code
|
13
|
+
# -
|
14
|
+
|
15
|
+
class ReverseMarkdown
|
16
|
+
|
17
|
+
# set basic variables:
|
18
|
+
# - @li_counter: numbering list item (li) tags in an ordered list (ol)
|
19
|
+
# - @links: hold the links for adding them to the bottom of the @output
|
20
|
+
# this means 'reference style', please take a look at http://daringfireball.net/projects/markdown/syntax#link
|
21
|
+
# - @outout: fancy markdown code in here!
|
22
|
+
# - @indent: control indention level for nested lists
|
23
|
+
# - @errors: appearing errors, like unknown tags, go into this array
|
24
|
+
def initialize()
|
25
|
+
@li_counter = 0
|
26
|
+
@links = []
|
27
|
+
@output = ""
|
28
|
+
@indent = 0
|
29
|
+
@errors = []
|
30
|
+
end
|
31
|
+
|
32
|
+
# Invokes the HTML parsing by using a string. Returns the markdown code in @output.
|
33
|
+
# To garantuee well-formed xml for REXML a <root> element will be added, but has no effect.
|
34
|
+
# After parsing all elements, the 'reference style'-links will be inserted.
|
35
|
+
def parse_string(string)
|
36
|
+
doc = REXML::Document.new("<root>\n"+string+"\n</root>")
|
37
|
+
parse_element(doc.root, :none)
|
38
|
+
insert_links()
|
39
|
+
@output
|
40
|
+
end
|
41
|
+
|
42
|
+
# Parsing an element and its children (recursive) and writing its markdown code to @output
|
43
|
+
# 1. do indent for nested list items
|
44
|
+
# 2. add the markdown opening tag for this element
|
45
|
+
# 3a. if element only contains text, handle it like a text node
|
46
|
+
# 3b. if element is a container handle its children, which may be text- or element nodes
|
47
|
+
# 4. finally add the markdown ending tag for this element
|
48
|
+
def parse_element(element, parent)
|
49
|
+
name = element.name.to_sym
|
50
|
+
# 1.
|
51
|
+
@output << (" " * @indent) if name.eql?(:li)
|
52
|
+
# 2.
|
53
|
+
@output << opening(element, parent)
|
54
|
+
|
55
|
+
# 3a.
|
56
|
+
if (element.has_text? and element.children.size < 2)
|
57
|
+
@output << text_node(element, parent)
|
58
|
+
end
|
59
|
+
|
60
|
+
# 3b.
|
61
|
+
if element.has_elements?
|
62
|
+
element.children.each do |child|
|
63
|
+
# increase indent if nested list
|
64
|
+
@indent += 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
|
65
|
+
|
66
|
+
if child.node_type.eql?(:element)
|
67
|
+
parse_element(child, element.name.to_sym)
|
68
|
+
else
|
69
|
+
if parent.eql?(:blockquote)
|
70
|
+
@output << child.to_s.gsub("\n ", "\n>")
|
71
|
+
else
|
72
|
+
@output << child.to_s
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# decrease indent if end of nested list
|
77
|
+
@indent -= 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# 4.
|
82
|
+
@output << ending(element, parent)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Returns opening markdown tag for the element. Its parent matters sometimes!
|
86
|
+
def opening(type, parent)
|
87
|
+
case type.name.to_sym
|
88
|
+
when :h1
|
89
|
+
"# "
|
90
|
+
when :li
|
91
|
+
parent.eql?(:ul) ? " - " : " "+(@li_counter+=1).to_s+". "
|
92
|
+
when :ol
|
93
|
+
@li_counter = 0
|
94
|
+
""
|
95
|
+
when :ul
|
96
|
+
""
|
97
|
+
when :h2
|
98
|
+
"## "
|
99
|
+
when :h3
|
100
|
+
"### "
|
101
|
+
when :h4
|
102
|
+
"#### "
|
103
|
+
when :h5
|
104
|
+
"##### "
|
105
|
+
when :h6
|
106
|
+
"###### "
|
107
|
+
when :em
|
108
|
+
"*"
|
109
|
+
when :strong
|
110
|
+
"**"
|
111
|
+
when :blockquote
|
112
|
+
# remove leading newline
|
113
|
+
type.children.first.value = ""
|
114
|
+
"> "
|
115
|
+
when :code
|
116
|
+
parent.eql?(:pre) ? " " : "`"
|
117
|
+
when :a
|
118
|
+
"["
|
119
|
+
when :img
|
120
|
+
"!["
|
121
|
+
when :hr
|
122
|
+
"----------\n\n"
|
123
|
+
when :root
|
124
|
+
""
|
125
|
+
else
|
126
|
+
@errors << "unknown start tag: "+type.name.to_s
|
127
|
+
""
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# Returns the closing markdown tag, like opening()
|
132
|
+
def ending(type, parent)
|
133
|
+
case type.name.to_sym
|
134
|
+
when :h1
|
135
|
+
" #\n\n"
|
136
|
+
when :h2
|
137
|
+
" ##\n\n"
|
138
|
+
when :h3
|
139
|
+
" ###\n\n"
|
140
|
+
when :h4
|
141
|
+
" ####\n\n"
|
142
|
+
when :h5
|
143
|
+
" #####\n\n"
|
144
|
+
when :h6
|
145
|
+
" ######\n\n"
|
146
|
+
when :p
|
147
|
+
parent.eql?(:root) ? "\n\n" : "\n"
|
148
|
+
when :ol
|
149
|
+
parent.eql?(:li) ? "" : "\n"
|
150
|
+
when :ul
|
151
|
+
parent.eql?(:li) ? "" : "\n"
|
152
|
+
when :em
|
153
|
+
"*"
|
154
|
+
when :strong
|
155
|
+
"**"
|
156
|
+
when :li
|
157
|
+
""
|
158
|
+
when :blockquote
|
159
|
+
""
|
160
|
+
when :code
|
161
|
+
parent.eql?(:pre) ? "" : "`"
|
162
|
+
when :a
|
163
|
+
@links << type.attribute('href').to_s
|
164
|
+
"][" + @links.size.to_s + "] "
|
165
|
+
when :img
|
166
|
+
@links << type.attribute('src').to_s
|
167
|
+
"" + type.attribute('alt').to_s + "][" + @links.size.to_s + "] "
|
168
|
+
"#{type.attribute('alt')}][#{@links.size}] "
|
169
|
+
when :root
|
170
|
+
""
|
171
|
+
else
|
172
|
+
@errors << " unknown end tag: "+type.name.to_s
|
173
|
+
""
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Return the content of element, which should be just text.
|
178
|
+
# If its a code block to indent of 4 spaces.
|
179
|
+
# For block quotation add a leading '>'
|
180
|
+
def text_node(element, parent)
|
181
|
+
if element.name.to_sym.eql?(:code) and parent.eql?(:pre)
|
182
|
+
element.text.gsub("\n","\n ") << "\n"
|
183
|
+
elsif parent.eql?(:blockquote)
|
184
|
+
element.text.gsub!("\n ","\n>")
|
185
|
+
else
|
186
|
+
element.text
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Insert the mentioned reference style links.
|
191
|
+
def insert_links
|
192
|
+
@output << "\n"
|
193
|
+
@output << @links.each_with_index.map do |link, index|
|
194
|
+
" [#{index+1}]: #{link}\n"
|
195
|
+
end.join
|
196
|
+
end
|
197
|
+
|
198
|
+
# Print out all errors, that occured and have been written to @errors.
|
199
|
+
def print_errors
|
200
|
+
@errors.each do |error|
|
201
|
+
puts error
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Perform a benchmark on a given string n-times.
|
206
|
+
def speed_benchmark(string, n)
|
207
|
+
require 'benchmark'
|
208
|
+
initialize()
|
209
|
+
Benchmark.bm(15) do |test|
|
210
|
+
test.report("reverse markdown:") { n.times do; parse_string(string); initialize(); end; }
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
data/test/test.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require './lib/reverse_markdown'
|
4
|
+
|
5
|
+
# Example HTML Code for parsing
|
6
|
+
example = <<-EOF
|
7
|
+
This text, though not within an element, should also be shown.
|
8
|
+
|
9
|
+
<h2>heading 1.1</h2>
|
10
|
+
|
11
|
+
<p>text *italic* and **bold**.</p>
|
12
|
+
|
13
|
+
<pre><code>text *italic* and **bold**.
|
14
|
+
sdfsdff
|
15
|
+
sdfsd
|
16
|
+
sdf sdfsdf
|
17
|
+
</code></pre>
|
18
|
+
|
19
|
+
<blockquote>
|
20
|
+
<p>text <em>italic</em> and <strong>bold</strong>. sdfsdff
|
21
|
+
sdfsd sdf sdfsdf</p>
|
22
|
+
</blockquote>
|
23
|
+
|
24
|
+
<p>asdasd <code>sdfsdfsdf</code> asdad <a href="http://www.bla.de">link text</a></p>
|
25
|
+
|
26
|
+
<p><a href="http://www.bla.de">link <strong>text</strong></a></p>
|
27
|
+
|
28
|
+
<ol>
|
29
|
+
<li>List item</li>
|
30
|
+
<li>List <em>item</em>
|
31
|
+
<ol><li>List item</li>
|
32
|
+
<li>dsfdsf
|
33
|
+
<ul><li>dfwe</li>
|
34
|
+
<li>dsfsdfsdf</li></ul></li>
|
35
|
+
<li>lidsf <img src="http://www.dfgdfg.de/dsf.jpe" alt="item" title="" /></li></ol></li>
|
36
|
+
<li>sdfsdfsdf
|
37
|
+
<ul><li>sdfsdfsdf</li>
|
38
|
+
<li>sdfsdfsdf <strong>sdfsdf</strong></li></ul></li>
|
39
|
+
</ol>
|
40
|
+
|
41
|
+
<blockquote>
|
42
|
+
<p>Lorem ipsum dolor sit amet, consetetur
|
43
|
+
voluptua. At vero eos et accusam et
|
44
|
+
justo duo dolores et ea rebum. Stet
|
45
|
+
clita kasd gubergren, no sea takimata
|
46
|
+
sanctus est Lorem ipsum dolor sit
|
47
|
+
amet. <em>italic</em></p>
|
48
|
+
</blockquote>
|
49
|
+
|
50
|
+
<hr />
|
51
|
+
|
52
|
+
<blockquote>
|
53
|
+
<p>Lorem ipsum dolor sit amet, consetetur
|
54
|
+
sadipscing elitr, sed diam nonumy
|
55
|
+
eirmod tempor invidunt ut labore et
|
56
|
+
dolore magna aliquyam erat, sed</p>
|
57
|
+
</blockquote>
|
58
|
+
|
59
|
+
This should also be shown, even if it's not wrapped in an element.
|
60
|
+
|
61
|
+
<p>nur ein text! nur eine maschine!</p>
|
62
|
+
|
63
|
+
This text should not be invisible!
|
64
|
+
EOF
|
65
|
+
|
66
|
+
r = ReverseMarkdown.new
|
67
|
+
|
68
|
+
puts r.parse_string(example)
|
69
|
+
|
70
|
+
#r.print_errors
|
71
|
+
|
72
|
+
#r.speed_benchmark(example, 100)
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: reverse_markdown
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- James Robertson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-09 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description:
|
15
|
+
email:
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/reverse_markdown.rb
|
21
|
+
- test/test.rb
|
22
|
+
homepage:
|
23
|
+
licenses: []
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
none: false
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 1.8.23
|
43
|
+
signing_key:
|
44
|
+
specification_version: 3
|
45
|
+
summary: reverse_markdown
|
46
|
+
test_files:
|
47
|
+
- test/test.rb
|