reverse_markdown 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/reverse_markdown.rb +214 -0
  2. data/test/test.rb +72 -0
  3. metadata +47 -0
@@ -0,0 +1,214 @@
1
+ require 'rexml/document'
2
+
3
+ # reverse markdown for ruby
4
+ # author: JO
5
+ # e-mail: xijo@gmx.de
6
+ # date: 14.7.2009
7
+ # version: 0.1
8
+ # license: GPL
9
+ # taken from https://github.com/xijo/reverse-markdown/raw/master/reverse_markdown.rb
10
+
11
+ # TODO
12
+ # - ol numbering is buggy, in fact doesn't matter for markdown code
13
+ # -
14
+
15
+ class ReverseMarkdown
16
+
17
+ # set basic variables:
18
+ # - @li_counter: numbering list item (li) tags in an ordered list (ol)
19
+ # - @links: hold the links for adding them to the bottom of the @output
20
+ # this means 'reference style', please take a look at http://daringfireball.net/projects/markdown/syntax#link
21
+ # - @outout: fancy markdown code in here!
22
+ # - @indent: control indention level for nested lists
23
+ # - @errors: appearing errors, like unknown tags, go into this array
24
+ def initialize()
25
+ @li_counter = 0
26
+ @links = []
27
+ @output = ""
28
+ @indent = 0
29
+ @errors = []
30
+ end
31
+
32
+ # Invokes the HTML parsing by using a string. Returns the markdown code in @output.
33
+ # To garantuee well-formed xml for REXML a <root> element will be added, but has no effect.
34
+ # After parsing all elements, the 'reference style'-links will be inserted.
35
+ def parse_string(string)
36
+ doc = REXML::Document.new("<root>\n"+string+"\n</root>")
37
+ parse_element(doc.root, :none)
38
+ insert_links()
39
+ @output
40
+ end
41
+
42
+ # Parsing an element and its children (recursive) and writing its markdown code to @output
43
+ # 1. do indent for nested list items
44
+ # 2. add the markdown opening tag for this element
45
+ # 3a. if element only contains text, handle it like a text node
46
+ # 3b. if element is a container handle its children, which may be text- or element nodes
47
+ # 4. finally add the markdown ending tag for this element
48
+ def parse_element(element, parent)
49
+ name = element.name.to_sym
50
+ # 1.
51
+ @output << (" " * @indent) if name.eql?(:li)
52
+ # 2.
53
+ @output << opening(element, parent)
54
+
55
+ # 3a.
56
+ if (element.has_text? and element.children.size < 2)
57
+ @output << text_node(element, parent)
58
+ end
59
+
60
+ # 3b.
61
+ if element.has_elements?
62
+ element.children.each do |child|
63
+ # increase indent if nested list
64
+ @indent += 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
65
+
66
+ if child.node_type.eql?(:element)
67
+ parse_element(child, element.name.to_sym)
68
+ else
69
+ if parent.eql?(:blockquote)
70
+ @output << child.to_s.gsub("\n ", "\n>")
71
+ else
72
+ @output << child.to_s
73
+ end
74
+ end
75
+
76
+ # decrease indent if end of nested list
77
+ @indent -= 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
78
+ end
79
+ end
80
+
81
+ # 4.
82
+ @output << ending(element, parent)
83
+ end
84
+
85
+ # Returns opening markdown tag for the element. Its parent matters sometimes!
86
+ def opening(type, parent)
87
+ case type.name.to_sym
88
+ when :h1
89
+ "# "
90
+ when :li
91
+ parent.eql?(:ul) ? " - " : " "+(@li_counter+=1).to_s+". "
92
+ when :ol
93
+ @li_counter = 0
94
+ ""
95
+ when :ul
96
+ ""
97
+ when :h2
98
+ "## "
99
+ when :h3
100
+ "### "
101
+ when :h4
102
+ "#### "
103
+ when :h5
104
+ "##### "
105
+ when :h6
106
+ "###### "
107
+ when :em
108
+ "*"
109
+ when :strong
110
+ "**"
111
+ when :blockquote
112
+ # remove leading newline
113
+ type.children.first.value = ""
114
+ "> "
115
+ when :code
116
+ parent.eql?(:pre) ? " " : "`"
117
+ when :a
118
+ "["
119
+ when :img
120
+ "!["
121
+ when :hr
122
+ "----------\n\n"
123
+ when :root
124
+ ""
125
+ else
126
+ @errors << "unknown start tag: "+type.name.to_s
127
+ ""
128
+ end
129
+ end
130
+
131
+ # Returns the closing markdown tag, like opening()
132
+ def ending(type, parent)
133
+ case type.name.to_sym
134
+ when :h1
135
+ " #\n\n"
136
+ when :h2
137
+ " ##\n\n"
138
+ when :h3
139
+ " ###\n\n"
140
+ when :h4
141
+ " ####\n\n"
142
+ when :h5
143
+ " #####\n\n"
144
+ when :h6
145
+ " ######\n\n"
146
+ when :p
147
+ parent.eql?(:root) ? "\n\n" : "\n"
148
+ when :ol
149
+ parent.eql?(:li) ? "" : "\n"
150
+ when :ul
151
+ parent.eql?(:li) ? "" : "\n"
152
+ when :em
153
+ "*"
154
+ when :strong
155
+ "**"
156
+ when :li
157
+ ""
158
+ when :blockquote
159
+ ""
160
+ when :code
161
+ parent.eql?(:pre) ? "" : "`"
162
+ when :a
163
+ @links << type.attribute('href').to_s
164
+ "][" + @links.size.to_s + "] "
165
+ when :img
166
+ @links << type.attribute('src').to_s
167
+ "" + type.attribute('alt').to_s + "][" + @links.size.to_s + "] "
168
+ "#{type.attribute('alt')}][#{@links.size}] "
169
+ when :root
170
+ ""
171
+ else
172
+ @errors << " unknown end tag: "+type.name.to_s
173
+ ""
174
+ end
175
+ end
176
+
177
+ # Return the content of element, which should be just text.
178
+ # If its a code block to indent of 4 spaces.
179
+ # For block quotation add a leading '>'
180
+ def text_node(element, parent)
181
+ if element.name.to_sym.eql?(:code) and parent.eql?(:pre)
182
+ element.text.gsub("\n","\n ") << "\n"
183
+ elsif parent.eql?(:blockquote)
184
+ element.text.gsub!("\n ","\n>")
185
+ else
186
+ element.text
187
+ end
188
+ end
189
+
190
+ # Insert the mentioned reference style links.
191
+ def insert_links
192
+ @output << "\n"
193
+ @output << @links.each_with_index.map do |link, index|
194
+ " [#{index+1}]: #{link}\n"
195
+ end.join
196
+ end
197
+
198
+ # Print out all errors, that occured and have been written to @errors.
199
+ def print_errors
200
+ @errors.each do |error|
201
+ puts error
202
+ end
203
+ end
204
+
205
+ # Perform a benchmark on a given string n-times.
206
+ def speed_benchmark(string, n)
207
+ require 'benchmark'
208
+ initialize()
209
+ Benchmark.bm(15) do |test|
210
+ test.report("reverse markdown:") { n.times do; parse_string(string); initialize(); end; }
211
+ end
212
+ end
213
+
214
+ end
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require './lib/reverse_markdown'
4
+
5
+ # Example HTML Code for parsing
6
+ example = <<-EOF
7
+ This text, though not within an element, should also be shown.
8
+
9
+ <h2>heading 1.1</h2>
10
+
11
+ <p>text *italic* and **bold**.</p>
12
+
13
+ <pre><code>text *italic* and **bold**.
14
+ sdfsdff
15
+ sdfsd
16
+ sdf sdfsdf
17
+ </code></pre>
18
+
19
+ <blockquote>
20
+ <p>text <em>italic</em> and <strong>bold</strong>. sdfsdff
21
+ sdfsd sdf sdfsdf</p>
22
+ </blockquote>
23
+
24
+ <p>asdasd <code>sdfsdfsdf</code> asdad <a href="http://www.bla.de">link text</a></p>
25
+
26
+ <p><a href="http://www.bla.de">link <strong>text</strong></a></p>
27
+
28
+ <ol>
29
+ <li>List item</li>
30
+ <li>List <em>item</em>
31
+ <ol><li>List item</li>
32
+ <li>dsfdsf
33
+ <ul><li>dfwe</li>
34
+ <li>dsfsdfsdf</li></ul></li>
35
+ <li>lidsf <img src="http://www.dfgdfg.de/dsf.jpe" alt="item" title="" /></li></ol></li>
36
+ <li>sdfsdfsdf
37
+ <ul><li>sdfsdfsdf</li>
38
+ <li>sdfsdfsdf <strong>sdfsdf</strong></li></ul></li>
39
+ </ol>
40
+
41
+ <blockquote>
42
+ <p>Lorem ipsum dolor sit amet, consetetur
43
+ voluptua. At vero eos et accusam et
44
+ justo duo dolores et ea rebum. Stet
45
+ clita kasd gubergren, no sea takimata
46
+ sanctus est Lorem ipsum dolor sit
47
+ amet. <em>italic</em></p>
48
+ </blockquote>
49
+
50
+ <hr />
51
+
52
+ <blockquote>
53
+ <p>Lorem ipsum dolor sit amet, consetetur
54
+ sadipscing elitr, sed diam nonumy
55
+ eirmod tempor invidunt ut labore et
56
+ dolore magna aliquyam erat, sed</p>
57
+ </blockquote>
58
+
59
+ This should also be shown, even if it's not wrapped in an element.
60
+
61
+ <p>nur ein text! nur eine maschine!</p>
62
+
63
+ This text should not be invisible!
64
+ EOF
65
+
66
+ r = ReverseMarkdown.new
67
+
68
+ puts r.parse_string(example)
69
+
70
+ #r.print_errors
71
+
72
+ #r.speed_benchmark(example, 100)
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: reverse_markdown
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - James Robertson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-09 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email:
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/reverse_markdown.rb
21
+ - test/test.rb
22
+ homepage:
23
+ licenses: []
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 1.8.23
43
+ signing_key:
44
+ specification_version: 3
45
+ summary: reverse_markdown
46
+ test_files:
47
+ - test/test.rb