reverse_markdown 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/reverse_markdown.rb +214 -0
  2. data/test/test.rb +72 -0
  3. metadata +47 -0
@@ -0,0 +1,214 @@
1
+ require 'rexml/document'
2
+
3
+ # reverse markdown for ruby
4
+ # author: JO
5
+ # e-mail: xijo@gmx.de
6
+ # date: 14.7.2009
7
+ # version: 0.1
8
+ # license: GPL
9
+ # taken from https://github.com/xijo/reverse-markdown/raw/master/reverse_markdown.rb
10
+
11
+ # TODO
12
+ # - ol numbering is buggy, in fact doesn't matter for markdown code
13
+ # -
14
+
15
+ class ReverseMarkdown
16
+
17
+ # set basic variables:
18
+ # - @li_counter: numbering list item (li) tags in an ordered list (ol)
19
+ # - @links: hold the links for adding them to the bottom of the @output
20
+ # this means 'reference style', please take a look at http://daringfireball.net/projects/markdown/syntax#link
21
+ # - @outout: fancy markdown code in here!
22
+ # - @indent: control indention level for nested lists
23
+ # - @errors: appearing errors, like unknown tags, go into this array
24
+ def initialize()
25
+ @li_counter = 0
26
+ @links = []
27
+ @output = ""
28
+ @indent = 0
29
+ @errors = []
30
+ end
31
+
32
+ # Invokes the HTML parsing by using a string. Returns the markdown code in @output.
33
+ # To garantuee well-formed xml for REXML a <root> element will be added, but has no effect.
34
+ # After parsing all elements, the 'reference style'-links will be inserted.
35
+ def parse_string(string)
36
+ doc = REXML::Document.new("<root>\n"+string+"\n</root>")
37
+ parse_element(doc.root, :none)
38
+ insert_links()
39
+ @output
40
+ end
41
+
42
+ # Parsing an element and its children (recursive) and writing its markdown code to @output
43
+ # 1. do indent for nested list items
44
+ # 2. add the markdown opening tag for this element
45
+ # 3a. if element only contains text, handle it like a text node
46
+ # 3b. if element is a container handle its children, which may be text- or element nodes
47
+ # 4. finally add the markdown ending tag for this element
48
+ def parse_element(element, parent)
49
+ name = element.name.to_sym
50
+ # 1.
51
+ @output << (" " * @indent) if name.eql?(:li)
52
+ # 2.
53
+ @output << opening(element, parent)
54
+
55
+ # 3a.
56
+ if (element.has_text? and element.children.size < 2)
57
+ @output << text_node(element, parent)
58
+ end
59
+
60
+ # 3b.
61
+ if element.has_elements?
62
+ element.children.each do |child|
63
+ # increase indent if nested list
64
+ @indent += 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
65
+
66
+ if child.node_type.eql?(:element)
67
+ parse_element(child, element.name.to_sym)
68
+ else
69
+ if parent.eql?(:blockquote)
70
+ @output << child.to_s.gsub("\n ", "\n>")
71
+ else
72
+ @output << child.to_s
73
+ end
74
+ end
75
+
76
+ # decrease indent if end of nested list
77
+ @indent -= 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
78
+ end
79
+ end
80
+
81
+ # 4.
82
+ @output << ending(element, parent)
83
+ end
84
+
85
+ # Returns opening markdown tag for the element. Its parent matters sometimes!
86
+ def opening(type, parent)
87
+ case type.name.to_sym
88
+ when :h1
89
+ "# "
90
+ when :li
91
+ parent.eql?(:ul) ? " - " : " "+(@li_counter+=1).to_s+". "
92
+ when :ol
93
+ @li_counter = 0
94
+ ""
95
+ when :ul
96
+ ""
97
+ when :h2
98
+ "## "
99
+ when :h3
100
+ "### "
101
+ when :h4
102
+ "#### "
103
+ when :h5
104
+ "##### "
105
+ when :h6
106
+ "###### "
107
+ when :em
108
+ "*"
109
+ when :strong
110
+ "**"
111
+ when :blockquote
112
+ # remove leading newline
113
+ type.children.first.value = ""
114
+ "> "
115
+ when :code
116
+ parent.eql?(:pre) ? " " : "`"
117
+ when :a
118
+ "["
119
+ when :img
120
+ "!["
121
+ when :hr
122
+ "----------\n\n"
123
+ when :root
124
+ ""
125
+ else
126
+ @errors << "unknown start tag: "+type.name.to_s
127
+ ""
128
+ end
129
+ end
130
+
131
+ # Returns the closing markdown tag, like opening()
132
+ def ending(type, parent)
133
+ case type.name.to_sym
134
+ when :h1
135
+ " #\n\n"
136
+ when :h2
137
+ " ##\n\n"
138
+ when :h3
139
+ " ###\n\n"
140
+ when :h4
141
+ " ####\n\n"
142
+ when :h5
143
+ " #####\n\n"
144
+ when :h6
145
+ " ######\n\n"
146
+ when :p
147
+ parent.eql?(:root) ? "\n\n" : "\n"
148
+ when :ol
149
+ parent.eql?(:li) ? "" : "\n"
150
+ when :ul
151
+ parent.eql?(:li) ? "" : "\n"
152
+ when :em
153
+ "*"
154
+ when :strong
155
+ "**"
156
+ when :li
157
+ ""
158
+ when :blockquote
159
+ ""
160
+ when :code
161
+ parent.eql?(:pre) ? "" : "`"
162
+ when :a
163
+ @links << type.attribute('href').to_s
164
+ "][" + @links.size.to_s + "] "
165
+ when :img
166
+ @links << type.attribute('src').to_s
167
+ "" + type.attribute('alt').to_s + "][" + @links.size.to_s + "] "
168
+ "#{type.attribute('alt')}][#{@links.size}] "
169
+ when :root
170
+ ""
171
+ else
172
+ @errors << " unknown end tag: "+type.name.to_s
173
+ ""
174
+ end
175
+ end
176
+
177
+ # Return the content of element, which should be just text.
178
+ # If its a code block to indent of 4 spaces.
179
+ # For block quotation add a leading '>'
180
+ def text_node(element, parent)
181
+ if element.name.to_sym.eql?(:code) and parent.eql?(:pre)
182
+ element.text.gsub("\n","\n ") << "\n"
183
+ elsif parent.eql?(:blockquote)
184
+ element.text.gsub!("\n ","\n>")
185
+ else
186
+ element.text
187
+ end
188
+ end
189
+
190
+ # Insert the mentioned reference style links.
191
+ def insert_links
192
+ @output << "\n"
193
+ @output << @links.each_with_index.map do |link, index|
194
+ " [#{index+1}]: #{link}\n"
195
+ end.join
196
+ end
197
+
198
+ # Print out all errors, that occured and have been written to @errors.
199
+ def print_errors
200
+ @errors.each do |error|
201
+ puts error
202
+ end
203
+ end
204
+
205
+ # Perform a benchmark on a given string n-times.
206
+ def speed_benchmark(string, n)
207
+ require 'benchmark'
208
+ initialize()
209
+ Benchmark.bm(15) do |test|
210
+ test.report("reverse markdown:") { n.times do; parse_string(string); initialize(); end; }
211
+ end
212
+ end
213
+
214
+ end
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require './lib/reverse_markdown'
4
+
5
+ # Example HTML Code for parsing
6
+ example = <<-EOF
7
+ This text, though not within an element, should also be shown.
8
+
9
+ <h2>heading 1.1</h2>
10
+
11
+ <p>text *italic* and **bold**.</p>
12
+
13
+ <pre><code>text *italic* and **bold**.
14
+ sdfsdff
15
+ sdfsd
16
+ sdf sdfsdf
17
+ </code></pre>
18
+
19
+ <blockquote>
20
+ <p>text <em>italic</em> and <strong>bold</strong>. sdfsdff
21
+ sdfsd sdf sdfsdf</p>
22
+ </blockquote>
23
+
24
+ <p>asdasd <code>sdfsdfsdf</code> asdad <a href="http://www.bla.de">link text</a></p>
25
+
26
+ <p><a href="http://www.bla.de">link <strong>text</strong></a></p>
27
+
28
+ <ol>
29
+ <li>List item</li>
30
+ <li>List <em>item</em>
31
+ <ol><li>List item</li>
32
+ <li>dsfdsf
33
+ <ul><li>dfwe</li>
34
+ <li>dsfsdfsdf</li></ul></li>
35
+ <li>lidsf <img src="http://www.dfgdfg.de/dsf.jpe" alt="item" title="" /></li></ol></li>
36
+ <li>sdfsdfsdf
37
+ <ul><li>sdfsdfsdf</li>
38
+ <li>sdfsdfsdf <strong>sdfsdf</strong></li></ul></li>
39
+ </ol>
40
+
41
+ <blockquote>
42
+ <p>Lorem ipsum dolor sit amet, consetetur
43
+ voluptua. At vero eos et accusam et
44
+ justo duo dolores et ea rebum. Stet
45
+ clita kasd gubergren, no sea takimata
46
+ sanctus est Lorem ipsum dolor sit
47
+ amet. <em>italic</em></p>
48
+ </blockquote>
49
+
50
+ <hr />
51
+
52
+ <blockquote>
53
+ <p>Lorem ipsum dolor sit amet, consetetur
54
+ sadipscing elitr, sed diam nonumy
55
+ eirmod tempor invidunt ut labore et
56
+ dolore magna aliquyam erat, sed</p>
57
+ </blockquote>
58
+
59
+ This should also be shown, even if it's not wrapped in an element.
60
+
61
+ <p>nur ein text! nur eine maschine!</p>
62
+
63
+ This text should not be invisible!
64
+ EOF
65
+
66
+ r = ReverseMarkdown.new
67
+
68
+ puts r.parse_string(example)
69
+
70
+ #r.print_errors
71
+
72
+ #r.speed_benchmark(example, 100)
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: reverse_markdown
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - James Robertson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-09 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email:
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/reverse_markdown.rb
21
+ - test/test.rb
22
+ homepage:
23
+ licenses: []
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 1.8.23
43
+ signing_key:
44
+ specification_version: 3
45
+ summary: reverse_markdown
46
+ test_files:
47
+ - test/test.rb