reverse-markdown 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/reverse-markdown.rb +201 -0
  2. metadata +56 -0
@@ -0,0 +1,201 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: reverse-markdown.rb
4
+
5
+ # it is forked from https://github.com/xijo/reverse-markdown/blob/master/reverse_markdown.rb
6
+ require 'rexml/document'
7
+ require 'benchmark'
8
+ include REXML
9
+ include Benchmark
10
+
11
+ class ReverseMarkdown
12
+
13
+ # set basic variables:
14
+ # - @li_counter: numbering list item (li) tags in an ordered list (ol)
15
+ # - @links: hold the links for adding them to the bottom of the @output
16
+ # this means 'reference style', please take a look at http://daringfireball.net/projects/markdown/syntax#link
17
+ # - @outout: fancy markdown code in here!
18
+ # - @indent: control indention level for nested lists
19
+ # - @errors: appearing errors, like unknown tags, go into this array
20
+ def initialize()
21
+ @li_counter = 0
22
+ @links = []
23
+ @output = ""
24
+ @indent = 0
25
+ @errors = []
26
+ end
27
+
28
+ # Invokes the HTML parsing by using a string. Returns the markdown code in @output.
29
+ # To garantuee well-formed xml for REXML a <root> element will be added, but has no effect.
30
+ # After parsing all elements, the 'reference style'-links will be inserted.
31
+ def parse_string(string)
32
+ doc = Document.new("<root>\n"+string+"\n</root>")
33
+ root = doc.root
34
+ root.elements.each do |element|
35
+ parse_element(element, :root)
36
+ end
37
+ insert_links()
38
+ @output
39
+ end
40
+
41
+ # Parsing an element and its children (recursive) and writing its markdown code to @output
42
+ # 1. do indent for nested list items
43
+ # 2. add the markdown opening tag for this element
44
+ # 3a. if element only contains text, handle it like a text node
45
+ # 3b. if element is a container handle its children, which may be text- or element nodes
46
+ # 4. finally add the markdown ending tag for this element
47
+ def parse_element(element, parent)
48
+ name = element.name.to_sym
49
+ # 1.
50
+ @output << indent() if name.eql?(:li)
51
+ # 2.
52
+ @output << opening(element, parent)
53
+
54
+ # 3a.
55
+ if (element.has_text? and element.children.size < 2)
56
+ @output << text_node(element, parent)
57
+ end
58
+
59
+ # 3b.
60
+ if element.has_elements?
61
+ element.children.each do |child|
62
+ # increase indent if nested list
63
+ @indent += 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
64
+
65
+ if child.node_type.eql?(:element)
66
+ parse_element(child, element.name.to_sym)
67
+ else
68
+ if parent.eql?(:blockquote)
69
+ @output << child.to_s.gsub("\n ", "\n>")
70
+ else
71
+ @output << child.to_s
72
+ end
73
+ end
74
+
75
+ # decrease indent if end of nested list
76
+ @indent -= 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
77
+ end
78
+ end
79
+
80
+ # 4.
81
+ @output << ending(element, parent)
82
+ end
83
+
84
+ # Returns opening markdown tag for the element. Its parent matters sometimes!
85
+ def opening(type, parent)
86
+ case type.name.to_sym
87
+ when :h1
88
+ "# "
89
+ when :li
90
+ parent.eql?(:ul) ? " - " : " "+(@li_counter+=1).to_s+". "
91
+ when :ol
92
+ @li_counter = 0
93
+ ""
94
+ when :ul
95
+ ""
96
+ when :h2
97
+ "## "
98
+ when :em
99
+ "*"
100
+ when :strong
101
+ "**"
102
+ when :blockquote
103
+ # remove leading newline
104
+ type.children.first.value = ""
105
+ "> "
106
+ when :code
107
+ parent.eql?(:pre) ? " " : "`"
108
+ when :a
109
+ "["
110
+ when :img
111
+ "!["
112
+ when :hr
113
+ "----------\n\n"
114
+ else
115
+ @errors << "unknown start tag: "+type.name.to_s
116
+ ""
117
+ end
118
+ end
119
+
120
+ # Returns the closing markdown tag, like opening()
121
+ def ending(type, parent)
122
+ case type.name.to_sym
123
+ when :h1
124
+ " #\n\n"
125
+ when :h2
126
+ " ##\n\n"
127
+ when :p
128
+ parent.eql?(:root) ? "\n\n" : "\n"
129
+ when :ol
130
+ parent.eql?(:li) ? "" : "\n"
131
+ when :ul
132
+ parent.eql?(:li) ? "" : "\n"
133
+ when :em
134
+ "*"
135
+ when :strong
136
+ "**"
137
+ when :li
138
+ ""
139
+ when :blockquote
140
+ ""
141
+ when :code
142
+ parent.eql?(:pre) ? "" : "`"
143
+ when :a
144
+ @links << type.attribute('href').to_s
145
+ "][" + @links.size.to_s + "] "
146
+ when :img
147
+ @links << type.attribute('src').to_s
148
+ "" + type.attribute('alt').to_s + "][" + @links.size.to_s + "] "
149
+ "#{type.attribute('alt')}][#{@links.size}] "
150
+ else
151
+ @errors << " unknown end tag: "+type.name.to_s
152
+ ""
153
+ end
154
+ end
155
+
156
+ # Perform indent: two space, @indent times - quite simple! :)
157
+ def indent
158
+ str = ""
159
+ @indent.times do
160
+ str << " "
161
+ end
162
+ str
163
+ end
164
+
165
+ # Return the content of element, which should be just text.
166
+ # If its a code block to indent of 4 spaces.
167
+ # For block quotation add a leading '>'
168
+ def text_node(element, parent)
169
+ if element.name.to_sym.eql?(:code) and parent.eql?(:pre)
170
+ element.text.gsub("\n","\n ") << "\n"
171
+ elsif parent.eql?(:blockquote)
172
+ element.text.gsub!("\n ","\n>")
173
+ else
174
+ element.text
175
+ end
176
+ end
177
+
178
+ # Insert the mentioned reference style links.
179
+ def insert_links
180
+ @output << "\n"
181
+ @links.each_index do |index|
182
+ @output << " [#{index+1}]: #{@links[index]}\n"
183
+ end
184
+ end
185
+
186
+ # Print out all errors, that occured and have been written to @errors.
187
+ def print_errors
188
+ @errors.each do |error|
189
+ puts error
190
+ end
191
+ end
192
+
193
+ # Perform a benchmark on a given string n-times.
194
+ def speed_benchmark(string, n)
195
+ initialize()
196
+ bm(15) do |test|
197
+ test.report("reverse markdown:") { n.times do; parse_string(string); initialize(); end; }
198
+ end
199
+ end
200
+
201
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: reverse-markdown
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.0
6
+ platform: ruby
7
+ authors:
8
+ - James Robertson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-08-17 00:00:00 +01:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description:
18
+ email:
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - lib/reverse-markdown.rb
27
+ has_rdoc: true
28
+ homepage:
29
+ licenses: []
30
+
31
+ post_install_message:
32
+ rdoc_options: []
33
+
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: "0"
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: "0"
48
+ requirements: []
49
+
50
+ rubyforge_project:
51
+ rubygems_version: 1.5.2
52
+ signing_key:
53
+ specification_version: 3
54
+ summary: reverse-markdown
55
+ test_files: []
56
+