reverse-markdown 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/reverse-markdown.rb +201 -0
  2. metadata +56 -0
@@ -0,0 +1,201 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: reverse-markdown.rb
4
+
5
+ # it is forked from https://github.com/xijo/reverse-markdown/blob/master/reverse_markdown.rb
6
+ require 'rexml/document'
7
+ require 'benchmark'
8
+ include REXML
9
+ include Benchmark
10
+
11
+ class ReverseMarkdown
12
+
13
+ # set basic variables:
14
+ # - @li_counter: numbering list item (li) tags in an ordered list (ol)
15
+ # - @links: hold the links for adding them to the bottom of the @output
16
+ # this means 'reference style', please take a look at http://daringfireball.net/projects/markdown/syntax#link
17
+ # - @outout: fancy markdown code in here!
18
+ # - @indent: control indention level for nested lists
19
+ # - @errors: appearing errors, like unknown tags, go into this array
20
+ def initialize()
21
+ @li_counter = 0
22
+ @links = []
23
+ @output = ""
24
+ @indent = 0
25
+ @errors = []
26
+ end
27
+
28
+ # Invokes the HTML parsing by using a string. Returns the markdown code in @output.
29
+ # To garantuee well-formed xml for REXML a <root> element will be added, but has no effect.
30
+ # After parsing all elements, the 'reference style'-links will be inserted.
31
+ def parse_string(string)
32
+ doc = Document.new("<root>\n"+string+"\n</root>")
33
+ root = doc.root
34
+ root.elements.each do |element|
35
+ parse_element(element, :root)
36
+ end
37
+ insert_links()
38
+ @output
39
+ end
40
+
41
+ # Parsing an element and its children (recursive) and writing its markdown code to @output
42
+ # 1. do indent for nested list items
43
+ # 2. add the markdown opening tag for this element
44
+ # 3a. if element only contains text, handle it like a text node
45
+ # 3b. if element is a container handle its children, which may be text- or element nodes
46
+ # 4. finally add the markdown ending tag for this element
47
+ def parse_element(element, parent)
48
+ name = element.name.to_sym
49
+ # 1.
50
+ @output << indent() if name.eql?(:li)
51
+ # 2.
52
+ @output << opening(element, parent)
53
+
54
+ # 3a.
55
+ if (element.has_text? and element.children.size < 2)
56
+ @output << text_node(element, parent)
57
+ end
58
+
59
+ # 3b.
60
+ if element.has_elements?
61
+ element.children.each do |child|
62
+ # increase indent if nested list
63
+ @indent += 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
64
+
65
+ if child.node_type.eql?(:element)
66
+ parse_element(child, element.name.to_sym)
67
+ else
68
+ if parent.eql?(:blockquote)
69
+ @output << child.to_s.gsub("\n ", "\n>")
70
+ else
71
+ @output << child.to_s
72
+ end
73
+ end
74
+
75
+ # decrease indent if end of nested list
76
+ @indent -= 1 if element.name=~/(ul|ol)/ and parent.eql?(:li)
77
+ end
78
+ end
79
+
80
+ # 4.
81
+ @output << ending(element, parent)
82
+ end
83
+
84
+ # Returns opening markdown tag for the element. Its parent matters sometimes!
85
+ def opening(type, parent)
86
+ case type.name.to_sym
87
+ when :h1
88
+ "# "
89
+ when :li
90
+ parent.eql?(:ul) ? " - " : " "+(@li_counter+=1).to_s+". "
91
+ when :ol
92
+ @li_counter = 0
93
+ ""
94
+ when :ul
95
+ ""
96
+ when :h2
97
+ "## "
98
+ when :em
99
+ "*"
100
+ when :strong
101
+ "**"
102
+ when :blockquote
103
+ # remove leading newline
104
+ type.children.first.value = ""
105
+ "> "
106
+ when :code
107
+ parent.eql?(:pre) ? " " : "`"
108
+ when :a
109
+ "["
110
+ when :img
111
+ "!["
112
+ when :hr
113
+ "----------\n\n"
114
+ else
115
+ @errors << "unknown start tag: "+type.name.to_s
116
+ ""
117
+ end
118
+ end
119
+
120
+ # Returns the closing markdown tag, like opening()
121
+ def ending(type, parent)
122
+ case type.name.to_sym
123
+ when :h1
124
+ " #\n\n"
125
+ when :h2
126
+ " ##\n\n"
127
+ when :p
128
+ parent.eql?(:root) ? "\n\n" : "\n"
129
+ when :ol
130
+ parent.eql?(:li) ? "" : "\n"
131
+ when :ul
132
+ parent.eql?(:li) ? "" : "\n"
133
+ when :em
134
+ "*"
135
+ when :strong
136
+ "**"
137
+ when :li
138
+ ""
139
+ when :blockquote
140
+ ""
141
+ when :code
142
+ parent.eql?(:pre) ? "" : "`"
143
+ when :a
144
+ @links << type.attribute('href').to_s
145
+ "][" + @links.size.to_s + "] "
146
+ when :img
147
+ @links << type.attribute('src').to_s
148
+ "" + type.attribute('alt').to_s + "][" + @links.size.to_s + "] "
149
+ "#{type.attribute('alt')}][#{@links.size}] "
150
+ else
151
+ @errors << " unknown end tag: "+type.name.to_s
152
+ ""
153
+ end
154
+ end
155
+
156
+ # Perform indent: two space, @indent times - quite simple! :)
157
+ def indent
158
+ str = ""
159
+ @indent.times do
160
+ str << " "
161
+ end
162
+ str
163
+ end
164
+
165
+ # Return the content of element, which should be just text.
166
+ # If its a code block to indent of 4 spaces.
167
+ # For block quotation add a leading '>'
168
+ def text_node(element, parent)
169
+ if element.name.to_sym.eql?(:code) and parent.eql?(:pre)
170
+ element.text.gsub("\n","\n ") << "\n"
171
+ elsif parent.eql?(:blockquote)
172
+ element.text.gsub!("\n ","\n>")
173
+ else
174
+ element.text
175
+ end
176
+ end
177
+
178
+ # Insert the mentioned reference style links.
179
+ def insert_links
180
+ @output << "\n"
181
+ @links.each_index do |index|
182
+ @output << " [#{index+1}]: #{@links[index]}\n"
183
+ end
184
+ end
185
+
186
+ # Print out all errors, that occured and have been written to @errors.
187
+ def print_errors
188
+ @errors.each do |error|
189
+ puts error
190
+ end
191
+ end
192
+
193
+ # Perform a benchmark on a given string n-times.
194
+ def speed_benchmark(string, n)
195
+ initialize()
196
+ bm(15) do |test|
197
+ test.report("reverse markdown:") { n.times do; parse_string(string); initialize(); end; }
198
+ end
199
+ end
200
+
201
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: reverse-markdown
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.0
6
+ platform: ruby
7
+ authors:
8
+ - James Robertson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-08-17 00:00:00 +01:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description:
18
+ email:
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - lib/reverse-markdown.rb
27
+ has_rdoc: true
28
+ homepage:
29
+ licenses: []
30
+
31
+ post_install_message:
32
+ rdoc_options: []
33
+
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: "0"
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: "0"
48
+ requirements: []
49
+
50
+ rubyforge_project:
51
+ rubygems_version: 1.5.2
52
+ signing_key:
53
+ specification_version: 3
54
+ summary: reverse-markdown
55
+ test_files: []
56
+