markitdown 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in markitdown.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Christopher Petersen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,112 @@
1
+ # Markitdown
2
+
3
+ Markitdown is a Ruby library that converts HTML to Markdown. It's powered by Nokogiri. It supports:
4
+
5
+ * Ordered and unordered lists
6
+ * Nested lists
7
+ * Blockquotes
8
+ * Lists (and nested list) inside of block quotes
9
+ * Images
10
+ * Links
11
+
12
+ As well as other tags.
13
+
14
+ ## Installation
15
+
16
+ Add this line to your application's Gemfile:
17
+
18
+ gem 'markitdown'
19
+
20
+ And then execute:
21
+
22
+ $ bundle
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install markitdown
27
+
28
+ ## Usage
29
+
30
+ To convert HTML to Markdown:
31
+
32
+ ```ruby
33
+ Markitdown.from_html(html)
34
+ ```
35
+
36
+ ```Markitdown``` uses Nokogiri internally. If you already have a Nokogiri object you can use ```from_nokogiri```
37
+
38
+ ```ruby
39
+ Markitdown.from_html(nokogiri_node)
40
+ ```
41
+
42
+ ## Example
43
+
44
+ From the specs:
45
+
46
+ ### HTML
47
+ ```html
48
+ <html>
49
+ <head>
50
+ <title>Test Document</title>
51
+ </head>
52
+ <body>
53
+ <h1>Main Header</h1>
54
+ <p>
55
+ This <em>is</em> a <b>test</b>. It includes a <a href="http://www.google.com">link</a> as well as an image <img src="https://www.google.com/images/srpr/logo3w.png" alt="Google Logo" />
56
+ <ul>
57
+ <li>bullet 1</li>
58
+ <li>bullet 2</li>
59
+ <li>bullet 3</li>
60
+ </ul>
61
+ </p>
62
+ <hr/>
63
+ <h2>Subheader</h2>
64
+ <p>
65
+ This is paragraph two.
66
+ <ol>
67
+ <li>bullet 1</li>
68
+ <ul>
69
+ <li>Sub-bullet 1 <a href="http://github.com">Nested link</a>.</li>
70
+ </ul>
71
+ <li>bullet 2</li>
72
+ <li>bullet 3</li>
73
+ </ol>
74
+ </p>
75
+ </body>
76
+ </html>
77
+ ```
78
+
79
+ Gets converted to the following Markdown:
80
+
81
+ ```md
82
+
83
+
84
+ # Main Header
85
+
86
+ This *is* a **test**. It includes a [link](http://www.google.com) as well as an image ![Google Logo](https://www.google.com/images/srpr/logo3w.png)
87
+
88
+ * bullet 1
89
+ * bullet 2
90
+ * bullet 3
91
+
92
+ ***
93
+
94
+ ## Subheader
95
+
96
+ This is paragraph two.
97
+
98
+ 1. bullet 1
99
+ * Sub-bullet 1 [Nested link](http://github.com).
100
+ 1. bullet 2
101
+ 1. bullet 3
102
+
103
+
104
+ ```
105
+
106
+ ## Contributing
107
+
108
+ 1. Fork it
109
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
110
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
111
+ 4. Push to the branch (`git push origin my-new-feature`)
112
+ 5. Create new Pull Request
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task :test => :spec
8
+ task :default => :spec
@@ -0,0 +1,166 @@
1
+ require "markitdown/version"
2
+ require "nokogiri"
3
+
4
+ module Markitdown
5
+ def self.from_html(html)
6
+ from_nokogiri(Nokogiri::XML(html).root)
7
+ end
8
+
9
+ def self.from_nokogiri(node)
10
+ # gsub(/\n\s+\n/,"\n\n") - remove lines with nothing but space characters
11
+ # gsub(/\n{2,}/,"\n\n") - collapse any series of more an than 2 new lines down to 2
12
+ # gsub(/\t+/," ") - collapse consecutive tabs down to a single space. I use tabs to pad divs and span, this causes multiple nested spans and divs to ultimately be surrounded by a single space.
13
+ # gsub(/ ([\.\?])/,'\1') - removes a space before a period or question mark. Things like links get surrounded by spaces. If they appear at the end of a sentence, this makes sure the punctation isn't off.
14
+ self.parse_node(node).flatten.compact.join.gsub(/\n\s+\n/,"\n\n").gsub(/\n{2,}/,"\n\n").gsub(/\t+/," ").gsub(/ ([\.\?])/,'\1')
15
+ end
16
+
17
+ private
18
+ def self.parse_node(node, states=[])
19
+ results=[]
20
+ after = nil
21
+ states.unshift node.name.downcase
22
+ pre = prefix(states)
23
+ strip_contents = false
24
+ case node.name
25
+ when "head"
26
+ return []
27
+ when "title"
28
+ return []
29
+ when "style"
30
+ return []
31
+ when "div"
32
+ results << "\t"
33
+ after = "\t"
34
+ when "span"
35
+ results << "\t"
36
+ after = "\t"
37
+ when "p"
38
+ results << self.newline(pre, nil, 2)
39
+ after = self.newline(pre, nil, 2)
40
+ when "h1"
41
+ results << self.newline(pre, nil, 2)
42
+ results << "# "
43
+ after = self.newline(pre, nil, 2)
44
+ when "h2"
45
+ results << self.newline(pre, nil, 2)
46
+ results << "## "
47
+ after = self.newline(pre, nil, 2)
48
+ when "h3"
49
+ results << self.newline(pre, nil, 2)
50
+ results << "### "
51
+ after = self.newline(pre, nil, 2)
52
+ when "h4"
53
+ results << self.newline(pre, nil, 2)
54
+ results << "#### "
55
+ after = self.newline(pre, nil, 2)
56
+ when "h5"
57
+ results << self.newline(pre, nil, 2)
58
+ results << "##### "
59
+ after = self.newline(pre, nil, 2)
60
+ when "h6"
61
+ results << self.newline(pre, nil, 2)
62
+ results << "###### "
63
+ after = self.newline(pre, nil, 2)
64
+ when "hr"
65
+ results << self.newline(pre, nil, 2)
66
+ results << "***"
67
+ results << self.newline(pre, nil, 2)
68
+ when "br"
69
+ results << self.newline(pre, nil, 2)
70
+ when "em"
71
+ results << " *"
72
+ after = "* "
73
+ when "i"
74
+ results << " *"
75
+ after = "* "
76
+ when "strong"
77
+ results << " **"
78
+ after = "** "
79
+ when "b"
80
+ results << " **"
81
+ after = "** "
82
+ when "blockquote"
83
+ results << pre
84
+ after = "\n"
85
+ when "ol"
86
+ unless self.nested_list?(states)
87
+ results << self.newline(pre, nil)
88
+ after = "\n"
89
+ end
90
+ when "ul"
91
+ unless self.nested_list?(states)
92
+ results << self.newline(pre, nil)
93
+ after = "\n"
94
+ end
95
+ when "li"
96
+ results << "\n"
97
+ results << pre
98
+ when "a"
99
+ results << " ["
100
+ after = ["](#{node.attributes["href"].value}) "]
101
+ strip_content = true
102
+ when "img"
103
+ results << " !["
104
+ results << node.attributes["alt"].value if node.attributes["alt"]
105
+ results << "]("
106
+ results << node.attributes["src"].value if node.attributes["src"]
107
+ results << ") "
108
+ when "text"
109
+ results << node.text.strip.gsub("\n","").gsub(/ {2,}/," ")
110
+ end
111
+ node.children.each do |child|
112
+ contents = self.parse_node(child, states)
113
+ contents = contents.flatten.compact.join.strip if strip_content
114
+ results << contents
115
+ end
116
+ results << after
117
+ states.shift
118
+ results
119
+ end
120
+
121
+ def self.nested_list?(states)
122
+ result = false
123
+ states.each_with_index do |state, index|
124
+ next if index==0
125
+ result = true if ["ul","ol","blockquote"].include?(state)
126
+ end
127
+ result
128
+ end
129
+
130
+ def self.newline(pre, line, count=1)
131
+ result = []
132
+ count.times do
133
+ result << pre
134
+ result << line
135
+ result << "\n"
136
+ end
137
+ result
138
+ end
139
+
140
+ def self.prefix(states)
141
+ result = []
142
+ states.each_with_index do |state, index|
143
+ if state == "blockquote"
144
+ result.unshift(" > ")
145
+ end
146
+ next if index==0
147
+ if index==1
148
+ if states.first == "li"
149
+ if state == "ol"
150
+ result.unshift(" 1. ")
151
+ elsif state == "ul"
152
+ result.unshift(" * ")
153
+ end
154
+ end
155
+ next
156
+ end
157
+ case state
158
+ when "ol"
159
+ result.unshift(" ")
160
+ when "ul"
161
+ result.unshift(" ")
162
+ end
163
+ end
164
+ result
165
+ end
166
+ end
@@ -0,0 +1,3 @@
1
+ module Markitdown
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/markitdown/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Christopher Petersen"]
6
+ gem.email = ["christopher.petersen@gmail.com"]
7
+ gem.description = %q{A small library that uses Nokogiri to parse an HTML file and produce Markdown}
8
+ gem.summary = %q{Converts HTML to Markdown}
9
+ gem.homepage = ""
10
+
11
+ gem.add_dependency('nokogiri')
12
+ gem.add_development_dependency('rake')
13
+ gem.add_development_dependency('rspec')
14
+
15
+ gem.files = `git ls-files`.split($\)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.name = "markitdown"
19
+ gem.require_paths = ["lib"]
20
+ gem.version = Markitdown::VERSION
21
+ end
@@ -0,0 +1,29 @@
1
+ <html>
2
+ <head>
3
+ <title>Test Document</title>
4
+ </head>
5
+ <body>
6
+ <h1>Main Header</h1>
7
+ <p>
8
+ This <em>is</em> a <b>test</b>. It includes a <a href="http://www.google.com">link</a> as well as an image <img src="https://www.google.com/images/srpr/logo3w.png" alt="Google Logo" />
9
+ <ul>
10
+ <li>bullet 1</li>
11
+ <li>bullet 2</li>
12
+ <li>bullet 3</li>
13
+ </ul>
14
+ </p>
15
+ <hr/>
16
+ <h2>Subheader</h2>
17
+ <p>
18
+ This is paragraph two.
19
+ <ol>
20
+ <li>bullet 1</li>
21
+ <ul>
22
+ <li>Sub-bullet 1 <a href="http://github.com">Nested link</a>.</li>
23
+ </ul>
24
+ <li>bullet 2</li>
25
+ <li>bullet 3</li>
26
+ </ol>
27
+ </p>
28
+ </body>
29
+ </html>
@@ -0,0 +1,32 @@
1
+ require 'markitdown'
2
+
3
+ describe Markitdown do
4
+ context "When parsing a document" do
5
+ let(:html) { File.read("spec/doc.html") }
6
+
7
+ it "should produce valid markdown" do
8
+ Markitdown.from_html(html).should == "
9
+
10
+ # Main Header
11
+
12
+ This *is* a **test**. It includes a [link](http://www.google.com) as well as an image ![Google Logo](https://www.google.com/images/srpr/logo3w.png)
13
+
14
+ * bullet 1
15
+ * bullet 2
16
+ * bullet 3
17
+
18
+ ***
19
+
20
+ ## Subheader
21
+
22
+ This is paragraph two.
23
+
24
+ 1. bullet 1
25
+ * Sub-bullet 1 [Nested link](http://github.com).
26
+ 1. bullet 2
27
+ 1. bullet 3
28
+
29
+ "
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,130 @@
1
+ require 'markitdown'
2
+
3
+ describe Markitdown do
4
+ context "when parsing nested ordered lists" do
5
+ let(:html) { "
6
+ <ol>
7
+ <li>line 1.1</li>
8
+ <ol>
9
+ <li>line 2.1</li>
10
+ <li>line 2.2</li>
11
+ <ol>
12
+ <li>line 3.1</li>
13
+ <li>line 3.2</li>
14
+ </ol>
15
+ </ol>
16
+ <li>line 1.2</li>
17
+ </ol>"
18
+ }
19
+ it "should return valid markdown" do
20
+ Markitdown.from_html(html).should == "
21
+
22
+ 1. line 1.1
23
+ 1. line 2.1
24
+ 1. line 2.2
25
+ 1. line 3.1
26
+ 1. line 3.2
27
+ 1. line 1.2
28
+ "
29
+ end
30
+ end
31
+
32
+ context "when parsing nested unordered lists" do
33
+ let(:html) { "
34
+ <ul>
35
+ <li>line 1.1</li>
36
+ <ul>
37
+ <li>line 2.1</li>
38
+ <li>line 2.2</li>
39
+ <ul>
40
+ <li>line 3.1</li>
41
+ <li>line 3.2</li>
42
+ </ul>
43
+ </ul>
44
+ <li>line 1.2</li>
45
+ </ul>"
46
+ }
47
+ it "should return valid markdown" do
48
+ Markitdown.from_html(html).should == "
49
+
50
+ * line 1.1
51
+ * line 2.1
52
+ * line 2.2
53
+ * line 3.1
54
+ * line 3.2
55
+ * line 1.2
56
+ "
57
+ end
58
+ end
59
+
60
+ context "when parsing nested ordered and unordered lists" do
61
+ let(:html) { "
62
+ <ul>
63
+ <li>line 1.1</li>
64
+ <ol>
65
+ <li>line 2.1</li>
66
+ <li>line 2.2</li>
67
+ <ul>
68
+ <li>line 3.1</li>
69
+ <li>line 3.2</li>
70
+ </ul>
71
+ </ol>
72
+ <li>line 1.2</li>
73
+ </ul>"
74
+ }
75
+ it "should return valid markdown" do
76
+ Markitdown.from_html(html).should == "
77
+
78
+ * line 1.1
79
+ 1. line 2.1
80
+ 1. line 2.2
81
+ * line 3.1
82
+ * line 3.2
83
+ * line 1.2
84
+ "
85
+ end
86
+ end
87
+
88
+ context "when parsing an unordered list nested under a blockquote" do
89
+ let(:html) { "
90
+ <blockquote>
91
+ This is a quote with a list
92
+ <ul>
93
+ <li>item 1</li>
94
+ <li>item 2</li>
95
+ </ul>
96
+ </blockquote>" }
97
+ it "should return valid markdown" do
98
+ Markitdown.from_html(html).should ==
99
+ " > This is a quote with a list
100
+ > * item 1
101
+ > * item 2
102
+ "
103
+ end
104
+ end
105
+
106
+
107
+ context "when parsing nested lists with links nested under a blockquote" do
108
+ let(:html) { "
109
+ <blockquote>
110
+ This is a quote with a list
111
+ <ul>
112
+ <li>item <a href='http://www.google.com'>1.1</a></li>
113
+ <ol>
114
+ <li>item <a href='http://www.google.com'>2.1</a></li>
115
+ <li>item 2.2</li>
116
+ </ol>
117
+ <li>item 1.2</li>
118
+ </ul>
119
+ </blockquote>" }
120
+ it "should return valid markdown" do
121
+ Markitdown.from_html(html).should ==
122
+ " > This is a quote with a list
123
+ > * item [1.1](http://www.google.com)
124
+ > 1. item [2.1](http://www.google.com)
125
+ > 1. item 2.2
126
+ > * item 1.2
127
+ "
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,209 @@
1
+ require 'markitdown'
2
+
3
+ describe Markitdown do
4
+ context "When parsing a paragraph" do
5
+ let(:html) { "<p>This is a paragraph</p>" }
6
+
7
+ it "should return valid markdown" do
8
+ Markitdown.from_html(html).should == "\n\nThis is a paragraph\n\n"
9
+ end
10
+ end
11
+
12
+ context "When parsing an H1" do
13
+ let(:html) { "<h1>This is a test</h1>" }
14
+
15
+ it "should return valid markdown" do
16
+ Markitdown.from_html(html).should == "\n\n# This is a test\n\n"
17
+ end
18
+ end
19
+
20
+ context "When parsing an H2" do
21
+ let(:html) { "<h2>This is a test</h2>" }
22
+
23
+ it "should return valid markdown" do
24
+ Markitdown.from_html(html).should == "\n\n## This is a test\n\n"
25
+ end
26
+ end
27
+
28
+ context "When parsing an H3" do
29
+ let(:html) { "<h3>This is a test</h3>" }
30
+
31
+ it "should return valid markdown" do
32
+ Markitdown.from_html(html).should == "\n\n### This is a test\n\n"
33
+ end
34
+ end
35
+
36
+ context "When parsing an H4" do
37
+ let(:html) { "<h4>This is a test</h4>" }
38
+
39
+ it "should return valid markdown" do
40
+ Markitdown.from_html(html).should == "\n\n#### This is a test\n\n"
41
+ end
42
+ end
43
+
44
+ context "When parsing an H5" do
45
+ let(:html) { "<h5>This is a test</h5>" }
46
+
47
+ it "should return valid markdown" do
48
+ Markitdown.from_html(html).should == "\n\n##### This is a test\n\n"
49
+ end
50
+ end
51
+
52
+ context "When parsing an H6" do
53
+ let(:html) { "<h6>This is a test</h6>" }
54
+
55
+ it "should return valid markdown" do
56
+ Markitdown.from_html(html).should == "\n\n###### This is a test\n\n"
57
+ end
58
+ end
59
+
60
+ context "When parsing an HR" do
61
+ let(:html) { "<hr/>" }
62
+
63
+ it "should return valid markdown" do
64
+ Markitdown.from_html(html).should == "\n\n***\n\n"
65
+ end
66
+ end
67
+
68
+ context "When parsing an BR" do
69
+ let(:html) { "<br/>" }
70
+
71
+ it "should return valid markdown" do
72
+ Markitdown.from_html(html).should == "\n\n"
73
+ end
74
+ end
75
+
76
+ context "When parsing an EM element" do
77
+ let(:html) { "<em>emphasis added</em>" }
78
+
79
+ it "should return valid markdown" do
80
+ Markitdown.from_html(html).should == " *emphasis added* "
81
+ end
82
+ end
83
+
84
+ context "When parsing an italicized element" do
85
+ let(:html) { "<i>italics added</i>" }
86
+
87
+ it "should return valid markdown" do
88
+ Markitdown.from_html(html).should == " *italics added* "
89
+ end
90
+ end
91
+
92
+ context "When parsing a strong element" do
93
+ let(:html) { "<strong>strong added</strong>" }
94
+
95
+ it "should return valid markdown" do
96
+ Markitdown.from_html(html).should == " **strong added** "
97
+ end
98
+ end
99
+
100
+ context "When parsing a bold element" do
101
+ let(:html) { "<b>bold added</b>" }
102
+
103
+ it "should return valid markdown" do
104
+ Markitdown.from_html(html).should == " **bold added** "
105
+ end
106
+ end
107
+
108
+ context "When parsing a bold element that's followed by a punctuation" do
109
+ let(:html) { "<html><b>bold added</b>.</html>" }
110
+
111
+ it "should return valid markdown without a space" do
112
+ Markitdown.from_html(html).should == " **bold added**."
113
+ end
114
+ end
115
+
116
+ context "When parsing a em element that's followed by a punctuation" do
117
+ let(:html) { "<html><em>emphasis added</em>?</html>" }
118
+
119
+ it "should return valid markdown without a space" do
120
+ Markitdown.from_html(html).should == " *emphasis added*?"
121
+ end
122
+ end
123
+
124
+ context "When parsing an OL" do
125
+ let(:html) { "<ol>
126
+ <li>first bullet</li>
127
+ <li>second bullet</li>
128
+ <li>third bullet</li>
129
+ </ol>"
130
+ }
131
+ it "should return valid markdown" do
132
+ Markitdown.from_html(html).should == "
133
+
134
+ 1. first bullet
135
+ 1. second bullet
136
+ 1. third bullet
137
+ "
138
+ end
139
+ end
140
+
141
+ context "When parsing an UL" do
142
+ let(:html) { "<ul>
143
+ <li>first bullet</li>
144
+ <li>second bullet</li>
145
+ <li>third bullet</li>
146
+ </ul>"
147
+ }
148
+ it "should return valid markdown" do
149
+ Markitdown.from_html(html).should == "
150
+
151
+ * first bullet
152
+ * second bullet
153
+ * third bullet
154
+ "
155
+ end
156
+ end
157
+
158
+ context "When parsing a link" do
159
+ let(:html) { "<a href='http://www.google.com'>this is a link</strong>" }
160
+
161
+ it "should return valid markdown" do
162
+ Markitdown.from_html(html).should == " [this is a link](http://www.google.com) "
163
+ end
164
+ end
165
+
166
+ context "When parsing an image" do
167
+ let(:html) { "<img src='https://www.google.com/images/srpr/logo3w.png' alt='Google Logo'>" }
168
+
169
+ it "should return valid markdown" do
170
+ Markitdown.from_html(html).should == " ![Google Logo](https://www.google.com/images/srpr/logo3w.png) "
171
+ end
172
+ end
173
+
174
+ context "When parsing an image without an alt tag" do
175
+ let(:html) { "<img src='https://www.google.com/images/srpr/logo3w.png'>" }
176
+
177
+ it "should return valid markdown" do
178
+ Markitdown.from_html(html).should == " ![](https://www.google.com/images/srpr/logo3w.png) "
179
+ end
180
+ end
181
+
182
+ context "When parsing a style block" do
183
+ let(:html) { "<style>div.whatever { font-weight: bold; }</style>" }
184
+
185
+ it "should ignore it" do
186
+ Markitdown.from_html(html).should == ""
187
+ end
188
+ end
189
+
190
+ context "When parsing a blockquote" do
191
+ let(:html) { "<blockquote>this is a block quote</blockquote>" }
192
+
193
+ it "should return valid markdown" do
194
+ Markitdown.from_html(html).should == " > this is a block quote\n"
195
+ end
196
+ end
197
+
198
+ context "When parsing a multi line blockquote" do
199
+ let(:html) { "<blockquote>
200
+ line 1
201
+ line 2
202
+ line 3
203
+ </blockquote>" }
204
+
205
+ it "should return valid markdown" do
206
+ Markitdown.from_html(html).should == " > line 1 line 2 line 3\n"
207
+ end
208
+ end
209
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: markitdown
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Christopher Petersen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: A small library that uses Nokogiri to parse an HTML file and produce
63
+ Markdown
64
+ email:
65
+ - christopher.petersen@gmail.com
66
+ executables: []
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - .gitignore
71
+ - Gemfile
72
+ - LICENSE
73
+ - README.md
74
+ - Rakefile
75
+ - lib/markitdown.rb
76
+ - lib/markitdown/version.rb
77
+ - markitdown.gemspec
78
+ - spec/doc.html
79
+ - spec/doc_spec.rb
80
+ - spec/nesting_spec.rb
81
+ - spec/tag_spec.rb
82
+ homepage: ''
83
+ licenses: []
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ segments:
95
+ - 0
96
+ hash: 4314622301527767866
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ! '>='
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ segments:
104
+ - 0
105
+ hash: 4314622301527767866
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 1.8.24
109
+ signing_key:
110
+ specification_version: 3
111
+ summary: Converts HTML to Markdown
112
+ test_files:
113
+ - spec/doc.html
114
+ - spec/doc_spec.rb
115
+ - spec/nesting_spec.rb
116
+ - spec/tag_spec.rb