markitdown 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in markitdown.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Christopher Petersen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,112 @@
1
+ # Markitdown
2
+
3
+ Markitdown is a Ruby library that converts HTML to Markdown. It's powered by Nokogiri. It supports:
4
+
5
+ * Ordered and unordered lists
6
+ * Nested lists
7
+ * Blockquotes
8
+ * Lists (and nested list) inside of block quotes
9
+ * Images
10
+ * Links
11
+
12
+ As well as other tags.
13
+
14
+ ## Installation
15
+
16
+ Add this line to your application's Gemfile:
17
+
18
+ gem 'markitdown'
19
+
20
+ And then execute:
21
+
22
+ $ bundle
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install markitdown
27
+
28
+ ## Usage
29
+
30
+ To convert HTML to Markdown:
31
+
32
+ ```ruby
33
+ Markitdown.from_html(html)
34
+ ```
35
+
36
+ ```Markitdown``` uses Nokogiri internally. If you already have a Nokogiri object you can use ```from_nokogiri```
37
+
38
+ ```ruby
39
+ Markitdown.from_html(nokogiri_node)
40
+ ```
41
+
42
+ ## Example
43
+
44
+ From the specs:
45
+
46
+ ### HTML
47
+ ```html
48
+ <html>
49
+ <head>
50
+ <title>Test Document</title>
51
+ </head>
52
+ <body>
53
+ <h1>Main Header</h1>
54
+ <p>
55
+ This <em>is</em> a <b>test</b>. It includes a <a href="http://www.google.com">link</a> as well as an image <img src="https://www.google.com/images/srpr/logo3w.png" alt="Google Logo" />
56
+ <ul>
57
+ <li>bullet 1</li>
58
+ <li>bullet 2</li>
59
+ <li>bullet 3</li>
60
+ </ul>
61
+ </p>
62
+ <hr/>
63
+ <h2>Subheader</h2>
64
+ <p>
65
+ This is paragraph two.
66
+ <ol>
67
+ <li>bullet 1</li>
68
+ <ul>
69
+ <li>Sub-bullet 1 <a href="http://github.com">Nested link</a>.</li>
70
+ </ul>
71
+ <li>bullet 2</li>
72
+ <li>bullet 3</li>
73
+ </ol>
74
+ </p>
75
+ </body>
76
+ </html>
77
+ ```
78
+
79
+ Gets converted to the following Markdown:
80
+
81
+ ```md
82
+
83
+
84
+ # Main Header
85
+
86
+ This *is* a **test**. It includes a [link](http://www.google.com) as well as an image ![Google Logo](https://www.google.com/images/srpr/logo3w.png)
87
+
88
+ * bullet 1
89
+ * bullet 2
90
+ * bullet 3
91
+
92
+ ***
93
+
94
+ ## Subheader
95
+
96
+ This is paragraph two.
97
+
98
+ 1. bullet 1
99
+ * Sub-bullet 1 [Nested link](http://github.com).
100
+ 1. bullet 2
101
+ 1. bullet 3
102
+
103
+
104
+ ```
105
+
106
+ ## Contributing
107
+
108
+ 1. Fork it
109
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
110
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
111
+ 4. Push to the branch (`git push origin my-new-feature`)
112
+ 5. Create new Pull Request
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task :test => :spec
8
+ task :default => :spec
@@ -0,0 +1,166 @@
1
+ require "markitdown/version"
2
+ require "nokogiri"
3
+
4
+ module Markitdown
5
+ def self.from_html(html)
6
+ from_nokogiri(Nokogiri::XML(html).root)
7
+ end
8
+
9
+ def self.from_nokogiri(node)
10
+ # gsub(/\n\s+\n/,"\n\n") - remove lines with nothing but space characters
11
+ # gsub(/\n{2,}/,"\n\n") - collapse any series of more an than 2 new lines down to 2
12
+ # gsub(/\t+/," ") - collapse consecutive tabs down to a single space. I use tabs to pad divs and span, this causes multiple nested spans and divs to ultimately be surrounded by a single space.
13
+ # gsub(/ ([\.\?])/,'\1') - removes a space before a period or question mark. Things like links get surrounded by spaces. If they appear at the end of a sentence, this makes sure the punctation isn't off.
14
+ self.parse_node(node).flatten.compact.join.gsub(/\n\s+\n/,"\n\n").gsub(/\n{2,}/,"\n\n").gsub(/\t+/," ").gsub(/ ([\.\?])/,'\1')
15
+ end
16
+
17
+ private
18
+ def self.parse_node(node, states=[])
19
+ results=[]
20
+ after = nil
21
+ states.unshift node.name.downcase
22
+ pre = prefix(states)
23
+ strip_contents = false
24
+ case node.name
25
+ when "head"
26
+ return []
27
+ when "title"
28
+ return []
29
+ when "style"
30
+ return []
31
+ when "div"
32
+ results << "\t"
33
+ after = "\t"
34
+ when "span"
35
+ results << "\t"
36
+ after = "\t"
37
+ when "p"
38
+ results << self.newline(pre, nil, 2)
39
+ after = self.newline(pre, nil, 2)
40
+ when "h1"
41
+ results << self.newline(pre, nil, 2)
42
+ results << "# "
43
+ after = self.newline(pre, nil, 2)
44
+ when "h2"
45
+ results << self.newline(pre, nil, 2)
46
+ results << "## "
47
+ after = self.newline(pre, nil, 2)
48
+ when "h3"
49
+ results << self.newline(pre, nil, 2)
50
+ results << "### "
51
+ after = self.newline(pre, nil, 2)
52
+ when "h4"
53
+ results << self.newline(pre, nil, 2)
54
+ results << "#### "
55
+ after = self.newline(pre, nil, 2)
56
+ when "h5"
57
+ results << self.newline(pre, nil, 2)
58
+ results << "##### "
59
+ after = self.newline(pre, nil, 2)
60
+ when "h6"
61
+ results << self.newline(pre, nil, 2)
62
+ results << "###### "
63
+ after = self.newline(pre, nil, 2)
64
+ when "hr"
65
+ results << self.newline(pre, nil, 2)
66
+ results << "***"
67
+ results << self.newline(pre, nil, 2)
68
+ when "br"
69
+ results << self.newline(pre, nil, 2)
70
+ when "em"
71
+ results << " *"
72
+ after = "* "
73
+ when "i"
74
+ results << " *"
75
+ after = "* "
76
+ when "strong"
77
+ results << " **"
78
+ after = "** "
79
+ when "b"
80
+ results << " **"
81
+ after = "** "
82
+ when "blockquote"
83
+ results << pre
84
+ after = "\n"
85
+ when "ol"
86
+ unless self.nested_list?(states)
87
+ results << self.newline(pre, nil)
88
+ after = "\n"
89
+ end
90
+ when "ul"
91
+ unless self.nested_list?(states)
92
+ results << self.newline(pre, nil)
93
+ after = "\n"
94
+ end
95
+ when "li"
96
+ results << "\n"
97
+ results << pre
98
+ when "a"
99
+ results << " ["
100
+ after = ["](#{node.attributes["href"].value}) "]
101
+ strip_content = true
102
+ when "img"
103
+ results << " !["
104
+ results << node.attributes["alt"].value if node.attributes["alt"]
105
+ results << "]("
106
+ results << node.attributes["src"].value if node.attributes["src"]
107
+ results << ") "
108
+ when "text"
109
+ results << node.text.strip.gsub("\n","").gsub(/ {2,}/," ")
110
+ end
111
+ node.children.each do |child|
112
+ contents = self.parse_node(child, states)
113
+ contents = contents.flatten.compact.join.strip if strip_content
114
+ results << contents
115
+ end
116
+ results << after
117
+ states.shift
118
+ results
119
+ end
120
+
121
+ def self.nested_list?(states)
122
+ result = false
123
+ states.each_with_index do |state, index|
124
+ next if index==0
125
+ result = true if ["ul","ol","blockquote"].include?(state)
126
+ end
127
+ result
128
+ end
129
+
130
+ def self.newline(pre, line, count=1)
131
+ result = []
132
+ count.times do
133
+ result << pre
134
+ result << line
135
+ result << "\n"
136
+ end
137
+ result
138
+ end
139
+
140
+ def self.prefix(states)
141
+ result = []
142
+ states.each_with_index do |state, index|
143
+ if state == "blockquote"
144
+ result.unshift(" > ")
145
+ end
146
+ next if index==0
147
+ if index==1
148
+ if states.first == "li"
149
+ if state == "ol"
150
+ result.unshift(" 1. ")
151
+ elsif state == "ul"
152
+ result.unshift(" * ")
153
+ end
154
+ end
155
+ next
156
+ end
157
+ case state
158
+ when "ol"
159
+ result.unshift(" ")
160
+ when "ul"
161
+ result.unshift(" ")
162
+ end
163
+ end
164
+ result
165
+ end
166
+ end
@@ -0,0 +1,3 @@
1
+ module Markitdown
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/markitdown/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Christopher Petersen"]
6
+ gem.email = ["christopher.petersen@gmail.com"]
7
+ gem.description = %q{A small library that uses Nokogiri to parse an HTML file and produce Markdown}
8
+ gem.summary = %q{Converts HTML to Markdown}
9
+ gem.homepage = ""
10
+
11
+ gem.add_dependency('nokogiri')
12
+ gem.add_development_dependency('rake')
13
+ gem.add_development_dependency('rspec')
14
+
15
+ gem.files = `git ls-files`.split($\)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.name = "markitdown"
19
+ gem.require_paths = ["lib"]
20
+ gem.version = Markitdown::VERSION
21
+ end
@@ -0,0 +1,29 @@
1
+ <html>
2
+ <head>
3
+ <title>Test Document</title>
4
+ </head>
5
+ <body>
6
+ <h1>Main Header</h1>
7
+ <p>
8
+ This <em>is</em> a <b>test</b>. It includes a <a href="http://www.google.com">link</a> as well as an image <img src="https://www.google.com/images/srpr/logo3w.png" alt="Google Logo" />
9
+ <ul>
10
+ <li>bullet 1</li>
11
+ <li>bullet 2</li>
12
+ <li>bullet 3</li>
13
+ </ul>
14
+ </p>
15
+ <hr/>
16
+ <h2>Subheader</h2>
17
+ <p>
18
+ This is paragraph two.
19
+ <ol>
20
+ <li>bullet 1</li>
21
+ <ul>
22
+ <li>Sub-bullet 1 <a href="http://github.com">Nested link</a>.</li>
23
+ </ul>
24
+ <li>bullet 2</li>
25
+ <li>bullet 3</li>
26
+ </ol>
27
+ </p>
28
+ </body>
29
+ </html>
@@ -0,0 +1,32 @@
1
+ require 'markitdown'
2
+
3
+ describe Markitdown do
4
+ context "When parsing a document" do
5
+ let(:html) { File.read("spec/doc.html") }
6
+
7
+ it "should produce valid markdown" do
8
+ Markitdown.from_html(html).should == "
9
+
10
+ # Main Header
11
+
12
+ This *is* a **test**. It includes a [link](http://www.google.com) as well as an image ![Google Logo](https://www.google.com/images/srpr/logo3w.png)
13
+
14
+ * bullet 1
15
+ * bullet 2
16
+ * bullet 3
17
+
18
+ ***
19
+
20
+ ## Subheader
21
+
22
+ This is paragraph two.
23
+
24
+ 1. bullet 1
25
+ * Sub-bullet 1 [Nested link](http://github.com).
26
+ 1. bullet 2
27
+ 1. bullet 3
28
+
29
+ "
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,130 @@
1
+ require 'markitdown'
2
+
3
+ describe Markitdown do
4
+ context "when parsing nested ordered lists" do
5
+ let(:html) { "
6
+ <ol>
7
+ <li>line 1.1</li>
8
+ <ol>
9
+ <li>line 2.1</li>
10
+ <li>line 2.2</li>
11
+ <ol>
12
+ <li>line 3.1</li>
13
+ <li>line 3.2</li>
14
+ </ol>
15
+ </ol>
16
+ <li>line 1.2</li>
17
+ </ol>"
18
+ }
19
+ it "should return valid markdown" do
20
+ Markitdown.from_html(html).should == "
21
+
22
+ 1. line 1.1
23
+ 1. line 2.1
24
+ 1. line 2.2
25
+ 1. line 3.1
26
+ 1. line 3.2
27
+ 1. line 1.2
28
+ "
29
+ end
30
+ end
31
+
32
+ context "when parsing nested unordered lists" do
33
+ let(:html) { "
34
+ <ul>
35
+ <li>line 1.1</li>
36
+ <ul>
37
+ <li>line 2.1</li>
38
+ <li>line 2.2</li>
39
+ <ul>
40
+ <li>line 3.1</li>
41
+ <li>line 3.2</li>
42
+ </ul>
43
+ </ul>
44
+ <li>line 1.2</li>
45
+ </ul>"
46
+ }
47
+ it "should return valid markdown" do
48
+ Markitdown.from_html(html).should == "
49
+
50
+ * line 1.1
51
+ * line 2.1
52
+ * line 2.2
53
+ * line 3.1
54
+ * line 3.2
55
+ * line 1.2
56
+ "
57
+ end
58
+ end
59
+
60
+ context "when parsing nested ordered and unordered lists" do
61
+ let(:html) { "
62
+ <ul>
63
+ <li>line 1.1</li>
64
+ <ol>
65
+ <li>line 2.1</li>
66
+ <li>line 2.2</li>
67
+ <ul>
68
+ <li>line 3.1</li>
69
+ <li>line 3.2</li>
70
+ </ul>
71
+ </ol>
72
+ <li>line 1.2</li>
73
+ </ul>"
74
+ }
75
+ it "should return valid markdown" do
76
+ Markitdown.from_html(html).should == "
77
+
78
+ * line 1.1
79
+ 1. line 2.1
80
+ 1. line 2.2
81
+ * line 3.1
82
+ * line 3.2
83
+ * line 1.2
84
+ "
85
+ end
86
+ end
87
+
88
+ context "when parsing an unordered list nested under a blockquote" do
89
+ let(:html) { "
90
+ <blockquote>
91
+ This is a quote with a list
92
+ <ul>
93
+ <li>item 1</li>
94
+ <li>item 2</li>
95
+ </ul>
96
+ </blockquote>" }
97
+ it "should return valid markdown" do
98
+ Markitdown.from_html(html).should ==
99
+ " > This is a quote with a list
100
+ > * item 1
101
+ > * item 2
102
+ "
103
+ end
104
+ end
105
+
106
+
107
+ context "when parsing nested lists with links nested under a blockquote" do
108
+ let(:html) { "
109
+ <blockquote>
110
+ This is a quote with a list
111
+ <ul>
112
+ <li>item <a href='http://www.google.com'>1.1</a></li>
113
+ <ol>
114
+ <li>item <a href='http://www.google.com'>2.1</a></li>
115
+ <li>item 2.2</li>
116
+ </ol>
117
+ <li>item 1.2</li>
118
+ </ul>
119
+ </blockquote>" }
120
+ it "should return valid markdown" do
121
+ Markitdown.from_html(html).should ==
122
+ " > This is a quote with a list
123
+ > * item [1.1](http://www.google.com)
124
+ > 1. item [2.1](http://www.google.com)
125
+ > 1. item 2.2
126
+ > * item 1.2
127
+ "
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,209 @@
1
+ require 'markitdown'
2
+
3
+ describe Markitdown do
4
+ context "When parsing a paragraph" do
5
+ let(:html) { "<p>This is a paragraph</p>" }
6
+
7
+ it "should return valid markdown" do
8
+ Markitdown.from_html(html).should == "\n\nThis is a paragraph\n\n"
9
+ end
10
+ end
11
+
12
+ context "When parsing an H1" do
13
+ let(:html) { "<h1>This is a test</h1>" }
14
+
15
+ it "should return valid markdown" do
16
+ Markitdown.from_html(html).should == "\n\n# This is a test\n\n"
17
+ end
18
+ end
19
+
20
+ context "When parsing an H2" do
21
+ let(:html) { "<h2>This is a test</h2>" }
22
+
23
+ it "should return valid markdown" do
24
+ Markitdown.from_html(html).should == "\n\n## This is a test\n\n"
25
+ end
26
+ end
27
+
28
+ context "When parsing an H3" do
29
+ let(:html) { "<h3>This is a test</h3>" }
30
+
31
+ it "should return valid markdown" do
32
+ Markitdown.from_html(html).should == "\n\n### This is a test\n\n"
33
+ end
34
+ end
35
+
36
+ context "When parsing an H4" do
37
+ let(:html) { "<h4>This is a test</h4>" }
38
+
39
+ it "should return valid markdown" do
40
+ Markitdown.from_html(html).should == "\n\n#### This is a test\n\n"
41
+ end
42
+ end
43
+
44
+ context "When parsing an H5" do
45
+ let(:html) { "<h5>This is a test</h5>" }
46
+
47
+ it "should return valid markdown" do
48
+ Markitdown.from_html(html).should == "\n\n##### This is a test\n\n"
49
+ end
50
+ end
51
+
52
+ context "When parsing an H6" do
53
+ let(:html) { "<h6>This is a test</h6>" }
54
+
55
+ it "should return valid markdown" do
56
+ Markitdown.from_html(html).should == "\n\n###### This is a test\n\n"
57
+ end
58
+ end
59
+
60
+ context "When parsing an HR" do
61
+ let(:html) { "<hr/>" }
62
+
63
+ it "should return valid markdown" do
64
+ Markitdown.from_html(html).should == "\n\n***\n\n"
65
+ end
66
+ end
67
+
68
+ context "When parsing an BR" do
69
+ let(:html) { "<br/>" }
70
+
71
+ it "should return valid markdown" do
72
+ Markitdown.from_html(html).should == "\n\n"
73
+ end
74
+ end
75
+
76
+ context "When parsing an EM element" do
77
+ let(:html) { "<em>emphasis added</em>" }
78
+
79
+ it "should return valid markdown" do
80
+ Markitdown.from_html(html).should == " *emphasis added* "
81
+ end
82
+ end
83
+
84
+ context "When parsing an italicized element" do
85
+ let(:html) { "<i>italics added</i>" }
86
+
87
+ it "should return valid markdown" do
88
+ Markitdown.from_html(html).should == " *italics added* "
89
+ end
90
+ end
91
+
92
+ context "When parsing a strong element" do
93
+ let(:html) { "<strong>strong added</strong>" }
94
+
95
+ it "should return valid markdown" do
96
+ Markitdown.from_html(html).should == " **strong added** "
97
+ end
98
+ end
99
+
100
+ context "When parsing a bold element" do
101
+ let(:html) { "<b>bold added</b>" }
102
+
103
+ it "should return valid markdown" do
104
+ Markitdown.from_html(html).should == " **bold added** "
105
+ end
106
+ end
107
+
108
+ context "When parsing a bold element that's followed by a punctuation" do
109
+ let(:html) { "<html><b>bold added</b>.</html>" }
110
+
111
+ it "should return valid markdown without a space" do
112
+ Markitdown.from_html(html).should == " **bold added**."
113
+ end
114
+ end
115
+
116
+ context "When parsing a em element that's followed by a punctuation" do
117
+ let(:html) { "<html><em>emphasis added</em>?</html>" }
118
+
119
+ it "should return valid markdown without a space" do
120
+ Markitdown.from_html(html).should == " *emphasis added*?"
121
+ end
122
+ end
123
+
124
+ context "When parsing an OL" do
125
+ let(:html) { "<ol>
126
+ <li>first bullet</li>
127
+ <li>second bullet</li>
128
+ <li>third bullet</li>
129
+ </ol>"
130
+ }
131
+ it "should return valid markdown" do
132
+ Markitdown.from_html(html).should == "
133
+
134
+ 1. first bullet
135
+ 1. second bullet
136
+ 1. third bullet
137
+ "
138
+ end
139
+ end
140
+
141
+ context "When parsing an UL" do
142
+ let(:html) { "<ul>
143
+ <li>first bullet</li>
144
+ <li>second bullet</li>
145
+ <li>third bullet</li>
146
+ </ul>"
147
+ }
148
+ it "should return valid markdown" do
149
+ Markitdown.from_html(html).should == "
150
+
151
+ * first bullet
152
+ * second bullet
153
+ * third bullet
154
+ "
155
+ end
156
+ end
157
+
158
+ context "When parsing a link" do
159
+ let(:html) { "<a href='http://www.google.com'>this is a link</strong>" }
160
+
161
+ it "should return valid markdown" do
162
+ Markitdown.from_html(html).should == " [this is a link](http://www.google.com) "
163
+ end
164
+ end
165
+
166
+ context "When parsing an image" do
167
+ let(:html) { "<img src='https://www.google.com/images/srpr/logo3w.png' alt='Google Logo'>" }
168
+
169
+ it "should return valid markdown" do
170
+ Markitdown.from_html(html).should == " ![Google Logo](https://www.google.com/images/srpr/logo3w.png) "
171
+ end
172
+ end
173
+
174
+ context "When parsing an image without an alt tag" do
175
+ let(:html) { "<img src='https://www.google.com/images/srpr/logo3w.png'>" }
176
+
177
+ it "should return valid markdown" do
178
+ Markitdown.from_html(html).should == " ![](https://www.google.com/images/srpr/logo3w.png) "
179
+ end
180
+ end
181
+
182
+ context "When parsing a style block" do
183
+ let(:html) { "<style>div.whatever { font-weight: bold; }</style>" }
184
+
185
+ it "should ignore it" do
186
+ Markitdown.from_html(html).should == ""
187
+ end
188
+ end
189
+
190
+ context "When parsing a blockquote" do
191
+ let(:html) { "<blockquote>this is a block quote</blockquote>" }
192
+
193
+ it "should return valid markdown" do
194
+ Markitdown.from_html(html).should == " > this is a block quote\n"
195
+ end
196
+ end
197
+
198
+ context "When parsing a multi line blockquote" do
199
+ let(:html) { "<blockquote>
200
+ line 1
201
+ line 2
202
+ line 3
203
+ </blockquote>" }
204
+
205
+ it "should return valid markdown" do
206
+ Markitdown.from_html(html).should == " > line 1 line 2 line 3\n"
207
+ end
208
+ end
209
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: markitdown
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Christopher Petersen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: A small library that uses Nokogiri to parse an HTML file and produce
63
+ Markdown
64
+ email:
65
+ - christopher.petersen@gmail.com
66
+ executables: []
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - .gitignore
71
+ - Gemfile
72
+ - LICENSE
73
+ - README.md
74
+ - Rakefile
75
+ - lib/markitdown.rb
76
+ - lib/markitdown/version.rb
77
+ - markitdown.gemspec
78
+ - spec/doc.html
79
+ - spec/doc_spec.rb
80
+ - spec/nesting_spec.rb
81
+ - spec/tag_spec.rb
82
+ homepage: ''
83
+ licenses: []
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ segments:
95
+ - 0
96
+ hash: 4314622301527767866
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ! '>='
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ segments:
104
+ - 0
105
+ hash: 4314622301527767866
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 1.8.24
109
+ signing_key:
110
+ specification_version: 3
111
+ summary: Converts HTML to Markdown
112
+ test_files:
113
+ - spec/doc.html
114
+ - spec/doc_spec.rb
115
+ - spec/nesting_spec.rb
116
+ - spec/tag_spec.rb