upmark 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/LICENSE.md +20 -0
- data/README.md +19 -0
- data/Rakefile +19 -0
- data/bin/upmark +14 -0
- data/lib/core_ext/array.rb +9 -0
- data/lib/upmark/parser/xml.rb +76 -0
- data/lib/upmark/transform/markdown.rb +84 -0
- data/lib/upmark/transform/preprocess.rb +25 -0
- data/lib/upmark/version.rb +3 -0
- data/lib/upmark.rb +28 -0
- data/spec/acceptance/upmark_spec.rb +79 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/unit/lib/upmark/parser/xml_spec.rb +219 -0
- data/spec/unit/lib/upmark/transform/markdown_spec.rb +98 -0
- data/upmark.gemspec +26 -0
- metadata +90 -0
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/Gemfile
ADDED
data/LICENSE.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 The Conversation Media Group
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# Upmark
|
2
|
+
|
3
|
+
A HTML to Markdown converter.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
gem install upmark
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
require "upmark"
|
12
|
+
|
13
|
+
html = %q{<p>messenger <strong>bag</strong> skateboard</p>}
|
14
|
+
markdown = Upmark.convert(html)
|
15
|
+
puts markdown
|
16
|
+
|
17
|
+
## License
|
18
|
+
|
19
|
+
Upmark is Copyright (c) 2011 The Conversation Media Group and distributed under the MIT license.
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
|
4
|
+
namespace :spec do
|
5
|
+
desc "Run acceptance specs"
|
6
|
+
RSpec::Core::RakeTask.new(:acceptance) do |t|
|
7
|
+
t.pattern = "./spec/acceptance/**/*_spec.rb"
|
8
|
+
end
|
9
|
+
|
10
|
+
desc "Run unit specs"
|
11
|
+
RSpec::Core::RakeTask.new(:unit) do |t|
|
12
|
+
t.pattern = "./spec/unit/**/*_spec.rb"
|
13
|
+
end
|
14
|
+
|
15
|
+
desc "Run unit and acceptance specs"
|
16
|
+
task :all => [:"spec:unit", :"spec:acceptance"]
|
17
|
+
end
|
18
|
+
|
19
|
+
task default: :"spec:all"
|
data/bin/upmark
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
module Upmark
|
2
|
+
module Parser
|
3
|
+
# The XML parser class.
|
4
|
+
#
|
5
|
+
# Parses a XML document into an abstract syntax tree (AST).
|
6
|
+
#
|
7
|
+
# It's worth referring to the XML spec:
|
8
|
+
# http://www.w3.org/TR/2000/REC-xml-20001006
|
9
|
+
#
|
10
|
+
class XML < Parslet::Parser
|
11
|
+
root(:content)
|
12
|
+
|
13
|
+
rule(:content) {
|
14
|
+
(
|
15
|
+
element.as(:element) |
|
16
|
+
text.as(:text)
|
17
|
+
).repeat(0)
|
18
|
+
}
|
19
|
+
|
20
|
+
rule(:element) {
|
21
|
+
(
|
22
|
+
start_tag.as(:start_tag) >>
|
23
|
+
content.as(:content) >>
|
24
|
+
end_tag.as(:end_tag)
|
25
|
+
) |
|
26
|
+
empty_tag.as(:empty_tag)
|
27
|
+
}
|
28
|
+
|
29
|
+
rule(:text) {
|
30
|
+
match(/[^<>]/).repeat(1)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:start_tag) {
|
34
|
+
str('<') >>
|
35
|
+
name.as(:name) >>
|
36
|
+
(space >> attribute).repeat.as(:attributes) >>
|
37
|
+
space? >>
|
38
|
+
str('>')
|
39
|
+
}
|
40
|
+
|
41
|
+
rule(:end_tag) {
|
42
|
+
str('</') >>
|
43
|
+
name.as(:name) >>
|
44
|
+
space? >>
|
45
|
+
str('>')
|
46
|
+
}
|
47
|
+
|
48
|
+
rule(:empty_tag) {
|
49
|
+
str('<') >>
|
50
|
+
name.as(:name) >>
|
51
|
+
(space >> attribute).repeat.as(:attributes) >>
|
52
|
+
space? >>
|
53
|
+
str('/>')
|
54
|
+
}
|
55
|
+
|
56
|
+
rule(:name) {
|
57
|
+
match(/[a-zA-Z_:]/) >> match(/[\w:\.-]/).repeat
|
58
|
+
}
|
59
|
+
|
60
|
+
rule(:attribute) {
|
61
|
+
name.as(:name) >>
|
62
|
+
str('=') >> (
|
63
|
+
(str('"') >> attribute_value.as(:value) >> str('"')) | # double quotes
|
64
|
+
(str("'") >> attribute_value.as(:value) >> str("'")) # single quotes
|
65
|
+
)
|
66
|
+
}
|
67
|
+
|
68
|
+
rule(:attribute_value) {
|
69
|
+
(match(/['"]/).absent? >> match(/[^<&]/)).repeat
|
70
|
+
}
|
71
|
+
|
72
|
+
rule(:space) { match(/\s/).repeat(1) }
|
73
|
+
rule(:space?) { space.maybe }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Upmark
|
2
|
+
module Transform
|
3
|
+
# The Markdown transform class.
|
4
|
+
#
|
5
|
+
# Transforms an abstract syntax tree (AST) into a Markdown document.
|
6
|
+
#
|
7
|
+
class Markdown < Parslet::Transform
|
8
|
+
def self.tag(tag_name)
|
9
|
+
tag_name = tag_name.to_s.downcase
|
10
|
+
{
|
11
|
+
tag: {name: tag_name, attributes: subtree(:attributes)},
|
12
|
+
content: sequence(:values)
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.map_attributes_subtree(ast)
|
17
|
+
ast.inject({}) do |hash, attribute|
|
18
|
+
hash[attribute[:name].to_sym] = attribute[:value]
|
19
|
+
hash
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
rule(element: subtree(:values)) { values }
|
24
|
+
|
25
|
+
rule(text: simple(:value)) { value.to_s }
|
26
|
+
|
27
|
+
rule(tag(:p)) { "#{values.join}\n\n" }
|
28
|
+
rule(tag(:strong)) { "**#{values.join}**" }
|
29
|
+
rule(tag(:em)) { "*#{values.join}*" }
|
30
|
+
rule(tag(:li)) { "#{values.join}" }
|
31
|
+
rule(tag(:h1)) { "# #{values.join}" }
|
32
|
+
rule(tag(:h2)) { "## #{values.join}" }
|
33
|
+
rule(tag(:h3)) { "### #{values.join}" }
|
34
|
+
rule(tag(:br)) { "\n" }
|
35
|
+
|
36
|
+
rule(tag(:ul)) do |dictionary|
|
37
|
+
values = dictionary[:values].map {|value| value.strip != "" ? value : nil }.compact
|
38
|
+
values.map {|value| "* #{value}\n" }
|
39
|
+
end
|
40
|
+
|
41
|
+
rule(tag(:ol)) do |dictionary|
|
42
|
+
values = dictionary[:values].map {|value| value.strip != "" ? value : nil }.compact
|
43
|
+
values.map_with_index {|value, i| "#{i + 1}. #{value}\n" }
|
44
|
+
end
|
45
|
+
|
46
|
+
rule(tag(:a)) do |dictionary|
|
47
|
+
attributes = map_attributes_subtree(dictionary[:attributes])
|
48
|
+
href = attributes[:href]
|
49
|
+
title = attributes[:title]
|
50
|
+
values = dictionary[:values].join
|
51
|
+
|
52
|
+
%Q{[#{values}](#{href} "#{title}")}
|
53
|
+
end
|
54
|
+
|
55
|
+
rule(tag(:img)) do |dictionary|
|
56
|
+
attributes = map_attributes_subtree(dictionary[:attributes])
|
57
|
+
href = attributes[:src]
|
58
|
+
title = attributes[:title]
|
59
|
+
alt_text = attributes[:alt]
|
60
|
+
|
61
|
+
%Q{![#{alt_text}](#{href} "#{title}")}
|
62
|
+
end
|
63
|
+
|
64
|
+
# Catch-all rule to pass all tags through.
|
65
|
+
rule(
|
66
|
+
tag: {name: simple(:tag_name), attributes: subtree(:attributes)},
|
67
|
+
content: sequence(:values)
|
68
|
+
) do |dictionary|
|
69
|
+
attributes = map_attributes_subtree(dictionary[:attributes])
|
70
|
+
values = dictionary[:values].join
|
71
|
+
tag_name = dictionary[:tag_name]
|
72
|
+
|
73
|
+
attributes_list =
|
74
|
+
if attributes.any?
|
75
|
+
" " + attributes.map {|name, value| %Q{#{name}="#{value}"} }.join(" ")
|
76
|
+
else
|
77
|
+
""
|
78
|
+
end
|
79
|
+
|
80
|
+
%Q{<#{tag_name}#{attributes_list}>#{values}</#{tag_name}>}
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Upmark
|
2
|
+
module Transform
|
3
|
+
class Preprocess < Parslet::Transform
|
4
|
+
rule(
|
5
|
+
start_tag: {name: simple(:tag_name), attributes: subtree(:attributes)},
|
6
|
+
end_tag: {name: simple(:tag_name)},
|
7
|
+
content: subtree(:values)
|
8
|
+
) do
|
9
|
+
{
|
10
|
+
tag: {name: tag_name, attributes: attributes},
|
11
|
+
content: values
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
rule(
|
16
|
+
empty_tag: {name: simple(:tag_name), attributes: subtree(:attributes)}
|
17
|
+
) do
|
18
|
+
{
|
19
|
+
tag: {name: tag_name, attributes: attributes},
|
20
|
+
content: []
|
21
|
+
}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/upmark.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require "parslet"
|
2
|
+
|
3
|
+
require "core_ext/array"
|
4
|
+
|
5
|
+
require "upmark/parser/xml"
|
6
|
+
require "upmark/transform/markdown"
|
7
|
+
require "upmark/transform/preprocess"
|
8
|
+
require "upmark/version"
|
9
|
+
|
10
|
+
module Upmark
|
11
|
+
def self.convert(html)
|
12
|
+
xml = Parser::XML.new
|
13
|
+
preprocess = Transform::Preprocess.new
|
14
|
+
markdown = Transform::Markdown.new
|
15
|
+
|
16
|
+
ast = xml.parse(html.strip)
|
17
|
+
ast = preprocess.apply(ast)
|
18
|
+
ast = markdown.apply(ast)
|
19
|
+
|
20
|
+
# The result is either a String or an Array.
|
21
|
+
ast = ast.join if ast.is_a?(Array)
|
22
|
+
|
23
|
+
# Any more than two consecutive newline characters is superflous.
|
24
|
+
ast = ast.gsub(/\n(\s*\n)+/, "\n\n")
|
25
|
+
|
26
|
+
ast.strip
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Upmark, ".convert" do
|
4
|
+
subject { Upmark.convert(html) }
|
5
|
+
|
6
|
+
let(:html) { <<-HTML.strip }
|
7
|
+
<h1>messenger bag skateboard</h1>
|
8
|
+
|
9
|
+
<p>messenger bag skateboard</p>
|
10
|
+
|
11
|
+
<p>messenger <em>bag</em> <strong>skateboard</strong></p>
|
12
|
+
|
13
|
+
<p><a href="http://helvetica.com/" title="art party organic">messenger <strong>bag</strong> skateboard</a></p>
|
14
|
+
|
15
|
+
<img src="http://helvetica.com/image.gif" title="art party organic" alt="messenger bag skateboard" />
|
16
|
+
|
17
|
+
<ul>
|
18
|
+
<li>messenger</li>
|
19
|
+
<li><strong>bag</strong></li>
|
20
|
+
<li>skateboard</li>
|
21
|
+
</ul>
|
22
|
+
|
23
|
+
<ul>
|
24
|
+
<li><p>messenger</p></li>
|
25
|
+
<li><p><strong>bag</strong></p></li>
|
26
|
+
<li><p>skateboard</p></li>
|
27
|
+
</ul>
|
28
|
+
|
29
|
+
<ol>
|
30
|
+
<li>messenger</li>
|
31
|
+
<li><strong>bag</strong></li>
|
32
|
+
<li>skateboard</li>
|
33
|
+
</ol>
|
34
|
+
|
35
|
+
<ol>
|
36
|
+
<li><p>messenger</p></li>
|
37
|
+
<li><p><strong>bag</strong></p></li>
|
38
|
+
<li><p>skateboard</p></li>
|
39
|
+
</ol>
|
40
|
+
|
41
|
+
<div>messenger <strong>bag</strong> skateboard</div>
|
42
|
+
<div id="tofu" class="art party">messenger <strong>bag</strong> skateboard</div>
|
43
|
+
HTML
|
44
|
+
|
45
|
+
it { should == <<-MD.strip }
|
46
|
+
# messenger bag skateboard
|
47
|
+
|
48
|
+
messenger bag skateboard
|
49
|
+
|
50
|
+
messenger *bag* **skateboard**
|
51
|
+
|
52
|
+
[messenger **bag** skateboard](http://helvetica.com/ "art party organic")
|
53
|
+
|
54
|
+
![messenger bag skateboard](http://helvetica.com/image.gif "art party organic")
|
55
|
+
|
56
|
+
* messenger
|
57
|
+
* **bag**
|
58
|
+
* skateboard
|
59
|
+
|
60
|
+
* messenger
|
61
|
+
|
62
|
+
* **bag**
|
63
|
+
|
64
|
+
* skateboard
|
65
|
+
|
66
|
+
1. messenger
|
67
|
+
2. **bag**
|
68
|
+
3. skateboard
|
69
|
+
|
70
|
+
1. messenger
|
71
|
+
|
72
|
+
2. **bag**
|
73
|
+
|
74
|
+
3. skateboard
|
75
|
+
|
76
|
+
<div>messenger **bag** skateboard</div>
|
77
|
+
<div id="tofu" class="art party">messenger **bag** skateboard</div>
|
78
|
+
MD
|
79
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,219 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Upmark::Parser::XML do
|
4
|
+
let(:parser) { Upmark::Parser::XML.new }
|
5
|
+
|
6
|
+
context "#content" do
|
7
|
+
subject { parser.content }
|
8
|
+
|
9
|
+
it { should parse "" }
|
10
|
+
it { should parse "messenger bag skateboard" }
|
11
|
+
it { should parse "<p>messenger bag skateboard</p>" }
|
12
|
+
it { should parse "messenger <p>bag</p> skateboard" }
|
13
|
+
it { should parse "<p>messenger</p><p>bag</p><p>skateboard</p>" }
|
14
|
+
it { should parse "<p>messenger</p>\n<p>bag</p>\n<p>skateboard</p>" }
|
15
|
+
it { should parse "<p>messenger <strong>bag</strong> skateboard</p>" }
|
16
|
+
end
|
17
|
+
|
18
|
+
context "#element" do
|
19
|
+
subject { parser.element }
|
20
|
+
|
21
|
+
it { should parse "<p></p>" }
|
22
|
+
it { should parse "<p>messenger bag skateboard</p>" }
|
23
|
+
it { should parse %q{<tofu art="party" />} }
|
24
|
+
it { should_not parse "<p>" }
|
25
|
+
it { should_not parse "<p>messenger bag skateboard" }
|
26
|
+
it { should_not parse "messenger bag skateboard</p>" }
|
27
|
+
it { should_not parse "<p>messenger bag skateboard<p>" }
|
28
|
+
end
|
29
|
+
|
30
|
+
context "#text" do
|
31
|
+
subject { parser.text }
|
32
|
+
|
33
|
+
it { should parse "messenger bag skateboard" }
|
34
|
+
it { should_not parse "<p>messenger bag skateboard</p>" }
|
35
|
+
it { should_not parse "" }
|
36
|
+
end
|
37
|
+
|
38
|
+
context "#start_tag" do
|
39
|
+
subject { parser.start_tag }
|
40
|
+
|
41
|
+
it { should parse %q{<tofu art="party">} }
|
42
|
+
it { should parse %q{<tofu art="party" synth="letterpress">} }
|
43
|
+
it { should parse "<tofu>" }
|
44
|
+
it { should_not parse "</tofu>" }
|
45
|
+
it { should_not parse "<tofu" }
|
46
|
+
it { should_not parse "tofu>" }
|
47
|
+
end
|
48
|
+
|
49
|
+
context "#end_tag" do
|
50
|
+
subject { parser.end_tag }
|
51
|
+
|
52
|
+
it { should parse "</tofu>" }
|
53
|
+
it { should_not parse "<tofu>" }
|
54
|
+
it { should_not parse "<tofu" }
|
55
|
+
it { should_not parse "/tofu>" }
|
56
|
+
end
|
57
|
+
|
58
|
+
context "#empty_tag" do
|
59
|
+
subject { parser.empty_tag }
|
60
|
+
|
61
|
+
it { should parse %q{<tofu art="party" />} }
|
62
|
+
it { should parse %q{<tofu art="party" synth="letterpress" />} }
|
63
|
+
it { should_not parse "<tofu>" }
|
64
|
+
it { should_not parse "</tofu>" }
|
65
|
+
it { should_not parse "<tofu" }
|
66
|
+
it { should_not parse "/tofu>" }
|
67
|
+
end
|
68
|
+
|
69
|
+
context "#name" do
|
70
|
+
subject { parser.name }
|
71
|
+
|
72
|
+
it { should parse "p" }
|
73
|
+
it { should parse "h1" }
|
74
|
+
it { should_not parse "1h" }
|
75
|
+
it { should_not parse "h 1" }
|
76
|
+
end
|
77
|
+
|
78
|
+
context "#attribute" do
|
79
|
+
subject { parser.attribute }
|
80
|
+
|
81
|
+
it { should parse %q{art="party organic"} }
|
82
|
+
it { should parse %q{art='party organic'} }
|
83
|
+
it { should_not parse "art" }
|
84
|
+
it { should_not parse "art=" }
|
85
|
+
it { should_not parse "art=party" }
|
86
|
+
it { should_not parse %q{="party organic"} }
|
87
|
+
it { should_not parse %q{art="party organic'} }
|
88
|
+
it { should_not parse %q{art='party organic"} }
|
89
|
+
end
|
90
|
+
|
91
|
+
context "#parse" do
|
92
|
+
subject { parser.parse(html) }
|
93
|
+
|
94
|
+
context "single element" do
|
95
|
+
let(:html) { "<p>messenger</p>" }
|
96
|
+
|
97
|
+
it do
|
98
|
+
should == [
|
99
|
+
{
|
100
|
+
element: {
|
101
|
+
start_tag: {name: "p", attributes: []},
|
102
|
+
end_tag: {name: "p"},
|
103
|
+
content: [{text: "messenger"}]
|
104
|
+
}
|
105
|
+
}
|
106
|
+
]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
context "single element with attributes" do
|
111
|
+
let(:html) { %q{<a href="http://helvetica.com/" title="art party organic">messenger bag skateboard</a>} }
|
112
|
+
|
113
|
+
it do
|
114
|
+
should == [
|
115
|
+
{
|
116
|
+
element: {
|
117
|
+
start_tag: {
|
118
|
+
name: "a",
|
119
|
+
attributes: [
|
120
|
+
{name: "href", value: "http://helvetica.com/"},
|
121
|
+
{name: "title", value: "art party organic"}
|
122
|
+
]
|
123
|
+
},
|
124
|
+
end_tag: {name: "a"},
|
125
|
+
content: [{text: "messenger bag skateboard"}]
|
126
|
+
}
|
127
|
+
}
|
128
|
+
]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
context "multiple inline elements" do
|
133
|
+
let(:html) { "<p>messenger</p><p>bag</p><p>skateboard</p>" }
|
134
|
+
|
135
|
+
it do
|
136
|
+
should == [
|
137
|
+
{
|
138
|
+
element: {
|
139
|
+
start_tag: {name: "p", attributes: []},
|
140
|
+
end_tag: {name: "p"},
|
141
|
+
content: [{text: "messenger"}]
|
142
|
+
}
|
143
|
+
}, {
|
144
|
+
element: {
|
145
|
+
start_tag: {name: "p", attributes: []},
|
146
|
+
end_tag: {name: "p"},
|
147
|
+
content: [{text: "bag"}]
|
148
|
+
}
|
149
|
+
}, {
|
150
|
+
element: {
|
151
|
+
start_tag: {name: "p", attributes: []},
|
152
|
+
end_tag: {name: "p"},
|
153
|
+
content: [{text: "skateboard"}]
|
154
|
+
}
|
155
|
+
}
|
156
|
+
]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
context "multiple elements" do
|
161
|
+
let(:html) { "<p>messenger</p>\n<p>bag</p>\n<p>skateboard</p>" }
|
162
|
+
|
163
|
+
it do
|
164
|
+
should == [
|
165
|
+
{
|
166
|
+
element: {
|
167
|
+
start_tag: {name: "p", attributes: []},
|
168
|
+
end_tag: {name: "p"},
|
169
|
+
content: [{text: "messenger"}]
|
170
|
+
}
|
171
|
+
}, {
|
172
|
+
text: "\n"
|
173
|
+
}, {
|
174
|
+
element: {
|
175
|
+
start_tag: {name: "p", attributes: []},
|
176
|
+
end_tag: {name: "p"},
|
177
|
+
content: [{text: "bag"}]
|
178
|
+
}
|
179
|
+
}, {
|
180
|
+
text: "\n"
|
181
|
+
}, {
|
182
|
+
element: {
|
183
|
+
start_tag: {name: "p", attributes: []},
|
184
|
+
end_tag: {name: "p"},
|
185
|
+
content: [{text: "skateboard"}]
|
186
|
+
}
|
187
|
+
}
|
188
|
+
]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
context "nested elements" do
|
193
|
+
let(:html) { "<p>messenger <strong>bag</strong> skateboard</p>" }
|
194
|
+
|
195
|
+
it do
|
196
|
+
should == [
|
197
|
+
{
|
198
|
+
element: {
|
199
|
+
start_tag: {name: "p", attributes: []},
|
200
|
+
end_tag: {name: "p"},
|
201
|
+
content: [
|
202
|
+
{text: "messenger "},
|
203
|
+
{
|
204
|
+
element: {
|
205
|
+
start_tag: {name: "strong", attributes: []},
|
206
|
+
content: [{text: "bag"}],
|
207
|
+
end_tag: {name: "strong"}
|
208
|
+
}
|
209
|
+
}, {
|
210
|
+
text: " skateboard"
|
211
|
+
}
|
212
|
+
]
|
213
|
+
}
|
214
|
+
}
|
215
|
+
]
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Upmark::Transform::Markdown do
|
4
|
+
let(:transform) { Upmark::Transform::Markdown.new }
|
5
|
+
|
6
|
+
context "#apply" do
|
7
|
+
subject { transform.apply(ast) }
|
8
|
+
|
9
|
+
context "<p>" do
|
10
|
+
context "single element" do
|
11
|
+
let(:ast) do
|
12
|
+
[
|
13
|
+
{
|
14
|
+
element: {
|
15
|
+
tag: {name: "p", attributes: []},
|
16
|
+
content: [{text: "messenger bag skateboard"}]
|
17
|
+
}
|
18
|
+
}
|
19
|
+
]
|
20
|
+
end
|
21
|
+
|
22
|
+
it { should == ["messenger bag skateboard\n\n"] }
|
23
|
+
end
|
24
|
+
|
25
|
+
context "multiple elements" do
|
26
|
+
let(:ast) do
|
27
|
+
[
|
28
|
+
{
|
29
|
+
element: {
|
30
|
+
tag: {name: "p", attributes: []},
|
31
|
+
content: [{text: "messenger"}]
|
32
|
+
}
|
33
|
+
}, {
|
34
|
+
element: {
|
35
|
+
tag: {name: "p", attributes: []},
|
36
|
+
content: [{text: "bag"}]
|
37
|
+
}
|
38
|
+
}, {
|
39
|
+
element: {
|
40
|
+
tag: {name: "p", attributes: []},
|
41
|
+
content: [{text: "skateboard"}]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
]
|
45
|
+
end
|
46
|
+
|
47
|
+
it { should == ["messenger\n\n", "bag\n\n", "skateboard\n\n"] }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
context "<a>" do
|
52
|
+
context "single element" do
|
53
|
+
let(:ast) do
|
54
|
+
[
|
55
|
+
{
|
56
|
+
element: {
|
57
|
+
tag: {
|
58
|
+
name: "a",
|
59
|
+
attributes: [
|
60
|
+
{name: "href", value: "http://helvetica.com/"},
|
61
|
+
{name: "title", value: "art party organic"}
|
62
|
+
]
|
63
|
+
},
|
64
|
+
content: [{text: "messenger bag skateboard"}]
|
65
|
+
}
|
66
|
+
}
|
67
|
+
]
|
68
|
+
end
|
69
|
+
|
70
|
+
it { should == [%q{[messenger bag skateboard](http://helvetica.com/ "art party organic")}] }
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context "<img>" do
|
75
|
+
context "empty element" do
|
76
|
+
let(:ast) do
|
77
|
+
[
|
78
|
+
{
|
79
|
+
element: {
|
80
|
+
tag: {
|
81
|
+
name: "img",
|
82
|
+
attributes: [
|
83
|
+
{name: "src", value: "http://helvetica.com/image.gif"},
|
84
|
+
{name: "title", value: "art party organic"},
|
85
|
+
{name: "alt", value: "messenger bag skateboard"}
|
86
|
+
]
|
87
|
+
},
|
88
|
+
content: []
|
89
|
+
}
|
90
|
+
}
|
91
|
+
]
|
92
|
+
end
|
93
|
+
|
94
|
+
it { should == [%q{![messenger bag skateboard](http://helvetica.com/image.gif "art party organic")}] }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
data/upmark.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
$:.push File.expand_path("../lib", __FILE__)
|
4
|
+
|
5
|
+
require "upmark/version"
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = "upmark"
|
9
|
+
s.version = Upmark::VERSION
|
10
|
+
s.authors = ["Josh Bassett", "Gus Gollings"]
|
11
|
+
s.email = "dev@theconversation.edu.au"
|
12
|
+
s.homepage = "https://github.com/conversation/upmark"
|
13
|
+
s.summary = %q{A HTML to Markdown converter.}
|
14
|
+
s.description = %q{Upmark has the skills to convert your HTML to Markdown.}
|
15
|
+
|
16
|
+
s.rubyforge_project = "upmark"
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map {|f| File.basename(f) }
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
|
23
|
+
s.add_development_dependency "rspec"
|
24
|
+
|
25
|
+
s.add_runtime_dependency "parslet"
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: upmark
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Josh Bassett
|
9
|
+
- Gus Gollings
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2011-09-23 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rspec
|
17
|
+
requirement: &70160857809560 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *70160857809560
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: parslet
|
28
|
+
requirement: &70160857808520 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *70160857808520
|
37
|
+
description: Upmark has the skills to convert your HTML to Markdown.
|
38
|
+
email: dev@theconversation.edu.au
|
39
|
+
executables:
|
40
|
+
- upmark
|
41
|
+
extensions: []
|
42
|
+
extra_rdoc_files: []
|
43
|
+
files:
|
44
|
+
- .gitignore
|
45
|
+
- .rspec
|
46
|
+
- Gemfile
|
47
|
+
- LICENSE.md
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- bin/upmark
|
51
|
+
- lib/core_ext/array.rb
|
52
|
+
- lib/upmark.rb
|
53
|
+
- lib/upmark/parser/xml.rb
|
54
|
+
- lib/upmark/transform/markdown.rb
|
55
|
+
- lib/upmark/transform/preprocess.rb
|
56
|
+
- lib/upmark/version.rb
|
57
|
+
- spec/acceptance/upmark_spec.rb
|
58
|
+
- spec/spec_helper.rb
|
59
|
+
- spec/unit/lib/upmark/parser/xml_spec.rb
|
60
|
+
- spec/unit/lib/upmark/transform/markdown_spec.rb
|
61
|
+
- upmark.gemspec
|
62
|
+
homepage: https://github.com/conversation/upmark
|
63
|
+
licenses: []
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ! '>='
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
requirements: []
|
81
|
+
rubyforge_project: upmark
|
82
|
+
rubygems_version: 1.8.10
|
83
|
+
signing_key:
|
84
|
+
specification_version: 3
|
85
|
+
summary: A HTML to Markdown converter.
|
86
|
+
test_files:
|
87
|
+
- spec/acceptance/upmark_spec.rb
|
88
|
+
- spec/spec_helper.rb
|
89
|
+
- spec/unit/lib/upmark/parser/xml_spec.rb
|
90
|
+
- spec/unit/lib/upmark/transform/markdown_spec.rb
|