upmark 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -2
- data/Gemfile.lock +34 -0
- data/{LICENSE.md → LICENSE} +0 -0
- data/lib/upmark.rb +3 -3
- data/lib/upmark/transform/markdown.rb +2 -4
- data/lib/upmark/transform/normalise.rb +39 -0
- data/lib/upmark/transform/preprocess.rb +10 -26
- data/lib/upmark/version.rb +1 -1
- data/spec/acceptance/upmark_spec.rb +23 -40
- data/spec/spec_helper.rb +4 -0
- data/upmark.gemspec +5 -5
- metadata +22 -9
- data/lib/upmark/transform/pass_through.rb +0 -20
data/.gitignore
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
upmark (0.1.1)
|
5
|
+
parslet
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: http://rubygems.org/
|
9
|
+
specs:
|
10
|
+
blankslate (2.1.2.4)
|
11
|
+
diff-lcs (1.1.3)
|
12
|
+
multi_json (1.0.3)
|
13
|
+
parslet (1.2.1)
|
14
|
+
blankslate (~> 2.0)
|
15
|
+
rspec (2.6.0)
|
16
|
+
rspec-core (~> 2.6.0)
|
17
|
+
rspec-expectations (~> 2.6.0)
|
18
|
+
rspec-mocks (~> 2.6.0)
|
19
|
+
rspec-core (2.6.4)
|
20
|
+
rspec-expectations (2.6.0)
|
21
|
+
diff-lcs (~> 1.1.2)
|
22
|
+
rspec-mocks (2.6.0)
|
23
|
+
simplecov (0.5.3)
|
24
|
+
multi_json (~> 1.0.3)
|
25
|
+
simplecov-html (~> 0.5.3)
|
26
|
+
simplecov-html (0.5.3)
|
27
|
+
|
28
|
+
PLATFORMS
|
29
|
+
ruby
|
30
|
+
|
31
|
+
DEPENDENCIES
|
32
|
+
rspec
|
33
|
+
simplecov
|
34
|
+
upmark!
|
data/{LICENSE.md → LICENSE}
RENAMED
File without changes
|
data/lib/upmark.rb
CHANGED
@@ -5,20 +5,20 @@ require "core_ext/array"
|
|
5
5
|
require "upmark/parser/xml"
|
6
6
|
require 'upmark/transform_helpers'
|
7
7
|
require "upmark/transform/markdown"
|
8
|
-
require "upmark/transform/
|
8
|
+
require "upmark/transform/normalise"
|
9
9
|
require "upmark/transform/preprocess"
|
10
10
|
require "upmark/version"
|
11
11
|
|
12
12
|
module Upmark
|
13
13
|
def self.convert(html)
|
14
14
|
xml = Parser::XML.new
|
15
|
+
normalise = Transform::Normalise.new
|
15
16
|
preprocess = Transform::Preprocess.new
|
16
|
-
pass_through = Transform::PassThrough.new
|
17
17
|
markdown = Transform::Markdown.new
|
18
18
|
|
19
19
|
ast = xml.parse(html.strip)
|
20
|
+
ast = normalise.apply(ast)
|
20
21
|
ast = preprocess.apply(ast)
|
21
|
-
ast = pass_through.apply(ast)
|
22
22
|
ast = markdown.apply(ast)
|
23
23
|
|
24
24
|
# The result is either a String or an Array.
|
@@ -1,9 +1,7 @@
|
|
1
1
|
module Upmark
|
2
2
|
module Transform
|
3
|
-
#
|
4
|
-
#
|
5
|
-
# Transforms an abstract syntax tree (AST) into a Markdown document.
|
6
|
-
#
|
3
|
+
# A transform class which converts an abstract syntax tree (AST) into
|
4
|
+
# a Markdown document.
|
7
5
|
class Markdown < Parslet::Transform
|
8
6
|
include TransformHelpers
|
9
7
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Upmark
|
2
|
+
module Transform
|
3
|
+
# A transform class withich normalises start/end/empty tags into the
|
4
|
+
# same structure.
|
5
|
+
class Normalise < Parslet::Transform
|
6
|
+
rule(
|
7
|
+
element: {
|
8
|
+
start_tag: {name: simple(:name), attributes: subtree(:attributes)},
|
9
|
+
end_tag: {name: simple(:name)},
|
10
|
+
children: subtree(:children)
|
11
|
+
}
|
12
|
+
) do
|
13
|
+
{
|
14
|
+
element: {
|
15
|
+
name: name,
|
16
|
+
attributes: attributes,
|
17
|
+
children: children,
|
18
|
+
ignore: false
|
19
|
+
}
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
rule(
|
24
|
+
element: {
|
25
|
+
empty_tag: {name: simple(:name), attributes: subtree(:attributes)}
|
26
|
+
}
|
27
|
+
) do
|
28
|
+
{
|
29
|
+
element: {
|
30
|
+
name: name,
|
31
|
+
attributes: attributes,
|
32
|
+
children: [],
|
33
|
+
ignore: false
|
34
|
+
}
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -1,35 +1,19 @@
|
|
1
|
+
require "upmark/transform/ignore"
|
2
|
+
|
1
3
|
module Upmark
|
2
4
|
module Transform
|
3
|
-
# A
|
5
|
+
# A transform class which marks block-level elements as ignored.
|
6
|
+
# i.e. These elements should not be converted to Markdown.
|
4
7
|
class Preprocess < Parslet::Transform
|
5
|
-
|
6
|
-
element: {
|
7
|
-
start_tag: {name: simple(:name), attributes: subtree(:attributes)},
|
8
|
-
end_tag: {name: simple(:name)},
|
9
|
-
children: subtree(:children)
|
10
|
-
}
|
11
|
-
) do
|
12
|
-
{
|
13
|
-
element: {
|
14
|
-
name: name,
|
15
|
-
attributes: attributes,
|
16
|
-
children: children,
|
17
|
-
ignore: false
|
18
|
-
}
|
19
|
-
}
|
20
|
-
end
|
8
|
+
include TransformHelpers
|
21
9
|
|
22
|
-
|
23
|
-
element: {
|
24
|
-
empty_tag: {name: simple(:name), attributes: subtree(:attributes)}
|
25
|
-
}
|
26
|
-
) do
|
10
|
+
element(:div, :table, :pre) do |element|
|
27
11
|
{
|
28
12
|
element: {
|
29
|
-
name: name,
|
30
|
-
attributes: attributes,
|
31
|
-
children: [],
|
32
|
-
ignore:
|
13
|
+
name: element[:name],
|
14
|
+
attributes: element[:attributes],
|
15
|
+
children: Ignore.new.apply(element[:children]),
|
16
|
+
ignore: true
|
33
17
|
}
|
34
18
|
}
|
35
19
|
end
|
data/lib/upmark/version.rb
CHANGED
@@ -109,20 +109,18 @@ organic
|
|
109
109
|
MD
|
110
110
|
end
|
111
111
|
|
112
|
-
context "
|
113
|
-
|
112
|
+
context "block-level elements" do
|
113
|
+
context "<div>" do
|
114
|
+
let(:html) { <<-HTML.strip }
|
114
115
|
<div>messenger <strong>bag</strong> skateboard</div>
|
115
116
|
<div id="tofu" class="art party">messenger <strong>bag</strong> skateboard</div>
|
116
|
-
|
117
|
+
HTML
|
117
118
|
|
118
|
-
|
119
|
-
|
120
|
-
<div id="tofu" class="art party">messenger <strong>bag</strong> skateboard</div>
|
121
|
-
MD
|
122
|
-
end
|
119
|
+
it { should == html }
|
120
|
+
end
|
123
121
|
|
124
|
-
|
125
|
-
|
122
|
+
context "<table>" do
|
123
|
+
let(:html) { <<-HTML.strip }
|
126
124
|
<table>
|
127
125
|
<tr>
|
128
126
|
<td>messenger</td>
|
@@ -134,48 +132,33 @@ organic
|
|
134
132
|
<td>skateboard</td>
|
135
133
|
</tr>
|
136
134
|
</table>
|
137
|
-
|
135
|
+
HTML
|
138
136
|
|
139
|
-
|
140
|
-
|
141
|
-
<tr>
|
142
|
-
<td>messenger</td>
|
143
|
-
</tr>
|
144
|
-
<tr>
|
145
|
-
<td><strong>bag</strong></td>
|
146
|
-
</tr>
|
147
|
-
<tr>
|
148
|
-
<td>skateboard</td>
|
149
|
-
</tr>
|
150
|
-
</table>
|
151
|
-
MD
|
152
|
-
end
|
137
|
+
it { should == html }
|
138
|
+
end
|
153
139
|
|
154
|
-
|
155
|
-
|
140
|
+
context "<pre>" do
|
141
|
+
let(:html) { <<-HTML.strip }
|
156
142
|
<pre>
|
157
143
|
<code>
|
158
144
|
messenger bag skateboard
|
159
145
|
</code>
|
160
146
|
</pre>
|
161
|
-
|
147
|
+
HTML
|
162
148
|
|
163
|
-
|
164
|
-
|
165
|
-
<code>
|
166
|
-
messenger bag skateboard
|
167
|
-
</code>
|
168
|
-
</pre>
|
169
|
-
MD
|
149
|
+
it { should == html }
|
150
|
+
end
|
170
151
|
end
|
171
152
|
|
172
|
-
context "
|
173
|
-
|
153
|
+
context "span-level elements" do
|
154
|
+
context "<span>" do
|
155
|
+
let(:html) { <<-HTML.strip }
|
174
156
|
<span>messenger <strong>bag</strong> skateboard</span>
|
175
|
-
|
157
|
+
HTML
|
176
158
|
|
177
|
-
|
159
|
+
it { should == <<-MD.strip }
|
178
160
|
<span>messenger **bag** skateboard</span>
|
179
|
-
|
161
|
+
MD
|
162
|
+
end
|
180
163
|
end
|
181
164
|
end
|
data/spec/spec_helper.rb
CHANGED
data/upmark.gemspec
CHANGED
@@ -9,18 +9,18 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.version = Upmark::VERSION
|
10
10
|
s.authors = ["Josh Bassett", "Gus Gollings"]
|
11
11
|
s.email = "dev@theconversation.edu.au"
|
12
|
-
s.homepage = "
|
12
|
+
s.homepage = "http://github.com/conversation/upmark"
|
13
13
|
s.summary = %q{A HTML to Markdown converter.}
|
14
14
|
s.description = %q{Upmark has the skills to convert your HTML to Markdown.}
|
15
15
|
|
16
16
|
s.rubyforge_project = "upmark"
|
17
17
|
|
18
|
-
s.files
|
19
|
-
s.test_files
|
20
|
-
s.executables
|
21
|
-
s.require_paths = ["lib"]
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map {|f| File.basename(f) }
|
22
21
|
|
23
22
|
s.add_development_dependency "rspec"
|
23
|
+
s.add_development_dependency "simplecov"
|
24
24
|
|
25
25
|
s.add_runtime_dependency "parslet"
|
26
26
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: upmark
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2011-09-
|
13
|
+
date: 2011-09-26 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|
17
|
-
requirement: &
|
17
|
+
requirement: &70363456228720 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,21 @@ dependencies:
|
|
22
22
|
version: '0'
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70363456228720
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: simplecov
|
28
|
+
requirement: &70363456228280 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *70363456228280
|
26
37
|
- !ruby/object:Gem::Dependency
|
27
38
|
name: parslet
|
28
|
-
requirement: &
|
39
|
+
requirement: &70363456227680 !ruby/object:Gem::Requirement
|
29
40
|
none: false
|
30
41
|
requirements:
|
31
42
|
- - ! '>='
|
@@ -33,7 +44,7 @@ dependencies:
|
|
33
44
|
version: '0'
|
34
45
|
type: :runtime
|
35
46
|
prerelease: false
|
36
|
-
version_requirements: *
|
47
|
+
version_requirements: *70363456227680
|
37
48
|
description: Upmark has the skills to convert your HTML to Markdown.
|
38
49
|
email: dev@theconversation.edu.au
|
39
50
|
executables:
|
@@ -44,16 +55,18 @@ files:
|
|
44
55
|
- .gitignore
|
45
56
|
- .rspec
|
46
57
|
- Gemfile
|
47
|
-
-
|
58
|
+
- Gemfile.lock
|
59
|
+
- LICENSE
|
48
60
|
- README.md
|
49
61
|
- Rakefile
|
50
62
|
- bin/upmark
|
63
|
+
- coverage/.gitkeep
|
51
64
|
- lib/core_ext/array.rb
|
52
65
|
- lib/upmark.rb
|
53
66
|
- lib/upmark/parser/xml.rb
|
54
67
|
- lib/upmark/transform/ignore.rb
|
55
68
|
- lib/upmark/transform/markdown.rb
|
56
|
-
- lib/upmark/transform/
|
69
|
+
- lib/upmark/transform/normalise.rb
|
57
70
|
- lib/upmark/transform/preprocess.rb
|
58
71
|
- lib/upmark/transform_helpers.rb
|
59
72
|
- lib/upmark/version.rb
|
@@ -62,7 +75,7 @@ files:
|
|
62
75
|
- spec/unit/lib/upmark/parser/xml_spec.rb
|
63
76
|
- spec/unit/lib/upmark/transform/markdown_spec.rb
|
64
77
|
- upmark.gemspec
|
65
|
-
homepage:
|
78
|
+
homepage: http://github.com/conversation/upmark
|
66
79
|
licenses: []
|
67
80
|
post_install_message:
|
68
81
|
rdoc_options: []
|
@@ -1,20 +0,0 @@
|
|
1
|
-
require "upmark/transform/ignore"
|
2
|
-
|
3
|
-
module Upmark
|
4
|
-
module Transform
|
5
|
-
class PassThrough < Parslet::Transform
|
6
|
-
include TransformHelpers
|
7
|
-
|
8
|
-
element(:div, :table, :pre) do |element|
|
9
|
-
{
|
10
|
-
element: {
|
11
|
-
name: element[:name],
|
12
|
-
attributes: element[:attributes],
|
13
|
-
children: Ignore.new.apply(element[:children]),
|
14
|
-
ignore: true
|
15
|
-
}
|
16
|
-
}
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|