upmark 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -2
- data/Gemfile.lock +34 -0
- data/{LICENSE.md → LICENSE} +0 -0
- data/lib/upmark.rb +3 -3
- data/lib/upmark/transform/markdown.rb +2 -4
- data/lib/upmark/transform/normalise.rb +39 -0
- data/lib/upmark/transform/preprocess.rb +10 -26
- data/lib/upmark/version.rb +1 -1
- data/spec/acceptance/upmark_spec.rb +23 -40
- data/spec/spec_helper.rb +4 -0
- data/upmark.gemspec +5 -5
- metadata +22 -9
- data/lib/upmark/transform/pass_through.rb +0 -20
data/.gitignore
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
upmark (0.1.1)
|
5
|
+
parslet
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: http://rubygems.org/
|
9
|
+
specs:
|
10
|
+
blankslate (2.1.2.4)
|
11
|
+
diff-lcs (1.1.3)
|
12
|
+
multi_json (1.0.3)
|
13
|
+
parslet (1.2.1)
|
14
|
+
blankslate (~> 2.0)
|
15
|
+
rspec (2.6.0)
|
16
|
+
rspec-core (~> 2.6.0)
|
17
|
+
rspec-expectations (~> 2.6.0)
|
18
|
+
rspec-mocks (~> 2.6.0)
|
19
|
+
rspec-core (2.6.4)
|
20
|
+
rspec-expectations (2.6.0)
|
21
|
+
diff-lcs (~> 1.1.2)
|
22
|
+
rspec-mocks (2.6.0)
|
23
|
+
simplecov (0.5.3)
|
24
|
+
multi_json (~> 1.0.3)
|
25
|
+
simplecov-html (~> 0.5.3)
|
26
|
+
simplecov-html (0.5.3)
|
27
|
+
|
28
|
+
PLATFORMS
|
29
|
+
ruby
|
30
|
+
|
31
|
+
DEPENDENCIES
|
32
|
+
rspec
|
33
|
+
simplecov
|
34
|
+
upmark!
|
data/{LICENSE.md → LICENSE}
RENAMED
File without changes
|
data/lib/upmark.rb
CHANGED
@@ -5,20 +5,20 @@ require "core_ext/array"
|
|
5
5
|
require "upmark/parser/xml"
|
6
6
|
require 'upmark/transform_helpers'
|
7
7
|
require "upmark/transform/markdown"
|
8
|
-
require "upmark/transform/
|
8
|
+
require "upmark/transform/normalise"
|
9
9
|
require "upmark/transform/preprocess"
|
10
10
|
require "upmark/version"
|
11
11
|
|
12
12
|
module Upmark
|
13
13
|
def self.convert(html)
|
14
14
|
xml = Parser::XML.new
|
15
|
+
normalise = Transform::Normalise.new
|
15
16
|
preprocess = Transform::Preprocess.new
|
16
|
-
pass_through = Transform::PassThrough.new
|
17
17
|
markdown = Transform::Markdown.new
|
18
18
|
|
19
19
|
ast = xml.parse(html.strip)
|
20
|
+
ast = normalise.apply(ast)
|
20
21
|
ast = preprocess.apply(ast)
|
21
|
-
ast = pass_through.apply(ast)
|
22
22
|
ast = markdown.apply(ast)
|
23
23
|
|
24
24
|
# The result is either a String or an Array.
|
@@ -1,9 +1,7 @@
|
|
1
1
|
module Upmark
|
2
2
|
module Transform
|
3
|
-
#
|
4
|
-
#
|
5
|
-
# Transforms an abstract syntax tree (AST) into a Markdown document.
|
6
|
-
#
|
3
|
+
# A transform class which converts an abstract syntax tree (AST) into
|
4
|
+
# a Markdown document.
|
7
5
|
class Markdown < Parslet::Transform
|
8
6
|
include TransformHelpers
|
9
7
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Upmark
|
2
|
+
module Transform
|
3
|
+
# A transform class withich normalises start/end/empty tags into the
|
4
|
+
# same structure.
|
5
|
+
class Normalise < Parslet::Transform
|
6
|
+
rule(
|
7
|
+
element: {
|
8
|
+
start_tag: {name: simple(:name), attributes: subtree(:attributes)},
|
9
|
+
end_tag: {name: simple(:name)},
|
10
|
+
children: subtree(:children)
|
11
|
+
}
|
12
|
+
) do
|
13
|
+
{
|
14
|
+
element: {
|
15
|
+
name: name,
|
16
|
+
attributes: attributes,
|
17
|
+
children: children,
|
18
|
+
ignore: false
|
19
|
+
}
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
rule(
|
24
|
+
element: {
|
25
|
+
empty_tag: {name: simple(:name), attributes: subtree(:attributes)}
|
26
|
+
}
|
27
|
+
) do
|
28
|
+
{
|
29
|
+
element: {
|
30
|
+
name: name,
|
31
|
+
attributes: attributes,
|
32
|
+
children: [],
|
33
|
+
ignore: false
|
34
|
+
}
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -1,35 +1,19 @@
|
|
1
|
+
require "upmark/transform/ignore"
|
2
|
+
|
1
3
|
module Upmark
|
2
4
|
module Transform
|
3
|
-
# A
|
5
|
+
# A transform class which marks block-level elements as ignored.
|
6
|
+
# i.e. These elements should not be converted to Markdown.
|
4
7
|
class Preprocess < Parslet::Transform
|
5
|
-
|
6
|
-
element: {
|
7
|
-
start_tag: {name: simple(:name), attributes: subtree(:attributes)},
|
8
|
-
end_tag: {name: simple(:name)},
|
9
|
-
children: subtree(:children)
|
10
|
-
}
|
11
|
-
) do
|
12
|
-
{
|
13
|
-
element: {
|
14
|
-
name: name,
|
15
|
-
attributes: attributes,
|
16
|
-
children: children,
|
17
|
-
ignore: false
|
18
|
-
}
|
19
|
-
}
|
20
|
-
end
|
8
|
+
include TransformHelpers
|
21
9
|
|
22
|
-
|
23
|
-
element: {
|
24
|
-
empty_tag: {name: simple(:name), attributes: subtree(:attributes)}
|
25
|
-
}
|
26
|
-
) do
|
10
|
+
element(:div, :table, :pre) do |element|
|
27
11
|
{
|
28
12
|
element: {
|
29
|
-
name: name,
|
30
|
-
attributes: attributes,
|
31
|
-
children: [],
|
32
|
-
ignore:
|
13
|
+
name: element[:name],
|
14
|
+
attributes: element[:attributes],
|
15
|
+
children: Ignore.new.apply(element[:children]),
|
16
|
+
ignore: true
|
33
17
|
}
|
34
18
|
}
|
35
19
|
end
|
data/lib/upmark/version.rb
CHANGED
@@ -109,20 +109,18 @@ organic
|
|
109
109
|
MD
|
110
110
|
end
|
111
111
|
|
112
|
-
context "
|
113
|
-
|
112
|
+
context "block-level elements" do
|
113
|
+
context "<div>" do
|
114
|
+
let(:html) { <<-HTML.strip }
|
114
115
|
<div>messenger <strong>bag</strong> skateboard</div>
|
115
116
|
<div id="tofu" class="art party">messenger <strong>bag</strong> skateboard</div>
|
116
|
-
|
117
|
+
HTML
|
117
118
|
|
118
|
-
|
119
|
-
|
120
|
-
<div id="tofu" class="art party">messenger <strong>bag</strong> skateboard</div>
|
121
|
-
MD
|
122
|
-
end
|
119
|
+
it { should == html }
|
120
|
+
end
|
123
121
|
|
124
|
-
|
125
|
-
|
122
|
+
context "<table>" do
|
123
|
+
let(:html) { <<-HTML.strip }
|
126
124
|
<table>
|
127
125
|
<tr>
|
128
126
|
<td>messenger</td>
|
@@ -134,48 +132,33 @@ organic
|
|
134
132
|
<td>skateboard</td>
|
135
133
|
</tr>
|
136
134
|
</table>
|
137
|
-
|
135
|
+
HTML
|
138
136
|
|
139
|
-
|
140
|
-
|
141
|
-
<tr>
|
142
|
-
<td>messenger</td>
|
143
|
-
</tr>
|
144
|
-
<tr>
|
145
|
-
<td><strong>bag</strong></td>
|
146
|
-
</tr>
|
147
|
-
<tr>
|
148
|
-
<td>skateboard</td>
|
149
|
-
</tr>
|
150
|
-
</table>
|
151
|
-
MD
|
152
|
-
end
|
137
|
+
it { should == html }
|
138
|
+
end
|
153
139
|
|
154
|
-
|
155
|
-
|
140
|
+
context "<pre>" do
|
141
|
+
let(:html) { <<-HTML.strip }
|
156
142
|
<pre>
|
157
143
|
<code>
|
158
144
|
messenger bag skateboard
|
159
145
|
</code>
|
160
146
|
</pre>
|
161
|
-
|
147
|
+
HTML
|
162
148
|
|
163
|
-
|
164
|
-
|
165
|
-
<code>
|
166
|
-
messenger bag skateboard
|
167
|
-
</code>
|
168
|
-
</pre>
|
169
|
-
MD
|
149
|
+
it { should == html }
|
150
|
+
end
|
170
151
|
end
|
171
152
|
|
172
|
-
context "
|
173
|
-
|
153
|
+
context "span-level elements" do
|
154
|
+
context "<span>" do
|
155
|
+
let(:html) { <<-HTML.strip }
|
174
156
|
<span>messenger <strong>bag</strong> skateboard</span>
|
175
|
-
|
157
|
+
HTML
|
176
158
|
|
177
|
-
|
159
|
+
it { should == <<-MD.strip }
|
178
160
|
<span>messenger **bag** skateboard</span>
|
179
|
-
|
161
|
+
MD
|
162
|
+
end
|
180
163
|
end
|
181
164
|
end
|
data/spec/spec_helper.rb
CHANGED
data/upmark.gemspec
CHANGED
@@ -9,18 +9,18 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.version = Upmark::VERSION
|
10
10
|
s.authors = ["Josh Bassett", "Gus Gollings"]
|
11
11
|
s.email = "dev@theconversation.edu.au"
|
12
|
-
s.homepage = "
|
12
|
+
s.homepage = "http://github.com/conversation/upmark"
|
13
13
|
s.summary = %q{A HTML to Markdown converter.}
|
14
14
|
s.description = %q{Upmark has the skills to convert your HTML to Markdown.}
|
15
15
|
|
16
16
|
s.rubyforge_project = "upmark"
|
17
17
|
|
18
|
-
s.files
|
19
|
-
s.test_files
|
20
|
-
s.executables
|
21
|
-
s.require_paths = ["lib"]
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map {|f| File.basename(f) }
|
22
21
|
|
23
22
|
s.add_development_dependency "rspec"
|
23
|
+
s.add_development_dependency "simplecov"
|
24
24
|
|
25
25
|
s.add_runtime_dependency "parslet"
|
26
26
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: upmark
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2011-09-
|
13
|
+
date: 2011-09-26 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|
17
|
-
requirement: &
|
17
|
+
requirement: &70363456228720 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,21 @@ dependencies:
|
|
22
22
|
version: '0'
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70363456228720
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: simplecov
|
28
|
+
requirement: &70363456228280 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *70363456228280
|
26
37
|
- !ruby/object:Gem::Dependency
|
27
38
|
name: parslet
|
28
|
-
requirement: &
|
39
|
+
requirement: &70363456227680 !ruby/object:Gem::Requirement
|
29
40
|
none: false
|
30
41
|
requirements:
|
31
42
|
- - ! '>='
|
@@ -33,7 +44,7 @@ dependencies:
|
|
33
44
|
version: '0'
|
34
45
|
type: :runtime
|
35
46
|
prerelease: false
|
36
|
-
version_requirements: *
|
47
|
+
version_requirements: *70363456227680
|
37
48
|
description: Upmark has the skills to convert your HTML to Markdown.
|
38
49
|
email: dev@theconversation.edu.au
|
39
50
|
executables:
|
@@ -44,16 +55,18 @@ files:
|
|
44
55
|
- .gitignore
|
45
56
|
- .rspec
|
46
57
|
- Gemfile
|
47
|
-
-
|
58
|
+
- Gemfile.lock
|
59
|
+
- LICENSE
|
48
60
|
- README.md
|
49
61
|
- Rakefile
|
50
62
|
- bin/upmark
|
63
|
+
- coverage/.gitkeep
|
51
64
|
- lib/core_ext/array.rb
|
52
65
|
- lib/upmark.rb
|
53
66
|
- lib/upmark/parser/xml.rb
|
54
67
|
- lib/upmark/transform/ignore.rb
|
55
68
|
- lib/upmark/transform/markdown.rb
|
56
|
-
- lib/upmark/transform/
|
69
|
+
- lib/upmark/transform/normalise.rb
|
57
70
|
- lib/upmark/transform/preprocess.rb
|
58
71
|
- lib/upmark/transform_helpers.rb
|
59
72
|
- lib/upmark/version.rb
|
@@ -62,7 +75,7 @@ files:
|
|
62
75
|
- spec/unit/lib/upmark/parser/xml_spec.rb
|
63
76
|
- spec/unit/lib/upmark/transform/markdown_spec.rb
|
64
77
|
- upmark.gemspec
|
65
|
-
homepage:
|
78
|
+
homepage: http://github.com/conversation/upmark
|
66
79
|
licenses: []
|
67
80
|
post_install_message:
|
68
81
|
rdoc_options: []
|
@@ -1,20 +0,0 @@
|
|
1
|
-
require "upmark/transform/ignore"
|
2
|
-
|
3
|
-
module Upmark
|
4
|
-
module Transform
|
5
|
-
class PassThrough < Parslet::Transform
|
6
|
-
include TransformHelpers
|
7
|
-
|
8
|
-
element(:div, :table, :pre) do |element|
|
9
|
-
{
|
10
|
-
element: {
|
11
|
-
name: element[:name],
|
12
|
-
attributes: element[:attributes],
|
13
|
-
children: Ignore.new.apply(element[:children]),
|
14
|
-
ignore: true
|
15
|
-
}
|
16
|
-
}
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|