upmark 0.2.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/upmark.rb +12 -3
- data/lib/upmark/errors.rb +12 -1
- data/lib/upmark/parser/xml.rb +33 -20
- data/lib/upmark/transform/markdown.rb +43 -30
- data/lib/upmark/transform/normalise.rb +30 -1
- data/lib/upmark/transform/preprocess.rb +32 -1
- data/spec/acceptance/upmark_spec.rb +161 -54
- data/spec/spec_helper.rb +8 -0
- data/spec/unit/lib/upmark/parser/xml_spec.rb +224 -89
- data/spec/unit/lib/upmark/transform/markdown_spec.rb +89 -17
- metadata +10 -17
data/spec/spec_helper.rb
CHANGED
@@ -1,104 +1,227 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
describe Upmark::Parser::XML do
|
1
|
+
RSpec.describe Upmark::Parser::XML do
|
4
2
|
let(:parser) { Upmark::Parser::XML.new }
|
5
3
|
|
6
4
|
context "#node" do
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
it
|
11
|
-
|
12
|
-
|
13
|
-
it
|
14
|
-
|
15
|
-
|
5
|
+
it 'will parse ""' do
|
6
|
+
expect(parser.node).to parse ""
|
7
|
+
end
|
8
|
+
it 'will parse "messenger bag skateboard"' do
|
9
|
+
expect(parser.node).to parse "messenger bag skateboard"
|
10
|
+
end
|
11
|
+
it 'will parse html br tags' do
|
12
|
+
expect(parser.node).to parse '<p>One<br>Two</p>'
|
13
|
+
end
|
14
|
+
it 'will parse "<p>messenger bag skateboard</p>"' do
|
15
|
+
expect(
|
16
|
+
parser.node
|
17
|
+
).to parse "<p>messenger bag skateboard</p>"
|
18
|
+
end
|
19
|
+
it 'will parse "messenger <p>bag</p> skateboard"' do
|
20
|
+
expect(
|
21
|
+
parser.node
|
22
|
+
).to parse "messenger <p>bag</p> skateboard"
|
23
|
+
end
|
24
|
+
it 'will parse "<p>messenger</p><p>bag</p><p>skateboard</p>"' do
|
25
|
+
expect(
|
26
|
+
parser.node
|
27
|
+
).to parse "<p>messenger</p><p>bag</p><p>skateboard</p>"
|
28
|
+
end
|
29
|
+
it 'will parse "<p>messenger</p>\n<p>bag</p>\n<p>skateboard</p>"' do
|
30
|
+
expect(
|
31
|
+
parser.node
|
32
|
+
).to parse "<p>messenger</p>\n<p>bag</p>\n<p>skateboard</p>"
|
33
|
+
end
|
34
|
+
it 'will parse "<p>messenger <strong>bag</strong> skateboard</p>"' do
|
35
|
+
expect(
|
36
|
+
parser.node
|
37
|
+
).to parse "<p>messenger <strong>bag</strong> skateboard</p>"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "#empty_element" do
|
42
|
+
it 'will parse <p> </p>' do
|
43
|
+
expect(parser.empty_element).to parse '<p> </p>'
|
44
|
+
end
|
16
45
|
end
|
17
46
|
|
18
47
|
context "#element" do
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
it
|
23
|
-
|
24
|
-
|
25
|
-
it
|
26
|
-
|
27
|
-
|
48
|
+
it 'will parse "<p></p>"' do
|
49
|
+
expect(parser.element).to parse "<p></p>"
|
50
|
+
end
|
51
|
+
it 'will parse "<p>messenger bag skateboard</p>"' do
|
52
|
+
expect(parser.element).to parse "<p>messenger bag skateboard</p>"
|
53
|
+
end
|
54
|
+
it 'will parse "<p>Some<br>Text</p>"' do
|
55
|
+
expect(parser.element).to parse "<p>Some<br>Text</p>"
|
56
|
+
end
|
57
|
+
it 'will parse %q{<tofu art="party" />}' do
|
58
|
+
expect(parser.element).to parse %q{<tofu art="party" />}
|
59
|
+
end
|
60
|
+
it 'will not parse "<p>"' do
|
61
|
+
expect(parser.element).to_not parse "<p>"
|
62
|
+
end
|
63
|
+
it 'will not parse "<p>messenger bag skateboard"' do
|
64
|
+
expect(parser.element).to_not parse "<p>messenger bag skateboard"
|
65
|
+
end
|
66
|
+
it 'will not parse "messenger bag skateboard</p>"' do
|
67
|
+
expect(parser.element).to_not parse "messenger bag skateboard</p>"
|
68
|
+
end
|
69
|
+
it 'will not parse "<p>messenger bag skateboard<p>"' do
|
70
|
+
expect(parser.element).to_not parse "<p>messenger bag skateboard<p>"
|
71
|
+
end
|
28
72
|
end
|
29
73
|
|
30
74
|
context "#text" do
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
it
|
35
|
-
|
75
|
+
it 'will parse "messenger bag skateboard"' do
|
76
|
+
expect(parser.text).to parse "messenger bag skateboard"
|
77
|
+
end
|
78
|
+
it 'will not parse "<p>messenger bag skateboard</p>"' do
|
79
|
+
expect(parser.text).to_not parse "<p>messenger bag skateboard</p>"
|
80
|
+
end
|
81
|
+
it 'will not parse " "' do
|
82
|
+
expect(parser.text).to_not parse " "
|
83
|
+
end
|
84
|
+
it 'will not parse ""' do
|
85
|
+
expect(parser.text).to_not parse ""
|
86
|
+
end
|
36
87
|
end
|
37
88
|
|
38
89
|
context "#start_tag" do
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
it
|
43
|
-
|
44
|
-
|
45
|
-
it
|
46
|
-
|
90
|
+
it 'will parse %q{<tofu art="party">}' do
|
91
|
+
expect(parser.start_tag).to parse %q{<tofu art="party">}
|
92
|
+
end
|
93
|
+
it 'will parse %q{<tofu art="party" synth="letterpress">}' do
|
94
|
+
expect(parser.start_tag).to parse %q{<tofu art="party" synth="letterpress">}
|
95
|
+
end
|
96
|
+
it 'will parse "<tofu>"' do
|
97
|
+
expect(parser.start_tag).to parse "<tofu>"
|
98
|
+
end
|
99
|
+
it 'will not parse "</tofu>"' do
|
100
|
+
expect(parser.start_tag).to_not parse "</tofu>"
|
101
|
+
end
|
102
|
+
it 'will not parse "<tofu"' do
|
103
|
+
expect(parser.start_tag).to_not parse "<tofu"
|
104
|
+
end
|
105
|
+
it 'will not parse "tofu>"' do
|
106
|
+
expect(parser.start_tag).to_not parse "tofu>"
|
107
|
+
end
|
47
108
|
end
|
48
109
|
|
49
110
|
context "#end_tag" do
|
50
|
-
|
111
|
+
it 'will parse "</tofu>"' do
|
112
|
+
expect(parser.end_tag).to parse "</tofu>"
|
113
|
+
end
|
114
|
+
it 'will not parse "<tofu>"' do
|
115
|
+
expect(parser.end_tag).to_not parse "<tofu>"
|
116
|
+
end
|
117
|
+
it 'will not parse "<tofu"' do
|
118
|
+
expect(parser.end_tag).to_not parse "<tofu"
|
119
|
+
end
|
120
|
+
it 'will not parse "/tofu>"' do
|
121
|
+
expect(parser.end_tag).to_not parse "/tofu>"
|
122
|
+
end
|
123
|
+
end
|
51
124
|
|
52
|
-
|
53
|
-
it
|
54
|
-
|
55
|
-
|
125
|
+
context "#empty_br" do
|
126
|
+
it 'will parse html br tags' do
|
127
|
+
expect(parser.empty_br).to parse '<br>'
|
128
|
+
end
|
56
129
|
end
|
57
130
|
|
58
131
|
context "#empty_tag" do
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
it
|
63
|
-
|
64
|
-
|
65
|
-
it
|
66
|
-
|
67
|
-
|
132
|
+
it 'will parse %q{<tofu />}' do
|
133
|
+
expect(parser.empty_tag).to parse %q{<tofu />}
|
134
|
+
end
|
135
|
+
it 'will parse %q{<tofu art="party" />}' do
|
136
|
+
expect(parser.empty_tag).to parse %q{<tofu art="party" />}
|
137
|
+
end
|
138
|
+
it 'will parse %q{<tofu art="party" synth="letterpress" />}' do
|
139
|
+
expect(parser.empty_tag).to parse %q{<tofu art="party" synth="letterpress" />}
|
140
|
+
end
|
141
|
+
it 'will not parse "<tofu>"' do
|
142
|
+
expect(parser.empty_tag).to_not parse "<tofu>"
|
143
|
+
end
|
144
|
+
it 'will not parse "</tofu>"' do
|
145
|
+
expect(parser.empty_tag).to_not parse "</tofu>"
|
146
|
+
end
|
147
|
+
it 'will not parse "<tofu"' do
|
148
|
+
expect(parser.empty_tag).to_not parse "<tofu"
|
149
|
+
end
|
150
|
+
it 'will not parse "/tofu>"' do
|
151
|
+
expect(parser.empty_tag).to_not parse "/tofu>"
|
152
|
+
end
|
68
153
|
end
|
69
154
|
|
70
155
|
context "#name" do
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
it
|
75
|
-
|
76
|
-
|
156
|
+
it 'will parse "p"' do
|
157
|
+
expect(parser.name).to parse "p"
|
158
|
+
end
|
159
|
+
it 'will parse "h1"' do
|
160
|
+
expect(parser.name).to parse "h1"
|
161
|
+
end
|
162
|
+
it 'will not parse "1h"' do
|
163
|
+
expect(parser.name).to_not parse "1h"
|
164
|
+
end
|
165
|
+
it 'will not parse "h 1"' do
|
166
|
+
expect(parser.name).to_not parse "h 1"
|
167
|
+
end
|
77
168
|
end
|
78
169
|
|
79
170
|
context "#attribute" do
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
it
|
84
|
-
|
85
|
-
|
86
|
-
it
|
87
|
-
|
88
|
-
|
89
|
-
it
|
90
|
-
|
91
|
-
|
171
|
+
it 'will parse %q{art="party organic"}' do
|
172
|
+
expect(parser.attribute).to parse %q{art="party organic"}
|
173
|
+
end
|
174
|
+
it 'will parse %q{art=\'party organic\'}' do
|
175
|
+
expect(parser.attribute).to parse %q{art='party organic'}
|
176
|
+
end
|
177
|
+
it 'will parse %q{art="party\'organic"}' do
|
178
|
+
expect(parser.attribute).to parse %q{art="party'organic"}
|
179
|
+
end
|
180
|
+
it 'will parse %q{art=\'party"organic\'}' do
|
181
|
+
expect(parser.attribute).to parse %q{art='party"organic'}
|
182
|
+
end
|
183
|
+
it 'will not parse "art"' do
|
184
|
+
expect(parser.attribute).to_not parse "art"
|
185
|
+
end
|
186
|
+
it 'will not parse "art="' do
|
187
|
+
expect(parser.attribute).to_not parse "art="
|
188
|
+
end
|
189
|
+
it 'will not parse "art=party"' do
|
190
|
+
expect(parser.attribute).to_not parse "art=party"
|
191
|
+
end
|
192
|
+
it 'will not parse %q{="party organic"}' do
|
193
|
+
expect(parser.attribute).to_not parse %q{="party organic"}
|
194
|
+
end
|
195
|
+
it 'will not parse %q{art="party organic\'}' do
|
196
|
+
expect(parser.attribute).to_not parse %q{art="party organic'}
|
197
|
+
end
|
198
|
+
it 'will not parse %q{art=\'party organic"}' do
|
199
|
+
expect(parser.attribute).to_not parse %q{art='party organic"}
|
200
|
+
end
|
92
201
|
end
|
93
202
|
|
94
203
|
context "#parse" do
|
95
|
-
|
204
|
+
RSpec::Matchers.define :convert do |html|
|
205
|
+
match do |parser|
|
206
|
+
@actual = parser.parse(html)
|
207
|
+
@actual == @expected
|
208
|
+
end
|
96
209
|
|
97
|
-
|
98
|
-
|
210
|
+
chain :to do |ast|
|
211
|
+
@expected = ast
|
212
|
+
end
|
213
|
+
attr_reader :expected
|
214
|
+
|
215
|
+
failure_message do
|
216
|
+
%Q{expected "#{html}" to parse to "#{@expected.inspect}" but was #{@result.inspect}}
|
217
|
+
end
|
99
218
|
|
100
|
-
|
101
|
-
|
219
|
+
diffable
|
220
|
+
end
|
221
|
+
|
222
|
+
context "single tag" do
|
223
|
+
it 'is parsed as a single element' do
|
224
|
+
expect(parser).to convert("<p>messenger</p>").to([
|
102
225
|
{
|
103
226
|
element: {
|
104
227
|
start_tag: {name: "p", attributes: []},
|
@@ -106,29 +229,41 @@ describe Upmark::Parser::XML do
|
|
106
229
|
children: [{text: "messenger"}]
|
107
230
|
}
|
108
231
|
}
|
109
|
-
]
|
232
|
+
])
|
233
|
+
end
|
234
|
+
|
235
|
+
it 'will ignore empty text tags' do
|
236
|
+
expect(parser).to convert('<p> </p>').to(
|
237
|
+
[
|
238
|
+
{
|
239
|
+
empty:
|
240
|
+
{
|
241
|
+
start_tag: { name: "p", attributes: [] },
|
242
|
+
end_tag: { name: "p" },
|
243
|
+
}
|
244
|
+
}
|
245
|
+
]
|
246
|
+
)
|
110
247
|
end
|
111
248
|
end
|
112
249
|
|
113
250
|
context "empty tag" do
|
114
|
-
|
115
|
-
|
116
|
-
it do
|
117
|
-
should == [
|
251
|
+
it 'is parsed an empty_tag element' do
|
252
|
+
expect(parser).to convert("<br />").to([
|
118
253
|
{
|
119
254
|
element: {
|
120
255
|
empty_tag: {name: "br", attributes: []}
|
121
256
|
}
|
122
257
|
}
|
123
|
-
]
|
258
|
+
])
|
124
259
|
end
|
125
260
|
end
|
126
261
|
|
127
262
|
context "single tag with attributes" do
|
128
263
|
let(:html) { %q{<a href="http://helvetica.com/" title="art party organic">messenger bag skateboard</a>} }
|
129
264
|
|
130
|
-
it do
|
131
|
-
|
265
|
+
it 'is parsed an element with an attribute subtree' do
|
266
|
+
expect(parser).to convert(html).to([
|
132
267
|
{
|
133
268
|
element: {
|
134
269
|
start_tag: {
|
@@ -142,15 +277,15 @@ describe Upmark::Parser::XML do
|
|
142
277
|
children: [{text: "messenger bag skateboard"}]
|
143
278
|
}
|
144
279
|
}
|
145
|
-
]
|
280
|
+
])
|
146
281
|
end
|
147
282
|
end
|
148
283
|
|
149
284
|
context "multiple inline tags" do
|
150
285
|
let(:html) { "<p>messenger</p><p>bag</p><p>skateboard</p>" }
|
151
286
|
|
152
|
-
it do
|
153
|
-
|
287
|
+
it 'converts to multiple elements' do
|
288
|
+
expect(parser).to convert(html).to([
|
154
289
|
{
|
155
290
|
element: {
|
156
291
|
start_tag: {name: "p", attributes: []},
|
@@ -170,15 +305,15 @@ describe Upmark::Parser::XML do
|
|
170
305
|
children: [{text: "skateboard"}]
|
171
306
|
}
|
172
307
|
}
|
173
|
-
]
|
308
|
+
])
|
174
309
|
end
|
175
310
|
end
|
176
311
|
|
177
312
|
context "multiple tags" do
|
178
313
|
let(:html) { "<p>messenger</p>\n<p>bag</p>\n<p>skateboard</p>" }
|
179
314
|
|
180
|
-
it do
|
181
|
-
|
315
|
+
it 'converts to multiple elements' do
|
316
|
+
expect(parser).to convert(html).to([
|
182
317
|
{
|
183
318
|
element: {
|
184
319
|
start_tag: {name: "p", attributes: []},
|
@@ -202,15 +337,15 @@ describe Upmark::Parser::XML do
|
|
202
337
|
children: [{text: "skateboard"}]
|
203
338
|
}
|
204
339
|
}
|
205
|
-
]
|
340
|
+
])
|
206
341
|
end
|
207
342
|
end
|
208
343
|
|
209
344
|
context "nested tags" do
|
210
345
|
let(:html) { "<p>messenger <strong>bag</strong> skateboard</p>" }
|
211
346
|
|
212
|
-
it do
|
213
|
-
|
347
|
+
it 'converts to multiple nested elements' do
|
348
|
+
expect(parser).to convert(html).to([
|
214
349
|
{
|
215
350
|
element: {
|
216
351
|
start_tag: {name: "p", attributes: []},
|
@@ -230,7 +365,7 @@ describe Upmark::Parser::XML do
|
|
230
365
|
]
|
231
366
|
}
|
232
367
|
}
|
233
|
-
]
|
368
|
+
])
|
234
369
|
end
|
235
370
|
end
|
236
371
|
end
|
@@ -1,10 +1,18 @@
|
|
1
|
-
|
1
|
+
RSpec.describe Upmark::Transform::Markdown do
|
2
|
+
def transform(ast)
|
3
|
+
Upmark::Transform::Markdown.new.apply(ast)
|
4
|
+
end
|
2
5
|
|
3
|
-
|
4
|
-
let(:transform) { Upmark::Transform::Markdown.new }
|
6
|
+
let(:transformed_ast) { transform(ast) }
|
5
7
|
|
6
8
|
context "#apply" do
|
7
|
-
|
9
|
+
context '<br>' do
|
10
|
+
let(:ast) { [{ element: { name: 'br' }}] }
|
11
|
+
|
12
|
+
it 'will transform to markdown' do
|
13
|
+
expect(transformed_ast).to eq ["\n"]
|
14
|
+
end
|
15
|
+
end
|
8
16
|
|
9
17
|
context "<p>" do
|
10
18
|
context "single tag" do
|
@@ -21,7 +29,11 @@ describe Upmark::Transform::Markdown do
|
|
21
29
|
]
|
22
30
|
end
|
23
31
|
|
24
|
-
it
|
32
|
+
it 'transforms to markdown' do
|
33
|
+
expect(
|
34
|
+
transformed_ast
|
35
|
+
).to eq(["messenger bag skateboard\n\n"])
|
36
|
+
end
|
25
37
|
end
|
26
38
|
|
27
39
|
context "multiple tags" do
|
@@ -52,21 +64,31 @@ describe Upmark::Transform::Markdown do
|
|
52
64
|
]
|
53
65
|
end
|
54
66
|
|
55
|
-
it
|
67
|
+
it 'transforms to markdown' do
|
68
|
+
expect(
|
69
|
+
transformed_ast
|
70
|
+
).to eq(["messenger\n\n", "bag\n\n", "skateboard\n\n"])
|
71
|
+
end
|
56
72
|
end
|
57
73
|
end
|
58
74
|
|
59
75
|
context "<a>" do
|
60
76
|
context "single tag" do
|
61
77
|
let(:ast) do
|
78
|
+
a_tag(
|
79
|
+
href: "http://helvetica.com/",
|
80
|
+
title: "art party organic",
|
81
|
+
)
|
82
|
+
end
|
83
|
+
|
84
|
+
def a_tag(attributes)
|
62
85
|
[
|
63
86
|
{
|
64
87
|
element: {
|
65
88
|
name: "a",
|
66
|
-
attributes:
|
67
|
-
{name:
|
68
|
-
|
69
|
-
],
|
89
|
+
attributes: attributes.map do |key, value|
|
90
|
+
{ name: key.to_s, value: value }
|
91
|
+
end,
|
70
92
|
children: [{text: "messenger bag skateboard"}],
|
71
93
|
ignore: false
|
72
94
|
}
|
@@ -74,22 +96,50 @@ describe Upmark::Transform::Markdown do
|
|
74
96
|
]
|
75
97
|
end
|
76
98
|
|
77
|
-
it
|
99
|
+
it 'transforms to markdown' do
|
100
|
+
expect(
|
101
|
+
transformed_ast
|
102
|
+
).to eq([%q{[messenger bag skateboard](http://helvetica.com/ "art party organic")}])
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'transforms mailto to markdown' do
|
106
|
+
expect(
|
107
|
+
transform a_tag(href: 'mailto:a@example.com', title: 'Some Path')
|
108
|
+
).to eq([%q{[messenger bag skateboard](mailto:a@example.com "Some Path")}])
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'strips local urls to their text' do
|
112
|
+
expect(
|
113
|
+
transform a_tag(href: 'file://some/path', title: 'Some Path')
|
114
|
+
).to eq ['messenger bag skateboard']
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'strips relative urls to their alt text' do
|
118
|
+
expect(
|
119
|
+
transform a_tag(src: 'some/path', title: 'Some Path')
|
120
|
+
).to eq ['messenger bag skateboard']
|
121
|
+
end
|
78
122
|
end
|
79
123
|
end
|
80
124
|
|
81
125
|
context "<img>" do
|
82
126
|
context "empty tag" do
|
83
127
|
let(:ast) do
|
128
|
+
img(
|
129
|
+
src: "http://helvetica.com/image.gif",
|
130
|
+
title: "art party organic",
|
131
|
+
alt: "messenger bag skateboard",
|
132
|
+
)
|
133
|
+
end
|
134
|
+
|
135
|
+
def img(attributes)
|
84
136
|
[
|
85
137
|
{
|
86
138
|
element: {
|
87
139
|
name: "img",
|
88
|
-
attributes:
|
89
|
-
{name:
|
90
|
-
|
91
|
-
{name: "alt", value: "messenger bag skateboard"}
|
92
|
-
],
|
140
|
+
attributes: attributes.map do |key, value|
|
141
|
+
{ name: key.to_s, value: value }
|
142
|
+
end,
|
93
143
|
children: [],
|
94
144
|
ignore: false
|
95
145
|
}
|
@@ -97,7 +147,29 @@ describe Upmark::Transform::Markdown do
|
|
97
147
|
]
|
98
148
|
end
|
99
149
|
|
100
|
-
it
|
150
|
+
it 'transforms to markdown' do
|
151
|
+
expect(
|
152
|
+
transformed_ast
|
153
|
+
).to eq([%q{![messenger bag skateboard](http://helvetica.com/image.gif "art party organic")}])
|
154
|
+
end
|
155
|
+
|
156
|
+
it 'strips file urls to their alt text or title' do
|
157
|
+
expect(
|
158
|
+
transform img(src: 'file://some/path', alt: 'Some', title: 'Path')
|
159
|
+
).to eq ['Some']
|
160
|
+
expect(
|
161
|
+
transform img(src: 'file://some/path', title: 'Some Path')
|
162
|
+
).to eq ['Some Path']
|
163
|
+
end
|
164
|
+
|
165
|
+
it 'strips relative urls to their alt text' do
|
166
|
+
expect(
|
167
|
+
transform img(src: 'some/path', alt: 'Some', title: 'Path')
|
168
|
+
).to eq ['Some']
|
169
|
+
expect(
|
170
|
+
transform img(src: 'some/path', title: 'Some Path')
|
171
|
+
).to eq ['Some Path']
|
172
|
+
end
|
101
173
|
end
|
102
174
|
end
|
103
175
|
end
|