upmark 0.2.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/upmark.rb +12 -3
- data/lib/upmark/errors.rb +12 -1
- data/lib/upmark/parser/xml.rb +33 -20
- data/lib/upmark/transform/markdown.rb +43 -30
- data/lib/upmark/transform/normalise.rb +30 -1
- data/lib/upmark/transform/preprocess.rb +32 -1
- data/spec/acceptance/upmark_spec.rb +161 -54
- data/spec/spec_helper.rb +8 -0
- data/spec/unit/lib/upmark/parser/xml_spec.rb +224 -89
- data/spec/unit/lib/upmark/transform/markdown_spec.rb +89 -17
- metadata +10 -17
data/spec/spec_helper.rb
CHANGED
@@ -1,104 +1,227 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
describe Upmark::Parser::XML do
|
1
|
+
RSpec.describe Upmark::Parser::XML do
|
4
2
|
let(:parser) { Upmark::Parser::XML.new }
|
5
3
|
|
6
4
|
context "#node" do
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
it
|
11
|
-
|
12
|
-
|
13
|
-
it
|
14
|
-
|
15
|
-
|
5
|
+
it 'will parse ""' do
|
6
|
+
expect(parser.node).to parse ""
|
7
|
+
end
|
8
|
+
it 'will parse "messenger bag skateboard"' do
|
9
|
+
expect(parser.node).to parse "messenger bag skateboard"
|
10
|
+
end
|
11
|
+
it 'will parse html br tags' do
|
12
|
+
expect(parser.node).to parse '<p>One<br>Two</p>'
|
13
|
+
end
|
14
|
+
it 'will parse "<p>messenger bag skateboard</p>"' do
|
15
|
+
expect(
|
16
|
+
parser.node
|
17
|
+
).to parse "<p>messenger bag skateboard</p>"
|
18
|
+
end
|
19
|
+
it 'will parse "messenger <p>bag</p> skateboard"' do
|
20
|
+
expect(
|
21
|
+
parser.node
|
22
|
+
).to parse "messenger <p>bag</p> skateboard"
|
23
|
+
end
|
24
|
+
it 'will parse "<p>messenger</p><p>bag</p><p>skateboard</p>"' do
|
25
|
+
expect(
|
26
|
+
parser.node
|
27
|
+
).to parse "<p>messenger</p><p>bag</p><p>skateboard</p>"
|
28
|
+
end
|
29
|
+
it 'will parse "<p>messenger</p>\n<p>bag</p>\n<p>skateboard</p>"' do
|
30
|
+
expect(
|
31
|
+
parser.node
|
32
|
+
).to parse "<p>messenger</p>\n<p>bag</p>\n<p>skateboard</p>"
|
33
|
+
end
|
34
|
+
it 'will parse "<p>messenger <strong>bag</strong> skateboard</p>"' do
|
35
|
+
expect(
|
36
|
+
parser.node
|
37
|
+
).to parse "<p>messenger <strong>bag</strong> skateboard</p>"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "#empty_element" do
|
42
|
+
it 'will parse <p> </p>' do
|
43
|
+
expect(parser.empty_element).to parse '<p> </p>'
|
44
|
+
end
|
16
45
|
end
|
17
46
|
|
18
47
|
context "#element" do
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
it
|
23
|
-
|
24
|
-
|
25
|
-
it
|
26
|
-
|
27
|
-
|
48
|
+
it 'will parse "<p></p>"' do
|
49
|
+
expect(parser.element).to parse "<p></p>"
|
50
|
+
end
|
51
|
+
it 'will parse "<p>messenger bag skateboard</p>"' do
|
52
|
+
expect(parser.element).to parse "<p>messenger bag skateboard</p>"
|
53
|
+
end
|
54
|
+
it 'will parse "<p>Some<br>Text</p>"' do
|
55
|
+
expect(parser.element).to parse "<p>Some<br>Text</p>"
|
56
|
+
end
|
57
|
+
it 'will parse %q{<tofu art="party" />}' do
|
58
|
+
expect(parser.element).to parse %q{<tofu art="party" />}
|
59
|
+
end
|
60
|
+
it 'will not parse "<p>"' do
|
61
|
+
expect(parser.element).to_not parse "<p>"
|
62
|
+
end
|
63
|
+
it 'will not parse "<p>messenger bag skateboard"' do
|
64
|
+
expect(parser.element).to_not parse "<p>messenger bag skateboard"
|
65
|
+
end
|
66
|
+
it 'will not parse "messenger bag skateboard</p>"' do
|
67
|
+
expect(parser.element).to_not parse "messenger bag skateboard</p>"
|
68
|
+
end
|
69
|
+
it 'will not parse "<p>messenger bag skateboard<p>"' do
|
70
|
+
expect(parser.element).to_not parse "<p>messenger bag skateboard<p>"
|
71
|
+
end
|
28
72
|
end
|
29
73
|
|
30
74
|
context "#text" do
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
it
|
35
|
-
|
75
|
+
it 'will parse "messenger bag skateboard"' do
|
76
|
+
expect(parser.text).to parse "messenger bag skateboard"
|
77
|
+
end
|
78
|
+
it 'will not parse "<p>messenger bag skateboard</p>"' do
|
79
|
+
expect(parser.text).to_not parse "<p>messenger bag skateboard</p>"
|
80
|
+
end
|
81
|
+
it 'will not parse " "' do
|
82
|
+
expect(parser.text).to_not parse " "
|
83
|
+
end
|
84
|
+
it 'will not parse ""' do
|
85
|
+
expect(parser.text).to_not parse ""
|
86
|
+
end
|
36
87
|
end
|
37
88
|
|
38
89
|
context "#start_tag" do
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
it
|
43
|
-
|
44
|
-
|
45
|
-
it
|
46
|
-
|
90
|
+
it 'will parse %q{<tofu art="party">}' do
|
91
|
+
expect(parser.start_tag).to parse %q{<tofu art="party">}
|
92
|
+
end
|
93
|
+
it 'will parse %q{<tofu art="party" synth="letterpress">}' do
|
94
|
+
expect(parser.start_tag).to parse %q{<tofu art="party" synth="letterpress">}
|
95
|
+
end
|
96
|
+
it 'will parse "<tofu>"' do
|
97
|
+
expect(parser.start_tag).to parse "<tofu>"
|
98
|
+
end
|
99
|
+
it 'will not parse "</tofu>"' do
|
100
|
+
expect(parser.start_tag).to_not parse "</tofu>"
|
101
|
+
end
|
102
|
+
it 'will not parse "<tofu"' do
|
103
|
+
expect(parser.start_tag).to_not parse "<tofu"
|
104
|
+
end
|
105
|
+
it 'will not parse "tofu>"' do
|
106
|
+
expect(parser.start_tag).to_not parse "tofu>"
|
107
|
+
end
|
47
108
|
end
|
48
109
|
|
49
110
|
context "#end_tag" do
|
50
|
-
|
111
|
+
it 'will parse "</tofu>"' do
|
112
|
+
expect(parser.end_tag).to parse "</tofu>"
|
113
|
+
end
|
114
|
+
it 'will not parse "<tofu>"' do
|
115
|
+
expect(parser.end_tag).to_not parse "<tofu>"
|
116
|
+
end
|
117
|
+
it 'will not parse "<tofu"' do
|
118
|
+
expect(parser.end_tag).to_not parse "<tofu"
|
119
|
+
end
|
120
|
+
it 'will not parse "/tofu>"' do
|
121
|
+
expect(parser.end_tag).to_not parse "/tofu>"
|
122
|
+
end
|
123
|
+
end
|
51
124
|
|
52
|
-
|
53
|
-
it
|
54
|
-
|
55
|
-
|
125
|
+
context "#empty_br" do
|
126
|
+
it 'will parse html br tags' do
|
127
|
+
expect(parser.empty_br).to parse '<br>'
|
128
|
+
end
|
56
129
|
end
|
57
130
|
|
58
131
|
context "#empty_tag" do
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
it
|
63
|
-
|
64
|
-
|
65
|
-
it
|
66
|
-
|
67
|
-
|
132
|
+
it 'will parse %q{<tofu />}' do
|
133
|
+
expect(parser.empty_tag).to parse %q{<tofu />}
|
134
|
+
end
|
135
|
+
it 'will parse %q{<tofu art="party" />}' do
|
136
|
+
expect(parser.empty_tag).to parse %q{<tofu art="party" />}
|
137
|
+
end
|
138
|
+
it 'will parse %q{<tofu art="party" synth="letterpress" />}' do
|
139
|
+
expect(parser.empty_tag).to parse %q{<tofu art="party" synth="letterpress" />}
|
140
|
+
end
|
141
|
+
it 'will not parse "<tofu>"' do
|
142
|
+
expect(parser.empty_tag).to_not parse "<tofu>"
|
143
|
+
end
|
144
|
+
it 'will not parse "</tofu>"' do
|
145
|
+
expect(parser.empty_tag).to_not parse "</tofu>"
|
146
|
+
end
|
147
|
+
it 'will not parse "<tofu"' do
|
148
|
+
expect(parser.empty_tag).to_not parse "<tofu"
|
149
|
+
end
|
150
|
+
it 'will not parse "/tofu>"' do
|
151
|
+
expect(parser.empty_tag).to_not parse "/tofu>"
|
152
|
+
end
|
68
153
|
end
|
69
154
|
|
70
155
|
context "#name" do
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
it
|
75
|
-
|
76
|
-
|
156
|
+
it 'will parse "p"' do
|
157
|
+
expect(parser.name).to parse "p"
|
158
|
+
end
|
159
|
+
it 'will parse "h1"' do
|
160
|
+
expect(parser.name).to parse "h1"
|
161
|
+
end
|
162
|
+
it 'will not parse "1h"' do
|
163
|
+
expect(parser.name).to_not parse "1h"
|
164
|
+
end
|
165
|
+
it 'will not parse "h 1"' do
|
166
|
+
expect(parser.name).to_not parse "h 1"
|
167
|
+
end
|
77
168
|
end
|
78
169
|
|
79
170
|
context "#attribute" do
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
it
|
84
|
-
|
85
|
-
|
86
|
-
it
|
87
|
-
|
88
|
-
|
89
|
-
it
|
90
|
-
|
91
|
-
|
171
|
+
it 'will parse %q{art="party organic"}' do
|
172
|
+
expect(parser.attribute).to parse %q{art="party organic"}
|
173
|
+
end
|
174
|
+
it 'will parse %q{art=\'party organic\'}' do
|
175
|
+
expect(parser.attribute).to parse %q{art='party organic'}
|
176
|
+
end
|
177
|
+
it 'will parse %q{art="party\'organic"}' do
|
178
|
+
expect(parser.attribute).to parse %q{art="party'organic"}
|
179
|
+
end
|
180
|
+
it 'will parse %q{art=\'party"organic\'}' do
|
181
|
+
expect(parser.attribute).to parse %q{art='party"organic'}
|
182
|
+
end
|
183
|
+
it 'will not parse "art"' do
|
184
|
+
expect(parser.attribute).to_not parse "art"
|
185
|
+
end
|
186
|
+
it 'will not parse "art="' do
|
187
|
+
expect(parser.attribute).to_not parse "art="
|
188
|
+
end
|
189
|
+
it 'will not parse "art=party"' do
|
190
|
+
expect(parser.attribute).to_not parse "art=party"
|
191
|
+
end
|
192
|
+
it 'will not parse %q{="party organic"}' do
|
193
|
+
expect(parser.attribute).to_not parse %q{="party organic"}
|
194
|
+
end
|
195
|
+
it 'will not parse %q{art="party organic\'}' do
|
196
|
+
expect(parser.attribute).to_not parse %q{art="party organic'}
|
197
|
+
end
|
198
|
+
it 'will not parse %q{art=\'party organic"}' do
|
199
|
+
expect(parser.attribute).to_not parse %q{art='party organic"}
|
200
|
+
end
|
92
201
|
end
|
93
202
|
|
94
203
|
context "#parse" do
|
95
|
-
|
204
|
+
RSpec::Matchers.define :convert do |html|
|
205
|
+
match do |parser|
|
206
|
+
@actual = parser.parse(html)
|
207
|
+
@actual == @expected
|
208
|
+
end
|
96
209
|
|
97
|
-
|
98
|
-
|
210
|
+
chain :to do |ast|
|
211
|
+
@expected = ast
|
212
|
+
end
|
213
|
+
attr_reader :expected
|
214
|
+
|
215
|
+
failure_message do
|
216
|
+
%Q{expected "#{html}" to parse to "#{@expected.inspect}" but was #{@result.inspect}}
|
217
|
+
end
|
99
218
|
|
100
|
-
|
101
|
-
|
219
|
+
diffable
|
220
|
+
end
|
221
|
+
|
222
|
+
context "single tag" do
|
223
|
+
it 'is parsed as a single element' do
|
224
|
+
expect(parser).to convert("<p>messenger</p>").to([
|
102
225
|
{
|
103
226
|
element: {
|
104
227
|
start_tag: {name: "p", attributes: []},
|
@@ -106,29 +229,41 @@ describe Upmark::Parser::XML do
|
|
106
229
|
children: [{text: "messenger"}]
|
107
230
|
}
|
108
231
|
}
|
109
|
-
]
|
232
|
+
])
|
233
|
+
end
|
234
|
+
|
235
|
+
it 'will ignore empty text tags' do
|
236
|
+
expect(parser).to convert('<p> </p>').to(
|
237
|
+
[
|
238
|
+
{
|
239
|
+
empty:
|
240
|
+
{
|
241
|
+
start_tag: { name: "p", attributes: [] },
|
242
|
+
end_tag: { name: "p" },
|
243
|
+
}
|
244
|
+
}
|
245
|
+
]
|
246
|
+
)
|
110
247
|
end
|
111
248
|
end
|
112
249
|
|
113
250
|
context "empty tag" do
|
114
|
-
|
115
|
-
|
116
|
-
it do
|
117
|
-
should == [
|
251
|
+
it 'is parsed an empty_tag element' do
|
252
|
+
expect(parser).to convert("<br />").to([
|
118
253
|
{
|
119
254
|
element: {
|
120
255
|
empty_tag: {name: "br", attributes: []}
|
121
256
|
}
|
122
257
|
}
|
123
|
-
]
|
258
|
+
])
|
124
259
|
end
|
125
260
|
end
|
126
261
|
|
127
262
|
context "single tag with attributes" do
|
128
263
|
let(:html) { %q{<a href="http://helvetica.com/" title="art party organic">messenger bag skateboard</a>} }
|
129
264
|
|
130
|
-
it do
|
131
|
-
|
265
|
+
it 'is parsed an element with an attribute subtree' do
|
266
|
+
expect(parser).to convert(html).to([
|
132
267
|
{
|
133
268
|
element: {
|
134
269
|
start_tag: {
|
@@ -142,15 +277,15 @@ describe Upmark::Parser::XML do
|
|
142
277
|
children: [{text: "messenger bag skateboard"}]
|
143
278
|
}
|
144
279
|
}
|
145
|
-
]
|
280
|
+
])
|
146
281
|
end
|
147
282
|
end
|
148
283
|
|
149
284
|
context "multiple inline tags" do
|
150
285
|
let(:html) { "<p>messenger</p><p>bag</p><p>skateboard</p>" }
|
151
286
|
|
152
|
-
it do
|
153
|
-
|
287
|
+
it 'converts to multiple elements' do
|
288
|
+
expect(parser).to convert(html).to([
|
154
289
|
{
|
155
290
|
element: {
|
156
291
|
start_tag: {name: "p", attributes: []},
|
@@ -170,15 +305,15 @@ describe Upmark::Parser::XML do
|
|
170
305
|
children: [{text: "skateboard"}]
|
171
306
|
}
|
172
307
|
}
|
173
|
-
]
|
308
|
+
])
|
174
309
|
end
|
175
310
|
end
|
176
311
|
|
177
312
|
context "multiple tags" do
|
178
313
|
let(:html) { "<p>messenger</p>\n<p>bag</p>\n<p>skateboard</p>" }
|
179
314
|
|
180
|
-
it do
|
181
|
-
|
315
|
+
it 'converts to multiple elements' do
|
316
|
+
expect(parser).to convert(html).to([
|
182
317
|
{
|
183
318
|
element: {
|
184
319
|
start_tag: {name: "p", attributes: []},
|
@@ -202,15 +337,15 @@ describe Upmark::Parser::XML do
|
|
202
337
|
children: [{text: "skateboard"}]
|
203
338
|
}
|
204
339
|
}
|
205
|
-
]
|
340
|
+
])
|
206
341
|
end
|
207
342
|
end
|
208
343
|
|
209
344
|
context "nested tags" do
|
210
345
|
let(:html) { "<p>messenger <strong>bag</strong> skateboard</p>" }
|
211
346
|
|
212
|
-
it do
|
213
|
-
|
347
|
+
it 'converts to multiple nested elements' do
|
348
|
+
expect(parser).to convert(html).to([
|
214
349
|
{
|
215
350
|
element: {
|
216
351
|
start_tag: {name: "p", attributes: []},
|
@@ -230,7 +365,7 @@ describe Upmark::Parser::XML do
|
|
230
365
|
]
|
231
366
|
}
|
232
367
|
}
|
233
|
-
]
|
368
|
+
])
|
234
369
|
end
|
235
370
|
end
|
236
371
|
end
|
@@ -1,10 +1,18 @@
|
|
1
|
-
|
1
|
+
RSpec.describe Upmark::Transform::Markdown do
|
2
|
+
def transform(ast)
|
3
|
+
Upmark::Transform::Markdown.new.apply(ast)
|
4
|
+
end
|
2
5
|
|
3
|
-
|
4
|
-
let(:transform) { Upmark::Transform::Markdown.new }
|
6
|
+
let(:transformed_ast) { transform(ast) }
|
5
7
|
|
6
8
|
context "#apply" do
|
7
|
-
|
9
|
+
context '<br>' do
|
10
|
+
let(:ast) { [{ element: { name: 'br' }}] }
|
11
|
+
|
12
|
+
it 'will transform to markdown' do
|
13
|
+
expect(transformed_ast).to eq ["\n"]
|
14
|
+
end
|
15
|
+
end
|
8
16
|
|
9
17
|
context "<p>" do
|
10
18
|
context "single tag" do
|
@@ -21,7 +29,11 @@ describe Upmark::Transform::Markdown do
|
|
21
29
|
]
|
22
30
|
end
|
23
31
|
|
24
|
-
it
|
32
|
+
it 'transforms to markdown' do
|
33
|
+
expect(
|
34
|
+
transformed_ast
|
35
|
+
).to eq(["messenger bag skateboard\n\n"])
|
36
|
+
end
|
25
37
|
end
|
26
38
|
|
27
39
|
context "multiple tags" do
|
@@ -52,21 +64,31 @@ describe Upmark::Transform::Markdown do
|
|
52
64
|
]
|
53
65
|
end
|
54
66
|
|
55
|
-
it
|
67
|
+
it 'transforms to markdown' do
|
68
|
+
expect(
|
69
|
+
transformed_ast
|
70
|
+
).to eq(["messenger\n\n", "bag\n\n", "skateboard\n\n"])
|
71
|
+
end
|
56
72
|
end
|
57
73
|
end
|
58
74
|
|
59
75
|
context "<a>" do
|
60
76
|
context "single tag" do
|
61
77
|
let(:ast) do
|
78
|
+
a_tag(
|
79
|
+
href: "http://helvetica.com/",
|
80
|
+
title: "art party organic",
|
81
|
+
)
|
82
|
+
end
|
83
|
+
|
84
|
+
def a_tag(attributes)
|
62
85
|
[
|
63
86
|
{
|
64
87
|
element: {
|
65
88
|
name: "a",
|
66
|
-
attributes:
|
67
|
-
{name:
|
68
|
-
|
69
|
-
],
|
89
|
+
attributes: attributes.map do |key, value|
|
90
|
+
{ name: key.to_s, value: value }
|
91
|
+
end,
|
70
92
|
children: [{text: "messenger bag skateboard"}],
|
71
93
|
ignore: false
|
72
94
|
}
|
@@ -74,22 +96,50 @@ describe Upmark::Transform::Markdown do
|
|
74
96
|
]
|
75
97
|
end
|
76
98
|
|
77
|
-
it
|
99
|
+
it 'transforms to markdown' do
|
100
|
+
expect(
|
101
|
+
transformed_ast
|
102
|
+
).to eq([%q{[messenger bag skateboard](http://helvetica.com/ "art party organic")}])
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'transforms mailto to markdown' do
|
106
|
+
expect(
|
107
|
+
transform a_tag(href: 'mailto:a@example.com', title: 'Some Path')
|
108
|
+
).to eq([%q{[messenger bag skateboard](mailto:a@example.com "Some Path")}])
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'strips local urls to their text' do
|
112
|
+
expect(
|
113
|
+
transform a_tag(href: 'file://some/path', title: 'Some Path')
|
114
|
+
).to eq ['messenger bag skateboard']
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'strips relative urls to their alt text' do
|
118
|
+
expect(
|
119
|
+
transform a_tag(src: 'some/path', title: 'Some Path')
|
120
|
+
).to eq ['messenger bag skateboard']
|
121
|
+
end
|
78
122
|
end
|
79
123
|
end
|
80
124
|
|
81
125
|
context "<img>" do
|
82
126
|
context "empty tag" do
|
83
127
|
let(:ast) do
|
128
|
+
img(
|
129
|
+
src: "http://helvetica.com/image.gif",
|
130
|
+
title: "art party organic",
|
131
|
+
alt: "messenger bag skateboard",
|
132
|
+
)
|
133
|
+
end
|
134
|
+
|
135
|
+
def img(attributes)
|
84
136
|
[
|
85
137
|
{
|
86
138
|
element: {
|
87
139
|
name: "img",
|
88
|
-
attributes:
|
89
|
-
{name:
|
90
|
-
|
91
|
-
{name: "alt", value: "messenger bag skateboard"}
|
92
|
-
],
|
140
|
+
attributes: attributes.map do |key, value|
|
141
|
+
{ name: key.to_s, value: value }
|
142
|
+
end,
|
93
143
|
children: [],
|
94
144
|
ignore: false
|
95
145
|
}
|
@@ -97,7 +147,29 @@ describe Upmark::Transform::Markdown do
|
|
97
147
|
]
|
98
148
|
end
|
99
149
|
|
100
|
-
it
|
150
|
+
it 'transforms to markdown' do
|
151
|
+
expect(
|
152
|
+
transformed_ast
|
153
|
+
).to eq([%q{}])
|
154
|
+
end
|
155
|
+
|
156
|
+
it 'strips file urls to their alt text or title' do
|
157
|
+
expect(
|
158
|
+
transform img(src: 'file://some/path', alt: 'Some', title: 'Path')
|
159
|
+
).to eq ['Some']
|
160
|
+
expect(
|
161
|
+
transform img(src: 'file://some/path', title: 'Some Path')
|
162
|
+
).to eq ['Some Path']
|
163
|
+
end
|
164
|
+
|
165
|
+
it 'strips relative urls to their alt text' do
|
166
|
+
expect(
|
167
|
+
transform img(src: 'some/path', alt: 'Some', title: 'Path')
|
168
|
+
).to eq ['Some']
|
169
|
+
expect(
|
170
|
+
transform img(src: 'some/path', title: 'Some Path')
|
171
|
+
).to eq ['Some Path']
|
172
|
+
end
|
101
173
|
end
|
102
174
|
end
|
103
175
|
end
|