sumitup 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/sumitup/parser.rb +99 -71
- data/spec/fixtures/basic.html +36 -0
- data/spec/fixtures/justin.html +113 -0
- data/spec/fixtures/wikipedia.html +95 -0
- data/spec/spec_helper.rb +1 -1
- data/spec/sumitup/parser_spec.rb +79 -154
- data/sumitup.gemspec +5 -2
- metadata +7 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.2
|
data/lib/sumitup/parser.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
module Sumitup
|
2
2
|
class Parser
|
3
3
|
|
4
|
-
IMAGE_WIDTH_LIMIT = 230
|
5
|
-
|
6
4
|
attr_accessor :word_count, :max_words
|
7
|
-
attr_accessor :image_count, :image_width_limit, :max_images
|
5
|
+
attr_accessor :image_count, :image_width_limit, :max_images, :min_image_size
|
8
6
|
attr_accessor :elements, :attributes, :protocols, :remove_contents
|
9
7
|
attr_accessor :omission
|
10
8
|
|
@@ -12,16 +10,18 @@ module Sumitup
|
|
12
10
|
|
13
11
|
self.omission = options[:omission] || ''
|
14
12
|
|
15
|
-
self.word_count =
|
13
|
+
self.word_count = 0
|
16
14
|
self.max_words = options[:max_words] || 100
|
17
15
|
|
18
|
-
self.image_count =
|
16
|
+
self.image_count = 0
|
17
|
+
self.min_image_size = options[:min_image_size] || 40
|
19
18
|
self.image_width_limit = options[:image_width_limit] || 230
|
20
|
-
self.max_images = options[:max_images] ||
|
21
|
-
|
19
|
+
self.max_images = options[:max_images] || 1
|
20
|
+
|
21
|
+
# White listed elements
|
22
22
|
self.elements = options[:elements] || %w(
|
23
|
-
a abbr b blockquote
|
24
|
-
|
23
|
+
a abbr b blockquote cite code dfn em i kbd mark q samp small s strike strong sub sup time u var
|
24
|
+
br dd dl dt li ol p pre ul img span
|
25
25
|
)
|
26
26
|
|
27
27
|
self.attributes = options[:attributes] || {
|
@@ -43,90 +43,118 @@ module Sumitup
|
|
43
43
|
# Removes html and generate a summary
|
44
44
|
def summarize(html, max = nil)
|
45
45
|
return '' if is_blank?(html)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
46
|
+
unclean = Nokogiri::HTML::DocumentFragment.parse(html.dup)
|
47
|
+
summarize_fragment(unclean, max).to_html
|
48
|
+
end
|
49
|
+
|
50
|
+
def summarize_fragment(node, max = nil)
|
51
|
+
clean = Sanitize.clean_node!(node,
|
50
52
|
:elements => elements,
|
51
53
|
:attributes => attributes,
|
52
54
|
:protocols => protocols,
|
53
55
|
:remove_contents => remove_contents,
|
54
|
-
:transformers => [
|
55
|
-
|
56
|
+
:transformers => [word_transformer, image_transformer])
|
57
|
+
summarize_node(clean, max)
|
58
|
+
end
|
59
|
+
|
60
|
+
def summarize_node(node, max = nil)
|
61
|
+
max ||= self.max_words
|
62
|
+
|
63
|
+
# summarize all children of the node
|
64
|
+
node.children.each do |child|
|
65
|
+
summarize_node(child, max)
|
66
|
+
end
|
67
|
+
|
68
|
+
if node.text?
|
69
|
+
if self.word_count > max
|
70
|
+
node.remove
|
71
|
+
else
|
72
|
+
# if the text of the current node makes us go over then truncate it
|
73
|
+
result, count = snippet(node.inner_text, max - self.word_count)
|
74
|
+
self.word_count += count
|
75
|
+
node.content = result
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
node
|
80
|
+
end
|
81
|
+
|
82
|
+
# Truncates text at a word boundry
|
83
|
+
# Parameters:
|
84
|
+
# text - The text to truncate
|
85
|
+
# wordcount - The number of words
|
86
|
+
def snippet(text, max)
|
87
|
+
result = ''
|
88
|
+
count = 0
|
89
|
+
return [result, count] if is_blank?(text)
|
90
|
+
text.split.each do |word|
|
91
|
+
return [result.strip!, count] if count >= max
|
92
|
+
result << "#{word} "
|
93
|
+
count += 1
|
94
|
+
end
|
95
|
+
[result.strip!, count]
|
96
|
+
end
|
97
|
+
|
98
|
+
def is_blank?(text)
|
99
|
+
text.nil? || text.empty?
|
56
100
|
end
|
57
101
|
|
58
|
-
def
|
102
|
+
def word_transformer
|
59
103
|
me = self
|
60
104
|
lambda do |env|
|
61
|
-
|
105
|
+
|
62
106
|
node = env[:node]
|
63
|
-
|
107
|
+
name = env[:node_name]
|
64
108
|
return if !node.element?
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
else
|
71
|
-
# if the text of the current node makes us go over then truncate it
|
72
|
-
node.text.scan(/\b\S+\b/) { me.word_count += 1 }
|
73
|
-
if me.word_count > me.max_words
|
74
|
-
node.content = snippet(node.text, me.max_words, '...')
|
75
|
-
end
|
76
|
-
end
|
109
|
+
|
110
|
+
# Remove nodes with display none
|
111
|
+
if node['style'] && node['style'] =~ /display\s*:\s*none/
|
112
|
+
node.remove
|
113
|
+
return
|
77
114
|
end
|
78
|
-
|
115
|
+
|
116
|
+
# Remove empty nodes
|
117
|
+
if node.text.empty? && node.children.empty? && !['img', 'br'].include?(name)
|
118
|
+
node.remove
|
119
|
+
return
|
120
|
+
end
|
121
|
+
|
79
122
|
end
|
80
123
|
end
|
81
|
-
|
124
|
+
|
82
125
|
def image_transformer
|
83
126
|
me = self
|
84
127
|
lambda do |env|
|
85
128
|
node = env[:node]
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
129
|
+
return unless ['img'].include?(env[:node_name])
|
130
|
+
|
131
|
+
if (me.image_count+1) > me.max_images # We add a new image below so we have to make sure we won't go over the limit
|
132
|
+
node.remove
|
133
|
+
else
|
134
|
+
keep_it = false
|
135
|
+
|
136
|
+
if node.attributes['width']
|
137
|
+
width = node.attributes['width'].value.to_i rescue 0
|
138
|
+
keep_it = true if width > me.min_image_size
|
90
139
|
else
|
91
|
-
|
92
|
-
|
93
|
-
node.attributes['height'].remove if node.attributes['height']
|
140
|
+
width = nil
|
141
|
+
keep_it = true
|
94
142
|
end
|
143
|
+
|
144
|
+
if keep_it
|
145
|
+
me.image_count += 1
|
146
|
+
if width == nil || width > me.image_width_limit
|
147
|
+
node['width'] = me.image_width_limit.to_s
|
148
|
+
node.attributes['height'].remove if node.attributes['height']
|
149
|
+
end
|
150
|
+
else
|
151
|
+
node.remove
|
152
|
+
end
|
153
|
+
|
95
154
|
end
|
155
|
+
|
96
156
|
end
|
97
157
|
end
|
98
158
|
|
99
|
-
def empty_transformer
|
100
|
-
lambda do |env|
|
101
|
-
node = env[:node]
|
102
|
-
if node.text.empty? && node.children.empty? && !['img', 'br'].include?(env[:node_name])
|
103
|
-
node.remove
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def no_display_transformer
|
109
|
-
lambda do |env|
|
110
|
-
node = env[:node]
|
111
|
-
if node['style'] && node['style'] =~ /display\s*:\s*none/
|
112
|
-
node.remove
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
# Truncates text at a word boundry
|
118
|
-
# Parameters:
|
119
|
-
# text - The text to truncate
|
120
|
-
# wordcount - The number of words
|
121
|
-
# omission - Text to add when the text is truncated ie 'read more' or '...
|
122
|
-
def snippet(text, wordcount, omission)
|
123
|
-
return '' if is_blank?(text)
|
124
|
-
text.split[0..(wordcount-1)].join(" ") + (text.split.size > wordcount ? " " + omission : "")
|
125
|
-
end
|
126
|
-
|
127
|
-
def is_blank?(text)
|
128
|
-
text.nil? || text.empty?
|
129
|
-
end
|
130
|
-
|
131
159
|
end
|
132
160
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
<div class="entry">
|
2
|
+
<style type="text/css">
|
3
|
+
.item {
|
4
|
+
float: left;
|
5
|
+
margin-top: 10px;
|
6
|
+
text-align: center;
|
7
|
+
width: 33%; }
|
8
|
+
.img {
|
9
|
+
border: 2px solid #cfcfcf;
|
10
|
+
}
|
11
|
+
.caption {
|
12
|
+
margin-left: 0;
|
13
|
+
}
|
14
|
+
</style>
|
15
|
+
<p style="display:none;">Can't see this!</p>
|
16
|
+
<p></p>
|
17
|
+
<p>
|
18
|
+
Lorem <strong>ipsum</strong> dolor sit <blockquote>amet</blockquote>, consectetur adipiscing elit. Cras eleifend ornare laoreet. Nulla rutrum tristique nibh accumsan ornare. Phasellus varius aliquet tortor quis feugiat. Morbi ultrices mauris eu metus hendrerit id laoreet risus auctor. In et nisi mi. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nulla facilisi. Nunc malesuada faucibus cursus. Aenean metus erat, ullamcorper in pellentesque sed, fermentum nec erat.
|
19
|
+
Vestibulum id augue dolor. Quisque non neque sit amet orci dictum pharetra nec in odio. Nunc et tortor elit. Proin porttitor pharetra sollicitudin. Duis in gravida nisi. Etiam convallis gravida scelerisque. Pellentesque porta rhoncus nunc ac vestibulum. Etiam varius sodales nunc id dictum. Proin faucibus gravida sagittis. Nunc ut velit lacus. Phasellus vehicula porta eleifend. Cras rutrum nunc in sem egestas hendrerit. Vivamus ante libero, accumsan sed eleifend vel, egestas eget erat. Aenean sodales, nibh at facilisis cursus, turpis quam mollis magna, tempus eleifend orci nunc eu lorem. Pellentesque velit felis, suscipit sed commodo tempor, gravida vel urna. Quisque adipiscing euismod consectetur.
|
20
|
+
</p>
|
21
|
+
<img src="http://www.example.com/small.jpg" width="15" />
|
22
|
+
<img src="http://www.example.com/big.jpg" width="600" height="600" />
|
23
|
+
<img src="http://www.example.com/nowidth.jpg" />
|
24
|
+
<!-- An html comment -->
|
25
|
+
<dl class="item">
|
26
|
+
<dt class="icon">
|
27
|
+
<a href="http://www.example.com/a_post" title="post">
|
28
|
+
<img src="http://www.example.com/photo.jpg" width="150" height="150" title="" alt="">
|
29
|
+
</a>
|
30
|
+
</dt>
|
31
|
+
<dd class="caption">
|
32
|
+
A Picture
|
33
|
+
</dd>
|
34
|
+
</dl>
|
35
|
+
<br />
|
36
|
+
</div>
|
@@ -0,0 +1,113 @@
|
|
1
|
+
<div class="entry clear"><!--more--><!-- BlogGlue Cache: No -->
|
2
|
+
<p style="display:none;">Can't see this!</p>
|
3
|
+
<p></p>
|
4
|
+
<p>It's now a bit more than two weeks since I had an unfortunate incident with a serpent. While the leg is actually healing quite nicely I the joy of
|
5
|
+
bending my knee has become a distant memory and a luxury I look forward to each day. The antibiotics I am forced to continue leave my body in a semi d
|
6
|
+
ebilitated state. Each visit to the restroom is a vile reminder of my body's current inability to properly digest food. At least I'm not allergic to the drug this time.
|
7
|
+
The last regiment of antibiotics set my skin on fire and made me appreciate the leper's state.</p>
|
8
|
+
<p>My leg is healing and I think that the only permanent damage will be a pretty nasty scar. I can live with that. One of the truly odd
|
9
|
+
uirks of cyclists besides constant attempts to trim down to super model anorexic status and the tight shorts is the customary shaving of legs.
|
10
|
+
While some might contend the traditionally feminine activity helps reduce aerodynamic drag I have read that the true purpose is to aid in repairs and
|
11
|
+
healing in the event of an accident. This is a true fact. I don't shave my legs (my wife would never let me live that down). The surgeon told
|
12
|
+
me that he spent most of his time picking hair out of the wound. I'll let you judge. </p>
|
13
|
+
<p>Be warned these pictures are gross, disturbing and bloody. I think one of the nurses even got a bit squeamish. As bad as the pictures are my
|
14
|
+
youngest daughter had to sit in the room with us the entire time. She said, "Daddy's owie was really gross. I like it when they cover it with
|
15
|
+
something so you can't see it." She's 4 so suck it up.</p>
|
16
|
+
<img src="http://www.example.com/test.jpg" width="600" height="600" />
|
17
|
+
<img src="http://www.example.com/nowidth.jpg" />
|
18
|
+
<p>
|
19
|
+
<style type="text/css">
|
20
|
+
.gallery {
|
21
|
+
margin: auto;
|
22
|
+
}
|
23
|
+
.gallery-item {
|
24
|
+
float: left;
|
25
|
+
margin-top: 10px;
|
26
|
+
text-align: center;
|
27
|
+
width: 33%; }
|
28
|
+
.gallery img {
|
29
|
+
border: 2px solid #cfcfcf;
|
30
|
+
}
|
31
|
+
.gallery-caption {
|
32
|
+
margin-left: 0;
|
33
|
+
}
|
34
|
+
</style>
|
35
|
+
<!-- see gallery_shortcode() in wp-includes/media.php -->
|
36
|
+
</p><div class="gallery"><dl class="gallery-item">
|
37
|
+
<dt class="gallery-icon">
|
38
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-18/" title="2008-08-22-09-57-18"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="150" height="150" title="" alt=""></a>
|
39
|
+
</dt>
|
40
|
+
<dd class="gallery-caption">
|
41
|
+
Flesh always loses against asphalt
|
42
|
+
</dd></dl><dl class="gallery-item">
|
43
|
+
<dt class="gallery-icon">
|
44
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-19/" title="2008-08-22-09-57-19"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-19-150x150-1-img739.jpg" width="150" height="150" title="" alt=""></a>
|
45
|
+
</dt>
|
46
|
+
<dd class="gallery-caption">
|
47
|
+
My leg is straight so it is harder to see, but if I bend it you can see the tendons
|
48
|
+
</dd></dl><dl class="gallery-item">
|
49
|
+
<dt class="gallery-icon">
|
50
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-42/" title="2008-08-22-09-57-42"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-42-150x150-1-img741.jpg" width="150" height="150" title="" alt=""></a>
|
51
|
+
</dt>
|
52
|
+
<dd class="gallery-caption">
|
53
|
+
Drugs make you happy
|
54
|
+
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
55
|
+
<dt class="gallery-icon">
|
56
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-52/" title="2008-08-22-09-57-52"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-52-150x150-1-img742.jpg" width="150" height="150" title="" alt=""></a>
|
57
|
+
</dt>
|
58
|
+
<dd class="gallery-caption">
|
59
|
+
Joel stuck around to offer moral support
|
60
|
+
</dd></dl><dl class="gallery-item">
|
61
|
+
<dt class="gallery-icon">
|
62
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-06-34/" title="2008-08-22-11-06-34"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-06-34-150x150-1-img743.jpg" width="150" height="150" title="" alt=""></a>
|
63
|
+
</dt>
|
64
|
+
<dd class="gallery-caption">
|
65
|
+
After they cleaned it up
|
66
|
+
</dd></dl><dl class="gallery-item">
|
67
|
+
<dt class="gallery-icon">
|
68
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-21/" title="2008-08-22-11-07-21"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-21-150x150-1-img745.jpg" width="150" height="150" title="" alt=""></a>
|
69
|
+
</dt>
|
70
|
+
<dd class="gallery-caption">
|
71
|
+
This isn't as much fun as it looks
|
72
|
+
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
73
|
+
<dt class="gallery-icon">
|
74
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-53/" title="2008-08-22-11-07-53"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-53-150x150-1-img746.jpg" width="150" height="150" title="" alt=""></a>
|
75
|
+
</dt>
|
76
|
+
<dd class="gallery-caption">
|
77
|
+
Irrigating the wound - like its a crop or something
|
78
|
+
</dd></dl><dl class="gallery-item">
|
79
|
+
<dt class="gallery-icon">
|
80
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-17/" title="2008-08-27-09-47-17"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-17-150x150-1-img747.jpg" width="150" height="150" title="" alt=""></a>
|
81
|
+
</dt>
|
82
|
+
<dd class="gallery-caption">
|
83
|
+
After they took the bandage off the first time - 5 days later
|
84
|
+
</dd></dl><dl class="gallery-item">
|
85
|
+
<dt class="gallery-icon">
|
86
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-22/" title="2008-08-27-09-47-22"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-22-150x150-1-img748.jpg" width="150" height="150" title="" alt=""></a>
|
87
|
+
</dt>
|
88
|
+
<dd class="gallery-caption">
|
89
|
+
After they took the bandage off the first time - 5 days later
|
90
|
+
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
91
|
+
<dt class="gallery-icon">
|
92
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-29-10-43-49/" title="2008-08-29-10-43-49"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-29-10-43-49-150x150-1-img749.jpg" width="150" height="150" title="" alt=""></a>
|
93
|
+
</dt>
|
94
|
+
<dd class="gallery-caption">
|
95
|
+
After 7 days. Still not pretty, but it is amazing how the human body heals
|
96
|
+
</dd></dl><dl class="gallery-item">
|
97
|
+
<dt class="gallery-icon">
|
98
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo/" title="wound"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo-150x150-1-img750.jpg" width="150" height="150" title="" alt=""></a>
|
99
|
+
</dt>
|
100
|
+
<dd class="gallery-caption">
|
101
|
+
This is from my iPhone. It was taken 5 days after the accident at the doctor's office. I have a few more shots below.
|
102
|
+
</dd></dl><dl class="gallery-item">
|
103
|
+
<dt class="gallery-icon">
|
104
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo1/" title="The wound after "><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo1-150x150-1-img753.jpg" width="150" height="150" title="" alt=""></a>
|
105
|
+
</dt>
|
106
|
+
<dd class="gallery-caption">
|
107
|
+
Here's what it looks like today 9/8. They took out one stitch, but it will still e quite a while before they can take out the main ones.
|
108
|
+
</dd></dl><br style="clear: both">
|
109
|
+
<br style="clear: both;">
|
110
|
+
</div>
|
111
|
+
<br>
|
112
|
+
<!--more--><!-- BlogGlue Cache: No --><p></p>
|
113
|
+
</div>
|
@@ -0,0 +1,95 @@
|
|
1
|
+
<div lang="en" dir="ltr" class="mw-content-ltr"><div class="thumb tright">
|
2
|
+
<div class="thumbinner" style="width:252px;"><a href="/wiki/File:England-Saint-Michaels-Mount-1900-1.jpg" class="image"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/0/00/England-Saint-Michaels-Mount-1900-1.jpg/250px-England-Saint-Michaels-Mount-1900-1.jpg" width="250" height="178" class="thumbimage"></a>
|
3
|
+
<div class="thumbcaption">
|
4
|
+
<div class="magnify"><a href="/wiki/File:England-Saint-Michaels-Mount-1900-1.jpg" class="internal" title="Enlarge"><img src="//bits.wikimedia.org/skins-1.19/common/images/magnify-clip.png" width="15" height="11" alt=""></a></div>
|
5
|
+
<a href="/wiki/St_Michael%27s_Mount" title="St Michael's Mount">St Michael's Mount</a>, one of several candidates to be the island of Ictis</div>
|
6
|
+
</div>
|
7
|
+
</div>
|
8
|
+
<p><b>Ictis</b>, or <b>Iktin</b>, is or was an island described as a <a href="/wiki/Tin_sources_and_trade_in_ancient_times" title="Tin sources and trade in ancient times">tin trading</a> centre in the <i><a href="/wiki/Bibliotheca_historica" title="Bibliotheca historica">Bibliotheca historica</a></i> of the <a href="/wiki/Greeks_in_Italy" title="Greeks in Italy">Sicilian-Greek</a> historian <a href="/wiki/Diodorus_Siculus" title="Diodorus Siculus">Diodorus Siculus</a>, writing in the first century BC.</p>
|
9
|
+
<p>While Ictis is widely accepted to have been an island somewhere off the southern coast of what is now England, scholars continue to debate its precise location. Candidates include <a href="/wiki/St_Michael%27s_Mount" title="St Michael's Mount">St Michael's Mount</a> and <a href="/wiki/Looe_Island" title="Looe Island">Looe Island</a> off the coast of <a href="/wiki/Cornwall" title="Cornwall">Cornwall</a>, the <a href="/wiki/Mount_Batten" title="Mount Batten">Mount Batten</a> peninsula in <a href="/wiki/Devon" title="Devon">Devon</a>, and the <a href="/wiki/Isle_of_Wight" title="Isle of Wight">Isle of Wight</a> further to the east.</p>
|
10
|
+
<table id="toc" class="toc">
|
11
|
+
<tbody><tr>
|
12
|
+
<td>
|
13
|
+
<div id="toctitle">
|
14
|
+
<h2>Contents</h2>
|
15
|
+
<span class="toctoggle"> [<a href="#" class="internal" id="togglelink">hide</a>] </span></div>
|
16
|
+
<ul>
|
17
|
+
<li class="toclevel-1 tocsection-1"><a href="#Primary_sources"><span class="tocnumber">1</span> <span class="toctext">Primary sources</span></a></li>
|
18
|
+
<li class="toclevel-1 tocsection-2"><a href="#Debate"><span class="tocnumber">2</span> <span class="toctext">Debate</span></a></li>
|
19
|
+
<li class="toclevel-1 tocsection-3"><a href="#See_also"><span class="tocnumber">3</span> <span class="toctext">See also</span></a></li>
|
20
|
+
<li class="toclevel-1 tocsection-4"><a href="#Notes"><span class="tocnumber">4</span> <span class="toctext">Notes</span></a></li>
|
21
|
+
<li class="toclevel-1 tocsection-5"><a href="#Further_reading"><span class="tocnumber">5</span> <span class="toctext">Further reading</span></a></li>
|
22
|
+
</ul>
|
23
|
+
</td>
|
24
|
+
</tr>
|
25
|
+
</tbody></table>
|
26
|
+
<h2><span class="editsection">[<a href="/w/index.php?title=Ictis&action=edit&section=1" title="Edit section: Primary sources">edit</a>]</span> <span class="mw-headline" id="Primary_sources">Primary sources</span></h2>
|
27
|
+
<p>Diodorus Siculus, who flourished between about 60 and about 30 BC, is supposed to have relied for his account of the geography of <a href="/wiki/Great_Britain" title="Great Britain">Britain</a> on a lost work of <a href="/wiki/Pytheas" title="Pytheas">Pytheas</a>, a Greek geographer from <a href="/wiki/Marseilles" title="Marseilles" class="mw-redirect">Massalia</a> who made a voyage around the coast of Britain near the end of the fourth century BC, searching for the source of <a href="/wiki/Amber" title="Amber">amber</a>. The record of the voyage of Pytheas was lost in antiquity but was known to some later writers, including <a href="/wiki/Timaeus_(historian)" title="Timaeus (historian)">Timaeus</a>, <a href="/wiki/Posidonius" title="Posidonius">Posidonius</a> and <a href="/wiki/Pliny_the_Elder" title="Pliny the Elder">Pliny the Elder</a>. Their work is contradictory, but from it deductions can be made about what was reported by Pytheas. No other sources concerning the tin trade in the ancient world are known.<sup id="cite_ref-roman-britain_0-0" class="reference"><a href="#cite_note-roman-britain-0"><span>[</span>1<span>]</span></a></sup></p>
|
28
|
+
<p>Diodorus gives an account that is generally supposed to be a description of the working of Cornish tin at about the time of the voyage of Pytheas. He says:</p>
|
29
|
+
<blockquote class="templatequote">
|
30
|
+
<div class="Bug6200">The inhabitants of that part of Britain which is called Belerion are very fond of strangers and from their intercourse with foreign merchants are civilised in their manner of life. They prepare the tin, working very carefully the earth in which it is produced. The ground is rocky but it contains earthy veins, the produce of which is ground down, smelted and purified. They beat the metal into masses shaped like knuckle-bones and carry it off to a certain island off Britain called Iktis. During the ebb of the tide the intervening space is left dry and they carry over to the island the tin in abundance in their wagons ... Here then the merchants buy the tin from the natives and carry it over to Gaul, and after travelling overland for about thirty days, they finally bring their loads on horses to the mouth of the Rhone.<sup id="cite_ref-roman-britain_0-1" class="reference"><a href="#cite_note-roman-britain-0"><span>[</span>1<span>]</span></a></sup></div>
|
31
|
+
</blockquote>
|
32
|
+
<div class="thumb tright">
|
33
|
+
<div class="thumbinner" style="width:252px;"><a href="/wiki/File:Looe_island.jpg" class="image"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/a/ad/Looe_island.jpg/250px-Looe_island.jpg" width="250" height="184" class="thumbimage"></a>
|
34
|
+
<div class="thumbcaption">
|
35
|
+
<div class="magnify"><a href="/wiki/File:Looe_island.jpg" class="internal" title="Enlarge"><img src="//bits.wikimedia.org/skins-1.19/common/images/magnify-clip.png" width="15" height="11" alt=""></a></div>
|
36
|
+
<a href="/wiki/Looe_Island" title="Looe Island">Looe</a>, Cornwall, another island suggested as Ictis</div>
|
37
|
+
</div>
|
38
|
+
</div>
|
39
|
+
<p>In the Greek text of Diodorus the name appears, in the <a href="/wiki/Accusative_case" title="Accusative case">accusative case</a>, as "Iktin", so that translators have inferred that the <a href="/wiki/Nominative_case" title="Nominative case">nominative</a> form of the name was "Iktis", rendering this into the medieval <i><a href="/wiki/Lingua_franca" title="Lingua franca">lingua franca</a></i> of <a href="/wiki/Latin" title="Latin">Latin</a> (which only rarely used the letter 'k') as "Ictis". However, some commentators doubt that "Ictis" is correct and prefer "Iktin".<sup id="cite_ref-beer_1-0" class="reference"><a href="#cite_note-beer-1"><span>[</span>2<span>]</span></a></sup></p>
|
40
|
+
<p>In Book IV of his <i><a href="/wiki/Natural_History_(Pliny)" title="Natural History (Pliny)">Natural History</a></i>, Pliny quotes Timaeus and refers to "<i>insulam Mictim</i>" (the island of Mictis, or perhaps of Mictim):</p>
|
41
|
+
<blockquote class="templatequote">
|
42
|
+
<div class="Bug6200">There is an island named Mictis lying inwards six days' sail from Britain, where tin is found, and to which the Britons cross in boats of wickerwork covered with stitched hides.<sup id="cite_ref-cunliffe_2-0" class="reference"><a href="#cite_note-cunliffe-2"><span>[</span>3<span>]</span></a></sup></div>
|
43
|
+
</blockquote>
|
44
|
+
<p>It has been suggested that "<i>insulam Mictim</i>" was a copying error for <i>insulam Ictim</i>, and Diodorus and Pliny probably both relied on the same primary source. However, while it is possible that "Mictim" and "Iktin" are one and the same, it is also possible that they are different places. The word "inwards" can be interpreted as meaning "towards our home", and six days' sail from Britain could take a boat to somewhere on the Atlantic coast of what is now France.<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span>[</span>4<span>]</span></a></sup></p>
|
45
|
+
<p><a href="/wiki/Strabo" title="Strabo">Strabo</a>, a contemporary of Diodorus, stated in his <i>Geography</i> that British tin was shipped from <a href="/wiki/Marseille" title="Marseille">Massalia</a> on the Mediterranean coast of <a href="/wiki/Gaul" title="Gaul">Gaul</a>.<sup id="cite_ref-4" class="reference"><a href="#cite_note-4"><span>[</span>5<span>]</span></a></sup></p>
|
46
|
+
<p><a href="/wiki/Julius_Caesar" title="Julius Caesar">Julius Caesar</a>, in his <i>De Bello Gallico</i>, says of the <a href="/wiki/Veneti_(Gaul)" title="Veneti (Gaul)">Veneti</a>: "This last-named people were by far the most powerful on the coast of <a href="/wiki/Armorica" title="Armorica">Armorica</a>: they had a large fleet plying between their own ports and Britain; they knew more about the handling of ships and the science of navigation than anyone else thereabouts."<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span>[</span>6<span>]</span></a></sup></p>
|
47
|
+
<h2><span class="editsection">[<a href="/w/index.php?title=Ictis&action=edit&section=2" title="Edit section: Debate">edit</a>]</span> <span class="mw-headline" id="Debate">Debate</span></h2>
|
48
|
+
<p><a href="/wiki/William_Camden" title="William Camden">William Camden</a>, the <a href="/wiki/Elizabethan_era" title="Elizabethan era">Elizabethan</a> historian, took the view that the name "Ictis" was so similar to "Vectis", the <a href="/wiki/Latin" title="Latin">Latin</a> name for the Isle of Wight, that the two were probably the same island. The <a href="/wiki/Cornish_people" title="Cornish people">Cornish</a> antiquary <a href="/wiki/William_Borlase" title="William Borlase">William Borlase</a> (1696–1772) suggested that Ictis must have been near the coast of Cornwall and could have been a general name for a peninsula there.<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span>[</span>7<span>]</span></a></sup></p>
|
49
|
+
<p>In 1960, <a href="/wiki/Gavin_de_Beer" title="Gavin de Beer">Gavin de Beer</a> concluded that the most likely location of Iktin (the form of the name he preferred) was <a href="/wiki/St_Michael%27s_Mount" title="St Michael's Mount">St Michael's Mount</a>, a <a href="/wiki/Tidal_island" title="Tidal island">tidal island</a> near the town of <a href="/wiki/Marazion" title="Marazion">Marazion</a> in Cornwall. Apart from the effect of the tide being consistent with what is said by Diodorus, de Beer considered the other benefits of St Michael's Mount for the Britons.<sup id="cite_ref-beer_1-1" class="reference"><a href="#cite_note-beer-1"><span>[</span>2<span>]</span></a></sup> This identification is supported by the Roman Britain Organization and its website roman-britain.org.<sup id="cite_ref-roman-britain_0-2" class="reference"><a href="#cite_note-roman-britain-0"><span>[</span>1<span>]</span></a></sup></p>
|
50
|
+
<p>In 1972, I. S. Maxwell weighed up the competing claims of no fewer than twelve possible sites.<sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span>[</span>8<span>]</span></a></sup> In 1983, after excavations, the archaeologist <a href="/wiki/Barry_Cunliffe" title="Barry Cunliffe">Barry W. Cunliffe</a> proposed the <a href="/wiki/Mount_Batten" title="Mount Batten">Mount Batten</a> <a href="/wiki/Peninsula" title="Peninsula">peninsula</a> near <a href="/wiki/Plymouth" title="Plymouth">Plymouth</a> as the site of Ictis.<sup id="cite_ref-cunliffe_2-1" class="reference"><a href="#cite_note-cunliffe-2"><span>[</span>3<span>]</span></a></sup> Near the mouth of the <a href="/wiki/River_Erme" title="River Erme">River Erme</a>, not far away, a shipwreck site has produced ingots of ancient tin, which indicates a trade along the coast, although dating the site is difficult and it may not belong to the <a href="/wiki/Bronze_Age" title="Bronze Age">Bronze Age</a>.<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span>[</span>9<span>]</span></a></sup></p>
|
51
|
+
<p>The assessment of Miranda Aldhouse-Green in <i>The Celtic World</i> (1996) was that</p>
|
52
|
+
<blockquote class="templatequote">
|
53
|
+
<div class="Bug6200">The two places considered most likely to be Ictis are the island of St Michael's Mount, Cornwall, and the peninsula of Mount Batten in Plymouth Sound (Cunliffe 1983; Hawkes 1984) ... Mount Batten seems archaeologically more likely as there are a number of finds from there which indicate it was prominent in international trade from the fourth century BC until the first century AD (Cunliffe 1988).<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span>[</span>10<span>]</span></a></sup></div>
|
54
|
+
</blockquote>
|
55
|
+
<h2><span class="editsection">[<a href="/w/index.php?title=Ictis&action=edit&section=3" title="Edit section: See also">edit</a>]</span> <span class="mw-headline" id="See_also">See also</span></h2>
|
56
|
+
<ul>
|
57
|
+
<li><a href="/wiki/Mining_in_Cornwall_and_Devon" title="Mining in Cornwall and Devon">Mining in Cornwall and Devon</a></li>
|
58
|
+
<li><a href="/wiki/Tin_sources_and_trade_in_ancient_times" title="Tin sources and trade in ancient times">Tin sources and trade in ancient times</a></li>
|
59
|
+
</ul>
|
60
|
+
<h2><span class="editsection">[<a href="/w/index.php?title=Ictis&action=edit&section=4" title="Edit section: Notes">edit</a>]</span> <span class="mw-headline" id="Notes">Notes</span></h2>
|
61
|
+
<div class="reflist references-column-width" style="-moz-column-width: 33em; -webkit-column-width: 33em; column-width: 33em; list-style-type: decimal;">
|
62
|
+
<ol class="references">
|
63
|
+
<li id="cite_note-roman-britain-0">^ <a href="#cite_ref-roman-britain_0-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-roman-britain_0-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-roman-britain_0-2"><sup><i><b>c</b></i></sup></a> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.roman-britain.org/places/ictis.htm">ICTIS INSVLA</a> at roman-britain.org, accessed 7 February 2012</span></li>
|
64
|
+
<li id="cite_note-beer-1">^ <a href="#cite_ref-beer_1-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-beer_1-1"><sup><i><b>b</b></i></sup></a> <span class="reference-text"><a href="/wiki/Gavin_de_Beer" title="Gavin de Beer">Gavin de Beer</a>, "Iktin", in <i>The Geographical Journal</i> vol. 126 (June 1960) pp. 160–167, at p. 162</span></li>
|
65
|
+
<li id="cite_note-cunliffe-2">^ <a href="#cite_ref-cunliffe_2-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-cunliffe_2-1"><sup><i><b>b</b></i></sup></a> <span class="reference-text">Barry W. Cunliffe, "Ictis: Is it here?" in <i>Oxford Journal of Archaeology</i>, vol. 2, issue 1 (March 1983), pp. 123–126 (see <a rel="nofollow" class="external text" href="http://onlinelibrary.wiley.com/doi/10.1111/j.1468-0092.1983.tb00101.x/abstract">abstract</a>)</span></li>
|
66
|
+
<li id="cite_note-3"><b><a href="#cite_ref-3">^</a></b> <span class="reference-text">Barry Cunliffe, "Exchanges with the wider world" in <i>Iron age communities in Britain: an account of England, Scotland, and Wales from the seventh century BC until the Roman conquest</i> (Routledge, 1978) <a rel="nofollow" class="external text" href="http://books.google.co.uk/books?id=gthTGHWl4LUC&pg=PA471#v=onepage&q&f=false">p. 471</a></span></li>
|
67
|
+
<li id="cite_note-4"><b><a href="#cite_ref-4">^</a></b> <span class="reference-text">Strabo's <i>Geographi</i> ca<i>, Book III, 2.9</i></span></li>
|
68
|
+
<li id="cite_note-5"><b><a href="#cite_ref-5">^</a></b> <span class="reference-text">Gaius Julius Caesar, <i>Caesar's War Commentaries</i> (Kessinger, 2004), <a rel="nofollow" class="external text" href="http://books.google.co.uk/books?id=RFBkyHEooQ0C&pg=PA45#v=onepage&q&f=false">pp. 45–46</a></span></li>
|
69
|
+
<li id="cite_note-6"><b><a href="#cite_ref-6">^</a></b> <span class="reference-text">Sir Christopher Hawkins, <i>Observations on the tin trade of the ancients in Cornwall</i> (1811), p. 63: "Mr. Camden supposes, that, from the similarity of the words Ictis and Vectis, it was one and the same island. Dr. Borlase says, that the Ictis must have been situated somewhere near the Coast of Cornwall, and have been a general name for a peninsula, or some particular peninsula, and common emporium, on the same coast."</span></li>
|
70
|
+
<li id="cite_note-7"><b><a href="#cite_ref-7">^</a></b> <span class="reference-text">I. S. Maxwell, "The location of Ictis" in <i>Journal of the Royal Institution of Cornwall</i> 6 (4), pp. 293–319</span></li>
|
71
|
+
<li id="cite_note-8"><b><a href="#cite_ref-8">^</a></b> <span class="reference-text"><i>Erme Ingot Wreck Site Summary</i> (<a href="/wiki/English_Heritage" title="English Heritage">English Heritage</a>, 2000)</span></li>
|
72
|
+
<li id="cite_note-9"><b><a href="#cite_ref-9">^</a></b> <span class="reference-text">Miranda Jane Aldhouse-Green, <i>The Celtic World</i> (1996), p. 276</span></li>
|
73
|
+
</ol>
|
74
|
+
</div>
|
75
|
+
<h2><span class="editsection">[<a href="/w/index.php?title=Ictis&action=edit&section=5" title="Edit section: Further reading">edit</a>]</span> <span class="mw-headline" id="Further_reading">Further reading</span></h2>
|
76
|
+
<ul>
|
77
|
+
<li><a href="/wiki/Gavin_de_Beer" title="Gavin de Beer">Gavin de Beer</a>, 'Iktin', in The Geographical Journal vol. 126 (June 1960)</li>
|
78
|
+
<li>I. S. Maxwell, 'The location of Ictis' in <i>Journal of the Royal Institution of Cornwall</i> 6 (4) (1972)</li>
|
79
|
+
<li><a href="/wiki/Barry_Cunliffe" title="Barry Cunliffe">Barry W. Cunliffe</a>, 'Ictis: Is it here?' in <i>Oxford Journal of Archaeology</i>, vol. 2, issue 1 (March 1983)</li>
|
80
|
+
<li>S. Mitchell, <i>Cornish tin, Julius Caesar, and the invasion of Britain</i> (1983)</li>
|
81
|
+
<li>Christopher F. C. Hawkes, 'Ictis disentangled and the British tin trade' in <i>Oxford Journal of Archaeology</i>, 3 (1984), pp. 211–233</li>
|
82
|
+
<li>R. D. Penhallurick, <i>Tin in Antiquity</i> (London, 1986)</li>
|
83
|
+
</ul>
|
84
|
+
|
85
|
+
|
86
|
+
<!--
|
87
|
+
NewPP limit report
|
88
|
+
Preprocessor node count: 339/1000000
|
89
|
+
Post-expand include size: 2276/2048000 bytes
|
90
|
+
Template argument size: 1556/2048000 bytes
|
91
|
+
Expensive parser function count: 0/500
|
92
|
+
-->
|
93
|
+
|
94
|
+
<!-- Saved in parser cache with key enwiki:pcache:idhash:34635676-0!*!0!!en!4!* and timestamp 20120305150144 -->
|
95
|
+
</div>
|
data/spec/spec_helper.rb
CHANGED
@@ -12,4 +12,4 @@ RSpec.configure do |config|
|
|
12
12
|
config.treat_symbols_as_metadata_keys_with_true_values = true
|
13
13
|
config.filter_run :focus => true # Add a :focus tag to a given spec only that spec will be run instead of all of them
|
14
14
|
config.run_all_when_everything_filtered = true
|
15
|
-
end
|
15
|
+
end
|
data/spec/sumitup/parser_spec.rb
CHANGED
@@ -1,179 +1,104 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Sumitup::Parser do
|
4
|
-
|
5
|
-
@image_width_limit = 200
|
6
|
-
@parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => @image_width_limit)
|
7
|
-
end
|
4
|
+
|
8
5
|
describe "summarize" do
|
9
6
|
before do
|
10
|
-
@html =
|
11
|
-
<div class="entry clear"><!--more--><!-- BlogGlue Cache: No -->
|
12
|
-
<p style="display:none;">Can't see this!</p>
|
13
|
-
<p></p>
|
14
|
-
<p>It's now a bit more than two weeks since I had an unfortunate incident with a serpent. While the leg is actually healing quite nicely I the joy of
|
15
|
-
bending my knee has become a distant memory and a luxury I look forward to each day. The antibiotics I am forced to continue leave my body in a semi d
|
16
|
-
ebilitated state. Each visit to the restroom is a vile reminder of my body's current inability to properly digest food. At least I'm not allergic to the drug this time.
|
17
|
-
The last regiment of antibiotics set my skin on fire and made me appreciate the leper's state.</p>
|
18
|
-
<p>My leg is healing and I think that the only permanent damage will be a pretty nasty scar. I can live with that. One of the truly odd
|
19
|
-
uirks of cyclists besides constant attempts to trim down to super model anorexic status and the tight shorts is the customary shaving of legs.
|
20
|
-
While some might contend the traditionally feminine activity helps reduce aerodynamic drag I have read that the true purpose is to aid in repairs and
|
21
|
-
healing in the event of an accident. This is a true fact. I don't shave my legs (my wife would never let me live that down). The surgeon told
|
22
|
-
me that he spent most of his time picking hair out of the wound. I'll let you judge. </p>
|
23
|
-
<p>Be warned these pictures are gross, disturbing and bloody. I think one of the nurses even got a bit squeamish. As bad as the pictures are my
|
24
|
-
youngest daughter had to sit in the room with us the entire time. She said, "Daddy's owie was really gross. I like it when they cover it with
|
25
|
-
something so you can't see it." She's 4 so suck it up.</p>
|
26
|
-
<img src="http://www.example.com/test.jpg" width="600" height="600" />
|
27
|
-
<img src="http://www.example.com/nowidth.jpg" />
|
28
|
-
<p>
|
29
|
-
<style type="text/css">
|
30
|
-
.gallery {
|
31
|
-
margin: auto;
|
32
|
-
}
|
33
|
-
.gallery-item {
|
34
|
-
float: left;
|
35
|
-
margin-top: 10px;
|
36
|
-
text-align: center;
|
37
|
-
width: 33%; }
|
38
|
-
.gallery img {
|
39
|
-
border: 2px solid #cfcfcf;
|
40
|
-
}
|
41
|
-
.gallery-caption {
|
42
|
-
margin-left: 0;
|
43
|
-
}
|
44
|
-
</style>
|
45
|
-
<!-- see gallery_shortcode() in wp-includes/media.php -->
|
46
|
-
</p><div class="gallery"><dl class="gallery-item">
|
47
|
-
<dt class="gallery-icon">
|
48
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-18/" title="2008-08-22-09-57-18"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="150" height="150" title="" alt=""></a>
|
49
|
-
</dt>
|
50
|
-
<dd class="gallery-caption">
|
51
|
-
Flesh always loses against asphalt
|
52
|
-
</dd></dl><dl class="gallery-item">
|
53
|
-
<dt class="gallery-icon">
|
54
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-19/" title="2008-08-22-09-57-19"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-19-150x150-1-img739.jpg" width="150" height="150" title="" alt=""></a>
|
55
|
-
</dt>
|
56
|
-
<dd class="gallery-caption">
|
57
|
-
My leg is straight so it is harder to see, but if I bend it you can see the tendons
|
58
|
-
</dd></dl><dl class="gallery-item">
|
59
|
-
<dt class="gallery-icon">
|
60
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-42/" title="2008-08-22-09-57-42"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-42-150x150-1-img741.jpg" width="150" height="150" title="" alt=""></a>
|
61
|
-
</dt>
|
62
|
-
<dd class="gallery-caption">
|
63
|
-
Drugs make you happy
|
64
|
-
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
65
|
-
<dt class="gallery-icon">
|
66
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-52/" title="2008-08-22-09-57-52"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-52-150x150-1-img742.jpg" width="150" height="150" title="" alt=""></a>
|
67
|
-
</dt>
|
68
|
-
<dd class="gallery-caption">
|
69
|
-
Joel stuck around to offer moral support
|
70
|
-
</dd></dl><dl class="gallery-item">
|
71
|
-
<dt class="gallery-icon">
|
72
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-06-34/" title="2008-08-22-11-06-34"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-06-34-150x150-1-img743.jpg" width="150" height="150" title="" alt=""></a>
|
73
|
-
</dt>
|
74
|
-
<dd class="gallery-caption">
|
75
|
-
After they cleaned it up
|
76
|
-
</dd></dl><dl class="gallery-item">
|
77
|
-
<dt class="gallery-icon">
|
78
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-21/" title="2008-08-22-11-07-21"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-21-150x150-1-img745.jpg" width="150" height="150" title="" alt=""></a>
|
79
|
-
</dt>
|
80
|
-
<dd class="gallery-caption">
|
81
|
-
This isn't as much fun as it looks
|
82
|
-
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
83
|
-
<dt class="gallery-icon">
|
84
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-53/" title="2008-08-22-11-07-53"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-53-150x150-1-img746.jpg" width="150" height="150" title="" alt=""></a>
|
85
|
-
</dt>
|
86
|
-
<dd class="gallery-caption">
|
87
|
-
Irrigating the wound - like its a crop or something
|
88
|
-
</dd></dl><dl class="gallery-item">
|
89
|
-
<dt class="gallery-icon">
|
90
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-17/" title="2008-08-27-09-47-17"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-17-150x150-1-img747.jpg" width="150" height="150" title="" alt=""></a>
|
91
|
-
</dt>
|
92
|
-
<dd class="gallery-caption">
|
93
|
-
After they took the bandage off the first time - 5 days later
|
94
|
-
</dd></dl><dl class="gallery-item">
|
95
|
-
<dt class="gallery-icon">
|
96
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-22/" title="2008-08-27-09-47-22"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-22-150x150-1-img748.jpg" width="150" height="150" title="" alt=""></a>
|
97
|
-
</dt>
|
98
|
-
<dd class="gallery-caption">
|
99
|
-
After they took the bandage off the first time - 5 days later
|
100
|
-
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
101
|
-
<dt class="gallery-icon">
|
102
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-29-10-43-49/" title="2008-08-29-10-43-49"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-29-10-43-49-150x150-1-img749.jpg" width="150" height="150" title="" alt=""></a>
|
103
|
-
</dt>
|
104
|
-
<dd class="gallery-caption">
|
105
|
-
After 7 days. Still not pretty, but it is amazing how the human body heals
|
106
|
-
</dd></dl><dl class="gallery-item">
|
107
|
-
<dt class="gallery-icon">
|
108
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo/" title="wound"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo-150x150-1-img750.jpg" width="150" height="150" title="" alt=""></a>
|
109
|
-
</dt>
|
110
|
-
<dd class="gallery-caption">
|
111
|
-
This is from my iPhone. It was taken 5 days after the accident at the doctor's office. I have a few more shots below.
|
112
|
-
</dd></dl><dl class="gallery-item">
|
113
|
-
<dt class="gallery-icon">
|
114
|
-
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo1/" title="The wound after "><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo1-150x150-1-img753.jpg" width="150" height="150" title="" alt=""></a>
|
115
|
-
</dt>
|
116
|
-
<dd class="gallery-caption">
|
117
|
-
Here's what it looks like today 9/8. They took out one stitch, but it will still e quite a while before they can take out the main ones.
|
118
|
-
</dd></dl><br style="clear: both">
|
119
|
-
<br style="clear: both;">
|
120
|
-
</div>
|
121
|
-
<br>
|
122
|
-
<!--more--><!-- BlogGlue Cache: No --><p></p>
|
123
|
-
</div>}
|
124
|
-
@short_result = @parser.summarize(@html, 5)
|
125
|
-
@long_result = @parser.summarize(@html, 100000)
|
126
|
-
end
|
127
|
-
it "should summarize the content by number of words" do
|
128
|
-
@short_result.should_not include('than')
|
129
|
-
@short_result.should include('more')
|
130
|
-
end
|
131
|
-
it "should remove html comments" do
|
132
|
-
@short_result.should_not include('<!--more--><!-- BlogGlue Cache: No -->')
|
133
|
-
end
|
134
|
-
it "should keep the image tag" do
|
135
|
-
@long_result.should include(%Q{<img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="#{@image_width_limit}" title="" alt="">})
|
136
|
-
end
|
137
|
-
it "should remove the style tag" do
|
138
|
-
@long_result.should_not include('<style type="text/css">')
|
139
|
-
end
|
140
|
-
it "should remove empty tags" do
|
141
|
-
@long_result.should_not include('<p></p>')
|
7
|
+
@html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
|
142
8
|
end
|
143
|
-
|
144
|
-
|
9
|
+
|
10
|
+
describe "Sanitize options" do
|
11
|
+
it "should remove html comments" do
|
12
|
+
result = Sumitup::Parser.new.summarize(@html, 100000)
|
13
|
+
result.should_not include('<!-- An html comment -->')
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should remove the style tag" do
|
17
|
+
result = Sumitup::Parser.new.summarize(@html, 100000)
|
18
|
+
result.should_not include('<style type="text/css">')
|
19
|
+
end
|
145
20
|
end
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
21
|
+
|
22
|
+
describe "word_transformer" do
|
23
|
+
it "should summarize the content by number of words" do
|
24
|
+
parser = Sumitup::Parser.new(:max_words => 1000)
|
25
|
+
result = parser.summarize(@html, 5)
|
26
|
+
result.should_not include('consectetur')
|
27
|
+
result.should include('amet')
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should keep permitted html in summary" do
|
31
|
+
parser = Sumitup::Parser.new(:max_words => 1000)
|
32
|
+
result = parser.summarize(@html, 5)
|
33
|
+
result.should include('strong')
|
34
|
+
result.should include('blockquote')
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should remove empty tags" do
|
38
|
+
result = Sumitup::Parser.new.summarize(@html, 100000)
|
39
|
+
result.should_not include('<p></p>')
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should remove tags with display:none" do
|
43
|
+
result = Sumitup::Parser.new.summarize(@html, 100000)
|
44
|
+
result.should_not include('display:none')
|
45
|
+
end
|
150
46
|
end
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
47
|
+
|
48
|
+
describe "image_transformer" do
|
49
|
+
it "should set the width to 240 if width is greater than 240" do
|
50
|
+
parser = Sumitup::Parser.new(:image_width_limit => 240)
|
51
|
+
result = parser.summarize(@html, 10000)
|
52
|
+
result.should include(%Q{img src="http://www.example.com/big.jpg" width="240">})
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should only allow 2 images" do
|
56
|
+
parser = Sumitup::Parser.new(:max_images => 2)
|
57
|
+
result = parser.summarize(@html, 10000)
|
58
|
+
doc = Nokogiri::HTML(result)
|
59
|
+
doc.css('img').length.should == 2
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should not keep small images" do
|
63
|
+
result = Sumitup::Parser.new.summarize(@html, 100000)
|
64
|
+
result.should_not include('http://www.example.com/small.jpg')
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should keep images as is that are not over the width limit" do
|
68
|
+
parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => 200)
|
69
|
+
result = parser.summarize(@html, 100000)
|
70
|
+
result.should include('<img src="http://www.example.com/photo.jpg" width="150" height="150" title="" alt="">')
|
71
|
+
end
|
159
72
|
end
|
73
|
+
|
160
74
|
end
|
161
75
|
|
162
76
|
describe "snippet" do
|
163
|
-
|
77
|
+
before do
|
78
|
+
@parser = Sumitup::Parser.new
|
79
|
+
end
|
80
|
+
it "should build a string 5 words long" do
|
164
81
|
text = "Kimball was born to Solomon Farnham Kimball and Anna Spaulding in Sheldon, Franklin County, Vermont. Kimball's forefathers arrived in America from England and started"
|
165
|
-
@parser.snippet(text, 5
|
82
|
+
result, count = @parser.snippet(text, 5)
|
83
|
+
result.should == "Kimball was born to Solomon"
|
84
|
+
count.should == 5
|
166
85
|
end
|
86
|
+
|
167
87
|
it "should not crash if string is nil" do
|
168
|
-
|
169
|
-
|
88
|
+
result, count = @parser.snippet(nil, 5)
|
89
|
+
result.should == ''
|
90
|
+
count.should == 0
|
170
91
|
end
|
171
92
|
end
|
172
93
|
|
173
94
|
describe "is_blank?" do
|
95
|
+
before do
|
96
|
+
@parser = Sumitup::Parser.new
|
97
|
+
end
|
174
98
|
it "should be true if text is nil" do
|
175
99
|
@parser.is_blank?(nil).should be_true
|
176
100
|
end
|
101
|
+
|
177
102
|
it "should be true if text is ''" do
|
178
103
|
@parser.is_blank?('').should be_true
|
179
104
|
end
|
data/sumitup.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "sumitup"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Justin Ball"]
|
12
|
-
s.date = "2012-03-
|
12
|
+
s.date = "2012-03-06"
|
13
13
|
s.description = "Given an html document or fragment this gem will build a summary of the content."
|
14
14
|
s.email = "justinball@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -28,6 +28,9 @@ Gem::Specification.new do |s|
|
|
28
28
|
"VERSION",
|
29
29
|
"lib/sumitup.rb",
|
30
30
|
"lib/sumitup/parser.rb",
|
31
|
+
"spec/fixtures/basic.html",
|
32
|
+
"spec/fixtures/justin.html",
|
33
|
+
"spec/fixtures/wikipedia.html",
|
31
34
|
"spec/spec_helper.rb",
|
32
35
|
"spec/sumitup/parser_spec.rb",
|
33
36
|
"sumitup.gemspec"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sumitup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 2
|
10
|
+
version: 0.1.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Justin Ball
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-03-
|
18
|
+
date: 2012-03-06 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|
@@ -205,6 +205,9 @@ files:
|
|
205
205
|
- VERSION
|
206
206
|
- lib/sumitup.rb
|
207
207
|
- lib/sumitup/parser.rb
|
208
|
+
- spec/fixtures/basic.html
|
209
|
+
- spec/fixtures/justin.html
|
210
|
+
- spec/fixtures/wikipedia.html
|
208
211
|
- spec/spec_helper.rb
|
209
212
|
- spec/sumitup/parser_spec.rb
|
210
213
|
- sumitup.gemspec
|