sumitup 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.1
1
+ 0.1.2
@@ -1,10 +1,8 @@
1
1
  module Sumitup
2
2
  class Parser
3
3
 
4
- IMAGE_WIDTH_LIMIT = 230
5
-
6
4
  attr_accessor :word_count, :max_words
7
- attr_accessor :image_count, :image_width_limit, :max_images
5
+ attr_accessor :image_count, :image_width_limit, :max_images, :min_image_size
8
6
  attr_accessor :elements, :attributes, :protocols, :remove_contents
9
7
  attr_accessor :omission
10
8
 
@@ -12,16 +10,18 @@ module Sumitup
12
10
 
13
11
  self.omission = options[:omission] || ''
14
12
 
15
- self.word_count = options[:word_count] || 0
13
+ self.word_count = 0
16
14
  self.max_words = options[:max_words] || 100
17
15
 
18
- self.image_count = options[:image_count] || 0
16
+ self.image_count = 0
17
+ self.min_image_size = options[:min_image_size] || 40
19
18
  self.image_width_limit = options[:image_width_limit] || 230
20
- self.max_images = options[:max_images] || 2
21
-
19
+ self.max_images = options[:max_images] || 1
20
+
21
+ # White listed elements
22
22
  self.elements = options[:elements] || %w(
23
- a abbr b blockquote br cite code dd dfn dl dt em i kbd li mark ol p pre
24
- q s samp small strike strong sub sup time u ul var img span
23
+ a abbr b blockquote cite code dfn em i kbd mark q samp small s strike strong sub sup time u var
24
+ br dd dl dt li ol p pre ul img span
25
25
  )
26
26
 
27
27
  self.attributes = options[:attributes] || {
@@ -43,90 +43,118 @@ module Sumitup
43
43
  # Removes html and generate a summary
44
44
  def summarize(html, max = nil)
45
45
  return '' if is_blank?(html)
46
-
47
- self.max_words = max unless max.nil?
48
-
49
- Sanitize.clean(html,
46
+ unclean = Nokogiri::HTML::DocumentFragment.parse(html.dup)
47
+ summarize_fragment(unclean, max).to_html
48
+ end
49
+
50
+ def summarize_fragment(node, max = nil)
51
+ clean = Sanitize.clean_node!(node,
50
52
  :elements => elements,
51
53
  :attributes => attributes,
52
54
  :protocols => protocols,
53
55
  :remove_contents => remove_contents,
54
- :transformers => [no_display_transformer, empty_transformer],
55
- :transformers_breadth => [summarizer, image_transformer])
56
+ :transformers => [word_transformer, image_transformer])
57
+ summarize_node(clean, max)
58
+ end
59
+
60
+ def summarize_node(node, max = nil)
61
+ max ||= self.max_words
62
+
63
+ # summarize all children of the node
64
+ node.children.each do |child|
65
+ summarize_node(child, max)
66
+ end
67
+
68
+ if node.text?
69
+ if self.word_count > max
70
+ node.remove
71
+ else
72
+ # if the text of the current node makes us go over then truncate it
73
+ result, count = snippet(node.inner_text, max - self.word_count)
74
+ self.word_count += count
75
+ node.content = result
76
+ end
77
+ end
78
+
79
+ node
80
+ end
81
+
82
+ # Truncates text at a word boundry
83
+ # Parameters:
84
+ # text - The text to truncate
85
+ # wordcount - The number of words
86
+ def snippet(text, max)
87
+ result = ''
88
+ count = 0
89
+ return [result, count] if is_blank?(text)
90
+ text.split.each do |word|
91
+ return [result.strip!, count] if count >= max
92
+ result << "#{word} "
93
+ count += 1
94
+ end
95
+ [result.strip!, count]
96
+ end
97
+
98
+ def is_blank?(text)
99
+ text.nil? || text.empty?
56
100
  end
57
101
 
58
- def summarizer
102
+ def word_transformer
59
103
  me = self
60
104
  lambda do |env|
61
-
105
+
62
106
  node = env[:node]
63
-
107
+ name = env[:node_name]
64
108
  return if !node.element?
65
-
66
- if node.text? || (node.children && node.children.first && node.children.first.text?)
67
- if me.word_count > me.max_words
68
- # if we are already over then just remove the item
69
- node.remove
70
- else
71
- # if the text of the current node makes us go over then truncate it
72
- node.text.scan(/\b\S+\b/) { me.word_count += 1 }
73
- if me.word_count > me.max_words
74
- node.content = snippet(node.text, me.max_words, '...')
75
- end
76
- end
109
+
110
+ # Remove nodes with display none
111
+ if node['style'] && node['style'] =~ /display\s*:\s*none/
112
+ node.remove
113
+ return
77
114
  end
78
-
115
+
116
+ # Remove empty nodes
117
+ if node.text.empty? && node.children.empty? && !['img', 'br'].include?(name)
118
+ node.remove
119
+ return
120
+ end
121
+
79
122
  end
80
123
  end
81
-
124
+
82
125
  def image_transformer
83
126
  me = self
84
127
  lambda do |env|
85
128
  node = env[:node]
86
- if ['img'].include?(env[:node_name])
87
- me.image_count += 1
88
- if me.image_count > me.max_images
89
- node.remove
129
+ return unless ['img'].include?(env[:node_name])
130
+
131
+ if (me.image_count+1) > me.max_images # We add a new image below so we have to make sure we won't go over the limit
132
+ node.remove
133
+ else
134
+ keep_it = false
135
+
136
+ if node.attributes['width']
137
+ width = node.attributes['width'].value.to_i rescue 0
138
+ keep_it = true if width > me.min_image_size
90
139
  else
91
- # Force width of images
92
- node['width'] = me.image_width_limit.to_s
93
- node.attributes['height'].remove if node.attributes['height']
140
+ width = nil
141
+ keep_it = true
94
142
  end
143
+
144
+ if keep_it
145
+ me.image_count += 1
146
+ if width == nil || width > me.image_width_limit
147
+ node['width'] = me.image_width_limit.to_s
148
+ node.attributes['height'].remove if node.attributes['height']
149
+ end
150
+ else
151
+ node.remove
152
+ end
153
+
95
154
  end
155
+
96
156
  end
97
157
  end
98
158
 
99
- def empty_transformer
100
- lambda do |env|
101
- node = env[:node]
102
- if node.text.empty? && node.children.empty? && !['img', 'br'].include?(env[:node_name])
103
- node.remove
104
- end
105
- end
106
- end
107
-
108
- def no_display_transformer
109
- lambda do |env|
110
- node = env[:node]
111
- if node['style'] && node['style'] =~ /display\s*:\s*none/
112
- node.remove
113
- end
114
- end
115
- end
116
-
117
- # Truncates text at a word boundry
118
- # Parameters:
119
- # text - The text to truncate
120
- # wordcount - The number of words
121
- # omission - Text to add when the text is truncated ie 'read more' or '...
122
- def snippet(text, wordcount, omission)
123
- return '' if is_blank?(text)
124
- text.split[0..(wordcount-1)].join(" ") + (text.split.size > wordcount ? " " + omission : "")
125
- end
126
-
127
- def is_blank?(text)
128
- text.nil? || text.empty?
129
- end
130
-
131
159
  end
132
160
  end
@@ -0,0 +1,36 @@
1
+ <div class="entry">
2
+ <style type="text/css">
3
+ .item {
4
+ float: left;
5
+ margin-top: 10px;
6
+ text-align: center;
7
+ width: 33%; }
8
+ .img {
9
+ border: 2px solid #cfcfcf;
10
+ }
11
+ .caption {
12
+ margin-left: 0;
13
+ }
14
+ </style>
15
+ <p style="display:none;">Can't see this!</p>
16
+ <p></p>
17
+ <p>
18
+ Lorem <strong>ipsum</strong> dolor sit <blockquote>amet</blockquote>, consectetur adipiscing elit. Cras eleifend ornare laoreet. Nulla rutrum tristique nibh accumsan ornare. Phasellus varius aliquet tortor quis feugiat. Morbi ultrices mauris eu metus hendrerit id laoreet risus auctor. In et nisi mi. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nulla facilisi. Nunc malesuada faucibus cursus. Aenean metus erat, ullamcorper in pellentesque sed, fermentum nec erat.
19
+ Vestibulum id augue dolor. Quisque non neque sit amet orci dictum pharetra nec in odio. Nunc et tortor elit. Proin porttitor pharetra sollicitudin. Duis in gravida nisi. Etiam convallis gravida scelerisque. Pellentesque porta rhoncus nunc ac vestibulum. Etiam varius sodales nunc id dictum. Proin faucibus gravida sagittis. Nunc ut velit lacus. Phasellus vehicula porta eleifend. Cras rutrum nunc in sem egestas hendrerit. Vivamus ante libero, accumsan sed eleifend vel, egestas eget erat. Aenean sodales, nibh at facilisis cursus, turpis quam mollis magna, tempus eleifend orci nunc eu lorem. Pellentesque velit felis, suscipit sed commodo tempor, gravida vel urna. Quisque adipiscing euismod consectetur.
20
+ </p>
21
+ <img src="http://www.example.com/small.jpg" width="15" />
22
+ <img src="http://www.example.com/big.jpg" width="600" height="600" />
23
+ <img src="http://www.example.com/nowidth.jpg" />
24
+ <!-- An html comment -->
25
+ <dl class="item">
26
+ <dt class="icon">
27
+ <a href="http://www.example.com/a_post" title="post">
28
+ <img src="http://www.example.com/photo.jpg" width="150" height="150" title="" alt="">
29
+ </a>
30
+ </dt>
31
+ <dd class="caption">
32
+ A Picture
33
+ </dd>
34
+ </dl>
35
+ <br />
36
+ </div>
@@ -0,0 +1,113 @@
1
+ <div class="entry clear"><!--more--><!-- BlogGlue Cache: No -->
2
+ <p style="display:none;">Can't see this!</p>
3
+ <p></p>
4
+ <p>It's now a bit more than two weeks since I had an unfortunate incident with a serpent. While the leg is actually healing quite nicely I the joy of
5
+ bending my knee has become a distant memory and a luxury I look forward to each day. The antibiotics I am forced to continue leave my body in a semi d
6
+ ebilitated state. Each visit to the restroom is a vile reminder of my body's current inability to properly digest food. At least I'm not allergic to the drug this time.
7
+ The last regiment of antibiotics set my skin on fire and made me appreciate the leper's state.</p>
8
+ <p>My leg is healing and I think that the only permanent damage will be a pretty nasty scar. I can live with that. One of the truly odd
9
+ uirks of cyclists besides constant attempts to trim down to super model anorexic status and the tight shorts is the customary shaving of legs.
10
+ While some might contend the traditionally feminine activity helps reduce aerodynamic drag I have read that the true purpose is to aid in repairs and
11
+ healing in the event of an accident. This is a true fact. I don't shave my legs (my wife would never let me live that down). The surgeon told
12
+ me that he spent most of his time picking hair out of the wound. I'll let you judge. </p>
13
+ <p>Be warned these pictures are gross, disturbing and bloody. I think one of the nurses even got a bit squeamish. As bad as the pictures are my
14
+ youngest daughter had to sit in the room with us the entire time. She said, "Daddy's owie was really gross. I like it when they cover it with
15
+ something so you can't see it." She's 4 so suck it up.</p>
16
+ <img src="http://www.example.com/test.jpg" width="600" height="600" />
17
+ <img src="http://www.example.com/nowidth.jpg" />
18
+ <p>
19
+ <style type="text/css">
20
+ .gallery {
21
+ margin: auto;
22
+ }
23
+ .gallery-item {
24
+ float: left;
25
+ margin-top: 10px;
26
+ text-align: center;
27
+ width: 33%; }
28
+ .gallery img {
29
+ border: 2px solid #cfcfcf;
30
+ }
31
+ .gallery-caption {
32
+ margin-left: 0;
33
+ }
34
+ </style>
35
+ <!-- see gallery_shortcode() in wp-includes/media.php -->
36
+ </p><div class="gallery"><dl class="gallery-item">
37
+ <dt class="gallery-icon">
38
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-18/" title="2008-08-22-09-57-18"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="150" height="150" title="" alt=""></a>
39
+ </dt>
40
+ <dd class="gallery-caption">
41
+ Flesh always loses against asphalt
42
+ </dd></dl><dl class="gallery-item">
43
+ <dt class="gallery-icon">
44
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-19/" title="2008-08-22-09-57-19"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-19-150x150-1-img739.jpg" width="150" height="150" title="" alt=""></a>
45
+ </dt>
46
+ <dd class="gallery-caption">
47
+ My leg is straight so it is harder to see, but if I bend it you can see the tendons
48
+ </dd></dl><dl class="gallery-item">
49
+ <dt class="gallery-icon">
50
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-42/" title="2008-08-22-09-57-42"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-42-150x150-1-img741.jpg" width="150" height="150" title="" alt=""></a>
51
+ </dt>
52
+ <dd class="gallery-caption">
53
+ Drugs make you happy
54
+ </dd></dl><br style="clear: both"><dl class="gallery-item">
55
+ <dt class="gallery-icon">
56
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-52/" title="2008-08-22-09-57-52"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-52-150x150-1-img742.jpg" width="150" height="150" title="" alt=""></a>
57
+ </dt>
58
+ <dd class="gallery-caption">
59
+ Joel stuck around to offer moral support
60
+ </dd></dl><dl class="gallery-item">
61
+ <dt class="gallery-icon">
62
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-06-34/" title="2008-08-22-11-06-34"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-06-34-150x150-1-img743.jpg" width="150" height="150" title="" alt=""></a>
63
+ </dt>
64
+ <dd class="gallery-caption">
65
+ After they cleaned it up
66
+ </dd></dl><dl class="gallery-item">
67
+ <dt class="gallery-icon">
68
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-21/" title="2008-08-22-11-07-21"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-21-150x150-1-img745.jpg" width="150" height="150" title="" alt=""></a>
69
+ </dt>
70
+ <dd class="gallery-caption">
71
+ This isn't as much fun as it looks
72
+ </dd></dl><br style="clear: both"><dl class="gallery-item">
73
+ <dt class="gallery-icon">
74
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-53/" title="2008-08-22-11-07-53"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-53-150x150-1-img746.jpg" width="150" height="150" title="" alt=""></a>
75
+ </dt>
76
+ <dd class="gallery-caption">
77
+ Irrigating the wound - like its a crop or something
78
+ </dd></dl><dl class="gallery-item">
79
+ <dt class="gallery-icon">
80
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-17/" title="2008-08-27-09-47-17"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-17-150x150-1-img747.jpg" width="150" height="150" title="" alt=""></a>
81
+ </dt>
82
+ <dd class="gallery-caption">
83
+ After they took the bandage off the first time - 5 days later
84
+ </dd></dl><dl class="gallery-item">
85
+ <dt class="gallery-icon">
86
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-22/" title="2008-08-27-09-47-22"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-22-150x150-1-img748.jpg" width="150" height="150" title="" alt=""></a>
87
+ </dt>
88
+ <dd class="gallery-caption">
89
+ After they took the bandage off the first time - 5 days later
90
+ </dd></dl><br style="clear: both"><dl class="gallery-item">
91
+ <dt class="gallery-icon">
92
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-29-10-43-49/" title="2008-08-29-10-43-49"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-29-10-43-49-150x150-1-img749.jpg" width="150" height="150" title="" alt=""></a>
93
+ </dt>
94
+ <dd class="gallery-caption">
95
+ After 7 days. Still not pretty, but it is amazing how the human body heals
96
+ </dd></dl><dl class="gallery-item">
97
+ <dt class="gallery-icon">
98
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo/" title="wound"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo-150x150-1-img750.jpg" width="150" height="150" title="" alt=""></a>
99
+ </dt>
100
+ <dd class="gallery-caption">
101
+ This is from my iPhone. It was taken 5 days after the accident at the doctor's office. I have a few more shots below.
102
+ </dd></dl><dl class="gallery-item">
103
+ <dt class="gallery-icon">
104
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo1/" title="The wound after "><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo1-150x150-1-img753.jpg" width="150" height="150" title="" alt=""></a>
105
+ </dt>
106
+ <dd class="gallery-caption">
107
+ Here's what it looks like today 9/8. They took out one stitch, but it will still e quite a while before they can take out the main ones.
108
+ </dd></dl><br style="clear: both">
109
+ <br style="clear: both;">
110
+ </div>
111
+ <br>
112
+ <!--more--><!-- BlogGlue Cache: No --><p></p>
113
+ </div>
@@ -0,0 +1,95 @@
1
+ <div lang="en" dir="ltr" class="mw-content-ltr"><div class="thumb tright">
2
+ <div class="thumbinner" style="width:252px;"><a href="/wiki/File:England-Saint-Michaels-Mount-1900-1.jpg" class="image"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/0/00/England-Saint-Michaels-Mount-1900-1.jpg/250px-England-Saint-Michaels-Mount-1900-1.jpg" width="250" height="178" class="thumbimage"></a>
3
+ <div class="thumbcaption">
4
+ <div class="magnify"><a href="/wiki/File:England-Saint-Michaels-Mount-1900-1.jpg" class="internal" title="Enlarge"><img src="//bits.wikimedia.org/skins-1.19/common/images/magnify-clip.png" width="15" height="11" alt=""></a></div>
5
+ <a href="/wiki/St_Michael%27s_Mount" title="St Michael's Mount">St Michael's Mount</a>, one of several candidates to be the island of Ictis</div>
6
+ </div>
7
+ </div>
8
+ <p><b>Ictis</b>, or <b>Iktin</b>, is or was an island described as a <a href="/wiki/Tin_sources_and_trade_in_ancient_times" title="Tin sources and trade in ancient times">tin trading</a> centre in the <i><a href="/wiki/Bibliotheca_historica" title="Bibliotheca historica">Bibliotheca historica</a></i> of the <a href="/wiki/Greeks_in_Italy" title="Greeks in Italy">Sicilian-Greek</a> historian <a href="/wiki/Diodorus_Siculus" title="Diodorus Siculus">Diodorus Siculus</a>, writing in the first century BC.</p>
9
+ <p>While Ictis is widely accepted to have been an island somewhere off the southern coast of what is now England, scholars continue to debate its precise location. Candidates include <a href="/wiki/St_Michael%27s_Mount" title="St Michael's Mount">St Michael's Mount</a> and <a href="/wiki/Looe_Island" title="Looe Island">Looe Island</a> off the coast of <a href="/wiki/Cornwall" title="Cornwall">Cornwall</a>, the <a href="/wiki/Mount_Batten" title="Mount Batten">Mount Batten</a> peninsula in <a href="/wiki/Devon" title="Devon">Devon</a>, and the <a href="/wiki/Isle_of_Wight" title="Isle of Wight">Isle of Wight</a> further to the east.</p>
10
+ <table id="toc" class="toc">
11
+ <tbody><tr>
12
+ <td>
13
+ <div id="toctitle">
14
+ <h2>Contents</h2>
15
+ <span class="toctoggle">&nbsp;[<a href="#" class="internal" id="togglelink">hide</a>]&nbsp;</span></div>
16
+ <ul>
17
+ <li class="toclevel-1 tocsection-1"><a href="#Primary_sources"><span class="tocnumber">1</span> <span class="toctext">Primary sources</span></a></li>
18
+ <li class="toclevel-1 tocsection-2"><a href="#Debate"><span class="tocnumber">2</span> <span class="toctext">Debate</span></a></li>
19
+ <li class="toclevel-1 tocsection-3"><a href="#See_also"><span class="tocnumber">3</span> <span class="toctext">See also</span></a></li>
20
+ <li class="toclevel-1 tocsection-4"><a href="#Notes"><span class="tocnumber">4</span> <span class="toctext">Notes</span></a></li>
21
+ <li class="toclevel-1 tocsection-5"><a href="#Further_reading"><span class="tocnumber">5</span> <span class="toctext">Further reading</span></a></li>
22
+ </ul>
23
+ </td>
24
+ </tr>
25
+ </tbody></table>
26
+ <h2><span class="editsection">[<a href="/w/index.php?title=Ictis&amp;action=edit&amp;section=1" title="Edit section: Primary sources">edit</a>]</span> <span class="mw-headline" id="Primary_sources">Primary sources</span></h2>
27
+ <p>Diodorus Siculus, who flourished between about 60 and about 30 BC, is supposed to have relied for his account of the geography of <a href="/wiki/Great_Britain" title="Great Britain">Britain</a> on a lost work of <a href="/wiki/Pytheas" title="Pytheas">Pytheas</a>, a Greek geographer from <a href="/wiki/Marseilles" title="Marseilles" class="mw-redirect">Massalia</a> who made a voyage around the coast of Britain near the end of the fourth century BC, searching for the source of <a href="/wiki/Amber" title="Amber">amber</a>. The record of the voyage of Pytheas was lost in antiquity but was known to some later writers, including <a href="/wiki/Timaeus_(historian)" title="Timaeus (historian)">Timaeus</a>, <a href="/wiki/Posidonius" title="Posidonius">Posidonius</a> and <a href="/wiki/Pliny_the_Elder" title="Pliny the Elder">Pliny the Elder</a>. Their work is contradictory, but from it deductions can be made about what was reported by Pytheas. No other sources concerning the tin trade in the ancient world are known.<sup id="cite_ref-roman-britain_0-0" class="reference"><a href="#cite_note-roman-britain-0"><span>[</span>1<span>]</span></a></sup></p>
28
+ <p>Diodorus gives an account that is generally supposed to be a description of the working of Cornish tin at about the time of the voyage of Pytheas. He says:</p>
29
+ <blockquote class="templatequote">
30
+ <div class="Bug6200">The inhabitants of that part of Britain which is called Belerion are very fond of strangers and from their intercourse with foreign merchants are civilised in their manner of life. They prepare the tin, working very carefully the earth in which it is produced. The ground is rocky but it contains earthy veins, the produce of which is ground down, smelted and purified. They beat the metal into masses shaped like knuckle-bones and carry it off to a certain island off Britain called Iktis. During the ebb of the tide the intervening space is left dry and they carry over to the island the tin in abundance in their wagons&nbsp;... Here then the merchants buy the tin from the natives and carry it over to Gaul, and after travelling overland for about thirty days, they finally bring their loads on horses to the mouth of the Rhone.<sup id="cite_ref-roman-britain_0-1" class="reference"><a href="#cite_note-roman-britain-0"><span>[</span>1<span>]</span></a></sup></div>
31
+ </blockquote>
32
+ <div class="thumb tright">
33
+ <div class="thumbinner" style="width:252px;"><a href="/wiki/File:Looe_island.jpg" class="image"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/a/ad/Looe_island.jpg/250px-Looe_island.jpg" width="250" height="184" class="thumbimage"></a>
34
+ <div class="thumbcaption">
35
+ <div class="magnify"><a href="/wiki/File:Looe_island.jpg" class="internal" title="Enlarge"><img src="//bits.wikimedia.org/skins-1.19/common/images/magnify-clip.png" width="15" height="11" alt=""></a></div>
36
+ <a href="/wiki/Looe_Island" title="Looe Island">Looe</a>, Cornwall, another island suggested as Ictis</div>
37
+ </div>
38
+ </div>
39
+ <p>In the Greek text of Diodorus the name appears, in the <a href="/wiki/Accusative_case" title="Accusative case">accusative case</a>, as "Iktin", so that translators have inferred that the <a href="/wiki/Nominative_case" title="Nominative case">nominative</a> form of the name was "Iktis", rendering this into the medieval <i><a href="/wiki/Lingua_franca" title="Lingua franca">lingua franca</a></i> of <a href="/wiki/Latin" title="Latin">Latin</a> (which only rarely used the letter 'k') as "Ictis". However, some commentators doubt that "Ictis" is correct and prefer "Iktin".<sup id="cite_ref-beer_1-0" class="reference"><a href="#cite_note-beer-1"><span>[</span>2<span>]</span></a></sup></p>
40
+ <p>In Book IV of his <i><a href="/wiki/Natural_History_(Pliny)" title="Natural History (Pliny)">Natural History</a></i>, Pliny quotes Timaeus and refers to "<i>insulam Mictim</i>" (the island of Mictis, or perhaps of Mictim):</p>
41
+ <blockquote class="templatequote">
42
+ <div class="Bug6200">There is an island named Mictis lying inwards six days' sail from Britain, where tin is found, and to which the Britons cross in boats of wickerwork covered with stitched hides.<sup id="cite_ref-cunliffe_2-0" class="reference"><a href="#cite_note-cunliffe-2"><span>[</span>3<span>]</span></a></sup></div>
43
+ </blockquote>
44
+ <p>It has been suggested that "<i>insulam Mictim</i>" was a copying error for <i>insulam Ictim</i>, and Diodorus and Pliny probably both relied on the same primary source. However, while it is possible that "Mictim" and "Iktin" are one and the same, it is also possible that they are different places. The word "inwards" can be interpreted as meaning "towards our home", and six days' sail from Britain could take a boat to somewhere on the Atlantic coast of what is now France.<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span>[</span>4<span>]</span></a></sup></p>
45
+ <p><a href="/wiki/Strabo" title="Strabo">Strabo</a>, a contemporary of Diodorus, stated in his <i>Geography</i> that British tin was shipped from <a href="/wiki/Marseille" title="Marseille">Massalia</a> on the Mediterranean coast of <a href="/wiki/Gaul" title="Gaul">Gaul</a>.<sup id="cite_ref-4" class="reference"><a href="#cite_note-4"><span>[</span>5<span>]</span></a></sup></p>
46
+ <p><a href="/wiki/Julius_Caesar" title="Julius Caesar">Julius Caesar</a>, in his <i>De Bello Gallico</i>, says of the <a href="/wiki/Veneti_(Gaul)" title="Veneti (Gaul)">Veneti</a>: "This last-named people were by far the most powerful on the coast of <a href="/wiki/Armorica" title="Armorica">Armorica</a>: they had a large fleet plying between their own ports and Britain; they knew more about the handling of ships and the science of navigation than anyone else thereabouts."<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span>[</span>6<span>]</span></a></sup></p>
47
+ <h2><span class="editsection">[<a href="/w/index.php?title=Ictis&amp;action=edit&amp;section=2" title="Edit section: Debate">edit</a>]</span> <span class="mw-headline" id="Debate">Debate</span></h2>
48
+ <p><a href="/wiki/William_Camden" title="William Camden">William Camden</a>, the <a href="/wiki/Elizabethan_era" title="Elizabethan era">Elizabethan</a> historian, took the view that the name "Ictis" was so similar to "Vectis", the <a href="/wiki/Latin" title="Latin">Latin</a> name for the Isle of Wight, that the two were probably the same island. The <a href="/wiki/Cornish_people" title="Cornish people">Cornish</a> antiquary <a href="/wiki/William_Borlase" title="William Borlase">William Borlase</a> (1696–1772) suggested that Ictis must have been near the coast of Cornwall and could have been a general name for a peninsula there.<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span>[</span>7<span>]</span></a></sup></p>
49
+ <p>In 1960, <a href="/wiki/Gavin_de_Beer" title="Gavin de Beer">Gavin de Beer</a> concluded that the most likely location of Iktin (the form of the name he preferred) was <a href="/wiki/St_Michael%27s_Mount" title="St Michael's Mount">St Michael's Mount</a>, a <a href="/wiki/Tidal_island" title="Tidal island">tidal island</a> near the town of <a href="/wiki/Marazion" title="Marazion">Marazion</a> in Cornwall. Apart from the effect of the tide being consistent with what is said by Diodorus, de Beer considered the other benefits of St Michael's Mount for the Britons.<sup id="cite_ref-beer_1-1" class="reference"><a href="#cite_note-beer-1"><span>[</span>2<span>]</span></a></sup> This identification is supported by the Roman Britain Organization and its website roman-britain.org.<sup id="cite_ref-roman-britain_0-2" class="reference"><a href="#cite_note-roman-britain-0"><span>[</span>1<span>]</span></a></sup></p>
50
+ <p>In 1972, I. S. Maxwell weighed up the competing claims of no fewer than twelve possible sites.<sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span>[</span>8<span>]</span></a></sup> In 1983, after excavations, the archaeologist <a href="/wiki/Barry_Cunliffe" title="Barry Cunliffe">Barry W. Cunliffe</a> proposed the <a href="/wiki/Mount_Batten" title="Mount Batten">Mount Batten</a> <a href="/wiki/Peninsula" title="Peninsula">peninsula</a> near <a href="/wiki/Plymouth" title="Plymouth">Plymouth</a> as the site of Ictis.<sup id="cite_ref-cunliffe_2-1" class="reference"><a href="#cite_note-cunliffe-2"><span>[</span>3<span>]</span></a></sup> Near the mouth of the <a href="/wiki/River_Erme" title="River Erme">River Erme</a>, not far away, a shipwreck site has produced ingots of ancient tin, which indicates a trade along the coast, although dating the site is difficult and it may not belong to the <a href="/wiki/Bronze_Age" title="Bronze Age">Bronze Age</a>.<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span>[</span>9<span>]</span></a></sup></p>
51
+ <p>The assessment of Miranda Aldhouse-Green in <i>The Celtic World</i> (1996) was that</p>
52
+ <blockquote class="templatequote">
53
+ <div class="Bug6200">The two places considered most likely to be Ictis are the island of St Michael's Mount, Cornwall, and the peninsula of Mount Batten in Plymouth Sound (Cunliffe 1983; Hawkes 1984)&nbsp;... Mount Batten seems archaeologically more likely as there are a number of finds from there which indicate it was prominent in international trade from the fourth century BC until the first century AD (Cunliffe 1988).<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span>[</span>10<span>]</span></a></sup></div>
54
+ </blockquote>
55
+ <h2><span class="editsection">[<a href="/w/index.php?title=Ictis&amp;action=edit&amp;section=3" title="Edit section: See also">edit</a>]</span> <span class="mw-headline" id="See_also">See also</span></h2>
56
+ <ul>
57
+ <li><a href="/wiki/Mining_in_Cornwall_and_Devon" title="Mining in Cornwall and Devon">Mining in Cornwall and Devon</a></li>
58
+ <li><a href="/wiki/Tin_sources_and_trade_in_ancient_times" title="Tin sources and trade in ancient times">Tin sources and trade in ancient times</a></li>
59
+ </ul>
60
+ <h2><span class="editsection">[<a href="/w/index.php?title=Ictis&amp;action=edit&amp;section=4" title="Edit section: Notes">edit</a>]</span> <span class="mw-headline" id="Notes">Notes</span></h2>
61
+ <div class="reflist references-column-width" style="-moz-column-width: 33em; -webkit-column-width: 33em; column-width: 33em; list-style-type: decimal;">
62
+ <ol class="references">
63
+ <li id="cite_note-roman-britain-0">^ <a href="#cite_ref-roman-britain_0-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-roman-britain_0-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-roman-britain_0-2"><sup><i><b>c</b></i></sup></a> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.roman-britain.org/places/ictis.htm">ICTIS INSVLA</a> at roman-britain.org, accessed 7 February 2012</span></li>
64
+ <li id="cite_note-beer-1">^ <a href="#cite_ref-beer_1-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-beer_1-1"><sup><i><b>b</b></i></sup></a> <span class="reference-text"><a href="/wiki/Gavin_de_Beer" title="Gavin de Beer">Gavin de Beer</a>, "Iktin", in <i>The Geographical Journal</i> vol. 126 (June 1960) pp. 160–167, at p. 162</span></li>
65
+ <li id="cite_note-cunliffe-2">^ <a href="#cite_ref-cunliffe_2-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-cunliffe_2-1"><sup><i><b>b</b></i></sup></a> <span class="reference-text">Barry W. Cunliffe, "Ictis: Is it here?" in <i>Oxford Journal of Archaeology</i>, vol. 2, issue 1 (March 1983), pp. 123–126 (see <a rel="nofollow" class="external text" href="http://onlinelibrary.wiley.com/doi/10.1111/j.1468-0092.1983.tb00101.x/abstract">abstract</a>)</span></li>
66
+ <li id="cite_note-3"><b><a href="#cite_ref-3">^</a></b> <span class="reference-text">Barry Cunliffe, "Exchanges with the wider world" in <i>Iron age communities in Britain: an account of England, Scotland, and Wales from the seventh century BC until the Roman conquest</i> (Routledge, 1978) <a rel="nofollow" class="external text" href="http://books.google.co.uk/books?id=gthTGHWl4LUC&amp;pg=PA471#v=onepage&amp;q&amp;f=false">p. 471</a></span></li>
67
+ <li id="cite_note-4"><b><a href="#cite_ref-4">^</a></b> <span class="reference-text">Strabo's <i>Geographi</i> ca<i>, Book III, 2.9</i></span></li>
68
+ <li id="cite_note-5"><b><a href="#cite_ref-5">^</a></b> <span class="reference-text">Gaius Julius Caesar, <i>Caesar's War Commentaries</i> (Kessinger, 2004), <a rel="nofollow" class="external text" href="http://books.google.co.uk/books?id=RFBkyHEooQ0C&amp;pg=PA45#v=onepage&amp;q&amp;f=false">pp. 45–46</a></span></li>
69
+ <li id="cite_note-6"><b><a href="#cite_ref-6">^</a></b> <span class="reference-text">Sir Christopher Hawkins, <i>Observations on the tin trade of the ancients in Cornwall</i> (1811), p. 63: "Mr. Camden supposes, that, from the similarity of the words Ictis and Vectis, it was one and the same island. Dr. Borlase says, that the Ictis must have been situated somewhere near the Coast of Cornwall, and have been a general name for a peninsula, or some particular peninsula, and common emporium, on the same coast."</span></li>
70
+ <li id="cite_note-7"><b><a href="#cite_ref-7">^</a></b> <span class="reference-text">I. S. Maxwell, "The location of Ictis" in <i>Journal of the Royal Institution of Cornwall</i> 6 (4), pp. 293–319</span></li>
71
+ <li id="cite_note-8"><b><a href="#cite_ref-8">^</a></b> <span class="reference-text"><i>Erme Ingot Wreck Site Summary</i> (<a href="/wiki/English_Heritage" title="English Heritage">English Heritage</a>, 2000)</span></li>
72
+ <li id="cite_note-9"><b><a href="#cite_ref-9">^</a></b> <span class="reference-text">Miranda Jane Aldhouse-Green, <i>The Celtic World</i> (1996), p. 276</span></li>
73
+ </ol>
74
+ </div>
75
+ <h2><span class="editsection">[<a href="/w/index.php?title=Ictis&amp;action=edit&amp;section=5" title="Edit section: Further reading">edit</a>]</span> <span class="mw-headline" id="Further_reading">Further reading</span></h2>
76
+ <ul>
77
+ <li><a href="/wiki/Gavin_de_Beer" title="Gavin de Beer">Gavin de Beer</a>, 'Iktin', in The Geographical Journal vol. 126 (June 1960)</li>
78
+ <li>I. S. Maxwell, 'The location of Ictis' in <i>Journal of the Royal Institution of Cornwall</i> 6 (4) (1972)</li>
79
+ <li><a href="/wiki/Barry_Cunliffe" title="Barry Cunliffe">Barry W. Cunliffe</a>, 'Ictis: Is it here?' in <i>Oxford Journal of Archaeology</i>, vol. 2, issue 1 (March 1983)</li>
80
+ <li>S. Mitchell, <i>Cornish tin, Julius Caesar, and the invasion of Britain</i> (1983)</li>
81
+ <li>Christopher F. C. Hawkes, 'Ictis disentangled and the British tin trade' in <i>Oxford Journal of Archaeology</i>, 3 (1984), pp. 211–233</li>
82
+ <li>R. D. Penhallurick, <i>Tin in Antiquity</i> (London, 1986)</li>
83
+ </ul>
84
+
85
+
86
+ <!--
87
+ NewPP limit report
88
+ Preprocessor node count: 339/1000000
89
+ Post-expand include size: 2276/2048000 bytes
90
+ Template argument size: 1556/2048000 bytes
91
+ Expensive parser function count: 0/500
92
+ -->
93
+
94
+ <!-- Saved in parser cache with key enwiki:pcache:idhash:34635676-0!*!0!!en!4!* and timestamp 20120305150144 -->
95
+ </div>
@@ -12,4 +12,4 @@ RSpec.configure do |config|
12
12
  config.treat_symbols_as_metadata_keys_with_true_values = true
13
13
  config.filter_run :focus => true # Add a :focus tag to a given spec only that spec will be run instead of all of them
14
14
  config.run_all_when_everything_filtered = true
15
- end
15
+ end
@@ -1,179 +1,104 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Sumitup::Parser do
4
- before do
5
- @image_width_limit = 200
6
- @parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => @image_width_limit)
7
- end
4
+
8
5
  describe "summarize" do
9
6
  before do
10
- @html = %Q{
11
- <div class="entry clear"><!--more--><!-- BlogGlue Cache: No -->
12
- <p style="display:none;">Can't see this!</p>
13
- <p></p>
14
- <p>It's now a bit more than two weeks since I had an unfortunate incident with a serpent. While the leg is actually healing quite nicely I the joy of
15
- bending my knee has become a distant memory and a luxury I look forward to each day. The antibiotics I am forced to continue leave my body in a semi d
16
- ebilitated state. Each visit to the restroom is a vile reminder of my body's current inability to properly digest food. At least I'm not allergic to the drug this time.
17
- The last regiment of antibiotics set my skin on fire and made me appreciate the leper's state.</p>
18
- <p>My leg is healing and I think that the only permanent damage will be a pretty nasty scar. I can live with that. One of the truly odd
19
- uirks of cyclists besides constant attempts to trim down to super model anorexic status and the tight shorts is the customary shaving of legs.
20
- While some might contend the traditionally feminine activity helps reduce aerodynamic drag I have read that the true purpose is to aid in repairs and
21
- healing in the event of an accident. This is a true fact. I don't shave my legs (my wife would never let me live that down). The surgeon told
22
- me that he spent most of his time picking hair out of the wound. I'll let you judge. </p>
23
- <p>Be warned these pictures are gross, disturbing and bloody. I think one of the nurses even got a bit squeamish. As bad as the pictures are my
24
- youngest daughter had to sit in the room with us the entire time. She said, "Daddy's owie was really gross. I like it when they cover it with
25
- something so you can't see it." She's 4 so suck it up.</p>
26
- <img src="http://www.example.com/test.jpg" width="600" height="600" />
27
- <img src="http://www.example.com/nowidth.jpg" />
28
- <p>
29
- <style type="text/css">
30
- .gallery {
31
- margin: auto;
32
- }
33
- .gallery-item {
34
- float: left;
35
- margin-top: 10px;
36
- text-align: center;
37
- width: 33%; }
38
- .gallery img {
39
- border: 2px solid #cfcfcf;
40
- }
41
- .gallery-caption {
42
- margin-left: 0;
43
- }
44
- </style>
45
- <!-- see gallery_shortcode() in wp-includes/media.php -->
46
- </p><div class="gallery"><dl class="gallery-item">
47
- <dt class="gallery-icon">
48
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-18/" title="2008-08-22-09-57-18"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="150" height="150" title="" alt=""></a>
49
- </dt>
50
- <dd class="gallery-caption">
51
- Flesh always loses against asphalt
52
- </dd></dl><dl class="gallery-item">
53
- <dt class="gallery-icon">
54
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-19/" title="2008-08-22-09-57-19"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-19-150x150-1-img739.jpg" width="150" height="150" title="" alt=""></a>
55
- </dt>
56
- <dd class="gallery-caption">
57
- My leg is straight so it is harder to see, but if I bend it you can see the tendons
58
- </dd></dl><dl class="gallery-item">
59
- <dt class="gallery-icon">
60
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-42/" title="2008-08-22-09-57-42"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-42-150x150-1-img741.jpg" width="150" height="150" title="" alt=""></a>
61
- </dt>
62
- <dd class="gallery-caption">
63
- Drugs make you happy
64
- </dd></dl><br style="clear: both"><dl class="gallery-item">
65
- <dt class="gallery-icon">
66
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-52/" title="2008-08-22-09-57-52"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-52-150x150-1-img742.jpg" width="150" height="150" title="" alt=""></a>
67
- </dt>
68
- <dd class="gallery-caption">
69
- Joel stuck around to offer moral support
70
- </dd></dl><dl class="gallery-item">
71
- <dt class="gallery-icon">
72
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-06-34/" title="2008-08-22-11-06-34"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-06-34-150x150-1-img743.jpg" width="150" height="150" title="" alt=""></a>
73
- </dt>
74
- <dd class="gallery-caption">
75
- After they cleaned it up
76
- </dd></dl><dl class="gallery-item">
77
- <dt class="gallery-icon">
78
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-21/" title="2008-08-22-11-07-21"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-21-150x150-1-img745.jpg" width="150" height="150" title="" alt=""></a>
79
- </dt>
80
- <dd class="gallery-caption">
81
- This isn't as much fun as it looks
82
- </dd></dl><br style="clear: both"><dl class="gallery-item">
83
- <dt class="gallery-icon">
84
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-53/" title="2008-08-22-11-07-53"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-53-150x150-1-img746.jpg" width="150" height="150" title="" alt=""></a>
85
- </dt>
86
- <dd class="gallery-caption">
87
- Irrigating the wound - like its a crop or something
88
- </dd></dl><dl class="gallery-item">
89
- <dt class="gallery-icon">
90
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-17/" title="2008-08-27-09-47-17"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-17-150x150-1-img747.jpg" width="150" height="150" title="" alt=""></a>
91
- </dt>
92
- <dd class="gallery-caption">
93
- After they took the bandage off the first time - 5 days later
94
- </dd></dl><dl class="gallery-item">
95
- <dt class="gallery-icon">
96
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-22/" title="2008-08-27-09-47-22"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-22-150x150-1-img748.jpg" width="150" height="150" title="" alt=""></a>
97
- </dt>
98
- <dd class="gallery-caption">
99
- After they took the bandage off the first time - 5 days later
100
- </dd></dl><br style="clear: both"><dl class="gallery-item">
101
- <dt class="gallery-icon">
102
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-29-10-43-49/" title="2008-08-29-10-43-49"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-29-10-43-49-150x150-1-img749.jpg" width="150" height="150" title="" alt=""></a>
103
- </dt>
104
- <dd class="gallery-caption">
105
- After 7 days. Still not pretty, but it is amazing how the human body heals
106
- </dd></dl><dl class="gallery-item">
107
- <dt class="gallery-icon">
108
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo/" title="wound"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo-150x150-1-img750.jpg" width="150" height="150" title="" alt=""></a>
109
- </dt>
110
- <dd class="gallery-caption">
111
- This is from my iPhone. It was taken 5 days after the accident at the doctor's office. I have a few more shots below.
112
- </dd></dl><dl class="gallery-item">
113
- <dt class="gallery-icon">
114
- <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo1/" title="The wound after "><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo1-150x150-1-img753.jpg" width="150" height="150" title="" alt=""></a>
115
- </dt>
116
- <dd class="gallery-caption">
117
- Here's what it looks like today 9/8. They took out one stitch, but it will still e quite a while before they can take out the main ones.
118
- </dd></dl><br style="clear: both">
119
- <br style="clear: both;">
120
- </div>
121
- <br>
122
- <!--more--><!-- BlogGlue Cache: No --><p></p>
123
- </div>}
124
- @short_result = @parser.summarize(@html, 5)
125
- @long_result = @parser.summarize(@html, 100000)
126
- end
127
- it "should summarize the content by number of words" do
128
- @short_result.should_not include('than')
129
- @short_result.should include('more')
130
- end
131
- it "should remove html comments" do
132
- @short_result.should_not include('<!--more--><!-- BlogGlue Cache: No -->')
133
- end
134
- it "should keep the image tag" do
135
- @long_result.should include(%Q{<img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="#{@image_width_limit}" title="" alt="">})
136
- end
137
- it "should remove the style tag" do
138
- @long_result.should_not include('<style type="text/css">')
139
- end
140
- it "should remove empty tags" do
141
- @long_result.should_not include('<p></p>')
7
+ @html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
142
8
  end
143
- it "should remove tags with display:none" do
144
- @long_result.should_not include('display:none')
9
+
10
+ describe "Sanitize options" do
11
+ it "should remove html comments" do
12
+ result = Sumitup::Parser.new.summarize(@html, 100000)
13
+ result.should_not include('<!-- An html comment -->')
14
+ end
15
+
16
+ it "should remove the style tag" do
17
+ result = Sumitup::Parser.new.summarize(@html, 100000)
18
+ result.should_not include('<style type="text/css">')
19
+ end
145
20
  end
146
- it "should set the width to 240 if width is greater than 240" do
147
- parser = Sumitup::Parser.new(:image_width_limit => 240)
148
- result = parser.summarize(@html, 10000)
149
- result.should include('<img src="http://www.example.com/test.jpg" width="240">')
21
+
22
+ describe "word_transformer" do
23
+ it "should summarize the content by number of words" do
24
+ parser = Sumitup::Parser.new(:max_words => 1000)
25
+ result = parser.summarize(@html, 5)
26
+ result.should_not include('consectetur')
27
+ result.should include('amet')
28
+ end
29
+
30
+ it "should keep permitted html in summary" do
31
+ parser = Sumitup::Parser.new(:max_words => 1000)
32
+ result = parser.summarize(@html, 5)
33
+ result.should include('strong')
34
+ result.should include('blockquote')
35
+ end
36
+
37
+ it "should remove empty tags" do
38
+ result = Sumitup::Parser.new.summarize(@html, 100000)
39
+ result.should_not include('<p></p>')
40
+ end
41
+
42
+ it "should remove tags with display:none" do
43
+ result = Sumitup::Parser.new.summarize(@html, 100000)
44
+ result.should_not include('display:none')
45
+ end
150
46
  end
151
- it "should only allow 2 images" do
152
- parser = Sumitup::Parser.new(:max_images => 2)
153
- result = parser.summarize(@html, 10000)
154
- doc = Nokogiri::HTML(result)
155
- doc.css('img').length.should == 2
156
- end
157
- it "should add a width to images that don't have one" do
158
- @long_result.should include(%Q{<img src="http://www.example.com/nowidth.jpg" width="#{@image_width_limit}">})
47
+
48
+ describe "image_transformer" do
49
+ it "should set the width to 240 if width is greater than 240" do
50
+ parser = Sumitup::Parser.new(:image_width_limit => 240)
51
+ result = parser.summarize(@html, 10000)
52
+ result.should include(%Q{img src="http://www.example.com/big.jpg" width="240">})
53
+ end
54
+
55
+ it "should only allow 2 images" do
56
+ parser = Sumitup::Parser.new(:max_images => 2)
57
+ result = parser.summarize(@html, 10000)
58
+ doc = Nokogiri::HTML(result)
59
+ doc.css('img').length.should == 2
60
+ end
61
+
62
+ it "should not keep small images" do
63
+ result = Sumitup::Parser.new.summarize(@html, 100000)
64
+ result.should_not include('http://www.example.com/small.jpg')
65
+ end
66
+
67
+ it "should keep images as is that are not over the width limit" do
68
+ parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => 200)
69
+ result = parser.summarize(@html, 100000)
70
+ result.should include('<img src="http://www.example.com/photo.jpg" width="150" height="150" title="" alt="">')
71
+ end
159
72
  end
73
+
160
74
  end
161
75
 
162
76
  describe "snippet" do
163
- it "should build a string 157 chars long" do
77
+ before do
78
+ @parser = Sumitup::Parser.new
79
+ end
80
+ it "should build a string 5 words long" do
164
81
  text = "Kimball was born to Solomon Farnham Kimball and Anna Spaulding in Sheldon, Franklin County, Vermont. Kimball's forefathers arrived in America from England and started"
165
- @parser.snippet(text, 5, '...').should == "Kimball was born to Solomon ..."
82
+ result, count = @parser.snippet(text, 5)
83
+ result.should == "Kimball was born to Solomon"
84
+ count.should == 5
166
85
  end
86
+
167
87
  it "should not crash if string is nil" do
168
- text = nil
169
- @parser.snippet(text, 5, '...').length.should == 0
88
+ result, count = @parser.snippet(nil, 5)
89
+ result.should == ''
90
+ count.should == 0
170
91
  end
171
92
  end
172
93
 
173
94
  describe "is_blank?" do
95
+ before do
96
+ @parser = Sumitup::Parser.new
97
+ end
174
98
  it "should be true if text is nil" do
175
99
  @parser.is_blank?(nil).should be_true
176
100
  end
101
+
177
102
  it "should be true if text is ''" do
178
103
  @parser.is_blank?('').should be_true
179
104
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "sumitup"
8
- s.version = "0.1.1"
8
+ s.version = "0.1.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Justin Ball"]
12
- s.date = "2012-03-02"
12
+ s.date = "2012-03-06"
13
13
  s.description = "Given an html document or fragment this gem will build a summary of the content."
14
14
  s.email = "justinball@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -28,6 +28,9 @@ Gem::Specification.new do |s|
28
28
  "VERSION",
29
29
  "lib/sumitup.rb",
30
30
  "lib/sumitup/parser.rb",
31
+ "spec/fixtures/basic.html",
32
+ "spec/fixtures/justin.html",
33
+ "spec/fixtures/wikipedia.html",
31
34
  "spec/spec_helper.rb",
32
35
  "spec/sumitup/parser_spec.rb",
33
36
  "sumitup.gemspec"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sumitup
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 1
10
- version: 0.1.1
9
+ - 2
10
+ version: 0.1.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Justin Ball
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-02 00:00:00 Z
18
+ date: 2012-03-06 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime
@@ -205,6 +205,9 @@ files:
205
205
  - VERSION
206
206
  - lib/sumitup.rb
207
207
  - lib/sumitup/parser.rb
208
+ - spec/fixtures/basic.html
209
+ - spec/fixtures/justin.html
210
+ - spec/fixtures/wikipedia.html
208
211
  - spec/spec_helper.rb
209
212
  - spec/sumitup/parser_spec.rb
210
213
  - sumitup.gemspec