sumitup 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,18 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'sanitize'
4
+
5
+ # Add dependencies to develop your gem here.
6
+ # Include everything needed to run rake, tests, features, etc.
7
+ group :development, :test do
8
+ gem 'growl'
9
+ gem "rspec", "~> 2.8.0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.0.0"
12
+ gem "jeweler", "~> 1.8.3"
13
+ gem "rcov", ">= 0"
14
+ gem "guard", ">=1.0.0"
15
+ gem "guard-rspec", ">= 0.6.0"
16
+ gem "guard-bundler", ">= 0.1.3"
17
+ gem "ruby-debug"
18
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,62 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ columnize (0.3.5)
5
+ diff-lcs (1.1.3)
6
+ ffi (1.0.11)
7
+ git (1.2.5)
8
+ growl (1.0.3)
9
+ guard (1.0.0)
10
+ ffi (>= 0.5.0)
11
+ thor (~> 0.14.6)
12
+ guard-bundler (0.1.3)
13
+ bundler (>= 1.0.0)
14
+ guard (>= 0.2.2)
15
+ guard-rspec (0.6.0)
16
+ guard (>= 0.10.0)
17
+ jeweler (1.8.3)
18
+ bundler (~> 1.0)
19
+ git (>= 1.2.5)
20
+ rake
21
+ rdoc
22
+ json (1.6.5)
23
+ linecache (0.46)
24
+ rbx-require-relative (> 0.0.4)
25
+ nokogiri (1.5.0)
26
+ rake (0.9.2.2)
27
+ rbx-require-relative (0.0.5)
28
+ rcov (1.0.0)
29
+ rdoc (3.12)
30
+ json (~> 1.4)
31
+ rspec (2.8.0)
32
+ rspec-core (~> 2.8.0)
33
+ rspec-expectations (~> 2.8.0)
34
+ rspec-mocks (~> 2.8.0)
35
+ rspec-core (2.8.0)
36
+ rspec-expectations (2.8.0)
37
+ diff-lcs (~> 1.1.2)
38
+ rspec-mocks (2.8.0)
39
+ ruby-debug (0.10.4)
40
+ columnize (>= 0.1)
41
+ ruby-debug-base (~> 0.10.4.0)
42
+ ruby-debug-base (0.10.4)
43
+ linecache (>= 0.3)
44
+ sanitize (2.0.3)
45
+ nokogiri (>= 1.4.4, < 1.6)
46
+ thor (0.14.6)
47
+
48
+ PLATFORMS
49
+ ruby
50
+
51
+ DEPENDENCIES
52
+ bundler (~> 1.0.0)
53
+ growl
54
+ guard (>= 1.0.0)
55
+ guard-bundler (>= 0.1.3)
56
+ guard-rspec (>= 0.6.0)
57
+ jeweler (~> 1.8.3)
58
+ rcov
59
+ rdoc (~> 3.12)
60
+ rspec (~> 2.8.0)
61
+ ruby-debug
62
+ sanitize
data/Guardfile ADDED
@@ -0,0 +1,12 @@
1
+ guard 'bundler' do
2
+ watch('Gemfile')
3
+ watch(/^.+\.gemspec/)
4
+ end
5
+
6
+ guard 'rspec', :version => 2, :all_after_pass => false do
7
+ watch(%r{^spec/.+_spec\.rb$})
8
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
9
+ watch('spec/spec_helper.rb') { "spec" }
10
+ watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
11
+ watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
12
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Tatemae.com
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,7 @@
1
+ = sumitup
2
+
3
+ Given an html document or fragment this gem will build a summary of the contents
4
+
5
+ == Copyright
6
+
7
+ Copyright (c) 2012 Tatemae. See LICENSE.txt for further details.
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "sumitup"
18
+ gem.homepage = "http://github.com/tatemae/sumitup"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Generate a summary of html content}
21
+ gem.description = %Q{Given an html document or fragment this gem will build a summary of the content.}
22
+ gem.email = "justinball@gmail.com"
23
+ gem.authors = ["Justin Ball"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rdoc/task'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "sumitup #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/lib/sumitup.rb ADDED
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH << File.dirname(__FILE__) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
+
3
+ require 'sanitize'
4
+
5
+ # Get index status:
6
+ # curl -XGET 'http://localhost:9200/_status'
7
+ module Sumitup
8
+
9
+ end
10
+
11
+ require 'sumitup/parser'
@@ -0,0 +1,132 @@
1
+ module Sumitup
2
+ class Parser
3
+
4
+ IMAGE_WIDTH_LIMIT = 230
5
+
6
+ attr_accessor :word_count, :max_words
7
+ attr_accessor :image_count, :image_width_limit, :max_images
8
+ attr_accessor :elements, :attributes, :protocols, :remove_contents
9
+ attr_accessor :omission
10
+
11
+ def initialize(options = {})
12
+
13
+ self.omission = options[:omission] || ''
14
+
15
+ self.word_count = options[:word_count] || 0
16
+ self.max_words = options[:max_words] || 100
17
+
18
+ self.image_count = options[:image_count] || 0
19
+ self.image_width_limit = options[:image_width_limit] || 230
20
+ self.max_images = options[:max_images] || 2
21
+
22
+ self.elements = options[:elements] || %w(
23
+ a abbr b blockquote br cite code dd dfn dl dt em i kbd li mark ol p pre
24
+ q s samp small strike strong sub sup time u ul var img span
25
+ )
26
+
27
+ self.attributes = options[:attributes] || {
28
+ 'a' => ['href', 'title'],
29
+ 'blockquote' => ['cite'],
30
+ 'img' => ['alt', 'src', 'title', 'width', 'height']
31
+ }
32
+
33
+ self.protocols = options[:protocols] || {
34
+ 'a' => {'href' => ['http', 'https', 'mailto']}
35
+ }
36
+
37
+ self.remove_contents = options[:remove_contents] || %w(
38
+ style script
39
+ )
40
+
41
+ end
42
+
43
+ # Removes html and generate a summary
44
+ def summarize(html, max = nil)
45
+ return '' if is_blank?(html)
46
+
47
+ self.max_words = max unless max.nil?
48
+
49
+ Sanitize.clean(html,
50
+ :elements => elements,
51
+ :attributes => attributes,
52
+ :protocols => protocols,
53
+ :remove_contents => remove_contents,
54
+ :transformers => [no_display_transformer, empty_transformer],
55
+ :transformers_breadth => [summarizer, image_transformer])
56
+ end
57
+
58
+ def summarizer
59
+ me = self
60
+ lambda do |env|
61
+
62
+ node = env[:node]
63
+
64
+ return if !node.element?
65
+
66
+ if node.text? || (node.children && node.children.first && node.children.first.text?)
67
+ if me.word_count > me.max_words
68
+ # if we are already over then just remove the item
69
+ node.remove
70
+ else
71
+ # if the text of the current node makes us go over then truncate it
72
+ node.text.scan(/\b\S+\b/) { me.word_count += 1 }
73
+ if me.word_count > me.max_words
74
+ node.content = snippet(node.text, me.max_words, '...')
75
+ end
76
+ end
77
+ end
78
+
79
+ end
80
+ end
81
+
82
+ def image_transformer
83
+ me = self
84
+ lambda do |env|
85
+ node = env[:node]
86
+ if ['img'].include?(env[:node_name])
87
+ me.image_count += 1
88
+ if me.image_count > me.max_images
89
+ node.remove
90
+ else
91
+ # Force width of images
92
+ node.attributes['width'].value = me.image_width_limit.to_s
93
+ node.attributes['height'].remove
94
+ end
95
+ end
96
+ end
97
+ end
98
+
99
+ def empty_transformer
100
+ lambda do |env|
101
+ node = env[:node]
102
+ if node.text.empty? && node.children.empty? && !['img', 'br'].include?(env[:node_name])
103
+ node.remove
104
+ end
105
+ end
106
+ end
107
+
108
+ def no_display_transformer
109
+ lambda do |env|
110
+ node = env[:node]
111
+ if node['style'] && node['style'] =~ /display\s*:\s*none/
112
+ node.remove
113
+ end
114
+ end
115
+ end
116
+
117
+ # Truncates text at a word boundry
118
+ # Parameters:
119
+ # text - The text to truncate
120
+ # wordcount - The number of words
121
+ # omission - Text to add when the text is truncated ie 'read more' or '...
122
+ def snippet(text, wordcount, omission)
123
+ return '' if is_blank?(text)
124
+ text.split[0..(wordcount-1)].join(" ") + (text.split.size > wordcount ? " " + omission : "")
125
+ end
126
+
127
+ def is_blank?(text)
128
+ text.nil? || text.empty?
129
+ end
130
+
131
+ end
132
+ end
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'sumitup'
5
+ require 'ruby-debug'
6
+ # Requires supporting ruby files with custom matchers and macros, etc,
7
+ # in spec/support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+ config.mock_with :rspec
12
+ config.treat_symbols_as_metadata_keys_with_true_values = true
13
+ config.filter_run :focus => true # Add a :focus tag to a given spec only that spec will be run instead of all of them
14
+ config.run_all_when_everything_filtered = true
15
+ end
@@ -0,0 +1,181 @@
1
+ require 'spec_helper'
2
+
3
+ describe Sumitup::Parser do
4
+ before do
5
+ @image_width_limit = 200
6
+ @parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => @image_width_limit)
7
+ end
8
+ describe "summarize" do
9
+ before do
10
+ @html = %Q{
11
+ <div class="entry clear"><!--more--><!-- BlogGlue Cache: No -->
12
+ <p style="display:none;">Can't see this!</p>
13
+ <p></p>
14
+ <p>It's now a bit more than two weeks since I had an unfortunate incident with a serpent. While the leg is actually healing quite nicely I the joy of
15
+ bending my knee has become a distant memory and a luxury I look forward to each day. The antibiotics I am forced to continue leave my body in a semi d
16
+ ebilitated state. Each visit to the restroom is a vile reminder of my body's current inability to properly digest food. At least I'm not allergic to the drug this time.
17
+ The last regiment of antibiotics set my skin on fire and made me appreciate the leper's state.</p>
18
+ <p>My leg is healing and I think that the only permanent damage will be a pretty nasty scar. I can live with that. One of the truly odd
19
+ uirks of cyclists besides constant attempts to trim down to super model anorexic status and the tight shorts is the customary shaving of legs.
20
+ While some might contend the traditionally feminine activity helps reduce aerodynamic drag I have read that the true purpose is to aid in repairs and
21
+ healing in the event of an accident. This is a true fact. I don't shave my legs (my wife would never let me live that down). The surgeon told
22
+ me that he spent most of his time picking hair out of the wound. I'll let you judge. </p>
23
+ <p>Be warned these pictures are gross, disturbing and bloody. I think one of the nurses even got a bit squeamish. As bad as the pictures are my
24
+ youngest daughter had to sit in the room with us the entire time. She said, "Daddy's owie was really gross. I like it when they cover it with
25
+ something so you can't see it." She's 4 so suck it up.</p>
26
+ <img src="http://www.example.com/test.jpg" width="600" height="600" />
27
+ <p>
28
+ <style type="text/css">
29
+ .gallery {
30
+ margin: auto;
31
+ }
32
+ .gallery-item {
33
+ float: left;
34
+ margin-top: 10px;
35
+ text-align: center;
36
+ width: 33%; }
37
+ .gallery img {
38
+ border: 2px solid #cfcfcf;
39
+ }
40
+ .gallery-caption {
41
+ margin-left: 0;
42
+ }
43
+ </style>
44
+ <!-- see gallery_shortcode() in wp-includes/media.php -->
45
+ </p><div class="gallery"><dl class="gallery-item">
46
+ <dt class="gallery-icon">
47
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-18/" title="2008-08-22-09-57-18"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="150" height="150" title="" alt=""></a>
48
+ </dt>
49
+ <dd class="gallery-caption">
50
+ Flesh always loses against asphalt
51
+ </dd></dl><dl class="gallery-item">
52
+ <dt class="gallery-icon">
53
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-19/" title="2008-08-22-09-57-19"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-19-150x150-1-img739.jpg" width="150" height="150" title="" alt=""></a>
54
+ </dt>
55
+ <dd class="gallery-caption">
56
+ My leg is straight so it is harder to see, but if I bend it you can see the tendons
57
+ </dd></dl><dl class="gallery-item">
58
+ <dt class="gallery-icon">
59
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-42/" title="2008-08-22-09-57-42"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-42-150x150-1-img741.jpg" width="150" height="150" title="" alt=""></a>
60
+ </dt>
61
+ <dd class="gallery-caption">
62
+ Drugs make you happy
63
+ </dd></dl><br style="clear: both"><dl class="gallery-item">
64
+ <dt class="gallery-icon">
65
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-52/" title="2008-08-22-09-57-52"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-52-150x150-1-img742.jpg" width="150" height="150" title="" alt=""></a>
66
+ </dt>
67
+ <dd class="gallery-caption">
68
+ Joel stuck around to offer moral support
69
+ </dd></dl><dl class="gallery-item">
70
+ <dt class="gallery-icon">
71
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-06-34/" title="2008-08-22-11-06-34"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-06-34-150x150-1-img743.jpg" width="150" height="150" title="" alt=""></a>
72
+ </dt>
73
+ <dd class="gallery-caption">
74
+ After they cleaned it up
75
+ </dd></dl><dl class="gallery-item">
76
+ <dt class="gallery-icon">
77
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-21/" title="2008-08-22-11-07-21"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-21-150x150-1-img745.jpg" width="150" height="150" title="" alt=""></a>
78
+ </dt>
79
+ <dd class="gallery-caption">
80
+ This isn't as much fun as it looks
81
+ </dd></dl><br style="clear: both"><dl class="gallery-item">
82
+ <dt class="gallery-icon">
83
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-53/" title="2008-08-22-11-07-53"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-53-150x150-1-img746.jpg" width="150" height="150" title="" alt=""></a>
84
+ </dt>
85
+ <dd class="gallery-caption">
86
+ Irrigating the wound - like its a crop or something
87
+ </dd></dl><dl class="gallery-item">
88
+ <dt class="gallery-icon">
89
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-17/" title="2008-08-27-09-47-17"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-17-150x150-1-img747.jpg" width="150" height="150" title="" alt=""></a>
90
+ </dt>
91
+ <dd class="gallery-caption">
92
+ After they took the bandage off the first time - 5 days later
93
+ </dd></dl><dl class="gallery-item">
94
+ <dt class="gallery-icon">
95
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-22/" title="2008-08-27-09-47-22"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-22-150x150-1-img748.jpg" width="150" height="150" title="" alt=""></a>
96
+ </dt>
97
+ <dd class="gallery-caption">
98
+ After they took the bandage off the first time - 5 days later
99
+ </dd></dl><br style="clear: both"><dl class="gallery-item">
100
+ <dt class="gallery-icon">
101
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-29-10-43-49/" title="2008-08-29-10-43-49"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-29-10-43-49-150x150-1-img749.jpg" width="150" height="150" title="" alt=""></a>
102
+ </dt>
103
+ <dd class="gallery-caption">
104
+ After 7 days. Still not pretty, but it is amazing how the human body heals
105
+ </dd></dl><dl class="gallery-item">
106
+ <dt class="gallery-icon">
107
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo/" title="wound"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo-150x150-1-img750.jpg" width="150" height="150" title="" alt=""></a>
108
+ </dt>
109
+ <dd class="gallery-caption">
110
+ This is from my iPhone. It was taken 5 days after the accident at the doctor's office. I have a few more shots below.
111
+ </dd></dl><dl class="gallery-item">
112
+ <dt class="gallery-icon">
113
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo1/" title="The wound after "><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo1-150x150-1-img753.jpg" width="150" height="150" title="" alt=""></a>
114
+ </dt>
115
+ <dd class="gallery-caption">
116
+ Here's what it looks like today 9/8. They took out one stitch, but it will still e quite a while before they can take out the main ones.
117
+ </dd></dl><br style="clear: both">
118
+ <br style="clear: both;">
119
+ </div>
120
+ <br>
121
+ <!--more--><!-- BlogGlue Cache: No --><p></p>
122
+ </div>}
123
+ @short_result = @parser.summarize(@html, 5)
124
+ @long_result = @parser.summarize(@html, 100000)
125
+ end
126
+ it "should summarize the content by number of words" do
127
+ @short_result.should_not include('than')
128
+ @short_result.should include('more')
129
+ end
130
+ it "should remove html comments" do
131
+ @short_result.should_not include('<!--more--><!-- BlogGlue Cache: No -->')
132
+ end
133
+ it "should keep the image tag" do
134
+ @long_result.should include(%Q{<img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="#{@image_width_limit}" title="" alt="">})
135
+ end
136
+ it "should remove the style tag" do
137
+ @long_result.should_not include('<style type="text/css">')
138
+ end
139
+ it "should remove empty tags" do
140
+ @long_result.should_not include('<p></p>')
141
+ end
142
+ it "should remove tags with display:none" do
143
+ @long_result.should_not include('display:none')
144
+ end
145
+ it "should set the width to 240 if width is greater than 240" do
146
+ parser = Sumitup::Parser.new(:image_width_limit => 240)
147
+ result = parser.summarize(@html, 10000)
148
+ result.should include('<img src="http://www.example.com/test.jpg" width="240">')
149
+ end
150
+ it "should only allow 2 images" do
151
+ parser = Sumitup::Parser.new(:max_images => 2)
152
+ result = parser.summarize(@html, 10000)
153
+ doc = Nokogiri::HTML(result)
154
+ doc.css('img').length.should == 2
155
+ end
156
+ end
157
+
158
+ describe "snippet" do
159
+ it "should build a string 157 chars long" do
160
+ text = "Kimball was born to Solomon Farnham Kimball and Anna Spaulding in Sheldon, Franklin County, Vermont. Kimball's forefathers arrived in America from England and started"
161
+ @parser.snippet(text, 5, '...').should == "Kimball was born to Solomon ..."
162
+ end
163
+ it "should not crash if string is nil" do
164
+ text = nil
165
+ @parser.snippet(text, 5, '...').length.should == 0
166
+ end
167
+ end
168
+
169
+ describe "is_blank?" do
170
+ it "should be true if text is nil" do
171
+ @parser.is_blank?(nil).should be_true
172
+ end
173
+ it "should be true if text is ''" do
174
+ @parser.is_blank?('').should be_true
175
+ end
176
+ it "should be false if text is 'valid'" do
177
+ @parser.is_blank?('valid').should be_false
178
+ end
179
+ end
180
+
181
+ end
metadata ADDED
@@ -0,0 +1,244 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sumitup
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Justin Ball
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-03-02 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ type: :runtime
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ version_requirements: *id001
32
+ name: sanitize
33
+ prerelease: false
34
+ - !ruby/object:Gem::Dependency
35
+ type: :development
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ hash: 3
42
+ segments:
43
+ - 0
44
+ version: "0"
45
+ version_requirements: *id002
46
+ name: growl
47
+ prerelease: false
48
+ - !ruby/object:Gem::Dependency
49
+ type: :development
50
+ requirement: &id003 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ hash: 47
56
+ segments:
57
+ - 2
58
+ - 8
59
+ - 0
60
+ version: 2.8.0
61
+ version_requirements: *id003
62
+ name: rspec
63
+ prerelease: false
64
+ - !ruby/object:Gem::Dependency
65
+ type: :development
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
71
+ hash: 31
72
+ segments:
73
+ - 3
74
+ - 12
75
+ version: "3.12"
76
+ version_requirements: *id004
77
+ name: rdoc
78
+ prerelease: false
79
+ - !ruby/object:Gem::Dependency
80
+ type: :development
81
+ requirement: &id005 !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ~>
85
+ - !ruby/object:Gem::Version
86
+ hash: 23
87
+ segments:
88
+ - 1
89
+ - 0
90
+ - 0
91
+ version: 1.0.0
92
+ version_requirements: *id005
93
+ name: bundler
94
+ prerelease: false
95
+ - !ruby/object:Gem::Dependency
96
+ type: :development
97
+ requirement: &id006 !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ~>
101
+ - !ruby/object:Gem::Version
102
+ hash: 49
103
+ segments:
104
+ - 1
105
+ - 8
106
+ - 3
107
+ version: 1.8.3
108
+ version_requirements: *id006
109
+ name: jeweler
110
+ prerelease: false
111
+ - !ruby/object:Gem::Dependency
112
+ type: :development
113
+ requirement: &id007 !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ hash: 3
119
+ segments:
120
+ - 0
121
+ version: "0"
122
+ version_requirements: *id007
123
+ name: rcov
124
+ prerelease: false
125
+ - !ruby/object:Gem::Dependency
126
+ type: :development
127
+ requirement: &id008 !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ hash: 23
133
+ segments:
134
+ - 1
135
+ - 0
136
+ - 0
137
+ version: 1.0.0
138
+ version_requirements: *id008
139
+ name: guard
140
+ prerelease: false
141
+ - !ruby/object:Gem::Dependency
142
+ type: :development
143
+ requirement: &id009 !ruby/object:Gem::Requirement
144
+ none: false
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ hash: 7
149
+ segments:
150
+ - 0
151
+ - 6
152
+ - 0
153
+ version: 0.6.0
154
+ version_requirements: *id009
155
+ name: guard-rspec
156
+ prerelease: false
157
+ - !ruby/object:Gem::Dependency
158
+ type: :development
159
+ requirement: &id010 !ruby/object:Gem::Requirement
160
+ none: false
161
+ requirements:
162
+ - - ">="
163
+ - !ruby/object:Gem::Version
164
+ hash: 29
165
+ segments:
166
+ - 0
167
+ - 1
168
+ - 3
169
+ version: 0.1.3
170
+ version_requirements: *id010
171
+ name: guard-bundler
172
+ prerelease: false
173
+ - !ruby/object:Gem::Dependency
174
+ type: :development
175
+ requirement: &id011 !ruby/object:Gem::Requirement
176
+ none: false
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ hash: 3
181
+ segments:
182
+ - 0
183
+ version: "0"
184
+ version_requirements: *id011
185
+ name: ruby-debug
186
+ prerelease: false
187
+ description: Given an html document or fragment this gem will build a summary of the content.
188
+ email: justinball@gmail.com
189
+ executables: []
190
+
191
+ extensions: []
192
+
193
+ extra_rdoc_files:
194
+ - LICENSE.txt
195
+ - README.rdoc
196
+ files:
197
+ - .document
198
+ - .rspec
199
+ - Gemfile
200
+ - Gemfile.lock
201
+ - Guardfile
202
+ - LICENSE.txt
203
+ - README.rdoc
204
+ - Rakefile
205
+ - VERSION
206
+ - lib/sumitup.rb
207
+ - lib/sumitup/parser.rb
208
+ - spec/spec_helper.rb
209
+ - spec/sumitup/parser_spec.rb
210
+ homepage: http://github.com/tatemae/sumitup
211
+ licenses:
212
+ - MIT
213
+ post_install_message:
214
+ rdoc_options: []
215
+
216
+ require_paths:
217
+ - lib
218
+ required_ruby_version: !ruby/object:Gem::Requirement
219
+ none: false
220
+ requirements:
221
+ - - ">="
222
+ - !ruby/object:Gem::Version
223
+ hash: 3
224
+ segments:
225
+ - 0
226
+ version: "0"
227
+ required_rubygems_version: !ruby/object:Gem::Requirement
228
+ none: false
229
+ requirements:
230
+ - - ">="
231
+ - !ruby/object:Gem::Version
232
+ hash: 3
233
+ segments:
234
+ - 0
235
+ version: "0"
236
+ requirements: []
237
+
238
+ rubyforge_project:
239
+ rubygems_version: 1.8.12
240
+ signing_key:
241
+ specification_version: 3
242
+ summary: Generate a summary of html content
243
+ test_files: []
244
+