sumitup 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,18 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'sanitize'
4
+
5
+ # Add dependencies to develop your gem here.
6
+ # Include everything needed to run rake, tests, features, etc.
7
+ group :development, :test do
8
+ gem 'growl'
9
+ gem "rspec", "~> 2.8.0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.0.0"
12
+ gem "jeweler", "~> 1.8.3"
13
+ gem "rcov", ">= 0"
14
+ gem "guard", ">=1.0.0"
15
+ gem "guard-rspec", ">= 0.6.0"
16
+ gem "guard-bundler", ">= 0.1.3"
17
+ gem "ruby-debug"
18
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,62 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ columnize (0.3.5)
5
+ diff-lcs (1.1.3)
6
+ ffi (1.0.11)
7
+ git (1.2.5)
8
+ growl (1.0.3)
9
+ guard (1.0.0)
10
+ ffi (>= 0.5.0)
11
+ thor (~> 0.14.6)
12
+ guard-bundler (0.1.3)
13
+ bundler (>= 1.0.0)
14
+ guard (>= 0.2.2)
15
+ guard-rspec (0.6.0)
16
+ guard (>= 0.10.0)
17
+ jeweler (1.8.3)
18
+ bundler (~> 1.0)
19
+ git (>= 1.2.5)
20
+ rake
21
+ rdoc
22
+ json (1.6.5)
23
+ linecache (0.46)
24
+ rbx-require-relative (> 0.0.4)
25
+ nokogiri (1.5.0)
26
+ rake (0.9.2.2)
27
+ rbx-require-relative (0.0.5)
28
+ rcov (1.0.0)
29
+ rdoc (3.12)
30
+ json (~> 1.4)
31
+ rspec (2.8.0)
32
+ rspec-core (~> 2.8.0)
33
+ rspec-expectations (~> 2.8.0)
34
+ rspec-mocks (~> 2.8.0)
35
+ rspec-core (2.8.0)
36
+ rspec-expectations (2.8.0)
37
+ diff-lcs (~> 1.1.2)
38
+ rspec-mocks (2.8.0)
39
+ ruby-debug (0.10.4)
40
+ columnize (>= 0.1)
41
+ ruby-debug-base (~> 0.10.4.0)
42
+ ruby-debug-base (0.10.4)
43
+ linecache (>= 0.3)
44
+ sanitize (2.0.3)
45
+ nokogiri (>= 1.4.4, < 1.6)
46
+ thor (0.14.6)
47
+
48
+ PLATFORMS
49
+ ruby
50
+
51
+ DEPENDENCIES
52
+ bundler (~> 1.0.0)
53
+ growl
54
+ guard (>= 1.0.0)
55
+ guard-bundler (>= 0.1.3)
56
+ guard-rspec (>= 0.6.0)
57
+ jeweler (~> 1.8.3)
58
+ rcov
59
+ rdoc (~> 3.12)
60
+ rspec (~> 2.8.0)
61
+ ruby-debug
62
+ sanitize
data/Guardfile ADDED
@@ -0,0 +1,12 @@
1
+ guard 'bundler' do
2
+ watch('Gemfile')
3
+ watch(/^.+\.gemspec/)
4
+ end
5
+
6
+ guard 'rspec', :version => 2, :all_after_pass => false do
7
+ watch(%r{^spec/.+_spec\.rb$})
8
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
9
+ watch('spec/spec_helper.rb') { "spec" }
10
+ watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
11
+ watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
12
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Tatemae.com
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,7 @@
1
+ = sumitup
2
+
3
+ Given an html document or fragment this gem will build a summary of the contents
4
+
5
+ == Copyright
6
+
7
+ Copyright (c) 2012 Tatemae. See LICENSE.txt for further details.
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "sumitup"
18
+ gem.homepage = "http://github.com/tatemae/sumitup"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Generate a summary of html content}
21
+ gem.description = %Q{Given an html document or fragment this gem will build a summary of the content.}
22
+ gem.email = "justinball@gmail.com"
23
+ gem.authors = ["Justin Ball"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rdoc/task'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "sumitup #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/lib/sumitup.rb ADDED
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH << File.dirname(__FILE__) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
+
3
+ require 'sanitize'
4
+
5
+ # Get index status:
6
+ # curl -XGET 'http://localhost:9200/_status'
7
+ module Sumitup
8
+
9
+ end
10
+
11
+ require 'sumitup/parser'
@@ -0,0 +1,132 @@
1
+ module Sumitup
2
+ class Parser
3
+
4
+ IMAGE_WIDTH_LIMIT = 230
5
+
6
+ attr_accessor :word_count, :max_words
7
+ attr_accessor :image_count, :image_width_limit, :max_images
8
+ attr_accessor :elements, :attributes, :protocols, :remove_contents
9
+ attr_accessor :omission
10
+
11
+ def initialize(options = {})
12
+
13
+ self.omission = options[:omission] || ''
14
+
15
+ self.word_count = options[:word_count] || 0
16
+ self.max_words = options[:max_words] || 100
17
+
18
+ self.image_count = options[:image_count] || 0
19
+ self.image_width_limit = options[:image_width_limit] || 230
20
+ self.max_images = options[:max_images] || 2
21
+
22
+ self.elements = options[:elements] || %w(
23
+ a abbr b blockquote br cite code dd dfn dl dt em i kbd li mark ol p pre
24
+ q s samp small strike strong sub sup time u ul var img span
25
+ )
26
+
27
+ self.attributes = options[:attributes] || {
28
+ 'a' => ['href', 'title'],
29
+ 'blockquote' => ['cite'],
30
+ 'img' => ['alt', 'src', 'title', 'width', 'height']
31
+ }
32
+
33
+ self.protocols = options[:protocols] || {
34
+ 'a' => {'href' => ['http', 'https', 'mailto']}
35
+ }
36
+
37
+ self.remove_contents = options[:remove_contents] || %w(
38
+ style script
39
+ )
40
+
41
+ end
42
+
43
+ # Removes html and generate a summary
44
+ def summarize(html, max = nil)
45
+ return '' if is_blank?(html)
46
+
47
+ self.max_words = max unless max.nil?
48
+
49
+ Sanitize.clean(html,
50
+ :elements => elements,
51
+ :attributes => attributes,
52
+ :protocols => protocols,
53
+ :remove_contents => remove_contents,
54
+ :transformers => [no_display_transformer, empty_transformer],
55
+ :transformers_breadth => [summarizer, image_transformer])
56
+ end
57
+
58
+ def summarizer
59
+ me = self
60
+ lambda do |env|
61
+
62
+ node = env[:node]
63
+
64
+ return if !node.element?
65
+
66
+ if node.text? || (node.children && node.children.first && node.children.first.text?)
67
+ if me.word_count > me.max_words
68
+ # if we are already over then just remove the item
69
+ node.remove
70
+ else
71
+ # if the text of the current node makes us go over then truncate it
72
+ node.text.scan(/\b\S+\b/) { me.word_count += 1 }
73
+ if me.word_count > me.max_words
74
+ node.content = snippet(node.text, me.max_words, '...')
75
+ end
76
+ end
77
+ end
78
+
79
+ end
80
+ end
81
+
82
+ def image_transformer
83
+ me = self
84
+ lambda do |env|
85
+ node = env[:node]
86
+ if ['img'].include?(env[:node_name])
87
+ me.image_count += 1
88
+ if me.image_count > me.max_images
89
+ node.remove
90
+ else
91
+ # Force width of images
92
+ node.attributes['width'].value = me.image_width_limit.to_s
93
+ node.attributes['height'].remove
94
+ end
95
+ end
96
+ end
97
+ end
98
+
99
+ def empty_transformer
100
+ lambda do |env|
101
+ node = env[:node]
102
+ if node.text.empty? && node.children.empty? && !['img', 'br'].include?(env[:node_name])
103
+ node.remove
104
+ end
105
+ end
106
+ end
107
+
108
+ def no_display_transformer
109
+ lambda do |env|
110
+ node = env[:node]
111
+ if node['style'] && node['style'] =~ /display\s*:\s*none/
112
+ node.remove
113
+ end
114
+ end
115
+ end
116
+
117
+ # Truncates text at a word boundry
118
+ # Parameters:
119
+ # text - The text to truncate
120
+ # wordcount - The number of words
121
+ # omission - Text to add when the text is truncated ie 'read more' or '...
122
+ def snippet(text, wordcount, omission)
123
+ return '' if is_blank?(text)
124
+ text.split[0..(wordcount-1)].join(" ") + (text.split.size > wordcount ? " " + omission : "")
125
+ end
126
+
127
+ def is_blank?(text)
128
+ text.nil? || text.empty?
129
+ end
130
+
131
+ end
132
+ end
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'sumitup'
5
+ require 'ruby-debug'
6
+ # Requires supporting ruby files with custom matchers and macros, etc,
7
+ # in spec/support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+ config.mock_with :rspec
12
+ config.treat_symbols_as_metadata_keys_with_true_values = true
13
+ config.filter_run :focus => true # Add a :focus tag to a given spec only that spec will be run instead of all of them
14
+ config.run_all_when_everything_filtered = true
15
+ end
@@ -0,0 +1,181 @@
1
+ require 'spec_helper'
2
+
3
+ describe Sumitup::Parser do
4
+ before do
5
+ @image_width_limit = 200
6
+ @parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => @image_width_limit)
7
+ end
8
+ describe "summarize" do
9
+ before do
10
+ @html = %Q{
11
+ <div class="entry clear"><!--more--><!-- BlogGlue Cache: No -->
12
+ <p style="display:none;">Can't see this!</p>
13
+ <p></p>
14
+ <p>It's now a bit more than two weeks since I had an unfortunate incident with a serpent. While the leg is actually healing quite nicely I the joy of
15
+ bending my knee has become a distant memory and a luxury I look forward to each day. The antibiotics I am forced to continue leave my body in a semi d
16
+ ebilitated state. Each visit to the restroom is a vile reminder of my body's current inability to properly digest food. At least I'm not allergic to the drug this time.
17
+ The last regiment of antibiotics set my skin on fire and made me appreciate the leper's state.</p>
18
+ <p>My leg is healing and I think that the only permanent damage will be a pretty nasty scar. I can live with that. One of the truly odd
19
+ uirks of cyclists besides constant attempts to trim down to super model anorexic status and the tight shorts is the customary shaving of legs.
20
+ While some might contend the traditionally feminine activity helps reduce aerodynamic drag I have read that the true purpose is to aid in repairs and
21
+ healing in the event of an accident. This is a true fact. I don't shave my legs (my wife would never let me live that down). The surgeon told
22
+ me that he spent most of his time picking hair out of the wound. I'll let you judge. </p>
23
+ <p>Be warned these pictures are gross, disturbing and bloody. I think one of the nurses even got a bit squeamish. As bad as the pictures are my
24
+ youngest daughter had to sit in the room with us the entire time. She said, "Daddy's owie was really gross. I like it when they cover it with
25
+ something so you can't see it." She's 4 so suck it up.</p>
26
+ <img src="http://www.example.com/test.jpg" width="600" height="600" />
27
+ <p>
28
+ <style type="text/css">
29
+ .gallery {
30
+ margin: auto;
31
+ }
32
+ .gallery-item {
33
+ float: left;
34
+ margin-top: 10px;
35
+ text-align: center;
36
+ width: 33%; }
37
+ .gallery img {
38
+ border: 2px solid #cfcfcf;
39
+ }
40
+ .gallery-caption {
41
+ margin-left: 0;
42
+ }
43
+ </style>
44
+ <!-- see gallery_shortcode() in wp-includes/media.php -->
45
+ </p><div class="gallery"><dl class="gallery-item">
46
+ <dt class="gallery-icon">
47
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-18/" title="2008-08-22-09-57-18"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="150" height="150" title="" alt=""></a>
48
+ </dt>
49
+ <dd class="gallery-caption">
50
+ Flesh always loses against asphalt
51
+ </dd></dl><dl class="gallery-item">
52
+ <dt class="gallery-icon">
53
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-19/" title="2008-08-22-09-57-19"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-19-150x150-1-img739.jpg" width="150" height="150" title="" alt=""></a>
54
+ </dt>
55
+ <dd class="gallery-caption">
56
+ My leg is straight so it is harder to see, but if I bend it you can see the tendons
57
+ </dd></dl><dl class="gallery-item">
58
+ <dt class="gallery-icon">
59
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-42/" title="2008-08-22-09-57-42"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-42-150x150-1-img741.jpg" width="150" height="150" title="" alt=""></a>
60
+ </dt>
61
+ <dd class="gallery-caption">
62
+ Drugs make you happy
63
+ </dd></dl><br style="clear: both"><dl class="gallery-item">
64
+ <dt class="gallery-icon">
65
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-52/" title="2008-08-22-09-57-52"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-52-150x150-1-img742.jpg" width="150" height="150" title="" alt=""></a>
66
+ </dt>
67
+ <dd class="gallery-caption">
68
+ Joel stuck around to offer moral support
69
+ </dd></dl><dl class="gallery-item">
70
+ <dt class="gallery-icon">
71
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-06-34/" title="2008-08-22-11-06-34"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-06-34-150x150-1-img743.jpg" width="150" height="150" title="" alt=""></a>
72
+ </dt>
73
+ <dd class="gallery-caption">
74
+ After they cleaned it up
75
+ </dd></dl><dl class="gallery-item">
76
+ <dt class="gallery-icon">
77
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-21/" title="2008-08-22-11-07-21"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-21-150x150-1-img745.jpg" width="150" height="150" title="" alt=""></a>
78
+ </dt>
79
+ <dd class="gallery-caption">
80
+ This isn't as much fun as it looks
81
+ </dd></dl><br style="clear: both"><dl class="gallery-item">
82
+ <dt class="gallery-icon">
83
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-53/" title="2008-08-22-11-07-53"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-53-150x150-1-img746.jpg" width="150" height="150" title="" alt=""></a>
84
+ </dt>
85
+ <dd class="gallery-caption">
86
+ Irrigating the wound - like its a crop or something
87
+ </dd></dl><dl class="gallery-item">
88
+ <dt class="gallery-icon">
89
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-17/" title="2008-08-27-09-47-17"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-17-150x150-1-img747.jpg" width="150" height="150" title="" alt=""></a>
90
+ </dt>
91
+ <dd class="gallery-caption">
92
+ After they took the bandage off the first time - 5 days later
93
+ </dd></dl><dl class="gallery-item">
94
+ <dt class="gallery-icon">
95
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-22/" title="2008-08-27-09-47-22"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-22-150x150-1-img748.jpg" width="150" height="150" title="" alt=""></a>
96
+ </dt>
97
+ <dd class="gallery-caption">
98
+ After they took the bandage off the first time - 5 days later
99
+ </dd></dl><br style="clear: both"><dl class="gallery-item">
100
+ <dt class="gallery-icon">
101
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-29-10-43-49/" title="2008-08-29-10-43-49"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-29-10-43-49-150x150-1-img749.jpg" width="150" height="150" title="" alt=""></a>
102
+ </dt>
103
+ <dd class="gallery-caption">
104
+ After 7 days. Still not pretty, but it is amazing how the human body heals
105
+ </dd></dl><dl class="gallery-item">
106
+ <dt class="gallery-icon">
107
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo/" title="wound"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo-150x150-1-img750.jpg" width="150" height="150" title="" alt=""></a>
108
+ </dt>
109
+ <dd class="gallery-caption">
110
+ This is from my iPhone. It was taken 5 days after the accident at the doctor's office. I have a few more shots below.
111
+ </dd></dl><dl class="gallery-item">
112
+ <dt class="gallery-icon">
113
+ <a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo1/" title="The wound after "><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo1-150x150-1-img753.jpg" width="150" height="150" title="" alt=""></a>
114
+ </dt>
115
+ <dd class="gallery-caption">
116
+ Here's what it looks like today 9/8. They took out one stitch, but it will still e quite a while before they can take out the main ones.
117
+ </dd></dl><br style="clear: both">
118
+ <br style="clear: both;">
119
+ </div>
120
+ <br>
121
+ <!--more--><!-- BlogGlue Cache: No --><p></p>
122
+ </div>}
123
+ @short_result = @parser.summarize(@html, 5)
124
+ @long_result = @parser.summarize(@html, 100000)
125
+ end
126
+ it "should summarize the content by number of words" do
127
+ @short_result.should_not include('than')
128
+ @short_result.should include('more')
129
+ end
130
+ it "should remove html comments" do
131
+ @short_result.should_not include('<!--more--><!-- BlogGlue Cache: No -->')
132
+ end
133
+ it "should keep the image tag" do
134
+ @long_result.should include(%Q{<img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="#{@image_width_limit}" title="" alt="">})
135
+ end
136
+ it "should remove the style tag" do
137
+ @long_result.should_not include('<style type="text/css">')
138
+ end
139
+ it "should remove empty tags" do
140
+ @long_result.should_not include('<p></p>')
141
+ end
142
+ it "should remove tags with display:none" do
143
+ @long_result.should_not include('display:none')
144
+ end
145
+ it "should set the width to 240 if width is greater than 240" do
146
+ parser = Sumitup::Parser.new(:image_width_limit => 240)
147
+ result = parser.summarize(@html, 10000)
148
+ result.should include('<img src="http://www.example.com/test.jpg" width="240">')
149
+ end
150
+ it "should only allow 2 images" do
151
+ parser = Sumitup::Parser.new(:max_images => 2)
152
+ result = parser.summarize(@html, 10000)
153
+ doc = Nokogiri::HTML(result)
154
+ doc.css('img').length.should == 2
155
+ end
156
+ end
157
+
158
+ describe "snippet" do
159
+ it "should build a string 157 chars long" do
160
+ text = "Kimball was born to Solomon Farnham Kimball and Anna Spaulding in Sheldon, Franklin County, Vermont. Kimball's forefathers arrived in America from England and started"
161
+ @parser.snippet(text, 5, '...').should == "Kimball was born to Solomon ..."
162
+ end
163
+ it "should not crash if string is nil" do
164
+ text = nil
165
+ @parser.snippet(text, 5, '...').length.should == 0
166
+ end
167
+ end
168
+
169
+ describe "is_blank?" do
170
+ it "should be true if text is nil" do
171
+ @parser.is_blank?(nil).should be_true
172
+ end
173
+ it "should be true if text is ''" do
174
+ @parser.is_blank?('').should be_true
175
+ end
176
+ it "should be false if text is 'valid'" do
177
+ @parser.is_blank?('valid').should be_false
178
+ end
179
+ end
180
+
181
+ end
metadata ADDED
@@ -0,0 +1,244 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sumitup
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Justin Ball
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-03-02 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ type: :runtime
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ version_requirements: *id001
32
+ name: sanitize
33
+ prerelease: false
34
+ - !ruby/object:Gem::Dependency
35
+ type: :development
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ hash: 3
42
+ segments:
43
+ - 0
44
+ version: "0"
45
+ version_requirements: *id002
46
+ name: growl
47
+ prerelease: false
48
+ - !ruby/object:Gem::Dependency
49
+ type: :development
50
+ requirement: &id003 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ hash: 47
56
+ segments:
57
+ - 2
58
+ - 8
59
+ - 0
60
+ version: 2.8.0
61
+ version_requirements: *id003
62
+ name: rspec
63
+ prerelease: false
64
+ - !ruby/object:Gem::Dependency
65
+ type: :development
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
71
+ hash: 31
72
+ segments:
73
+ - 3
74
+ - 12
75
+ version: "3.12"
76
+ version_requirements: *id004
77
+ name: rdoc
78
+ prerelease: false
79
+ - !ruby/object:Gem::Dependency
80
+ type: :development
81
+ requirement: &id005 !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ~>
85
+ - !ruby/object:Gem::Version
86
+ hash: 23
87
+ segments:
88
+ - 1
89
+ - 0
90
+ - 0
91
+ version: 1.0.0
92
+ version_requirements: *id005
93
+ name: bundler
94
+ prerelease: false
95
+ - !ruby/object:Gem::Dependency
96
+ type: :development
97
+ requirement: &id006 !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ~>
101
+ - !ruby/object:Gem::Version
102
+ hash: 49
103
+ segments:
104
+ - 1
105
+ - 8
106
+ - 3
107
+ version: 1.8.3
108
+ version_requirements: *id006
109
+ name: jeweler
110
+ prerelease: false
111
+ - !ruby/object:Gem::Dependency
112
+ type: :development
113
+ requirement: &id007 !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ hash: 3
119
+ segments:
120
+ - 0
121
+ version: "0"
122
+ version_requirements: *id007
123
+ name: rcov
124
+ prerelease: false
125
+ - !ruby/object:Gem::Dependency
126
+ type: :development
127
+ requirement: &id008 !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ hash: 23
133
+ segments:
134
+ - 1
135
+ - 0
136
+ - 0
137
+ version: 1.0.0
138
+ version_requirements: *id008
139
+ name: guard
140
+ prerelease: false
141
+ - !ruby/object:Gem::Dependency
142
+ type: :development
143
+ requirement: &id009 !ruby/object:Gem::Requirement
144
+ none: false
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ hash: 7
149
+ segments:
150
+ - 0
151
+ - 6
152
+ - 0
153
+ version: 0.6.0
154
+ version_requirements: *id009
155
+ name: guard-rspec
156
+ prerelease: false
157
+ - !ruby/object:Gem::Dependency
158
+ type: :development
159
+ requirement: &id010 !ruby/object:Gem::Requirement
160
+ none: false
161
+ requirements:
162
+ - - ">="
163
+ - !ruby/object:Gem::Version
164
+ hash: 29
165
+ segments:
166
+ - 0
167
+ - 1
168
+ - 3
169
+ version: 0.1.3
170
+ version_requirements: *id010
171
+ name: guard-bundler
172
+ prerelease: false
173
+ - !ruby/object:Gem::Dependency
174
+ type: :development
175
+ requirement: &id011 !ruby/object:Gem::Requirement
176
+ none: false
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ hash: 3
181
+ segments:
182
+ - 0
183
+ version: "0"
184
+ version_requirements: *id011
185
+ name: ruby-debug
186
+ prerelease: false
187
+ description: Given an html document or fragment this gem will build a summary of the content.
188
+ email: justinball@gmail.com
189
+ executables: []
190
+
191
+ extensions: []
192
+
193
+ extra_rdoc_files:
194
+ - LICENSE.txt
195
+ - README.rdoc
196
+ files:
197
+ - .document
198
+ - .rspec
199
+ - Gemfile
200
+ - Gemfile.lock
201
+ - Guardfile
202
+ - LICENSE.txt
203
+ - README.rdoc
204
+ - Rakefile
205
+ - VERSION
206
+ - lib/sumitup.rb
207
+ - lib/sumitup/parser.rb
208
+ - spec/spec_helper.rb
209
+ - spec/sumitup/parser_spec.rb
210
+ homepage: http://github.com/tatemae/sumitup
211
+ licenses:
212
+ - MIT
213
+ post_install_message:
214
+ rdoc_options: []
215
+
216
+ require_paths:
217
+ - lib
218
+ required_ruby_version: !ruby/object:Gem::Requirement
219
+ none: false
220
+ requirements:
221
+ - - ">="
222
+ - !ruby/object:Gem::Version
223
+ hash: 3
224
+ segments:
225
+ - 0
226
+ version: "0"
227
+ required_rubygems_version: !ruby/object:Gem::Requirement
228
+ none: false
229
+ requirements:
230
+ - - ">="
231
+ - !ruby/object:Gem::Version
232
+ hash: 3
233
+ segments:
234
+ - 0
235
+ version: "0"
236
+ requirements: []
237
+
238
+ rubyforge_project:
239
+ rubygems_version: 1.8.12
240
+ signing_key:
241
+ specification_version: 3
242
+ summary: Generate a summary of html content
243
+ test_files: []
244
+