sumitup 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +62 -0
- data/Guardfile +12 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +7 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/lib/sumitup.rb +11 -0
- data/lib/sumitup/parser.rb +132 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/sumitup/parser_spec.rb +181 -0
- metadata +244 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem 'sanitize'
|
4
|
+
|
5
|
+
# Add dependencies to develop your gem here.
|
6
|
+
# Include everything needed to run rake, tests, features, etc.
|
7
|
+
group :development, :test do
|
8
|
+
gem 'growl'
|
9
|
+
gem "rspec", "~> 2.8.0"
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "bundler", "~> 1.0.0"
|
12
|
+
gem "jeweler", "~> 1.8.3"
|
13
|
+
gem "rcov", ">= 0"
|
14
|
+
gem "guard", ">=1.0.0"
|
15
|
+
gem "guard-rspec", ">= 0.6.0"
|
16
|
+
gem "guard-bundler", ">= 0.1.3"
|
17
|
+
gem "ruby-debug"
|
18
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
columnize (0.3.5)
|
5
|
+
diff-lcs (1.1.3)
|
6
|
+
ffi (1.0.11)
|
7
|
+
git (1.2.5)
|
8
|
+
growl (1.0.3)
|
9
|
+
guard (1.0.0)
|
10
|
+
ffi (>= 0.5.0)
|
11
|
+
thor (~> 0.14.6)
|
12
|
+
guard-bundler (0.1.3)
|
13
|
+
bundler (>= 1.0.0)
|
14
|
+
guard (>= 0.2.2)
|
15
|
+
guard-rspec (0.6.0)
|
16
|
+
guard (>= 0.10.0)
|
17
|
+
jeweler (1.8.3)
|
18
|
+
bundler (~> 1.0)
|
19
|
+
git (>= 1.2.5)
|
20
|
+
rake
|
21
|
+
rdoc
|
22
|
+
json (1.6.5)
|
23
|
+
linecache (0.46)
|
24
|
+
rbx-require-relative (> 0.0.4)
|
25
|
+
nokogiri (1.5.0)
|
26
|
+
rake (0.9.2.2)
|
27
|
+
rbx-require-relative (0.0.5)
|
28
|
+
rcov (1.0.0)
|
29
|
+
rdoc (3.12)
|
30
|
+
json (~> 1.4)
|
31
|
+
rspec (2.8.0)
|
32
|
+
rspec-core (~> 2.8.0)
|
33
|
+
rspec-expectations (~> 2.8.0)
|
34
|
+
rspec-mocks (~> 2.8.0)
|
35
|
+
rspec-core (2.8.0)
|
36
|
+
rspec-expectations (2.8.0)
|
37
|
+
diff-lcs (~> 1.1.2)
|
38
|
+
rspec-mocks (2.8.0)
|
39
|
+
ruby-debug (0.10.4)
|
40
|
+
columnize (>= 0.1)
|
41
|
+
ruby-debug-base (~> 0.10.4.0)
|
42
|
+
ruby-debug-base (0.10.4)
|
43
|
+
linecache (>= 0.3)
|
44
|
+
sanitize (2.0.3)
|
45
|
+
nokogiri (>= 1.4.4, < 1.6)
|
46
|
+
thor (0.14.6)
|
47
|
+
|
48
|
+
PLATFORMS
|
49
|
+
ruby
|
50
|
+
|
51
|
+
DEPENDENCIES
|
52
|
+
bundler (~> 1.0.0)
|
53
|
+
growl
|
54
|
+
guard (>= 1.0.0)
|
55
|
+
guard-bundler (>= 0.1.3)
|
56
|
+
guard-rspec (>= 0.6.0)
|
57
|
+
jeweler (~> 1.8.3)
|
58
|
+
rcov
|
59
|
+
rdoc (~> 3.12)
|
60
|
+
rspec (~> 2.8.0)
|
61
|
+
ruby-debug
|
62
|
+
sanitize
|
data/Guardfile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
guard 'bundler' do
|
2
|
+
watch('Gemfile')
|
3
|
+
watch(/^.+\.gemspec/)
|
4
|
+
end
|
5
|
+
|
6
|
+
guard 'rspec', :version => 2, :all_after_pass => false do
|
7
|
+
watch(%r{^spec/.+_spec\.rb$})
|
8
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
9
|
+
watch('spec/spec_helper.rb') { "spec" }
|
10
|
+
watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
11
|
+
watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
|
12
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Tatemae.com
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "sumitup"
|
18
|
+
gem.homepage = "http://github.com/tatemae/sumitup"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Generate a summary of html content}
|
21
|
+
gem.description = %Q{Given an html document or fragment this gem will build a summary of the content.}
|
22
|
+
gem.email = "justinball@gmail.com"
|
23
|
+
gem.authors = ["Justin Ball"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
40
|
+
|
41
|
+
require 'rdoc/task'
|
42
|
+
Rake::RDocTask.new do |rdoc|
|
43
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
44
|
+
|
45
|
+
rdoc.rdoc_dir = 'rdoc'
|
46
|
+
rdoc.title = "sumitup #{version}"
|
47
|
+
rdoc.rdoc_files.include('README*')
|
48
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
49
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/sumitup.rb
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
module Sumitup
|
2
|
+
class Parser
|
3
|
+
|
4
|
+
IMAGE_WIDTH_LIMIT = 230
|
5
|
+
|
6
|
+
attr_accessor :word_count, :max_words
|
7
|
+
attr_accessor :image_count, :image_width_limit, :max_images
|
8
|
+
attr_accessor :elements, :attributes, :protocols, :remove_contents
|
9
|
+
attr_accessor :omission
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
|
13
|
+
self.omission = options[:omission] || ''
|
14
|
+
|
15
|
+
self.word_count = options[:word_count] || 0
|
16
|
+
self.max_words = options[:max_words] || 100
|
17
|
+
|
18
|
+
self.image_count = options[:image_count] || 0
|
19
|
+
self.image_width_limit = options[:image_width_limit] || 230
|
20
|
+
self.max_images = options[:max_images] || 2
|
21
|
+
|
22
|
+
self.elements = options[:elements] || %w(
|
23
|
+
a abbr b blockquote br cite code dd dfn dl dt em i kbd li mark ol p pre
|
24
|
+
q s samp small strike strong sub sup time u ul var img span
|
25
|
+
)
|
26
|
+
|
27
|
+
self.attributes = options[:attributes] || {
|
28
|
+
'a' => ['href', 'title'],
|
29
|
+
'blockquote' => ['cite'],
|
30
|
+
'img' => ['alt', 'src', 'title', 'width', 'height']
|
31
|
+
}
|
32
|
+
|
33
|
+
self.protocols = options[:protocols] || {
|
34
|
+
'a' => {'href' => ['http', 'https', 'mailto']}
|
35
|
+
}
|
36
|
+
|
37
|
+
self.remove_contents = options[:remove_contents] || %w(
|
38
|
+
style script
|
39
|
+
)
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
# Removes html and generate a summary
|
44
|
+
def summarize(html, max = nil)
|
45
|
+
return '' if is_blank?(html)
|
46
|
+
|
47
|
+
self.max_words = max unless max.nil?
|
48
|
+
|
49
|
+
Sanitize.clean(html,
|
50
|
+
:elements => elements,
|
51
|
+
:attributes => attributes,
|
52
|
+
:protocols => protocols,
|
53
|
+
:remove_contents => remove_contents,
|
54
|
+
:transformers => [no_display_transformer, empty_transformer],
|
55
|
+
:transformers_breadth => [summarizer, image_transformer])
|
56
|
+
end
|
57
|
+
|
58
|
+
def summarizer
|
59
|
+
me = self
|
60
|
+
lambda do |env|
|
61
|
+
|
62
|
+
node = env[:node]
|
63
|
+
|
64
|
+
return if !node.element?
|
65
|
+
|
66
|
+
if node.text? || (node.children && node.children.first && node.children.first.text?)
|
67
|
+
if me.word_count > me.max_words
|
68
|
+
# if we are already over then just remove the item
|
69
|
+
node.remove
|
70
|
+
else
|
71
|
+
# if the text of the current node makes us go over then truncate it
|
72
|
+
node.text.scan(/\b\S+\b/) { me.word_count += 1 }
|
73
|
+
if me.word_count > me.max_words
|
74
|
+
node.content = snippet(node.text, me.max_words, '...')
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def image_transformer
|
83
|
+
me = self
|
84
|
+
lambda do |env|
|
85
|
+
node = env[:node]
|
86
|
+
if ['img'].include?(env[:node_name])
|
87
|
+
me.image_count += 1
|
88
|
+
if me.image_count > me.max_images
|
89
|
+
node.remove
|
90
|
+
else
|
91
|
+
# Force width of images
|
92
|
+
node.attributes['width'].value = me.image_width_limit.to_s
|
93
|
+
node.attributes['height'].remove
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def empty_transformer
|
100
|
+
lambda do |env|
|
101
|
+
node = env[:node]
|
102
|
+
if node.text.empty? && node.children.empty? && !['img', 'br'].include?(env[:node_name])
|
103
|
+
node.remove
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def no_display_transformer
|
109
|
+
lambda do |env|
|
110
|
+
node = env[:node]
|
111
|
+
if node['style'] && node['style'] =~ /display\s*:\s*none/
|
112
|
+
node.remove
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Truncates text at a word boundry
|
118
|
+
# Parameters:
|
119
|
+
# text - The text to truncate
|
120
|
+
# wordcount - The number of words
|
121
|
+
# omission - Text to add when the text is truncated ie 'read more' or '...
|
122
|
+
def snippet(text, wordcount, omission)
|
123
|
+
return '' if is_blank?(text)
|
124
|
+
text.split[0..(wordcount-1)].join(" ") + (text.split.size > wordcount ? " " + omission : "")
|
125
|
+
end
|
126
|
+
|
127
|
+
def is_blank?(text)
|
128
|
+
text.nil? || text.empty?
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'sumitup'
|
5
|
+
require 'ruby-debug'
|
6
|
+
# Requires supporting ruby files with custom matchers and macros, etc,
|
7
|
+
# in spec/support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.mock_with :rspec
|
12
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
13
|
+
config.filter_run :focus => true # Add a :focus tag to a given spec only that spec will be run instead of all of them
|
14
|
+
config.run_all_when_everything_filtered = true
|
15
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Sumitup::Parser do
|
4
|
+
before do
|
5
|
+
@image_width_limit = 200
|
6
|
+
@parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => @image_width_limit)
|
7
|
+
end
|
8
|
+
describe "summarize" do
|
9
|
+
before do
|
10
|
+
@html = %Q{
|
11
|
+
<div class="entry clear"><!--more--><!-- BlogGlue Cache: No -->
|
12
|
+
<p style="display:none;">Can't see this!</p>
|
13
|
+
<p></p>
|
14
|
+
<p>It's now a bit more than two weeks since I had an unfortunate incident with a serpent. While the leg is actually healing quite nicely I the joy of
|
15
|
+
bending my knee has become a distant memory and a luxury I look forward to each day. The antibiotics I am forced to continue leave my body in a semi d
|
16
|
+
ebilitated state. Each visit to the restroom is a vile reminder of my body's current inability to properly digest food. At least I'm not allergic to the drug this time.
|
17
|
+
The last regiment of antibiotics set my skin on fire and made me appreciate the leper's state.</p>
|
18
|
+
<p>My leg is healing and I think that the only permanent damage will be a pretty nasty scar. I can live with that. One of the truly odd
|
19
|
+
uirks of cyclists besides constant attempts to trim down to super model anorexic status and the tight shorts is the customary shaving of legs.
|
20
|
+
While some might contend the traditionally feminine activity helps reduce aerodynamic drag I have read that the true purpose is to aid in repairs and
|
21
|
+
healing in the event of an accident. This is a true fact. I don't shave my legs (my wife would never let me live that down). The surgeon told
|
22
|
+
me that he spent most of his time picking hair out of the wound. I'll let you judge. </p>
|
23
|
+
<p>Be warned these pictures are gross, disturbing and bloody. I think one of the nurses even got a bit squeamish. As bad as the pictures are my
|
24
|
+
youngest daughter had to sit in the room with us the entire time. She said, "Daddy's owie was really gross. I like it when they cover it with
|
25
|
+
something so you can't see it." She's 4 so suck it up.</p>
|
26
|
+
<img src="http://www.example.com/test.jpg" width="600" height="600" />
|
27
|
+
<p>
|
28
|
+
<style type="text/css">
|
29
|
+
.gallery {
|
30
|
+
margin: auto;
|
31
|
+
}
|
32
|
+
.gallery-item {
|
33
|
+
float: left;
|
34
|
+
margin-top: 10px;
|
35
|
+
text-align: center;
|
36
|
+
width: 33%; }
|
37
|
+
.gallery img {
|
38
|
+
border: 2px solid #cfcfcf;
|
39
|
+
}
|
40
|
+
.gallery-caption {
|
41
|
+
margin-left: 0;
|
42
|
+
}
|
43
|
+
</style>
|
44
|
+
<!-- see gallery_shortcode() in wp-includes/media.php -->
|
45
|
+
</p><div class="gallery"><dl class="gallery-item">
|
46
|
+
<dt class="gallery-icon">
|
47
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-18/" title="2008-08-22-09-57-18"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="150" height="150" title="" alt=""></a>
|
48
|
+
</dt>
|
49
|
+
<dd class="gallery-caption">
|
50
|
+
Flesh always loses against asphalt
|
51
|
+
</dd></dl><dl class="gallery-item">
|
52
|
+
<dt class="gallery-icon">
|
53
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-19/" title="2008-08-22-09-57-19"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-19-150x150-1-img739.jpg" width="150" height="150" title="" alt=""></a>
|
54
|
+
</dt>
|
55
|
+
<dd class="gallery-caption">
|
56
|
+
My leg is straight so it is harder to see, but if I bend it you can see the tendons
|
57
|
+
</dd></dl><dl class="gallery-item">
|
58
|
+
<dt class="gallery-icon">
|
59
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-42/" title="2008-08-22-09-57-42"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-42-150x150-1-img741.jpg" width="150" height="150" title="" alt=""></a>
|
60
|
+
</dt>
|
61
|
+
<dd class="gallery-caption">
|
62
|
+
Drugs make you happy
|
63
|
+
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
64
|
+
<dt class="gallery-icon">
|
65
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-52/" title="2008-08-22-09-57-52"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-52-150x150-1-img742.jpg" width="150" height="150" title="" alt=""></a>
|
66
|
+
</dt>
|
67
|
+
<dd class="gallery-caption">
|
68
|
+
Joel stuck around to offer moral support
|
69
|
+
</dd></dl><dl class="gallery-item">
|
70
|
+
<dt class="gallery-icon">
|
71
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-06-34/" title="2008-08-22-11-06-34"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-06-34-150x150-1-img743.jpg" width="150" height="150" title="" alt=""></a>
|
72
|
+
</dt>
|
73
|
+
<dd class="gallery-caption">
|
74
|
+
After they cleaned it up
|
75
|
+
</dd></dl><dl class="gallery-item">
|
76
|
+
<dt class="gallery-icon">
|
77
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-21/" title="2008-08-22-11-07-21"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-21-150x150-1-img745.jpg" width="150" height="150" title="" alt=""></a>
|
78
|
+
</dt>
|
79
|
+
<dd class="gallery-caption">
|
80
|
+
This isn't as much fun as it looks
|
81
|
+
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
82
|
+
<dt class="gallery-icon">
|
83
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-53/" title="2008-08-22-11-07-53"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-53-150x150-1-img746.jpg" width="150" height="150" title="" alt=""></a>
|
84
|
+
</dt>
|
85
|
+
<dd class="gallery-caption">
|
86
|
+
Irrigating the wound - like its a crop or something
|
87
|
+
</dd></dl><dl class="gallery-item">
|
88
|
+
<dt class="gallery-icon">
|
89
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-17/" title="2008-08-27-09-47-17"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-17-150x150-1-img747.jpg" width="150" height="150" title="" alt=""></a>
|
90
|
+
</dt>
|
91
|
+
<dd class="gallery-caption">
|
92
|
+
After they took the bandage off the first time - 5 days later
|
93
|
+
</dd></dl><dl class="gallery-item">
|
94
|
+
<dt class="gallery-icon">
|
95
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-22/" title="2008-08-27-09-47-22"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-22-150x150-1-img748.jpg" width="150" height="150" title="" alt=""></a>
|
96
|
+
</dt>
|
97
|
+
<dd class="gallery-caption">
|
98
|
+
After they took the bandage off the first time - 5 days later
|
99
|
+
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
100
|
+
<dt class="gallery-icon">
|
101
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-29-10-43-49/" title="2008-08-29-10-43-49"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-29-10-43-49-150x150-1-img749.jpg" width="150" height="150" title="" alt=""></a>
|
102
|
+
</dt>
|
103
|
+
<dd class="gallery-caption">
|
104
|
+
After 7 days. Still not pretty, but it is amazing how the human body heals
|
105
|
+
</dd></dl><dl class="gallery-item">
|
106
|
+
<dt class="gallery-icon">
|
107
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo/" title="wound"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo-150x150-1-img750.jpg" width="150" height="150" title="" alt=""></a>
|
108
|
+
</dt>
|
109
|
+
<dd class="gallery-caption">
|
110
|
+
This is from my iPhone. It was taken 5 days after the accident at the doctor's office. I have a few more shots below.
|
111
|
+
</dd></dl><dl class="gallery-item">
|
112
|
+
<dt class="gallery-icon">
|
113
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo1/" title="The wound after "><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo1-150x150-1-img753.jpg" width="150" height="150" title="" alt=""></a>
|
114
|
+
</dt>
|
115
|
+
<dd class="gallery-caption">
|
116
|
+
Here's what it looks like today 9/8. They took out one stitch, but it will still e quite a while before they can take out the main ones.
|
117
|
+
</dd></dl><br style="clear: both">
|
118
|
+
<br style="clear: both;">
|
119
|
+
</div>
|
120
|
+
<br>
|
121
|
+
<!--more--><!-- BlogGlue Cache: No --><p></p>
|
122
|
+
</div>}
|
123
|
+
@short_result = @parser.summarize(@html, 5)
|
124
|
+
@long_result = @parser.summarize(@html, 100000)
|
125
|
+
end
|
126
|
+
it "should summarize the content by number of words" do
|
127
|
+
@short_result.should_not include('than')
|
128
|
+
@short_result.should include('more')
|
129
|
+
end
|
130
|
+
it "should remove html comments" do
|
131
|
+
@short_result.should_not include('<!--more--><!-- BlogGlue Cache: No -->')
|
132
|
+
end
|
133
|
+
it "should keep the image tag" do
|
134
|
+
@long_result.should include(%Q{<img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="#{@image_width_limit}" title="" alt="">})
|
135
|
+
end
|
136
|
+
it "should remove the style tag" do
|
137
|
+
@long_result.should_not include('<style type="text/css">')
|
138
|
+
end
|
139
|
+
it "should remove empty tags" do
|
140
|
+
@long_result.should_not include('<p></p>')
|
141
|
+
end
|
142
|
+
it "should remove tags with display:none" do
|
143
|
+
@long_result.should_not include('display:none')
|
144
|
+
end
|
145
|
+
it "should set the width to 240 if width is greater than 240" do
|
146
|
+
parser = Sumitup::Parser.new(:image_width_limit => 240)
|
147
|
+
result = parser.summarize(@html, 10000)
|
148
|
+
result.should include('<img src="http://www.example.com/test.jpg" width="240">')
|
149
|
+
end
|
150
|
+
it "should only allow 2 images" do
|
151
|
+
parser = Sumitup::Parser.new(:max_images => 2)
|
152
|
+
result = parser.summarize(@html, 10000)
|
153
|
+
doc = Nokogiri::HTML(result)
|
154
|
+
doc.css('img').length.should == 2
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
describe "snippet" do
|
159
|
+
it "should build a string 157 chars long" do
|
160
|
+
text = "Kimball was born to Solomon Farnham Kimball and Anna Spaulding in Sheldon, Franklin County, Vermont. Kimball's forefathers arrived in America from England and started"
|
161
|
+
@parser.snippet(text, 5, '...').should == "Kimball was born to Solomon ..."
|
162
|
+
end
|
163
|
+
it "should not crash if string is nil" do
|
164
|
+
text = nil
|
165
|
+
@parser.snippet(text, 5, '...').length.should == 0
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
describe "is_blank?" do
|
170
|
+
it "should be true if text is nil" do
|
171
|
+
@parser.is_blank?(nil).should be_true
|
172
|
+
end
|
173
|
+
it "should be true if text is ''" do
|
174
|
+
@parser.is_blank?('').should be_true
|
175
|
+
end
|
176
|
+
it "should be false if text is 'valid'" do
|
177
|
+
@parser.is_blank?('valid').should be_false
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
metadata
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sumitup
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Justin Ball
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-03-02 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
type: :runtime
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
version_requirements: *id001
|
32
|
+
name: sanitize
|
33
|
+
prerelease: false
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
type: :development
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
hash: 3
|
42
|
+
segments:
|
43
|
+
- 0
|
44
|
+
version: "0"
|
45
|
+
version_requirements: *id002
|
46
|
+
name: growl
|
47
|
+
prerelease: false
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
type: :development
|
50
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
hash: 47
|
56
|
+
segments:
|
57
|
+
- 2
|
58
|
+
- 8
|
59
|
+
- 0
|
60
|
+
version: 2.8.0
|
61
|
+
version_requirements: *id003
|
62
|
+
name: rspec
|
63
|
+
prerelease: false
|
64
|
+
- !ruby/object:Gem::Dependency
|
65
|
+
type: :development
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ~>
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 31
|
72
|
+
segments:
|
73
|
+
- 3
|
74
|
+
- 12
|
75
|
+
version: "3.12"
|
76
|
+
version_requirements: *id004
|
77
|
+
name: rdoc
|
78
|
+
prerelease: false
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
type: :development
|
81
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ~>
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
hash: 23
|
87
|
+
segments:
|
88
|
+
- 1
|
89
|
+
- 0
|
90
|
+
- 0
|
91
|
+
version: 1.0.0
|
92
|
+
version_requirements: *id005
|
93
|
+
name: bundler
|
94
|
+
prerelease: false
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
type: :development
|
97
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
98
|
+
none: false
|
99
|
+
requirements:
|
100
|
+
- - ~>
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
hash: 49
|
103
|
+
segments:
|
104
|
+
- 1
|
105
|
+
- 8
|
106
|
+
- 3
|
107
|
+
version: 1.8.3
|
108
|
+
version_requirements: *id006
|
109
|
+
name: jeweler
|
110
|
+
prerelease: false
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
type: :development
|
113
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
hash: 3
|
119
|
+
segments:
|
120
|
+
- 0
|
121
|
+
version: "0"
|
122
|
+
version_requirements: *id007
|
123
|
+
name: rcov
|
124
|
+
prerelease: false
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
type: :development
|
127
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
128
|
+
none: false
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
hash: 23
|
133
|
+
segments:
|
134
|
+
- 1
|
135
|
+
- 0
|
136
|
+
- 0
|
137
|
+
version: 1.0.0
|
138
|
+
version_requirements: *id008
|
139
|
+
name: guard
|
140
|
+
prerelease: false
|
141
|
+
- !ruby/object:Gem::Dependency
|
142
|
+
type: :development
|
143
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
144
|
+
none: false
|
145
|
+
requirements:
|
146
|
+
- - ">="
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
hash: 7
|
149
|
+
segments:
|
150
|
+
- 0
|
151
|
+
- 6
|
152
|
+
- 0
|
153
|
+
version: 0.6.0
|
154
|
+
version_requirements: *id009
|
155
|
+
name: guard-rspec
|
156
|
+
prerelease: false
|
157
|
+
- !ruby/object:Gem::Dependency
|
158
|
+
type: :development
|
159
|
+
requirement: &id010 !ruby/object:Gem::Requirement
|
160
|
+
none: false
|
161
|
+
requirements:
|
162
|
+
- - ">="
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
hash: 29
|
165
|
+
segments:
|
166
|
+
- 0
|
167
|
+
- 1
|
168
|
+
- 3
|
169
|
+
version: 0.1.3
|
170
|
+
version_requirements: *id010
|
171
|
+
name: guard-bundler
|
172
|
+
prerelease: false
|
173
|
+
- !ruby/object:Gem::Dependency
|
174
|
+
type: :development
|
175
|
+
requirement: &id011 !ruby/object:Gem::Requirement
|
176
|
+
none: false
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
hash: 3
|
181
|
+
segments:
|
182
|
+
- 0
|
183
|
+
version: "0"
|
184
|
+
version_requirements: *id011
|
185
|
+
name: ruby-debug
|
186
|
+
prerelease: false
|
187
|
+
description: Given an html document or fragment this gem will build a summary of the content.
|
188
|
+
email: justinball@gmail.com
|
189
|
+
executables: []
|
190
|
+
|
191
|
+
extensions: []
|
192
|
+
|
193
|
+
extra_rdoc_files:
|
194
|
+
- LICENSE.txt
|
195
|
+
- README.rdoc
|
196
|
+
files:
|
197
|
+
- .document
|
198
|
+
- .rspec
|
199
|
+
- Gemfile
|
200
|
+
- Gemfile.lock
|
201
|
+
- Guardfile
|
202
|
+
- LICENSE.txt
|
203
|
+
- README.rdoc
|
204
|
+
- Rakefile
|
205
|
+
- VERSION
|
206
|
+
- lib/sumitup.rb
|
207
|
+
- lib/sumitup/parser.rb
|
208
|
+
- spec/spec_helper.rb
|
209
|
+
- spec/sumitup/parser_spec.rb
|
210
|
+
homepage: http://github.com/tatemae/sumitup
|
211
|
+
licenses:
|
212
|
+
- MIT
|
213
|
+
post_install_message:
|
214
|
+
rdoc_options: []
|
215
|
+
|
216
|
+
require_paths:
|
217
|
+
- lib
|
218
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
219
|
+
none: false
|
220
|
+
requirements:
|
221
|
+
- - ">="
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
hash: 3
|
224
|
+
segments:
|
225
|
+
- 0
|
226
|
+
version: "0"
|
227
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
228
|
+
none: false
|
229
|
+
requirements:
|
230
|
+
- - ">="
|
231
|
+
- !ruby/object:Gem::Version
|
232
|
+
hash: 3
|
233
|
+
segments:
|
234
|
+
- 0
|
235
|
+
version: "0"
|
236
|
+
requirements: []
|
237
|
+
|
238
|
+
rubyforge_project:
|
239
|
+
rubygems_version: 1.8.12
|
240
|
+
signing_key:
|
241
|
+
specification_version: 3
|
242
|
+
summary: Generate a summary of html content
|
243
|
+
test_files: []
|
244
|
+
|