sumitup 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +62 -0
- data/Guardfile +12 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +7 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/lib/sumitup.rb +11 -0
- data/lib/sumitup/parser.rb +132 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/sumitup/parser_spec.rb +181 -0
- metadata +244 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem 'sanitize'
|
4
|
+
|
5
|
+
# Add dependencies to develop your gem here.
|
6
|
+
# Include everything needed to run rake, tests, features, etc.
|
7
|
+
group :development, :test do
|
8
|
+
gem 'growl'
|
9
|
+
gem "rspec", "~> 2.8.0"
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "bundler", "~> 1.0.0"
|
12
|
+
gem "jeweler", "~> 1.8.3"
|
13
|
+
gem "rcov", ">= 0"
|
14
|
+
gem "guard", ">=1.0.0"
|
15
|
+
gem "guard-rspec", ">= 0.6.0"
|
16
|
+
gem "guard-bundler", ">= 0.1.3"
|
17
|
+
gem "ruby-debug"
|
18
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
columnize (0.3.5)
|
5
|
+
diff-lcs (1.1.3)
|
6
|
+
ffi (1.0.11)
|
7
|
+
git (1.2.5)
|
8
|
+
growl (1.0.3)
|
9
|
+
guard (1.0.0)
|
10
|
+
ffi (>= 0.5.0)
|
11
|
+
thor (~> 0.14.6)
|
12
|
+
guard-bundler (0.1.3)
|
13
|
+
bundler (>= 1.0.0)
|
14
|
+
guard (>= 0.2.2)
|
15
|
+
guard-rspec (0.6.0)
|
16
|
+
guard (>= 0.10.0)
|
17
|
+
jeweler (1.8.3)
|
18
|
+
bundler (~> 1.0)
|
19
|
+
git (>= 1.2.5)
|
20
|
+
rake
|
21
|
+
rdoc
|
22
|
+
json (1.6.5)
|
23
|
+
linecache (0.46)
|
24
|
+
rbx-require-relative (> 0.0.4)
|
25
|
+
nokogiri (1.5.0)
|
26
|
+
rake (0.9.2.2)
|
27
|
+
rbx-require-relative (0.0.5)
|
28
|
+
rcov (1.0.0)
|
29
|
+
rdoc (3.12)
|
30
|
+
json (~> 1.4)
|
31
|
+
rspec (2.8.0)
|
32
|
+
rspec-core (~> 2.8.0)
|
33
|
+
rspec-expectations (~> 2.8.0)
|
34
|
+
rspec-mocks (~> 2.8.0)
|
35
|
+
rspec-core (2.8.0)
|
36
|
+
rspec-expectations (2.8.0)
|
37
|
+
diff-lcs (~> 1.1.2)
|
38
|
+
rspec-mocks (2.8.0)
|
39
|
+
ruby-debug (0.10.4)
|
40
|
+
columnize (>= 0.1)
|
41
|
+
ruby-debug-base (~> 0.10.4.0)
|
42
|
+
ruby-debug-base (0.10.4)
|
43
|
+
linecache (>= 0.3)
|
44
|
+
sanitize (2.0.3)
|
45
|
+
nokogiri (>= 1.4.4, < 1.6)
|
46
|
+
thor (0.14.6)
|
47
|
+
|
48
|
+
PLATFORMS
|
49
|
+
ruby
|
50
|
+
|
51
|
+
DEPENDENCIES
|
52
|
+
bundler (~> 1.0.0)
|
53
|
+
growl
|
54
|
+
guard (>= 1.0.0)
|
55
|
+
guard-bundler (>= 0.1.3)
|
56
|
+
guard-rspec (>= 0.6.0)
|
57
|
+
jeweler (~> 1.8.3)
|
58
|
+
rcov
|
59
|
+
rdoc (~> 3.12)
|
60
|
+
rspec (~> 2.8.0)
|
61
|
+
ruby-debug
|
62
|
+
sanitize
|
data/Guardfile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
guard 'bundler' do
|
2
|
+
watch('Gemfile')
|
3
|
+
watch(/^.+\.gemspec/)
|
4
|
+
end
|
5
|
+
|
6
|
+
guard 'rspec', :version => 2, :all_after_pass => false do
|
7
|
+
watch(%r{^spec/.+_spec\.rb$})
|
8
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
9
|
+
watch('spec/spec_helper.rb') { "spec" }
|
10
|
+
watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
11
|
+
watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
|
12
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Tatemae.com
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "sumitup"
|
18
|
+
gem.homepage = "http://github.com/tatemae/sumitup"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Generate a summary of html content}
|
21
|
+
gem.description = %Q{Given an html document or fragment this gem will build a summary of the content.}
|
22
|
+
gem.email = "justinball@gmail.com"
|
23
|
+
gem.authors = ["Justin Ball"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
40
|
+
|
41
|
+
require 'rdoc/task'
|
42
|
+
Rake::RDocTask.new do |rdoc|
|
43
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
44
|
+
|
45
|
+
rdoc.rdoc_dir = 'rdoc'
|
46
|
+
rdoc.title = "sumitup #{version}"
|
47
|
+
rdoc.rdoc_files.include('README*')
|
48
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
49
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/sumitup.rb
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
module Sumitup
|
2
|
+
class Parser
|
3
|
+
|
4
|
+
IMAGE_WIDTH_LIMIT = 230
|
5
|
+
|
6
|
+
attr_accessor :word_count, :max_words
|
7
|
+
attr_accessor :image_count, :image_width_limit, :max_images
|
8
|
+
attr_accessor :elements, :attributes, :protocols, :remove_contents
|
9
|
+
attr_accessor :omission
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
|
13
|
+
self.omission = options[:omission] || ''
|
14
|
+
|
15
|
+
self.word_count = options[:word_count] || 0
|
16
|
+
self.max_words = options[:max_words] || 100
|
17
|
+
|
18
|
+
self.image_count = options[:image_count] || 0
|
19
|
+
self.image_width_limit = options[:image_width_limit] || 230
|
20
|
+
self.max_images = options[:max_images] || 2
|
21
|
+
|
22
|
+
self.elements = options[:elements] || %w(
|
23
|
+
a abbr b blockquote br cite code dd dfn dl dt em i kbd li mark ol p pre
|
24
|
+
q s samp small strike strong sub sup time u ul var img span
|
25
|
+
)
|
26
|
+
|
27
|
+
self.attributes = options[:attributes] || {
|
28
|
+
'a' => ['href', 'title'],
|
29
|
+
'blockquote' => ['cite'],
|
30
|
+
'img' => ['alt', 'src', 'title', 'width', 'height']
|
31
|
+
}
|
32
|
+
|
33
|
+
self.protocols = options[:protocols] || {
|
34
|
+
'a' => {'href' => ['http', 'https', 'mailto']}
|
35
|
+
}
|
36
|
+
|
37
|
+
self.remove_contents = options[:remove_contents] || %w(
|
38
|
+
style script
|
39
|
+
)
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
# Removes html and generate a summary
|
44
|
+
def summarize(html, max = nil)
|
45
|
+
return '' if is_blank?(html)
|
46
|
+
|
47
|
+
self.max_words = max unless max.nil?
|
48
|
+
|
49
|
+
Sanitize.clean(html,
|
50
|
+
:elements => elements,
|
51
|
+
:attributes => attributes,
|
52
|
+
:protocols => protocols,
|
53
|
+
:remove_contents => remove_contents,
|
54
|
+
:transformers => [no_display_transformer, empty_transformer],
|
55
|
+
:transformers_breadth => [summarizer, image_transformer])
|
56
|
+
end
|
57
|
+
|
58
|
+
def summarizer
|
59
|
+
me = self
|
60
|
+
lambda do |env|
|
61
|
+
|
62
|
+
node = env[:node]
|
63
|
+
|
64
|
+
return if !node.element?
|
65
|
+
|
66
|
+
if node.text? || (node.children && node.children.first && node.children.first.text?)
|
67
|
+
if me.word_count > me.max_words
|
68
|
+
# if we are already over then just remove the item
|
69
|
+
node.remove
|
70
|
+
else
|
71
|
+
# if the text of the current node makes us go over then truncate it
|
72
|
+
node.text.scan(/\b\S+\b/) { me.word_count += 1 }
|
73
|
+
if me.word_count > me.max_words
|
74
|
+
node.content = snippet(node.text, me.max_words, '...')
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def image_transformer
|
83
|
+
me = self
|
84
|
+
lambda do |env|
|
85
|
+
node = env[:node]
|
86
|
+
if ['img'].include?(env[:node_name])
|
87
|
+
me.image_count += 1
|
88
|
+
if me.image_count > me.max_images
|
89
|
+
node.remove
|
90
|
+
else
|
91
|
+
# Force width of images
|
92
|
+
node.attributes['width'].value = me.image_width_limit.to_s
|
93
|
+
node.attributes['height'].remove
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def empty_transformer
|
100
|
+
lambda do |env|
|
101
|
+
node = env[:node]
|
102
|
+
if node.text.empty? && node.children.empty? && !['img', 'br'].include?(env[:node_name])
|
103
|
+
node.remove
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def no_display_transformer
|
109
|
+
lambda do |env|
|
110
|
+
node = env[:node]
|
111
|
+
if node['style'] && node['style'] =~ /display\s*:\s*none/
|
112
|
+
node.remove
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Truncates text at a word boundry
|
118
|
+
# Parameters:
|
119
|
+
# text - The text to truncate
|
120
|
+
# wordcount - The number of words
|
121
|
+
# omission - Text to add when the text is truncated ie 'read more' or '...
|
122
|
+
def snippet(text, wordcount, omission)
|
123
|
+
return '' if is_blank?(text)
|
124
|
+
text.split[0..(wordcount-1)].join(" ") + (text.split.size > wordcount ? " " + omission : "")
|
125
|
+
end
|
126
|
+
|
127
|
+
def is_blank?(text)
|
128
|
+
text.nil? || text.empty?
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'sumitup'
|
5
|
+
require 'ruby-debug'
|
6
|
+
# Requires supporting ruby files with custom matchers and macros, etc,
|
7
|
+
# in spec/support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.mock_with :rspec
|
12
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
13
|
+
config.filter_run :focus => true # Add a :focus tag to a given spec only that spec will be run instead of all of them
|
14
|
+
config.run_all_when_everything_filtered = true
|
15
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Sumitup::Parser do
|
4
|
+
before do
|
5
|
+
@image_width_limit = 200
|
6
|
+
@parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => @image_width_limit)
|
7
|
+
end
|
8
|
+
describe "summarize" do
|
9
|
+
before do
|
10
|
+
@html = %Q{
|
11
|
+
<div class="entry clear"><!--more--><!-- BlogGlue Cache: No -->
|
12
|
+
<p style="display:none;">Can't see this!</p>
|
13
|
+
<p></p>
|
14
|
+
<p>It's now a bit more than two weeks since I had an unfortunate incident with a serpent. While the leg is actually healing quite nicely I the joy of
|
15
|
+
bending my knee has become a distant memory and a luxury I look forward to each day. The antibiotics I am forced to continue leave my body in a semi d
|
16
|
+
ebilitated state. Each visit to the restroom is a vile reminder of my body's current inability to properly digest food. At least I'm not allergic to the drug this time.
|
17
|
+
The last regiment of antibiotics set my skin on fire and made me appreciate the leper's state.</p>
|
18
|
+
<p>My leg is healing and I think that the only permanent damage will be a pretty nasty scar. I can live with that. One of the truly odd
|
19
|
+
uirks of cyclists besides constant attempts to trim down to super model anorexic status and the tight shorts is the customary shaving of legs.
|
20
|
+
While some might contend the traditionally feminine activity helps reduce aerodynamic drag I have read that the true purpose is to aid in repairs and
|
21
|
+
healing in the event of an accident. This is a true fact. I don't shave my legs (my wife would never let me live that down). The surgeon told
|
22
|
+
me that he spent most of his time picking hair out of the wound. I'll let you judge. </p>
|
23
|
+
<p>Be warned these pictures are gross, disturbing and bloody. I think one of the nurses even got a bit squeamish. As bad as the pictures are my
|
24
|
+
youngest daughter had to sit in the room with us the entire time. She said, "Daddy's owie was really gross. I like it when they cover it with
|
25
|
+
something so you can't see it." She's 4 so suck it up.</p>
|
26
|
+
<img src="http://www.example.com/test.jpg" width="600" height="600" />
|
27
|
+
<p>
|
28
|
+
<style type="text/css">
|
29
|
+
.gallery {
|
30
|
+
margin: auto;
|
31
|
+
}
|
32
|
+
.gallery-item {
|
33
|
+
float: left;
|
34
|
+
margin-top: 10px;
|
35
|
+
text-align: center;
|
36
|
+
width: 33%; }
|
37
|
+
.gallery img {
|
38
|
+
border: 2px solid #cfcfcf;
|
39
|
+
}
|
40
|
+
.gallery-caption {
|
41
|
+
margin-left: 0;
|
42
|
+
}
|
43
|
+
</style>
|
44
|
+
<!-- see gallery_shortcode() in wp-includes/media.php -->
|
45
|
+
</p><div class="gallery"><dl class="gallery-item">
|
46
|
+
<dt class="gallery-icon">
|
47
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-18/" title="2008-08-22-09-57-18"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="150" height="150" title="" alt=""></a>
|
48
|
+
</dt>
|
49
|
+
<dd class="gallery-caption">
|
50
|
+
Flesh always loses against asphalt
|
51
|
+
</dd></dl><dl class="gallery-item">
|
52
|
+
<dt class="gallery-icon">
|
53
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-19/" title="2008-08-22-09-57-19"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-19-150x150-1-img739.jpg" width="150" height="150" title="" alt=""></a>
|
54
|
+
</dt>
|
55
|
+
<dd class="gallery-caption">
|
56
|
+
My leg is straight so it is harder to see, but if I bend it you can see the tendons
|
57
|
+
</dd></dl><dl class="gallery-item">
|
58
|
+
<dt class="gallery-icon">
|
59
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-42/" title="2008-08-22-09-57-42"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-42-150x150-1-img741.jpg" width="150" height="150" title="" alt=""></a>
|
60
|
+
</dt>
|
61
|
+
<dd class="gallery-caption">
|
62
|
+
Drugs make you happy
|
63
|
+
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
64
|
+
<dt class="gallery-icon">
|
65
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-09-57-52/" title="2008-08-22-09-57-52"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-52-150x150-1-img742.jpg" width="150" height="150" title="" alt=""></a>
|
66
|
+
</dt>
|
67
|
+
<dd class="gallery-caption">
|
68
|
+
Joel stuck around to offer moral support
|
69
|
+
</dd></dl><dl class="gallery-item">
|
70
|
+
<dt class="gallery-icon">
|
71
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-06-34/" title="2008-08-22-11-06-34"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-06-34-150x150-1-img743.jpg" width="150" height="150" title="" alt=""></a>
|
72
|
+
</dt>
|
73
|
+
<dd class="gallery-caption">
|
74
|
+
After they cleaned it up
|
75
|
+
</dd></dl><dl class="gallery-item">
|
76
|
+
<dt class="gallery-icon">
|
77
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-21/" title="2008-08-22-11-07-21"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-21-150x150-1-img745.jpg" width="150" height="150" title="" alt=""></a>
|
78
|
+
</dt>
|
79
|
+
<dd class="gallery-caption">
|
80
|
+
This isn't as much fun as it looks
|
81
|
+
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
82
|
+
<dt class="gallery-icon">
|
83
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-22-11-07-53/" title="2008-08-22-11-07-53"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-11-07-53-150x150-1-img746.jpg" width="150" height="150" title="" alt=""></a>
|
84
|
+
</dt>
|
85
|
+
<dd class="gallery-caption">
|
86
|
+
Irrigating the wound - like its a crop or something
|
87
|
+
</dd></dl><dl class="gallery-item">
|
88
|
+
<dt class="gallery-icon">
|
89
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-17/" title="2008-08-27-09-47-17"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-17-150x150-1-img747.jpg" width="150" height="150" title="" alt=""></a>
|
90
|
+
</dt>
|
91
|
+
<dd class="gallery-caption">
|
92
|
+
After they took the bandage off the first time - 5 days later
|
93
|
+
</dd></dl><dl class="gallery-item">
|
94
|
+
<dt class="gallery-icon">
|
95
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-27-09-47-22/" title="2008-08-27-09-47-22"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-27-09-47-22-150x150-1-img748.jpg" width="150" height="150" title="" alt=""></a>
|
96
|
+
</dt>
|
97
|
+
<dd class="gallery-caption">
|
98
|
+
After they took the bandage off the first time - 5 days later
|
99
|
+
</dd></dl><br style="clear: both"><dl class="gallery-item">
|
100
|
+
<dt class="gallery-icon">
|
101
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/2008-08-29-10-43-49/" title="2008-08-29-10-43-49"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-29-10-43-49-150x150-1-img749.jpg" width="150" height="150" title="" alt=""></a>
|
102
|
+
</dt>
|
103
|
+
<dd class="gallery-caption">
|
104
|
+
After 7 days. Still not pretty, but it is amazing how the human body heals
|
105
|
+
</dd></dl><dl class="gallery-item">
|
106
|
+
<dt class="gallery-icon">
|
107
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo/" title="wound"><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo-150x150-1-img750.jpg" width="150" height="150" title="" alt=""></a>
|
108
|
+
</dt>
|
109
|
+
<dd class="gallery-caption">
|
110
|
+
This is from my iPhone. It was taken 5 days after the accident at the doctor's office. I have a few more shots below.
|
111
|
+
</dd></dl><dl class="gallery-item">
|
112
|
+
<dt class="gallery-icon">
|
113
|
+
<a href="http://www.justinball.com/2008/09/08/why-cyclists-shave-their-legs-the-most-disgusting-post-i-will-ever-make/photo1/" title="The wound after "><img src="http://www.justinball.com/wp-content/uploads/photojar/cache/photo1-150x150-1-img753.jpg" width="150" height="150" title="" alt=""></a>
|
114
|
+
</dt>
|
115
|
+
<dd class="gallery-caption">
|
116
|
+
Here's what it looks like today 9/8. They took out one stitch, but it will still e quite a while before they can take out the main ones.
|
117
|
+
</dd></dl><br style="clear: both">
|
118
|
+
<br style="clear: both;">
|
119
|
+
</div>
|
120
|
+
<br>
|
121
|
+
<!--more--><!-- BlogGlue Cache: No --><p></p>
|
122
|
+
</div>}
|
123
|
+
@short_result = @parser.summarize(@html, 5)
|
124
|
+
@long_result = @parser.summarize(@html, 100000)
|
125
|
+
end
|
126
|
+
it "should summarize the content by number of words" do
|
127
|
+
@short_result.should_not include('than')
|
128
|
+
@short_result.should include('more')
|
129
|
+
end
|
130
|
+
it "should remove html comments" do
|
131
|
+
@short_result.should_not include('<!--more--><!-- BlogGlue Cache: No -->')
|
132
|
+
end
|
133
|
+
it "should keep the image tag" do
|
134
|
+
@long_result.should include(%Q{<img src="http://www.justinball.com/wp-content/uploads/photojar/cache/2008-08-22-09-57-18-150x150-1-img738.jpg" width="#{@image_width_limit}" title="" alt="">})
|
135
|
+
end
|
136
|
+
it "should remove the style tag" do
|
137
|
+
@long_result.should_not include('<style type="text/css">')
|
138
|
+
end
|
139
|
+
it "should remove empty tags" do
|
140
|
+
@long_result.should_not include('<p></p>')
|
141
|
+
end
|
142
|
+
it "should remove tags with display:none" do
|
143
|
+
@long_result.should_not include('display:none')
|
144
|
+
end
|
145
|
+
it "should set the width to 240 if width is greater than 240" do
|
146
|
+
parser = Sumitup::Parser.new(:image_width_limit => 240)
|
147
|
+
result = parser.summarize(@html, 10000)
|
148
|
+
result.should include('<img src="http://www.example.com/test.jpg" width="240">')
|
149
|
+
end
|
150
|
+
it "should only allow 2 images" do
|
151
|
+
parser = Sumitup::Parser.new(:max_images => 2)
|
152
|
+
result = parser.summarize(@html, 10000)
|
153
|
+
doc = Nokogiri::HTML(result)
|
154
|
+
doc.css('img').length.should == 2
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
describe "snippet" do
|
159
|
+
it "should build a string 157 chars long" do
|
160
|
+
text = "Kimball was born to Solomon Farnham Kimball and Anna Spaulding in Sheldon, Franklin County, Vermont. Kimball's forefathers arrived in America from England and started"
|
161
|
+
@parser.snippet(text, 5, '...').should == "Kimball was born to Solomon ..."
|
162
|
+
end
|
163
|
+
it "should not crash if string is nil" do
|
164
|
+
text = nil
|
165
|
+
@parser.snippet(text, 5, '...').length.should == 0
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
describe "is_blank?" do
|
170
|
+
it "should be true if text is nil" do
|
171
|
+
@parser.is_blank?(nil).should be_true
|
172
|
+
end
|
173
|
+
it "should be true if text is ''" do
|
174
|
+
@parser.is_blank?('').should be_true
|
175
|
+
end
|
176
|
+
it "should be false if text is 'valid'" do
|
177
|
+
@parser.is_blank?('valid').should be_false
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
metadata
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sumitup
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Justin Ball
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-03-02 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
type: :runtime
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
version_requirements: *id001
|
32
|
+
name: sanitize
|
33
|
+
prerelease: false
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
type: :development
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
hash: 3
|
42
|
+
segments:
|
43
|
+
- 0
|
44
|
+
version: "0"
|
45
|
+
version_requirements: *id002
|
46
|
+
name: growl
|
47
|
+
prerelease: false
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
type: :development
|
50
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
hash: 47
|
56
|
+
segments:
|
57
|
+
- 2
|
58
|
+
- 8
|
59
|
+
- 0
|
60
|
+
version: 2.8.0
|
61
|
+
version_requirements: *id003
|
62
|
+
name: rspec
|
63
|
+
prerelease: false
|
64
|
+
- !ruby/object:Gem::Dependency
|
65
|
+
type: :development
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ~>
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 31
|
72
|
+
segments:
|
73
|
+
- 3
|
74
|
+
- 12
|
75
|
+
version: "3.12"
|
76
|
+
version_requirements: *id004
|
77
|
+
name: rdoc
|
78
|
+
prerelease: false
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
type: :development
|
81
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ~>
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
hash: 23
|
87
|
+
segments:
|
88
|
+
- 1
|
89
|
+
- 0
|
90
|
+
- 0
|
91
|
+
version: 1.0.0
|
92
|
+
version_requirements: *id005
|
93
|
+
name: bundler
|
94
|
+
prerelease: false
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
type: :development
|
97
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
98
|
+
none: false
|
99
|
+
requirements:
|
100
|
+
- - ~>
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
hash: 49
|
103
|
+
segments:
|
104
|
+
- 1
|
105
|
+
- 8
|
106
|
+
- 3
|
107
|
+
version: 1.8.3
|
108
|
+
version_requirements: *id006
|
109
|
+
name: jeweler
|
110
|
+
prerelease: false
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
type: :development
|
113
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
hash: 3
|
119
|
+
segments:
|
120
|
+
- 0
|
121
|
+
version: "0"
|
122
|
+
version_requirements: *id007
|
123
|
+
name: rcov
|
124
|
+
prerelease: false
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
type: :development
|
127
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
128
|
+
none: false
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
hash: 23
|
133
|
+
segments:
|
134
|
+
- 1
|
135
|
+
- 0
|
136
|
+
- 0
|
137
|
+
version: 1.0.0
|
138
|
+
version_requirements: *id008
|
139
|
+
name: guard
|
140
|
+
prerelease: false
|
141
|
+
- !ruby/object:Gem::Dependency
|
142
|
+
type: :development
|
143
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
144
|
+
none: false
|
145
|
+
requirements:
|
146
|
+
- - ">="
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
hash: 7
|
149
|
+
segments:
|
150
|
+
- 0
|
151
|
+
- 6
|
152
|
+
- 0
|
153
|
+
version: 0.6.0
|
154
|
+
version_requirements: *id009
|
155
|
+
name: guard-rspec
|
156
|
+
prerelease: false
|
157
|
+
- !ruby/object:Gem::Dependency
|
158
|
+
type: :development
|
159
|
+
requirement: &id010 !ruby/object:Gem::Requirement
|
160
|
+
none: false
|
161
|
+
requirements:
|
162
|
+
- - ">="
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
hash: 29
|
165
|
+
segments:
|
166
|
+
- 0
|
167
|
+
- 1
|
168
|
+
- 3
|
169
|
+
version: 0.1.3
|
170
|
+
version_requirements: *id010
|
171
|
+
name: guard-bundler
|
172
|
+
prerelease: false
|
173
|
+
- !ruby/object:Gem::Dependency
|
174
|
+
type: :development
|
175
|
+
requirement: &id011 !ruby/object:Gem::Requirement
|
176
|
+
none: false
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
hash: 3
|
181
|
+
segments:
|
182
|
+
- 0
|
183
|
+
version: "0"
|
184
|
+
version_requirements: *id011
|
185
|
+
name: ruby-debug
|
186
|
+
prerelease: false
|
187
|
+
description: Given an html document or fragment this gem will build a summary of the content.
|
188
|
+
email: justinball@gmail.com
|
189
|
+
executables: []
|
190
|
+
|
191
|
+
extensions: []
|
192
|
+
|
193
|
+
extra_rdoc_files:
|
194
|
+
- LICENSE.txt
|
195
|
+
- README.rdoc
|
196
|
+
files:
|
197
|
+
- .document
|
198
|
+
- .rspec
|
199
|
+
- Gemfile
|
200
|
+
- Gemfile.lock
|
201
|
+
- Guardfile
|
202
|
+
- LICENSE.txt
|
203
|
+
- README.rdoc
|
204
|
+
- Rakefile
|
205
|
+
- VERSION
|
206
|
+
- lib/sumitup.rb
|
207
|
+
- lib/sumitup/parser.rb
|
208
|
+
- spec/spec_helper.rb
|
209
|
+
- spec/sumitup/parser_spec.rb
|
210
|
+
homepage: http://github.com/tatemae/sumitup
|
211
|
+
licenses:
|
212
|
+
- MIT
|
213
|
+
post_install_message:
|
214
|
+
rdoc_options: []
|
215
|
+
|
216
|
+
require_paths:
|
217
|
+
- lib
|
218
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
219
|
+
none: false
|
220
|
+
requirements:
|
221
|
+
- - ">="
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
hash: 3
|
224
|
+
segments:
|
225
|
+
- 0
|
226
|
+
version: "0"
|
227
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
228
|
+
none: false
|
229
|
+
requirements:
|
230
|
+
- - ">="
|
231
|
+
- !ruby/object:Gem::Version
|
232
|
+
hash: 3
|
233
|
+
segments:
|
234
|
+
- 0
|
235
|
+
version: "0"
|
236
|
+
requirements: []
|
237
|
+
|
238
|
+
rubyforge_project:
|
239
|
+
rubygems_version: 1.8.12
|
240
|
+
signing_key:
|
241
|
+
specification_version: 3
|
242
|
+
summary: Generate a summary of html content
|
243
|
+
test_files: []
|
244
|
+
|