sumitup 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/VERSION +1 -1
- data/lib/sumitup.rb +2 -0
- data/lib/sumitup/parser.rb +36 -13
- data/spec/sumitup/parser_spec.rb +58 -3
- data/sumitup.gemspec +5 -2
- metadata +37 -23
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -3,6 +3,7 @@ GEM
|
|
3
3
|
specs:
|
4
4
|
columnize (0.3.5)
|
5
5
|
diff-lcs (1.1.3)
|
6
|
+
dimensions (1.0.0)
|
6
7
|
ffi (1.0.11)
|
7
8
|
git (1.2.5)
|
8
9
|
growl (1.0.3)
|
@@ -50,6 +51,7 @@ PLATFORMS
|
|
50
51
|
|
51
52
|
DEPENDENCIES
|
52
53
|
bundler (~> 1.0.0)
|
54
|
+
dimensions
|
53
55
|
growl
|
54
56
|
guard (>= 1.0.0)
|
55
57
|
guard-bundler (>= 0.1.3)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.4
|
data/lib/sumitup.rb
CHANGED
data/lib/sumitup/parser.rb
CHANGED
@@ -48,6 +48,9 @@ module Sumitup
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def summarize_fragment(node, max = nil)
|
51
|
+
# Always reset counts
|
52
|
+
self.word_count = 0
|
53
|
+
self.image_count = 0
|
51
54
|
clean = Sanitize.clean_node!(node,
|
52
55
|
:elements => elements,
|
53
56
|
:attributes => attributes,
|
@@ -95,6 +98,7 @@ module Sumitup
|
|
95
98
|
def snippet(text, max)
|
96
99
|
result = ''
|
97
100
|
count = 0
|
101
|
+
# TODO figure out support for pre that contains code blocks..
|
98
102
|
return [result, count] if is_blank?(text)
|
99
103
|
text.split.each do |word|
|
100
104
|
return [result.strip!, count] if count >= max
|
@@ -107,7 +111,24 @@ module Sumitup
|
|
107
111
|
def is_blank?(text)
|
108
112
|
text.nil? || text.empty?
|
109
113
|
end
|
110
|
-
|
114
|
+
|
115
|
+
def request_image_size(image_url)
|
116
|
+
width = nil
|
117
|
+
height = nil
|
118
|
+
open(image_url, 'rb') do |f|
|
119
|
+
img = Dimensions(f)
|
120
|
+
img.read
|
121
|
+
width = img.width
|
122
|
+
height = img.height
|
123
|
+
end
|
124
|
+
[width, height]
|
125
|
+
end
|
126
|
+
|
127
|
+
def image_height(existing_height, existing_width, image_width_limit)
|
128
|
+
ratio = image_width_limit.to_f/existing_width.to_f
|
129
|
+
(existing_height.to_f * ratio).to_i
|
130
|
+
end
|
131
|
+
|
111
132
|
def word_transformer
|
112
133
|
me = self
|
113
134
|
lambda do |env|
|
@@ -141,21 +162,23 @@ module Sumitup
|
|
141
162
|
node.remove
|
142
163
|
else
|
143
164
|
keep_it = false
|
144
|
-
|
145
|
-
if node.attributes['width']
|
146
|
-
width = node.attributes['width'].value.to_i rescue 0
|
147
|
-
keep_it = true if width > me.min_image_size
|
148
|
-
else
|
149
|
-
width = nil
|
150
|
-
keep_it = true
|
151
|
-
end
|
152
165
|
|
166
|
+
existing_width = node.attributes['width'].value.to_i rescue nil if node.attributes['width']
|
167
|
+
existing_height = node.attributes['height'].value.to_i rescue nil if node.attributes['height']
|
168
|
+
|
169
|
+
if !existing_width || !existing_height
|
170
|
+
image_url = node.attributes['src'] rescue nil
|
171
|
+
existing_width, existing_height = me.request_image_size(image_url) rescue [nil, nil] if image_url
|
172
|
+
end
|
173
|
+
|
174
|
+
existing_width ||= 0
|
175
|
+
|
176
|
+
keep_it = true if existing_width > me.min_image_size
|
177
|
+
|
153
178
|
if keep_it
|
154
179
|
me.image_count += 1
|
155
|
-
|
156
|
-
|
157
|
-
node.attributes['height'].remove if node.attributes['height']
|
158
|
-
end
|
180
|
+
node['height'] = me.image_height(existing_height, existing_width, me.image_width_limit).to_s
|
181
|
+
node['width'] = me.image_width_limit.to_s
|
159
182
|
else
|
160
183
|
node.remove
|
161
184
|
end
|
data/spec/sumitup/parser_spec.rb
CHANGED
@@ -7,6 +7,15 @@ describe Sumitup::Parser do
|
|
7
7
|
@html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
|
8
8
|
end
|
9
9
|
|
10
|
+
it "should used the default max_words" do
|
11
|
+
content = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec est enim, accumsan sollicitudin convallis sed, tempor vel libero. Quisque nulla tortor,
|
12
|
+
rhoncus sit amet fermentum ut, imperdiet iaculis risus. Nunc vulputate arcu non turpis consequat molestie. Vestibulum ante ipsum primis in faucibus orci luctus
|
13
|
+
et ultrices posuere cubilia Curae; Nam blandit malesuada leo et posuere. Suspendisse potenti. '
|
14
|
+
parser = Sumitup::Parser.new(:max_words => 10)
|
15
|
+
result = parser.summarize(content)
|
16
|
+
result.should == 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec est'
|
17
|
+
end
|
18
|
+
|
10
19
|
it "should summarize the content by number of words" do
|
11
20
|
parser = Sumitup::Parser.new(:max_words => 1000)
|
12
21
|
result = parser.summarize(@html, 5)
|
@@ -29,6 +38,13 @@ describe Sumitup::Parser do
|
|
29
38
|
result.should_not include('<span></span>')
|
30
39
|
end
|
31
40
|
|
41
|
+
it "should be reusable" do
|
42
|
+
parser = Sumitup::Parser.new(:max_words => 5)
|
43
|
+
result = parser.summarize(@html)
|
44
|
+
result2 = parser.summarize(@html)
|
45
|
+
result.should == result2
|
46
|
+
end
|
47
|
+
|
32
48
|
describe "Sanitize options" do
|
33
49
|
it "should remove html comments" do
|
34
50
|
result = Sumitup::Parser.new.summarize(@html, 100000)
|
@@ -57,7 +73,7 @@ describe Sumitup::Parser do
|
|
57
73
|
it "should set the width to 240 if width is greater than 240" do
|
58
74
|
parser = Sumitup::Parser.new(:image_width_limit => 240)
|
59
75
|
result = parser.summarize(@html, 10000)
|
60
|
-
result.should include(%Q{img src="http://www.example.com/big.jpg" width="240">})
|
76
|
+
result.should include(%Q{img src="http://www.example.com/big.jpg" width="240" height="240">})
|
61
77
|
end
|
62
78
|
|
63
79
|
it "should only allow 2 images" do
|
@@ -72,10 +88,10 @@ describe Sumitup::Parser do
|
|
72
88
|
result.should_not include('http://www.example.com/small.jpg')
|
73
89
|
end
|
74
90
|
|
75
|
-
it "should
|
91
|
+
it "should enlarge images that are not over the width limit" do
|
76
92
|
parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => 200)
|
77
93
|
result = parser.summarize(@html, 100000)
|
78
|
-
result.should include('<img src="http://www.example.com/photo.jpg" width="
|
94
|
+
result.should include('<img src="http://www.example.com/photo.jpg" width="200" height="200" title="" alt="">')
|
79
95
|
end
|
80
96
|
end
|
81
97
|
|
@@ -115,4 +131,43 @@ describe Sumitup::Parser do
|
|
115
131
|
end
|
116
132
|
end
|
117
133
|
|
134
|
+
describe "request_image_size" do
|
135
|
+
before do
|
136
|
+
@parser = Sumitup::Parser.new
|
137
|
+
end
|
138
|
+
it "should get width and height from the remote image" do
|
139
|
+
url = "http://upload.wikimedia.org/wikipedia/en/b/bc/Wiki.png"
|
140
|
+
width, height = @parser.request_image_size(url)
|
141
|
+
width.should == 135
|
142
|
+
height.should == 155
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
describe "image_height" do
|
147
|
+
before do
|
148
|
+
@parser = Sumitup::Parser.new
|
149
|
+
end
|
150
|
+
it "should calculate a smaller height based on the width change" do
|
151
|
+
image_width_limit = 100
|
152
|
+
existing_height = 1000
|
153
|
+
existing_width = 1000
|
154
|
+
height = @parser.image_height(existing_height, existing_width, image_width_limit)
|
155
|
+
height.should == 100
|
156
|
+
end
|
157
|
+
it "should calculate a larger height based on the width change" do
|
158
|
+
image_width_limit = 100
|
159
|
+
existing_height = 10
|
160
|
+
existing_width = 50
|
161
|
+
height = @parser.image_height(existing_height, existing_width, image_width_limit)
|
162
|
+
height.should == 20
|
163
|
+
end
|
164
|
+
it "should calculate new height based on width" do
|
165
|
+
image_width_limit = 100
|
166
|
+
existing_height = 143
|
167
|
+
existing_width = 136
|
168
|
+
height = @parser.image_height(existing_height, existing_width, image_width_limit)
|
169
|
+
height.should == 105
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
118
173
|
end
|
data/sumitup.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "sumitup"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Justin Ball"]
|
12
|
-
s.date = "2012-03-
|
12
|
+
s.date = "2012-03-13"
|
13
13
|
s.description = "Given an html document or fragment this gem will build a summary of the content."
|
14
14
|
s.email = "justinball@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -46,6 +46,7 @@ Gem::Specification.new do |s|
|
|
46
46
|
|
47
47
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
48
48
|
s.add_runtime_dependency(%q<sanitize>, [">= 0"])
|
49
|
+
s.add_runtime_dependency(%q<dimensions>, [">= 0"])
|
49
50
|
s.add_development_dependency(%q<growl>, [">= 0"])
|
50
51
|
s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
|
51
52
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
@@ -58,6 +59,7 @@ Gem::Specification.new do |s|
|
|
58
59
|
s.add_development_dependency(%q<ruby-debug>, [">= 0"])
|
59
60
|
else
|
60
61
|
s.add_dependency(%q<sanitize>, [">= 0"])
|
62
|
+
s.add_dependency(%q<dimensions>, [">= 0"])
|
61
63
|
s.add_dependency(%q<growl>, [">= 0"])
|
62
64
|
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
63
65
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
@@ -71,6 +73,7 @@ Gem::Specification.new do |s|
|
|
71
73
|
end
|
72
74
|
else
|
73
75
|
s.add_dependency(%q<sanitize>, [">= 0"])
|
76
|
+
s.add_dependency(%q<dimensions>, [">= 0"])
|
74
77
|
s.add_dependency(%q<growl>, [">= 0"])
|
75
78
|
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
76
79
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sumitup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 4
|
10
|
+
version: 0.1.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Justin Ball
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-03-
|
18
|
+
date: 2012-03-13 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
name: sanitize
|
33
33
|
prerelease: false
|
34
34
|
- !ruby/object:Gem::Dependency
|
35
|
-
type: :
|
35
|
+
type: :runtime
|
36
36
|
requirement: &id002 !ruby/object:Gem::Requirement
|
37
37
|
none: false
|
38
38
|
requirements:
|
@@ -43,11 +43,25 @@ dependencies:
|
|
43
43
|
- 0
|
44
44
|
version: "0"
|
45
45
|
version_requirements: *id002
|
46
|
-
name:
|
46
|
+
name: dimensions
|
47
47
|
prerelease: false
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
type: :development
|
50
50
|
requirement: &id003 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
hash: 3
|
56
|
+
segments:
|
57
|
+
- 0
|
58
|
+
version: "0"
|
59
|
+
version_requirements: *id003
|
60
|
+
name: growl
|
61
|
+
prerelease: false
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
type: :development
|
64
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
51
65
|
none: false
|
52
66
|
requirements:
|
53
67
|
- - ~>
|
@@ -58,12 +72,12 @@ dependencies:
|
|
58
72
|
- 8
|
59
73
|
- 0
|
60
74
|
version: 2.8.0
|
61
|
-
version_requirements: *
|
75
|
+
version_requirements: *id004
|
62
76
|
name: rspec
|
63
77
|
prerelease: false
|
64
78
|
- !ruby/object:Gem::Dependency
|
65
79
|
type: :development
|
66
|
-
requirement: &
|
80
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
67
81
|
none: false
|
68
82
|
requirements:
|
69
83
|
- - ~>
|
@@ -73,12 +87,12 @@ dependencies:
|
|
73
87
|
- 3
|
74
88
|
- 12
|
75
89
|
version: "3.12"
|
76
|
-
version_requirements: *
|
90
|
+
version_requirements: *id005
|
77
91
|
name: rdoc
|
78
92
|
prerelease: false
|
79
93
|
- !ruby/object:Gem::Dependency
|
80
94
|
type: :development
|
81
|
-
requirement: &
|
95
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
82
96
|
none: false
|
83
97
|
requirements:
|
84
98
|
- - ~>
|
@@ -89,12 +103,12 @@ dependencies:
|
|
89
103
|
- 0
|
90
104
|
- 0
|
91
105
|
version: 1.0.0
|
92
|
-
version_requirements: *
|
106
|
+
version_requirements: *id006
|
93
107
|
name: bundler
|
94
108
|
prerelease: false
|
95
109
|
- !ruby/object:Gem::Dependency
|
96
110
|
type: :development
|
97
|
-
requirement: &
|
111
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
98
112
|
none: false
|
99
113
|
requirements:
|
100
114
|
- - ~>
|
@@ -105,12 +119,12 @@ dependencies:
|
|
105
119
|
- 8
|
106
120
|
- 3
|
107
121
|
version: 1.8.3
|
108
|
-
version_requirements: *
|
122
|
+
version_requirements: *id007
|
109
123
|
name: jeweler
|
110
124
|
prerelease: false
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
type: :development
|
113
|
-
requirement: &
|
127
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
114
128
|
none: false
|
115
129
|
requirements:
|
116
130
|
- - ">="
|
@@ -119,12 +133,12 @@ dependencies:
|
|
119
133
|
segments:
|
120
134
|
- 0
|
121
135
|
version: "0"
|
122
|
-
version_requirements: *
|
136
|
+
version_requirements: *id008
|
123
137
|
name: rcov
|
124
138
|
prerelease: false
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
type: :development
|
127
|
-
requirement: &
|
141
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
128
142
|
none: false
|
129
143
|
requirements:
|
130
144
|
- - ">="
|
@@ -135,12 +149,12 @@ dependencies:
|
|
135
149
|
- 0
|
136
150
|
- 0
|
137
151
|
version: 1.0.0
|
138
|
-
version_requirements: *
|
152
|
+
version_requirements: *id009
|
139
153
|
name: guard
|
140
154
|
prerelease: false
|
141
155
|
- !ruby/object:Gem::Dependency
|
142
156
|
type: :development
|
143
|
-
requirement: &
|
157
|
+
requirement: &id010 !ruby/object:Gem::Requirement
|
144
158
|
none: false
|
145
159
|
requirements:
|
146
160
|
- - ">="
|
@@ -151,12 +165,12 @@ dependencies:
|
|
151
165
|
- 6
|
152
166
|
- 0
|
153
167
|
version: 0.6.0
|
154
|
-
version_requirements: *
|
168
|
+
version_requirements: *id010
|
155
169
|
name: guard-rspec
|
156
170
|
prerelease: false
|
157
171
|
- !ruby/object:Gem::Dependency
|
158
172
|
type: :development
|
159
|
-
requirement: &
|
173
|
+
requirement: &id011 !ruby/object:Gem::Requirement
|
160
174
|
none: false
|
161
175
|
requirements:
|
162
176
|
- - ">="
|
@@ -167,12 +181,12 @@ dependencies:
|
|
167
181
|
- 1
|
168
182
|
- 3
|
169
183
|
version: 0.1.3
|
170
|
-
version_requirements: *
|
184
|
+
version_requirements: *id011
|
171
185
|
name: guard-bundler
|
172
186
|
prerelease: false
|
173
187
|
- !ruby/object:Gem::Dependency
|
174
188
|
type: :development
|
175
|
-
requirement: &
|
189
|
+
requirement: &id012 !ruby/object:Gem::Requirement
|
176
190
|
none: false
|
177
191
|
requirements:
|
178
192
|
- - ">="
|
@@ -181,7 +195,7 @@ dependencies:
|
|
181
195
|
segments:
|
182
196
|
- 0
|
183
197
|
version: "0"
|
184
|
-
version_requirements: *
|
198
|
+
version_requirements: *id012
|
185
199
|
name: ruby-debug
|
186
200
|
prerelease: false
|
187
201
|
description: Given an html document or fragment this gem will build a summary of the content.
|