sumitup 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/VERSION +1 -1
- data/lib/sumitup.rb +2 -0
- data/lib/sumitup/parser.rb +36 -13
- data/spec/sumitup/parser_spec.rb +58 -3
- data/sumitup.gemspec +5 -2
- metadata +37 -23
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -3,6 +3,7 @@ GEM
|
|
3
3
|
specs:
|
4
4
|
columnize (0.3.5)
|
5
5
|
diff-lcs (1.1.3)
|
6
|
+
dimensions (1.0.0)
|
6
7
|
ffi (1.0.11)
|
7
8
|
git (1.2.5)
|
8
9
|
growl (1.0.3)
|
@@ -50,6 +51,7 @@ PLATFORMS
|
|
50
51
|
|
51
52
|
DEPENDENCIES
|
52
53
|
bundler (~> 1.0.0)
|
54
|
+
dimensions
|
53
55
|
growl
|
54
56
|
guard (>= 1.0.0)
|
55
57
|
guard-bundler (>= 0.1.3)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.4
|
data/lib/sumitup.rb
CHANGED
data/lib/sumitup/parser.rb
CHANGED
@@ -48,6 +48,9 @@ module Sumitup
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def summarize_fragment(node, max = nil)
|
51
|
+
# Always reset counts
|
52
|
+
self.word_count = 0
|
53
|
+
self.image_count = 0
|
51
54
|
clean = Sanitize.clean_node!(node,
|
52
55
|
:elements => elements,
|
53
56
|
:attributes => attributes,
|
@@ -95,6 +98,7 @@ module Sumitup
|
|
95
98
|
def snippet(text, max)
|
96
99
|
result = ''
|
97
100
|
count = 0
|
101
|
+
# TODO figure out support for pre that contains code blocks..
|
98
102
|
return [result, count] if is_blank?(text)
|
99
103
|
text.split.each do |word|
|
100
104
|
return [result.strip!, count] if count >= max
|
@@ -107,7 +111,24 @@ module Sumitup
|
|
107
111
|
def is_blank?(text)
|
108
112
|
text.nil? || text.empty?
|
109
113
|
end
|
110
|
-
|
114
|
+
|
115
|
+
def request_image_size(image_url)
|
116
|
+
width = nil
|
117
|
+
height = nil
|
118
|
+
open(image_url, 'rb') do |f|
|
119
|
+
img = Dimensions(f)
|
120
|
+
img.read
|
121
|
+
width = img.width
|
122
|
+
height = img.height
|
123
|
+
end
|
124
|
+
[width, height]
|
125
|
+
end
|
126
|
+
|
127
|
+
def image_height(existing_height, existing_width, image_width_limit)
|
128
|
+
ratio = image_width_limit.to_f/existing_width.to_f
|
129
|
+
(existing_height.to_f * ratio).to_i
|
130
|
+
end
|
131
|
+
|
111
132
|
def word_transformer
|
112
133
|
me = self
|
113
134
|
lambda do |env|
|
@@ -141,21 +162,23 @@ module Sumitup
|
|
141
162
|
node.remove
|
142
163
|
else
|
143
164
|
keep_it = false
|
144
|
-
|
145
|
-
if node.attributes['width']
|
146
|
-
width = node.attributes['width'].value.to_i rescue 0
|
147
|
-
keep_it = true if width > me.min_image_size
|
148
|
-
else
|
149
|
-
width = nil
|
150
|
-
keep_it = true
|
151
|
-
end
|
152
165
|
|
166
|
+
existing_width = node.attributes['width'].value.to_i rescue nil if node.attributes['width']
|
167
|
+
existing_height = node.attributes['height'].value.to_i rescue nil if node.attributes['height']
|
168
|
+
|
169
|
+
if !existing_width || !existing_height
|
170
|
+
image_url = node.attributes['src'] rescue nil
|
171
|
+
existing_width, existing_height = me.request_image_size(image_url) rescue [nil, nil] if image_url
|
172
|
+
end
|
173
|
+
|
174
|
+
existing_width ||= 0
|
175
|
+
|
176
|
+
keep_it = true if existing_width > me.min_image_size
|
177
|
+
|
153
178
|
if keep_it
|
154
179
|
me.image_count += 1
|
155
|
-
|
156
|
-
|
157
|
-
node.attributes['height'].remove if node.attributes['height']
|
158
|
-
end
|
180
|
+
node['height'] = me.image_height(existing_height, existing_width, me.image_width_limit).to_s
|
181
|
+
node['width'] = me.image_width_limit.to_s
|
159
182
|
else
|
160
183
|
node.remove
|
161
184
|
end
|
data/spec/sumitup/parser_spec.rb
CHANGED
@@ -7,6 +7,15 @@ describe Sumitup::Parser do
|
|
7
7
|
@html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
|
8
8
|
end
|
9
9
|
|
10
|
+
it "should used the default max_words" do
|
11
|
+
content = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec est enim, accumsan sollicitudin convallis sed, tempor vel libero. Quisque nulla tortor,
|
12
|
+
rhoncus sit amet fermentum ut, imperdiet iaculis risus. Nunc vulputate arcu non turpis consequat molestie. Vestibulum ante ipsum primis in faucibus orci luctus
|
13
|
+
et ultrices posuere cubilia Curae; Nam blandit malesuada leo et posuere. Suspendisse potenti. '
|
14
|
+
parser = Sumitup::Parser.new(:max_words => 10)
|
15
|
+
result = parser.summarize(content)
|
16
|
+
result.should == 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec est'
|
17
|
+
end
|
18
|
+
|
10
19
|
it "should summarize the content by number of words" do
|
11
20
|
parser = Sumitup::Parser.new(:max_words => 1000)
|
12
21
|
result = parser.summarize(@html, 5)
|
@@ -29,6 +38,13 @@ describe Sumitup::Parser do
|
|
29
38
|
result.should_not include('<span></span>')
|
30
39
|
end
|
31
40
|
|
41
|
+
it "should be reusable" do
|
42
|
+
parser = Sumitup::Parser.new(:max_words => 5)
|
43
|
+
result = parser.summarize(@html)
|
44
|
+
result2 = parser.summarize(@html)
|
45
|
+
result.should == result2
|
46
|
+
end
|
47
|
+
|
32
48
|
describe "Sanitize options" do
|
33
49
|
it "should remove html comments" do
|
34
50
|
result = Sumitup::Parser.new.summarize(@html, 100000)
|
@@ -57,7 +73,7 @@ describe Sumitup::Parser do
|
|
57
73
|
it "should set the width to 240 if width is greater than 240" do
|
58
74
|
parser = Sumitup::Parser.new(:image_width_limit => 240)
|
59
75
|
result = parser.summarize(@html, 10000)
|
60
|
-
result.should include(%Q{img src="http://www.example.com/big.jpg" width="240">})
|
76
|
+
result.should include(%Q{img src="http://www.example.com/big.jpg" width="240" height="240">})
|
61
77
|
end
|
62
78
|
|
63
79
|
it "should only allow 2 images" do
|
@@ -72,10 +88,10 @@ describe Sumitup::Parser do
|
|
72
88
|
result.should_not include('http://www.example.com/small.jpg')
|
73
89
|
end
|
74
90
|
|
75
|
-
it "should
|
91
|
+
it "should enlarge images that are not over the width limit" do
|
76
92
|
parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => 200)
|
77
93
|
result = parser.summarize(@html, 100000)
|
78
|
-
result.should include('<img src="http://www.example.com/photo.jpg" width="
|
94
|
+
result.should include('<img src="http://www.example.com/photo.jpg" width="200" height="200" title="" alt="">')
|
79
95
|
end
|
80
96
|
end
|
81
97
|
|
@@ -115,4 +131,43 @@ describe Sumitup::Parser do
|
|
115
131
|
end
|
116
132
|
end
|
117
133
|
|
134
|
+
describe "request_image_size" do
|
135
|
+
before do
|
136
|
+
@parser = Sumitup::Parser.new
|
137
|
+
end
|
138
|
+
it "should get width and height from the remote image" do
|
139
|
+
url = "http://upload.wikimedia.org/wikipedia/en/b/bc/Wiki.png"
|
140
|
+
width, height = @parser.request_image_size(url)
|
141
|
+
width.should == 135
|
142
|
+
height.should == 155
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
describe "image_height" do
|
147
|
+
before do
|
148
|
+
@parser = Sumitup::Parser.new
|
149
|
+
end
|
150
|
+
it "should calculate a smaller height based on the width change" do
|
151
|
+
image_width_limit = 100
|
152
|
+
existing_height = 1000
|
153
|
+
existing_width = 1000
|
154
|
+
height = @parser.image_height(existing_height, existing_width, image_width_limit)
|
155
|
+
height.should == 100
|
156
|
+
end
|
157
|
+
it "should calculate a larger height based on the width change" do
|
158
|
+
image_width_limit = 100
|
159
|
+
existing_height = 10
|
160
|
+
existing_width = 50
|
161
|
+
height = @parser.image_height(existing_height, existing_width, image_width_limit)
|
162
|
+
height.should == 20
|
163
|
+
end
|
164
|
+
it "should calculate new height based on width" do
|
165
|
+
image_width_limit = 100
|
166
|
+
existing_height = 143
|
167
|
+
existing_width = 136
|
168
|
+
height = @parser.image_height(existing_height, existing_width, image_width_limit)
|
169
|
+
height.should == 105
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
118
173
|
end
|
data/sumitup.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "sumitup"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Justin Ball"]
|
12
|
-
s.date = "2012-03-
|
12
|
+
s.date = "2012-03-13"
|
13
13
|
s.description = "Given an html document or fragment this gem will build a summary of the content."
|
14
14
|
s.email = "justinball@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -46,6 +46,7 @@ Gem::Specification.new do |s|
|
|
46
46
|
|
47
47
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
48
48
|
s.add_runtime_dependency(%q<sanitize>, [">= 0"])
|
49
|
+
s.add_runtime_dependency(%q<dimensions>, [">= 0"])
|
49
50
|
s.add_development_dependency(%q<growl>, [">= 0"])
|
50
51
|
s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
|
51
52
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
@@ -58,6 +59,7 @@ Gem::Specification.new do |s|
|
|
58
59
|
s.add_development_dependency(%q<ruby-debug>, [">= 0"])
|
59
60
|
else
|
60
61
|
s.add_dependency(%q<sanitize>, [">= 0"])
|
62
|
+
s.add_dependency(%q<dimensions>, [">= 0"])
|
61
63
|
s.add_dependency(%q<growl>, [">= 0"])
|
62
64
|
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
63
65
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
@@ -71,6 +73,7 @@ Gem::Specification.new do |s|
|
|
71
73
|
end
|
72
74
|
else
|
73
75
|
s.add_dependency(%q<sanitize>, [">= 0"])
|
76
|
+
s.add_dependency(%q<dimensions>, [">= 0"])
|
74
77
|
s.add_dependency(%q<growl>, [">= 0"])
|
75
78
|
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
76
79
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sumitup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 4
|
10
|
+
version: 0.1.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Justin Ball
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-03-
|
18
|
+
date: 2012-03-13 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
name: sanitize
|
33
33
|
prerelease: false
|
34
34
|
- !ruby/object:Gem::Dependency
|
35
|
-
type: :
|
35
|
+
type: :runtime
|
36
36
|
requirement: &id002 !ruby/object:Gem::Requirement
|
37
37
|
none: false
|
38
38
|
requirements:
|
@@ -43,11 +43,25 @@ dependencies:
|
|
43
43
|
- 0
|
44
44
|
version: "0"
|
45
45
|
version_requirements: *id002
|
46
|
-
name:
|
46
|
+
name: dimensions
|
47
47
|
prerelease: false
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
type: :development
|
50
50
|
requirement: &id003 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
hash: 3
|
56
|
+
segments:
|
57
|
+
- 0
|
58
|
+
version: "0"
|
59
|
+
version_requirements: *id003
|
60
|
+
name: growl
|
61
|
+
prerelease: false
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
type: :development
|
64
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
51
65
|
none: false
|
52
66
|
requirements:
|
53
67
|
- - ~>
|
@@ -58,12 +72,12 @@ dependencies:
|
|
58
72
|
- 8
|
59
73
|
- 0
|
60
74
|
version: 2.8.0
|
61
|
-
version_requirements: *
|
75
|
+
version_requirements: *id004
|
62
76
|
name: rspec
|
63
77
|
prerelease: false
|
64
78
|
- !ruby/object:Gem::Dependency
|
65
79
|
type: :development
|
66
|
-
requirement: &
|
80
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
67
81
|
none: false
|
68
82
|
requirements:
|
69
83
|
- - ~>
|
@@ -73,12 +87,12 @@ dependencies:
|
|
73
87
|
- 3
|
74
88
|
- 12
|
75
89
|
version: "3.12"
|
76
|
-
version_requirements: *
|
90
|
+
version_requirements: *id005
|
77
91
|
name: rdoc
|
78
92
|
prerelease: false
|
79
93
|
- !ruby/object:Gem::Dependency
|
80
94
|
type: :development
|
81
|
-
requirement: &
|
95
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
82
96
|
none: false
|
83
97
|
requirements:
|
84
98
|
- - ~>
|
@@ -89,12 +103,12 @@ dependencies:
|
|
89
103
|
- 0
|
90
104
|
- 0
|
91
105
|
version: 1.0.0
|
92
|
-
version_requirements: *
|
106
|
+
version_requirements: *id006
|
93
107
|
name: bundler
|
94
108
|
prerelease: false
|
95
109
|
- !ruby/object:Gem::Dependency
|
96
110
|
type: :development
|
97
|
-
requirement: &
|
111
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
98
112
|
none: false
|
99
113
|
requirements:
|
100
114
|
- - ~>
|
@@ -105,12 +119,12 @@ dependencies:
|
|
105
119
|
- 8
|
106
120
|
- 3
|
107
121
|
version: 1.8.3
|
108
|
-
version_requirements: *
|
122
|
+
version_requirements: *id007
|
109
123
|
name: jeweler
|
110
124
|
prerelease: false
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
type: :development
|
113
|
-
requirement: &
|
127
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
114
128
|
none: false
|
115
129
|
requirements:
|
116
130
|
- - ">="
|
@@ -119,12 +133,12 @@ dependencies:
|
|
119
133
|
segments:
|
120
134
|
- 0
|
121
135
|
version: "0"
|
122
|
-
version_requirements: *
|
136
|
+
version_requirements: *id008
|
123
137
|
name: rcov
|
124
138
|
prerelease: false
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
type: :development
|
127
|
-
requirement: &
|
141
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
128
142
|
none: false
|
129
143
|
requirements:
|
130
144
|
- - ">="
|
@@ -135,12 +149,12 @@ dependencies:
|
|
135
149
|
- 0
|
136
150
|
- 0
|
137
151
|
version: 1.0.0
|
138
|
-
version_requirements: *
|
152
|
+
version_requirements: *id009
|
139
153
|
name: guard
|
140
154
|
prerelease: false
|
141
155
|
- !ruby/object:Gem::Dependency
|
142
156
|
type: :development
|
143
|
-
requirement: &
|
157
|
+
requirement: &id010 !ruby/object:Gem::Requirement
|
144
158
|
none: false
|
145
159
|
requirements:
|
146
160
|
- - ">="
|
@@ -151,12 +165,12 @@ dependencies:
|
|
151
165
|
- 6
|
152
166
|
- 0
|
153
167
|
version: 0.6.0
|
154
|
-
version_requirements: *
|
168
|
+
version_requirements: *id010
|
155
169
|
name: guard-rspec
|
156
170
|
prerelease: false
|
157
171
|
- !ruby/object:Gem::Dependency
|
158
172
|
type: :development
|
159
|
-
requirement: &
|
173
|
+
requirement: &id011 !ruby/object:Gem::Requirement
|
160
174
|
none: false
|
161
175
|
requirements:
|
162
176
|
- - ">="
|
@@ -167,12 +181,12 @@ dependencies:
|
|
167
181
|
- 1
|
168
182
|
- 3
|
169
183
|
version: 0.1.3
|
170
|
-
version_requirements: *
|
184
|
+
version_requirements: *id011
|
171
185
|
name: guard-bundler
|
172
186
|
prerelease: false
|
173
187
|
- !ruby/object:Gem::Dependency
|
174
188
|
type: :development
|
175
|
-
requirement: &
|
189
|
+
requirement: &id012 !ruby/object:Gem::Requirement
|
176
190
|
none: false
|
177
191
|
requirements:
|
178
192
|
- - ">="
|
@@ -181,7 +195,7 @@ dependencies:
|
|
181
195
|
segments:
|
182
196
|
- 0
|
183
197
|
version: "0"
|
184
|
-
version_requirements: *
|
198
|
+
version_requirements: *id012
|
185
199
|
name: ruby-debug
|
186
200
|
prerelease: false
|
187
201
|
description: Given an html document or fragment this gem will build a summary of the content.
|