sumitup 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,6 +1,7 @@
1
1
  source "http://rubygems.org"
2
2
 
3
3
  gem 'sanitize'
4
+ gem 'dimensions'
4
5
 
5
6
  # Add dependencies to develop your gem here.
6
7
  # Include everything needed to run rake, tests, features, etc.
@@ -3,6 +3,7 @@ GEM
3
3
  specs:
4
4
  columnize (0.3.5)
5
5
  diff-lcs (1.1.3)
6
+ dimensions (1.0.0)
6
7
  ffi (1.0.11)
7
8
  git (1.2.5)
8
9
  growl (1.0.3)
@@ -50,6 +51,7 @@ PLATFORMS
50
51
 
51
52
  DEPENDENCIES
52
53
  bundler (~> 1.0.0)
54
+ dimensions
53
55
  growl
54
56
  guard (>= 1.0.0)
55
57
  guard-bundler (>= 0.1.3)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
@@ -1,6 +1,8 @@
1
1
  $LOAD_PATH << File.dirname(__FILE__) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
2
 
3
3
  require 'sanitize'
4
+ require 'open-uri'
5
+ require 'dimensions'
4
6
 
5
7
  # Get index status:
6
8
  # curl -XGET 'http://localhost:9200/_status'
@@ -48,6 +48,9 @@ module Sumitup
48
48
  end
49
49
 
50
50
  def summarize_fragment(node, max = nil)
51
+ # Always reset counts
52
+ self.word_count = 0
53
+ self.image_count = 0
51
54
  clean = Sanitize.clean_node!(node,
52
55
  :elements => elements,
53
56
  :attributes => attributes,
@@ -95,6 +98,7 @@ module Sumitup
95
98
  def snippet(text, max)
96
99
  result = ''
97
100
  count = 0
101
+ # TODO figure out support for pre that contains code blocks..
98
102
  return [result, count] if is_blank?(text)
99
103
  text.split.each do |word|
100
104
  return [result.strip!, count] if count >= max
@@ -107,7 +111,24 @@ module Sumitup
107
111
  def is_blank?(text)
108
112
  text.nil? || text.empty?
109
113
  end
110
-
114
+
115
+ def request_image_size(image_url)
116
+ width = nil
117
+ height = nil
118
+ open(image_url, 'rb') do |f|
119
+ img = Dimensions(f)
120
+ img.read
121
+ width = img.width
122
+ height = img.height
123
+ end
124
+ [width, height]
125
+ end
126
+
127
+ def image_height(existing_height, existing_width, image_width_limit)
128
+ ratio = image_width_limit.to_f/existing_width.to_f
129
+ (existing_height.to_f * ratio).to_i
130
+ end
131
+
111
132
  def word_transformer
112
133
  me = self
113
134
  lambda do |env|
@@ -141,21 +162,23 @@ module Sumitup
141
162
  node.remove
142
163
  else
143
164
  keep_it = false
144
-
145
- if node.attributes['width']
146
- width = node.attributes['width'].value.to_i rescue 0
147
- keep_it = true if width > me.min_image_size
148
- else
149
- width = nil
150
- keep_it = true
151
- end
152
165
 
166
+ existing_width = node.attributes['width'].value.to_i rescue nil if node.attributes['width']
167
+ existing_height = node.attributes['height'].value.to_i rescue nil if node.attributes['height']
168
+
169
+ if !existing_width || !existing_height
170
+ image_url = node.attributes['src'] rescue nil
171
+ existing_width, existing_height = me.request_image_size(image_url) rescue [nil, nil] if image_url
172
+ end
173
+
174
+ existing_width ||= 0
175
+
176
+ keep_it = true if existing_width > me.min_image_size
177
+
153
178
  if keep_it
154
179
  me.image_count += 1
155
- if width == nil || width > me.image_width_limit
156
- node['width'] = me.image_width_limit.to_s
157
- node.attributes['height'].remove if node.attributes['height']
158
- end
180
+ node['height'] = me.image_height(existing_height, existing_width, me.image_width_limit).to_s
181
+ node['width'] = me.image_width_limit.to_s
159
182
  else
160
183
  node.remove
161
184
  end
@@ -7,6 +7,15 @@ describe Sumitup::Parser do
7
7
  @html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
8
8
  end
9
9
 
10
+ it "should used the default max_words" do
11
+ content = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec est enim, accumsan sollicitudin convallis sed, tempor vel libero. Quisque nulla tortor,
12
+ rhoncus sit amet fermentum ut, imperdiet iaculis risus. Nunc vulputate arcu non turpis consequat molestie. Vestibulum ante ipsum primis in faucibus orci luctus
13
+ et ultrices posuere cubilia Curae; Nam blandit malesuada leo et posuere. Suspendisse potenti. '
14
+ parser = Sumitup::Parser.new(:max_words => 10)
15
+ result = parser.summarize(content)
16
+ result.should == 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec est'
17
+ end
18
+
10
19
  it "should summarize the content by number of words" do
11
20
  parser = Sumitup::Parser.new(:max_words => 1000)
12
21
  result = parser.summarize(@html, 5)
@@ -29,6 +38,13 @@ describe Sumitup::Parser do
29
38
  result.should_not include('<span></span>')
30
39
  end
31
40
 
41
+ it "should be reusable" do
42
+ parser = Sumitup::Parser.new(:max_words => 5)
43
+ result = parser.summarize(@html)
44
+ result2 = parser.summarize(@html)
45
+ result.should == result2
46
+ end
47
+
32
48
  describe "Sanitize options" do
33
49
  it "should remove html comments" do
34
50
  result = Sumitup::Parser.new.summarize(@html, 100000)
@@ -57,7 +73,7 @@ describe Sumitup::Parser do
57
73
  it "should set the width to 240 if width is greater than 240" do
58
74
  parser = Sumitup::Parser.new(:image_width_limit => 240)
59
75
  result = parser.summarize(@html, 10000)
60
- result.should include(%Q{img src="http://www.example.com/big.jpg" width="240">})
76
+ result.should include(%Q{img src="http://www.example.com/big.jpg" width="240" height="240">})
61
77
  end
62
78
 
63
79
  it "should only allow 2 images" do
@@ -72,10 +88,10 @@ describe Sumitup::Parser do
72
88
  result.should_not include('http://www.example.com/small.jpg')
73
89
  end
74
90
 
75
- it "should keep images as is that are not over the width limit" do
91
+ it "should enlarge images that are not over the width limit" do
76
92
  parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => 200)
77
93
  result = parser.summarize(@html, 100000)
78
- result.should include('<img src="http://www.example.com/photo.jpg" width="150" height="150" title="" alt="">')
94
+ result.should include('<img src="http://www.example.com/photo.jpg" width="200" height="200" title="" alt="">')
79
95
  end
80
96
  end
81
97
 
@@ -115,4 +131,43 @@ describe Sumitup::Parser do
115
131
  end
116
132
  end
117
133
 
134
+ describe "request_image_size" do
135
+ before do
136
+ @parser = Sumitup::Parser.new
137
+ end
138
+ it "should get width and height from the remote image" do
139
+ url = "http://upload.wikimedia.org/wikipedia/en/b/bc/Wiki.png"
140
+ width, height = @parser.request_image_size(url)
141
+ width.should == 135
142
+ height.should == 155
143
+ end
144
+ end
145
+
146
+ describe "image_height" do
147
+ before do
148
+ @parser = Sumitup::Parser.new
149
+ end
150
+ it "should calculate a smaller height based on the width change" do
151
+ image_width_limit = 100
152
+ existing_height = 1000
153
+ existing_width = 1000
154
+ height = @parser.image_height(existing_height, existing_width, image_width_limit)
155
+ height.should == 100
156
+ end
157
+ it "should calculate a larger height based on the width change" do
158
+ image_width_limit = 100
159
+ existing_height = 10
160
+ existing_width = 50
161
+ height = @parser.image_height(existing_height, existing_width, image_width_limit)
162
+ height.should == 20
163
+ end
164
+ it "should calculate new height based on width" do
165
+ image_width_limit = 100
166
+ existing_height = 143
167
+ existing_width = 136
168
+ height = @parser.image_height(existing_height, existing_width, image_width_limit)
169
+ height.should == 105
170
+ end
171
+ end
172
+
118
173
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "sumitup"
8
- s.version = "0.1.3"
8
+ s.version = "0.1.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Justin Ball"]
12
- s.date = "2012-03-07"
12
+ s.date = "2012-03-13"
13
13
  s.description = "Given an html document or fragment this gem will build a summary of the content."
14
14
  s.email = "justinball@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -46,6 +46,7 @@ Gem::Specification.new do |s|
46
46
 
47
47
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
48
  s.add_runtime_dependency(%q<sanitize>, [">= 0"])
49
+ s.add_runtime_dependency(%q<dimensions>, [">= 0"])
49
50
  s.add_development_dependency(%q<growl>, [">= 0"])
50
51
  s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
51
52
  s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
@@ -58,6 +59,7 @@ Gem::Specification.new do |s|
58
59
  s.add_development_dependency(%q<ruby-debug>, [">= 0"])
59
60
  else
60
61
  s.add_dependency(%q<sanitize>, [">= 0"])
62
+ s.add_dependency(%q<dimensions>, [">= 0"])
61
63
  s.add_dependency(%q<growl>, [">= 0"])
62
64
  s.add_dependency(%q<rspec>, ["~> 2.8.0"])
63
65
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
@@ -71,6 +73,7 @@ Gem::Specification.new do |s|
71
73
  end
72
74
  else
73
75
  s.add_dependency(%q<sanitize>, [">= 0"])
76
+ s.add_dependency(%q<dimensions>, [">= 0"])
74
77
  s.add_dependency(%q<growl>, [">= 0"])
75
78
  s.add_dependency(%q<rspec>, ["~> 2.8.0"])
76
79
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sumitup
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 3
10
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Justin Ball
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-07 00:00:00 Z
18
+ date: 2012-03-13 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime
@@ -32,7 +32,7 @@ dependencies:
32
32
  name: sanitize
33
33
  prerelease: false
34
34
  - !ruby/object:Gem::Dependency
35
- type: :development
35
+ type: :runtime
36
36
  requirement: &id002 !ruby/object:Gem::Requirement
37
37
  none: false
38
38
  requirements:
@@ -43,11 +43,25 @@ dependencies:
43
43
  - 0
44
44
  version: "0"
45
45
  version_requirements: *id002
46
- name: growl
46
+ name: dimensions
47
47
  prerelease: false
48
48
  - !ruby/object:Gem::Dependency
49
49
  type: :development
50
50
  requirement: &id003 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ hash: 3
56
+ segments:
57
+ - 0
58
+ version: "0"
59
+ version_requirements: *id003
60
+ name: growl
61
+ prerelease: false
62
+ - !ruby/object:Gem::Dependency
63
+ type: :development
64
+ requirement: &id004 !ruby/object:Gem::Requirement
51
65
  none: false
52
66
  requirements:
53
67
  - - ~>
@@ -58,12 +72,12 @@ dependencies:
58
72
  - 8
59
73
  - 0
60
74
  version: 2.8.0
61
- version_requirements: *id003
75
+ version_requirements: *id004
62
76
  name: rspec
63
77
  prerelease: false
64
78
  - !ruby/object:Gem::Dependency
65
79
  type: :development
66
- requirement: &id004 !ruby/object:Gem::Requirement
80
+ requirement: &id005 !ruby/object:Gem::Requirement
67
81
  none: false
68
82
  requirements:
69
83
  - - ~>
@@ -73,12 +87,12 @@ dependencies:
73
87
  - 3
74
88
  - 12
75
89
  version: "3.12"
76
- version_requirements: *id004
90
+ version_requirements: *id005
77
91
  name: rdoc
78
92
  prerelease: false
79
93
  - !ruby/object:Gem::Dependency
80
94
  type: :development
81
- requirement: &id005 !ruby/object:Gem::Requirement
95
+ requirement: &id006 !ruby/object:Gem::Requirement
82
96
  none: false
83
97
  requirements:
84
98
  - - ~>
@@ -89,12 +103,12 @@ dependencies:
89
103
  - 0
90
104
  - 0
91
105
  version: 1.0.0
92
- version_requirements: *id005
106
+ version_requirements: *id006
93
107
  name: bundler
94
108
  prerelease: false
95
109
  - !ruby/object:Gem::Dependency
96
110
  type: :development
97
- requirement: &id006 !ruby/object:Gem::Requirement
111
+ requirement: &id007 !ruby/object:Gem::Requirement
98
112
  none: false
99
113
  requirements:
100
114
  - - ~>
@@ -105,12 +119,12 @@ dependencies:
105
119
  - 8
106
120
  - 3
107
121
  version: 1.8.3
108
- version_requirements: *id006
122
+ version_requirements: *id007
109
123
  name: jeweler
110
124
  prerelease: false
111
125
  - !ruby/object:Gem::Dependency
112
126
  type: :development
113
- requirement: &id007 !ruby/object:Gem::Requirement
127
+ requirement: &id008 !ruby/object:Gem::Requirement
114
128
  none: false
115
129
  requirements:
116
130
  - - ">="
@@ -119,12 +133,12 @@ dependencies:
119
133
  segments:
120
134
  - 0
121
135
  version: "0"
122
- version_requirements: *id007
136
+ version_requirements: *id008
123
137
  name: rcov
124
138
  prerelease: false
125
139
  - !ruby/object:Gem::Dependency
126
140
  type: :development
127
- requirement: &id008 !ruby/object:Gem::Requirement
141
+ requirement: &id009 !ruby/object:Gem::Requirement
128
142
  none: false
129
143
  requirements:
130
144
  - - ">="
@@ -135,12 +149,12 @@ dependencies:
135
149
  - 0
136
150
  - 0
137
151
  version: 1.0.0
138
- version_requirements: *id008
152
+ version_requirements: *id009
139
153
  name: guard
140
154
  prerelease: false
141
155
  - !ruby/object:Gem::Dependency
142
156
  type: :development
143
- requirement: &id009 !ruby/object:Gem::Requirement
157
+ requirement: &id010 !ruby/object:Gem::Requirement
144
158
  none: false
145
159
  requirements:
146
160
  - - ">="
@@ -151,12 +165,12 @@ dependencies:
151
165
  - 6
152
166
  - 0
153
167
  version: 0.6.0
154
- version_requirements: *id009
168
+ version_requirements: *id010
155
169
  name: guard-rspec
156
170
  prerelease: false
157
171
  - !ruby/object:Gem::Dependency
158
172
  type: :development
159
- requirement: &id010 !ruby/object:Gem::Requirement
173
+ requirement: &id011 !ruby/object:Gem::Requirement
160
174
  none: false
161
175
  requirements:
162
176
  - - ">="
@@ -167,12 +181,12 @@ dependencies:
167
181
  - 1
168
182
  - 3
169
183
  version: 0.1.3
170
- version_requirements: *id010
184
+ version_requirements: *id011
171
185
  name: guard-bundler
172
186
  prerelease: false
173
187
  - !ruby/object:Gem::Dependency
174
188
  type: :development
175
- requirement: &id011 !ruby/object:Gem::Requirement
189
+ requirement: &id012 !ruby/object:Gem::Requirement
176
190
  none: false
177
191
  requirements:
178
192
  - - ">="
@@ -181,7 +195,7 @@ dependencies:
181
195
  segments:
182
196
  - 0
183
197
  version: "0"
184
- version_requirements: *id011
198
+ version_requirements: *id012
185
199
  name: ruby-debug
186
200
  prerelease: false
187
201
  description: Given an html document or fragment this gem will build a summary of the content.