sumitup 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -1,6 +1,7 @@
1
1
  source "http://rubygems.org"
2
2
 
3
3
  gem 'sanitize'
4
+ gem 'dimensions'
4
5
 
5
6
  # Add dependencies to develop your gem here.
6
7
  # Include everything needed to run rake, tests, features, etc.
@@ -3,6 +3,7 @@ GEM
3
3
  specs:
4
4
  columnize (0.3.5)
5
5
  diff-lcs (1.1.3)
6
+ dimensions (1.0.0)
6
7
  ffi (1.0.11)
7
8
  git (1.2.5)
8
9
  growl (1.0.3)
@@ -50,6 +51,7 @@ PLATFORMS
50
51
 
51
52
  DEPENDENCIES
52
53
  bundler (~> 1.0.0)
54
+ dimensions
53
55
  growl
54
56
  guard (>= 1.0.0)
55
57
  guard-bundler (>= 0.1.3)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
@@ -1,6 +1,8 @@
1
1
  $LOAD_PATH << File.dirname(__FILE__) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
2
 
3
3
  require 'sanitize'
4
+ require 'open-uri'
5
+ require 'dimensions'
4
6
 
5
7
  # Get index status:
6
8
  # curl -XGET 'http://localhost:9200/_status'
@@ -48,6 +48,9 @@ module Sumitup
48
48
  end
49
49
 
50
50
  def summarize_fragment(node, max = nil)
51
+ # Always reset counts
52
+ self.word_count = 0
53
+ self.image_count = 0
51
54
  clean = Sanitize.clean_node!(node,
52
55
  :elements => elements,
53
56
  :attributes => attributes,
@@ -95,6 +98,7 @@ module Sumitup
95
98
  def snippet(text, max)
96
99
  result = ''
97
100
  count = 0
101
+ # TODO figure out support for pre that contains code blocks..
98
102
  return [result, count] if is_blank?(text)
99
103
  text.split.each do |word|
100
104
  return [result.strip!, count] if count >= max
@@ -107,7 +111,24 @@ module Sumitup
107
111
  def is_blank?(text)
108
112
  text.nil? || text.empty?
109
113
  end
110
-
114
+
115
+ def request_image_size(image_url)
116
+ width = nil
117
+ height = nil
118
+ open(image_url, 'rb') do |f|
119
+ img = Dimensions(f)
120
+ img.read
121
+ width = img.width
122
+ height = img.height
123
+ end
124
+ [width, height]
125
+ end
126
+
127
+ def image_height(existing_height, existing_width, image_width_limit)
128
+ ratio = image_width_limit.to_f/existing_width.to_f
129
+ (existing_height.to_f * ratio).to_i
130
+ end
131
+
111
132
  def word_transformer
112
133
  me = self
113
134
  lambda do |env|
@@ -141,21 +162,23 @@ module Sumitup
141
162
  node.remove
142
163
  else
143
164
  keep_it = false
144
-
145
- if node.attributes['width']
146
- width = node.attributes['width'].value.to_i rescue 0
147
- keep_it = true if width > me.min_image_size
148
- else
149
- width = nil
150
- keep_it = true
151
- end
152
165
 
166
+ existing_width = node.attributes['width'].value.to_i rescue nil if node.attributes['width']
167
+ existing_height = node.attributes['height'].value.to_i rescue nil if node.attributes['height']
168
+
169
+ if !existing_width || !existing_height
170
+ image_url = node.attributes['src'] rescue nil
171
+ existing_width, existing_height = me.request_image_size(image_url) rescue [nil, nil] if image_url
172
+ end
173
+
174
+ existing_width ||= 0
175
+
176
+ keep_it = true if existing_width > me.min_image_size
177
+
153
178
  if keep_it
154
179
  me.image_count += 1
155
- if width == nil || width > me.image_width_limit
156
- node['width'] = me.image_width_limit.to_s
157
- node.attributes['height'].remove if node.attributes['height']
158
- end
180
+ node['height'] = me.image_height(existing_height, existing_width, me.image_width_limit).to_s
181
+ node['width'] = me.image_width_limit.to_s
159
182
  else
160
183
  node.remove
161
184
  end
@@ -7,6 +7,15 @@ describe Sumitup::Parser do
7
7
  @html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
8
8
  end
9
9
 
10
+ it "should used the default max_words" do
11
+ content = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec est enim, accumsan sollicitudin convallis sed, tempor vel libero. Quisque nulla tortor,
12
+ rhoncus sit amet fermentum ut, imperdiet iaculis risus. Nunc vulputate arcu non turpis consequat molestie. Vestibulum ante ipsum primis in faucibus orci luctus
13
+ et ultrices posuere cubilia Curae; Nam blandit malesuada leo et posuere. Suspendisse potenti. '
14
+ parser = Sumitup::Parser.new(:max_words => 10)
15
+ result = parser.summarize(content)
16
+ result.should == 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec est'
17
+ end
18
+
10
19
  it "should summarize the content by number of words" do
11
20
  parser = Sumitup::Parser.new(:max_words => 1000)
12
21
  result = parser.summarize(@html, 5)
@@ -29,6 +38,13 @@ describe Sumitup::Parser do
29
38
  result.should_not include('<span></span>')
30
39
  end
31
40
 
41
+ it "should be reusable" do
42
+ parser = Sumitup::Parser.new(:max_words => 5)
43
+ result = parser.summarize(@html)
44
+ result2 = parser.summarize(@html)
45
+ result.should == result2
46
+ end
47
+
32
48
  describe "Sanitize options" do
33
49
  it "should remove html comments" do
34
50
  result = Sumitup::Parser.new.summarize(@html, 100000)
@@ -57,7 +73,7 @@ describe Sumitup::Parser do
57
73
  it "should set the width to 240 if width is greater than 240" do
58
74
  parser = Sumitup::Parser.new(:image_width_limit => 240)
59
75
  result = parser.summarize(@html, 10000)
60
- result.should include(%Q{img src="http://www.example.com/big.jpg" width="240">})
76
+ result.should include(%Q{img src="http://www.example.com/big.jpg" width="240" height="240">})
61
77
  end
62
78
 
63
79
  it "should only allow 2 images" do
@@ -72,10 +88,10 @@ describe Sumitup::Parser do
72
88
  result.should_not include('http://www.example.com/small.jpg')
73
89
  end
74
90
 
75
- it "should keep images as is that are not over the width limit" do
91
+ it "should enlarge images that are not over the width limit" do
76
92
  parser = Sumitup::Parser.new(:max_images => 1000, :image_width_limit => 200)
77
93
  result = parser.summarize(@html, 100000)
78
- result.should include('<img src="http://www.example.com/photo.jpg" width="150" height="150" title="" alt="">')
94
+ result.should include('<img src="http://www.example.com/photo.jpg" width="200" height="200" title="" alt="">')
79
95
  end
80
96
  end
81
97
 
@@ -115,4 +131,43 @@ describe Sumitup::Parser do
115
131
  end
116
132
  end
117
133
 
134
+ describe "request_image_size" do
135
+ before do
136
+ @parser = Sumitup::Parser.new
137
+ end
138
+ it "should get width and height from the remote image" do
139
+ url = "http://upload.wikimedia.org/wikipedia/en/b/bc/Wiki.png"
140
+ width, height = @parser.request_image_size(url)
141
+ width.should == 135
142
+ height.should == 155
143
+ end
144
+ end
145
+
146
+ describe "image_height" do
147
+ before do
148
+ @parser = Sumitup::Parser.new
149
+ end
150
+ it "should calculate a smaller height based on the width change" do
151
+ image_width_limit = 100
152
+ existing_height = 1000
153
+ existing_width = 1000
154
+ height = @parser.image_height(existing_height, existing_width, image_width_limit)
155
+ height.should == 100
156
+ end
157
+ it "should calculate a larger height based on the width change" do
158
+ image_width_limit = 100
159
+ existing_height = 10
160
+ existing_width = 50
161
+ height = @parser.image_height(existing_height, existing_width, image_width_limit)
162
+ height.should == 20
163
+ end
164
+ it "should calculate new height based on width" do
165
+ image_width_limit = 100
166
+ existing_height = 143
167
+ existing_width = 136
168
+ height = @parser.image_height(existing_height, existing_width, image_width_limit)
169
+ height.should == 105
170
+ end
171
+ end
172
+
118
173
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "sumitup"
8
- s.version = "0.1.3"
8
+ s.version = "0.1.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Justin Ball"]
12
- s.date = "2012-03-07"
12
+ s.date = "2012-03-13"
13
13
  s.description = "Given an html document or fragment this gem will build a summary of the content."
14
14
  s.email = "justinball@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -46,6 +46,7 @@ Gem::Specification.new do |s|
46
46
 
47
47
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
48
  s.add_runtime_dependency(%q<sanitize>, [">= 0"])
49
+ s.add_runtime_dependency(%q<dimensions>, [">= 0"])
49
50
  s.add_development_dependency(%q<growl>, [">= 0"])
50
51
  s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
51
52
  s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
@@ -58,6 +59,7 @@ Gem::Specification.new do |s|
58
59
  s.add_development_dependency(%q<ruby-debug>, [">= 0"])
59
60
  else
60
61
  s.add_dependency(%q<sanitize>, [">= 0"])
62
+ s.add_dependency(%q<dimensions>, [">= 0"])
61
63
  s.add_dependency(%q<growl>, [">= 0"])
62
64
  s.add_dependency(%q<rspec>, ["~> 2.8.0"])
63
65
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
@@ -71,6 +73,7 @@ Gem::Specification.new do |s|
71
73
  end
72
74
  else
73
75
  s.add_dependency(%q<sanitize>, [">= 0"])
76
+ s.add_dependency(%q<dimensions>, [">= 0"])
74
77
  s.add_dependency(%q<growl>, [">= 0"])
75
78
  s.add_dependency(%q<rspec>, ["~> 2.8.0"])
76
79
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sumitup
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 3
10
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Justin Ball
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-07 00:00:00 Z
18
+ date: 2012-03-13 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime
@@ -32,7 +32,7 @@ dependencies:
32
32
  name: sanitize
33
33
  prerelease: false
34
34
  - !ruby/object:Gem::Dependency
35
- type: :development
35
+ type: :runtime
36
36
  requirement: &id002 !ruby/object:Gem::Requirement
37
37
  none: false
38
38
  requirements:
@@ -43,11 +43,25 @@ dependencies:
43
43
  - 0
44
44
  version: "0"
45
45
  version_requirements: *id002
46
- name: growl
46
+ name: dimensions
47
47
  prerelease: false
48
48
  - !ruby/object:Gem::Dependency
49
49
  type: :development
50
50
  requirement: &id003 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ hash: 3
56
+ segments:
57
+ - 0
58
+ version: "0"
59
+ version_requirements: *id003
60
+ name: growl
61
+ prerelease: false
62
+ - !ruby/object:Gem::Dependency
63
+ type: :development
64
+ requirement: &id004 !ruby/object:Gem::Requirement
51
65
  none: false
52
66
  requirements:
53
67
  - - ~>
@@ -58,12 +72,12 @@ dependencies:
58
72
  - 8
59
73
  - 0
60
74
  version: 2.8.0
61
- version_requirements: *id003
75
+ version_requirements: *id004
62
76
  name: rspec
63
77
  prerelease: false
64
78
  - !ruby/object:Gem::Dependency
65
79
  type: :development
66
- requirement: &id004 !ruby/object:Gem::Requirement
80
+ requirement: &id005 !ruby/object:Gem::Requirement
67
81
  none: false
68
82
  requirements:
69
83
  - - ~>
@@ -73,12 +87,12 @@ dependencies:
73
87
  - 3
74
88
  - 12
75
89
  version: "3.12"
76
- version_requirements: *id004
90
+ version_requirements: *id005
77
91
  name: rdoc
78
92
  prerelease: false
79
93
  - !ruby/object:Gem::Dependency
80
94
  type: :development
81
- requirement: &id005 !ruby/object:Gem::Requirement
95
+ requirement: &id006 !ruby/object:Gem::Requirement
82
96
  none: false
83
97
  requirements:
84
98
  - - ~>
@@ -89,12 +103,12 @@ dependencies:
89
103
  - 0
90
104
  - 0
91
105
  version: 1.0.0
92
- version_requirements: *id005
106
+ version_requirements: *id006
93
107
  name: bundler
94
108
  prerelease: false
95
109
  - !ruby/object:Gem::Dependency
96
110
  type: :development
97
- requirement: &id006 !ruby/object:Gem::Requirement
111
+ requirement: &id007 !ruby/object:Gem::Requirement
98
112
  none: false
99
113
  requirements:
100
114
  - - ~>
@@ -105,12 +119,12 @@ dependencies:
105
119
  - 8
106
120
  - 3
107
121
  version: 1.8.3
108
- version_requirements: *id006
122
+ version_requirements: *id007
109
123
  name: jeweler
110
124
  prerelease: false
111
125
  - !ruby/object:Gem::Dependency
112
126
  type: :development
113
- requirement: &id007 !ruby/object:Gem::Requirement
127
+ requirement: &id008 !ruby/object:Gem::Requirement
114
128
  none: false
115
129
  requirements:
116
130
  - - ">="
@@ -119,12 +133,12 @@ dependencies:
119
133
  segments:
120
134
  - 0
121
135
  version: "0"
122
- version_requirements: *id007
136
+ version_requirements: *id008
123
137
  name: rcov
124
138
  prerelease: false
125
139
  - !ruby/object:Gem::Dependency
126
140
  type: :development
127
- requirement: &id008 !ruby/object:Gem::Requirement
141
+ requirement: &id009 !ruby/object:Gem::Requirement
128
142
  none: false
129
143
  requirements:
130
144
  - - ">="
@@ -135,12 +149,12 @@ dependencies:
135
149
  - 0
136
150
  - 0
137
151
  version: 1.0.0
138
- version_requirements: *id008
152
+ version_requirements: *id009
139
153
  name: guard
140
154
  prerelease: false
141
155
  - !ruby/object:Gem::Dependency
142
156
  type: :development
143
- requirement: &id009 !ruby/object:Gem::Requirement
157
+ requirement: &id010 !ruby/object:Gem::Requirement
144
158
  none: false
145
159
  requirements:
146
160
  - - ">="
@@ -151,12 +165,12 @@ dependencies:
151
165
  - 6
152
166
  - 0
153
167
  version: 0.6.0
154
- version_requirements: *id009
168
+ version_requirements: *id010
155
169
  name: guard-rspec
156
170
  prerelease: false
157
171
  - !ruby/object:Gem::Dependency
158
172
  type: :development
159
- requirement: &id010 !ruby/object:Gem::Requirement
173
+ requirement: &id011 !ruby/object:Gem::Requirement
160
174
  none: false
161
175
  requirements:
162
176
  - - ">="
@@ -167,12 +181,12 @@ dependencies:
167
181
  - 1
168
182
  - 3
169
183
  version: 0.1.3
170
- version_requirements: *id010
184
+ version_requirements: *id011
171
185
  name: guard-bundler
172
186
  prerelease: false
173
187
  - !ruby/object:Gem::Dependency
174
188
  type: :development
175
- requirement: &id011 !ruby/object:Gem::Requirement
189
+ requirement: &id012 !ruby/object:Gem::Requirement
176
190
  none: false
177
191
  requirements:
178
192
  - - ">="
@@ -181,7 +195,7 @@ dependencies:
181
195
  segments:
182
196
  - 0
183
197
  version: "0"
184
- version_requirements: *id011
198
+ version_requirements: *id012
185
199
  name: ruby-debug
186
200
  prerelease: false
187
201
  description: Given an html document or fragment this gem will build a summary of the content.