sumitup 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
@@ -71,8 +71,17 @@ module Sumitup
71
71
  else
72
72
  # if the text of the current node makes us go over then truncate it
73
73
  result, count = snippet(node.inner_text, max - self.word_count)
74
- self.word_count += count
75
- node.content = result
74
+ if count == 0 || is_blank?(result)
75
+ node.remove
76
+ else
77
+ self.word_count += count
78
+ node.content = result
79
+ end
80
+ end
81
+ else
82
+ # Remove empty nodes
83
+ if node.text.empty? && node.children.empty? && !['img', 'br'].include?(node.name)
84
+ node.remove
76
85
  end
77
86
  end
78
87
 
@@ -33,4 +33,11 @@
33
33
  </dd>
34
34
  </dl>
35
35
  <br />
36
+ <ul>
37
+ <li><span>one</span></li>
38
+ <li><span>two</span></li>
39
+ <li><span>three</span></li>
40
+ <li><span>four</span></li>
41
+ <li><span>five</span></li>
42
+ </ul>
36
43
  </div>
@@ -7,6 +7,28 @@ describe Sumitup::Parser do
7
7
  @html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
8
8
  end
9
9
 
10
+ it "should summarize the content by number of words" do
11
+ parser = Sumitup::Parser.new(:max_words => 1000)
12
+ result = parser.summarize(@html, 5)
13
+ result.should_not include('consectetur')
14
+ result.should include('amet')
15
+ end
16
+
17
+ it "should keep permitted html in summary" do
18
+ parser = Sumitup::Parser.new(:max_words => 1000)
19
+ result = parser.summarize(@html, 5)
20
+ result.should include('strong')
21
+ result.should include('blockquote')
22
+ end
23
+
24
+ it "should remove empty tags after truncating text" do
25
+ parser = Sumitup::Parser.new(:max_words => 5)
26
+ result = parser.summarize(@html)
27
+ result.should_not include('ul')
28
+ result.should_not include('li')
29
+ result.should_not include('<span></span>')
30
+ end
31
+
10
32
  describe "Sanitize options" do
11
33
  it "should remove html comments" do
12
34
  result = Sumitup::Parser.new.summarize(@html, 100000)
@@ -18,22 +40,8 @@ describe Sumitup::Parser do
18
40
  result.should_not include('<style type="text/css">')
19
41
  end
20
42
  end
21
-
43
+
22
44
  describe "word_transformer" do
23
- it "should summarize the content by number of words" do
24
- parser = Sumitup::Parser.new(:max_words => 1000)
25
- result = parser.summarize(@html, 5)
26
- result.should_not include('consectetur')
27
- result.should include('amet')
28
- end
29
-
30
- it "should keep permitted html in summary" do
31
- parser = Sumitup::Parser.new(:max_words => 1000)
32
- result = parser.summarize(@html, 5)
33
- result.should include('strong')
34
- result.should include('blockquote')
35
- end
36
-
37
45
  it "should remove empty tags" do
38
46
  result = Sumitup::Parser.new.summarize(@html, 100000)
39
47
  result.should_not include('<p></p>')
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "sumitup"
8
- s.version = "0.1.2"
8
+ s.version = "0.1.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Justin Ball"]
12
- s.date = "2012-03-06"
12
+ s.date = "2012-03-07"
13
13
  s.description = "Given an html document or fragment this gem will build a summary of the content."
14
14
  s.email = "justinball@gmail.com"
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sumitup
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 29
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 2
10
- version: 0.1.2
9
+ - 3
10
+ version: 0.1.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Justin Ball
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-06 00:00:00 Z
18
+ date: 2012-03-07 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime