sumitup 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
@@ -71,8 +71,17 @@ module Sumitup
71
71
  else
72
72
  # if the text of the current node makes us go over then truncate it
73
73
  result, count = snippet(node.inner_text, max - self.word_count)
74
- self.word_count += count
75
- node.content = result
74
+ if count == 0 || is_blank?(result)
75
+ node.remove
76
+ else
77
+ self.word_count += count
78
+ node.content = result
79
+ end
80
+ end
81
+ else
82
+ # Remove empty nodes
83
+ if node.text.empty? && node.children.empty? && !['img', 'br'].include?(node.name)
84
+ node.remove
76
85
  end
77
86
  end
78
87
 
@@ -33,4 +33,11 @@
33
33
  </dd>
34
34
  </dl>
35
35
  <br />
36
+ <ul>
37
+ <li><span>one</span></li>
38
+ <li><span>two</span></li>
39
+ <li><span>three</span></li>
40
+ <li><span>four</span></li>
41
+ <li><span>five</span></li>
42
+ </ul>
36
43
  </div>
@@ -7,6 +7,28 @@ describe Sumitup::Parser do
7
7
  @html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
8
8
  end
9
9
 
10
+ it "should summarize the content by number of words" do
11
+ parser = Sumitup::Parser.new(:max_words => 1000)
12
+ result = parser.summarize(@html, 5)
13
+ result.should_not include('consectetur')
14
+ result.should include('amet')
15
+ end
16
+
17
+ it "should keep permitted html in summary" do
18
+ parser = Sumitup::Parser.new(:max_words => 1000)
19
+ result = parser.summarize(@html, 5)
20
+ result.should include('strong')
21
+ result.should include('blockquote')
22
+ end
23
+
24
+ it "should remove empty tags after truncating text" do
25
+ parser = Sumitup::Parser.new(:max_words => 5)
26
+ result = parser.summarize(@html)
27
+ result.should_not include('ul')
28
+ result.should_not include('li')
29
+ result.should_not include('<span></span>')
30
+ end
31
+
10
32
  describe "Sanitize options" do
11
33
  it "should remove html comments" do
12
34
  result = Sumitup::Parser.new.summarize(@html, 100000)
@@ -18,22 +40,8 @@ describe Sumitup::Parser do
18
40
  result.should_not include('<style type="text/css">')
19
41
  end
20
42
  end
21
-
43
+
22
44
  describe "word_transformer" do
23
- it "should summarize the content by number of words" do
24
- parser = Sumitup::Parser.new(:max_words => 1000)
25
- result = parser.summarize(@html, 5)
26
- result.should_not include('consectetur')
27
- result.should include('amet')
28
- end
29
-
30
- it "should keep permitted html in summary" do
31
- parser = Sumitup::Parser.new(:max_words => 1000)
32
- result = parser.summarize(@html, 5)
33
- result.should include('strong')
34
- result.should include('blockquote')
35
- end
36
-
37
45
  it "should remove empty tags" do
38
46
  result = Sumitup::Parser.new.summarize(@html, 100000)
39
47
  result.should_not include('<p></p>')
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "sumitup"
8
- s.version = "0.1.2"
8
+ s.version = "0.1.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Justin Ball"]
12
- s.date = "2012-03-06"
12
+ s.date = "2012-03-07"
13
13
  s.description = "Given an html document or fragment this gem will build a summary of the content."
14
14
  s.email = "justinball@gmail.com"
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sumitup
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 29
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 2
10
- version: 0.1.2
9
+ - 3
10
+ version: 0.1.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Justin Ball
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-06 00:00:00 Z
18
+ date: 2012-03-07 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime