sumitup 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/sumitup/parser.rb +11 -2
- data/spec/fixtures/basic.html +7 -0
- data/spec/sumitup/parser_spec.rb +23 -15
- data/sumitup.gemspec +2 -2
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.3
|
data/lib/sumitup/parser.rb
CHANGED
@@ -71,8 +71,17 @@ module Sumitup
|
|
71
71
|
else
|
72
72
|
# if the text of the current node makes us go over then truncate it
|
73
73
|
result, count = snippet(node.inner_text, max - self.word_count)
|
74
|
-
|
75
|
-
|
74
|
+
if count == 0 || is_blank?(result)
|
75
|
+
node.remove
|
76
|
+
else
|
77
|
+
self.word_count += count
|
78
|
+
node.content = result
|
79
|
+
end
|
80
|
+
end
|
81
|
+
else
|
82
|
+
# Remove empty nodes
|
83
|
+
if node.text.empty? && node.children.empty? && !['img', 'br'].include?(node.name)
|
84
|
+
node.remove
|
76
85
|
end
|
77
86
|
end
|
78
87
|
|
data/spec/fixtures/basic.html
CHANGED
data/spec/sumitup/parser_spec.rb
CHANGED
@@ -7,6 +7,28 @@ describe Sumitup::Parser do
|
|
7
7
|
@html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
|
8
8
|
end
|
9
9
|
|
10
|
+
it "should summarize the content by number of words" do
|
11
|
+
parser = Sumitup::Parser.new(:max_words => 1000)
|
12
|
+
result = parser.summarize(@html, 5)
|
13
|
+
result.should_not include('consectetur')
|
14
|
+
result.should include('amet')
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should keep permitted html in summary" do
|
18
|
+
parser = Sumitup::Parser.new(:max_words => 1000)
|
19
|
+
result = parser.summarize(@html, 5)
|
20
|
+
result.should include('strong')
|
21
|
+
result.should include('blockquote')
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should remove empty tags after truncating text" do
|
25
|
+
parser = Sumitup::Parser.new(:max_words => 5)
|
26
|
+
result = parser.summarize(@html)
|
27
|
+
result.should_not include('ul')
|
28
|
+
result.should_not include('li')
|
29
|
+
result.should_not include('<span></span>')
|
30
|
+
end
|
31
|
+
|
10
32
|
describe "Sanitize options" do
|
11
33
|
it "should remove html comments" do
|
12
34
|
result = Sumitup::Parser.new.summarize(@html, 100000)
|
@@ -18,22 +40,8 @@ describe Sumitup::Parser do
|
|
18
40
|
result.should_not include('<style type="text/css">')
|
19
41
|
end
|
20
42
|
end
|
21
|
-
|
43
|
+
|
22
44
|
describe "word_transformer" do
|
23
|
-
it "should summarize the content by number of words" do
|
24
|
-
parser = Sumitup::Parser.new(:max_words => 1000)
|
25
|
-
result = parser.summarize(@html, 5)
|
26
|
-
result.should_not include('consectetur')
|
27
|
-
result.should include('amet')
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should keep permitted html in summary" do
|
31
|
-
parser = Sumitup::Parser.new(:max_words => 1000)
|
32
|
-
result = parser.summarize(@html, 5)
|
33
|
-
result.should include('strong')
|
34
|
-
result.should include('blockquote')
|
35
|
-
end
|
36
|
-
|
37
45
|
it "should remove empty tags" do
|
38
46
|
result = Sumitup::Parser.new.summarize(@html, 100000)
|
39
47
|
result.should_not include('<p></p>')
|
data/sumitup.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "sumitup"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Justin Ball"]
|
12
|
-
s.date = "2012-03-
|
12
|
+
s.date = "2012-03-07"
|
13
13
|
s.description = "Given an html document or fragment this gem will build a summary of the content."
|
14
14
|
s.email = "justinball@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sumitup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 3
|
10
|
+
version: 0.1.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Justin Ball
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-03-
|
18
|
+
date: 2012-03-07 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|