sumitup 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/sumitup/parser.rb +11 -2
- data/spec/fixtures/basic.html +7 -0
- data/spec/sumitup/parser_spec.rb +23 -15
- data/sumitup.gemspec +2 -2
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.3
|
data/lib/sumitup/parser.rb
CHANGED
@@ -71,8 +71,17 @@ module Sumitup
|
|
71
71
|
else
|
72
72
|
# if the text of the current node makes us go over then truncate it
|
73
73
|
result, count = snippet(node.inner_text, max - self.word_count)
|
74
|
-
|
75
|
-
|
74
|
+
if count == 0 || is_blank?(result)
|
75
|
+
node.remove
|
76
|
+
else
|
77
|
+
self.word_count += count
|
78
|
+
node.content = result
|
79
|
+
end
|
80
|
+
end
|
81
|
+
else
|
82
|
+
# Remove empty nodes
|
83
|
+
if node.text.empty? && node.children.empty? && !['img', 'br'].include?(node.name)
|
84
|
+
node.remove
|
76
85
|
end
|
77
86
|
end
|
78
87
|
|
data/spec/fixtures/basic.html
CHANGED
data/spec/sumitup/parser_spec.rb
CHANGED
@@ -7,6 +7,28 @@ describe Sumitup::Parser do
|
|
7
7
|
@html = IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.html'))
|
8
8
|
end
|
9
9
|
|
10
|
+
it "should summarize the content by number of words" do
|
11
|
+
parser = Sumitup::Parser.new(:max_words => 1000)
|
12
|
+
result = parser.summarize(@html, 5)
|
13
|
+
result.should_not include('consectetur')
|
14
|
+
result.should include('amet')
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should keep permitted html in summary" do
|
18
|
+
parser = Sumitup::Parser.new(:max_words => 1000)
|
19
|
+
result = parser.summarize(@html, 5)
|
20
|
+
result.should include('strong')
|
21
|
+
result.should include('blockquote')
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should remove empty tags after truncating text" do
|
25
|
+
parser = Sumitup::Parser.new(:max_words => 5)
|
26
|
+
result = parser.summarize(@html)
|
27
|
+
result.should_not include('ul')
|
28
|
+
result.should_not include('li')
|
29
|
+
result.should_not include('<span></span>')
|
30
|
+
end
|
31
|
+
|
10
32
|
describe "Sanitize options" do
|
11
33
|
it "should remove html comments" do
|
12
34
|
result = Sumitup::Parser.new.summarize(@html, 100000)
|
@@ -18,22 +40,8 @@ describe Sumitup::Parser do
|
|
18
40
|
result.should_not include('<style type="text/css">')
|
19
41
|
end
|
20
42
|
end
|
21
|
-
|
43
|
+
|
22
44
|
describe "word_transformer" do
|
23
|
-
it "should summarize the content by number of words" do
|
24
|
-
parser = Sumitup::Parser.new(:max_words => 1000)
|
25
|
-
result = parser.summarize(@html, 5)
|
26
|
-
result.should_not include('consectetur')
|
27
|
-
result.should include('amet')
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should keep permitted html in summary" do
|
31
|
-
parser = Sumitup::Parser.new(:max_words => 1000)
|
32
|
-
result = parser.summarize(@html, 5)
|
33
|
-
result.should include('strong')
|
34
|
-
result.should include('blockquote')
|
35
|
-
end
|
36
|
-
|
37
45
|
it "should remove empty tags" do
|
38
46
|
result = Sumitup::Parser.new.summarize(@html, 100000)
|
39
47
|
result.should_not include('<p></p>')
|
data/sumitup.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "sumitup"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Justin Ball"]
|
12
|
-
s.date = "2012-03-
|
12
|
+
s.date = "2012-03-07"
|
13
13
|
s.description = "Given an html document or fragment this gem will build a summary of the content."
|
14
14
|
s.email = "justinball@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sumitup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 3
|
10
|
+
version: 0.1.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Justin Ball
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-03-
|
18
|
+
date: 2012-03-07 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :runtime
|