truncate_html 0.5.3 → 0.5.4
Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
module TruncateHtml
|
2
3
|
class HtmlString < String
|
3
4
|
|
4
|
-
UNPAIRED_TAGS = %w(br hr img)
|
5
|
+
UNPAIRED_TAGS = %w(br hr img).freeze
|
5
6
|
|
6
7
|
def initialize(original_html)
|
7
8
|
super(original_html)
|
@@ -33,7 +34,7 @@ module TruncateHtml
|
|
33
34
|
|
34
35
|
private
|
35
36
|
def regex
|
36
|
-
/(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[#{"[[:alpha:]]" if RUBY_VERSION >= '1.9'}\w\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'",\.\/?]+|\s
|
37
|
+
/(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[#{"[[:alpha:]]" if RUBY_VERSION >= '1.9'}\w\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'",\.\/?]+|\s+|\p{P}/
|
37
38
|
end
|
38
39
|
|
39
40
|
end
|
@@ -7,70 +7,61 @@ describe TruncateHtml::HtmlString do
|
|
7
7
|
end
|
8
8
|
|
9
9
|
describe '#html_tokens' do
|
10
|
-
before(:each) do
|
11
|
-
@html = '<h1>Hi there</h1> <p>This is sweet!</p>'
|
12
|
-
end
|
13
|
-
|
14
10
|
it 'returns each token in the string as an array element removing any consecutive whitespace from the string' do
|
15
|
-
|
11
|
+
html = '<h1>Hi there</h1> <p>This is sweet!</p>'
|
12
|
+
html_string(html).html_tokens.should == ['<h1>', 'Hi', ' ', 'there', '</h1>', ' ', '<p>', 'This', ' ', 'is', ' ', 'sweet!', '</p>']
|
16
13
|
end
|
17
|
-
|
18
14
|
end
|
19
15
|
|
20
16
|
describe '#html_tag?' do
|
21
|
-
|
22
17
|
it 'returns false when the string parameter is not an html tag' do
|
23
|
-
html_string('no tags').
|
18
|
+
html_string('no tags').should_not be_html_tag
|
24
19
|
end
|
25
20
|
|
26
21
|
it 'returns true when the string parameter is an html tag' do
|
27
|
-
html_string('<img src="foo">').
|
28
|
-
html_string('</img>').
|
22
|
+
html_string('<img src="foo">').should be_html_tag
|
23
|
+
html_string('</img>').should be_html_tag
|
29
24
|
end
|
30
|
-
|
31
25
|
end
|
32
26
|
|
33
27
|
describe '#open_tag?' do
|
34
|
-
|
35
28
|
it 'returns true if the tag is an open tag' do
|
36
|
-
html_string('<a>').
|
29
|
+
html_string('<a>').should be_open_tag
|
37
30
|
end
|
38
31
|
|
39
32
|
context 'the tag is an open tag, and has whitespace and html properties' do
|
40
33
|
it 'returns true if it has single quotes' do
|
41
|
-
html_string(" <a href='http://awesomeful.net' >").
|
34
|
+
html_string(" <a href='http://awesomeful.net' >").should be_open_tag
|
42
35
|
end
|
43
36
|
|
44
37
|
it 'returns true if it has double quotes' do
|
45
|
-
html_string(' <a href="http://awesomeful.net">').
|
38
|
+
html_string(' <a href="http://awesomeful.net">').should be_open_tag
|
46
39
|
end
|
47
40
|
end
|
48
41
|
|
49
42
|
it 'returns false if the tag is a close tag' do
|
50
|
-
html_string('</a>').
|
43
|
+
html_string('</a>').should_not be_open_tag
|
51
44
|
end
|
52
45
|
|
53
46
|
it 'returns false if the string is not an html tag' do
|
54
|
-
html_string('foo bar').
|
47
|
+
html_string('foo bar').should_not be_open_tag
|
55
48
|
end
|
56
49
|
|
57
50
|
it 'returns false if it is a <script> tag' do
|
58
|
-
html_string('<script>').
|
51
|
+
html_string('<script>').should_not be_open_tag
|
59
52
|
end
|
60
53
|
end
|
61
54
|
|
62
55
|
describe '#matching_close_tag' do
|
63
|
-
tag_pairs = { '<a>'
|
64
|
-
' <div>'
|
65
|
-
'<h1>'
|
66
|
-
'<a href="foo">'
|
56
|
+
tag_pairs = { '<a>' => '</a>',
|
57
|
+
' <div>' => '</div>',
|
58
|
+
'<h1>' => '</h1>',
|
59
|
+
'<a href="foo">' => '</a>' }
|
67
60
|
|
68
61
|
tag_pairs.each do |open_tag, close_tag|
|
69
62
|
it "closes a #{open_tag} and returns #{close_tag}" do
|
70
63
|
html_string(open_tag).matching_close_tag.should == close_tag
|
71
64
|
end
|
72
65
|
end
|
73
|
-
|
74
66
|
end
|
75
|
-
|
76
67
|
end
|
@@ -8,128 +8,115 @@ describe TruncateHtml::HtmlTruncator do
|
|
8
8
|
TruncateHtml::HtmlTruncator.new(html_string).truncate(opts)
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
it 'truncates to the exact length specified' do
|
15
|
-
truncate('<div>123456789</div>', :length => 5, :omission => '', :word_boundary => false).should == '<div>12345</div>'
|
16
|
-
end
|
17
|
-
|
18
|
-
it 'retains the tags within the text' do
|
19
|
-
html = 'some text <span class="caps">CAPS</span> some text'
|
20
|
-
truncate(html, :length => 25, :word_boundary => false).should == 'some text <span class="caps">CAPS</span> some te'
|
21
|
-
end
|
11
|
+
context 'when the word_boundary option is set to false' do
|
12
|
+
it 'truncates to the exact length specified' do
|
13
|
+
truncate('<div>123456789</div>', :length => 5, :omission => '', :word_boundary => false).should == '<div>12345</div>'
|
22
14
|
end
|
23
15
|
|
24
|
-
it
|
25
|
-
|
16
|
+
it 'retains the tags within the text' do
|
17
|
+
html = 'some text <span class="caps">CAPS</span> some text'
|
18
|
+
truncate(html, :length => 25, :word_boundary => false).should == 'some text <span class="caps">CAPS</span> some te'
|
26
19
|
end
|
20
|
+
end
|
27
21
|
|
28
|
-
|
29
|
-
|
30
|
-
|
22
|
+
it "includes the omission text's length in the returned truncated html" do
|
23
|
+
truncate('a b c', :length => 4, :omission => '...').should == 'a...'
|
24
|
+
end
|
31
25
|
|
32
|
-
|
33
|
-
|
34
|
-
|
26
|
+
it "never returns a string longer than :length" do
|
27
|
+
truncate("test this shit", :length => 10).should == 'test...'
|
28
|
+
end
|
35
29
|
|
36
|
-
|
37
|
-
|
38
|
-
|
30
|
+
it 'supports omissions longer than the maximum length' do
|
31
|
+
lambda { truncate('', :length => 1, :omission => '...') }.should_not raise_error
|
32
|
+
end
|
39
33
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
@expected_out = "<p>I have a script <script type=text/javascript>document.write('lum dee dum');</script> and...</p>"
|
44
|
-
end
|
45
|
-
it 'treats the script tag as lengthless string' do
|
46
|
-
truncate(@input_html, :length => 23).should == @expected_out
|
47
|
-
end
|
48
|
-
end
|
34
|
+
it 'returns the omission when the specified length is smaller than the omission' do
|
35
|
+
truncate('a b c', :length => 2, :omission => '...').should == '...'
|
36
|
+
end
|
49
37
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
38
|
+
it 'treats script tags as strings with no length' do
|
39
|
+
input_html = "<p>I have a script <script type = text/javascript>document.write('lum dee dum');</script> and more text</p>"
|
40
|
+
expected_out = "<p>I have a script <script type = text/javascript>document.write('lum dee dum');</script> and...</p>"
|
41
|
+
truncate(input_html, :length => 23).should == expected_out
|
42
|
+
end
|
54
43
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
44
|
+
it 'in the middle of a link, truncates and closes the <a>, and closes any remaining open tags' do
|
45
|
+
html = '<div><ul><li>Look at <a href = "foo">this</a> link </li></ul></div>'
|
46
|
+
expected = '<div><ul><li>Look at <a href = "foo">this...</a></li></ul></div>'
|
47
|
+
truncate(html, :length => 15).should == expected
|
48
|
+
end
|
59
49
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
truncate(@html, :length => 19).should == "<p>Look at <strong>this</strong>#{char}...</p>"
|
67
|
-
end
|
50
|
+
%w(! @ # $ % ^ & * \( \) - _ + = [ ] { } \ | , . / ?).each do |char|
|
51
|
+
context "when the html has a #{char} character after a closing tag" do
|
52
|
+
it 'places the punctuation after the tag without any whitespace' do
|
53
|
+
html = "<p>Look at <strong>this</strong>#{char} More words here</p>"
|
54
|
+
expected = "<p>Look at <strong>this</strong>#{char}...</p>"
|
55
|
+
truncate(html, :length => 19).should == expected
|
68
56
|
end
|
69
57
|
end
|
58
|
+
end
|
70
59
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
truncate(@html, :length => 21).should == '<p>Look at <a href="awesomeful.net">this</a> link...</p>'
|
77
|
-
end
|
60
|
+
context 'when the html has a non punctuation character after a closing tag' do
|
61
|
+
it 'leaves a whitespace between the closing tag and the following word character' do
|
62
|
+
html = '<p>Look at <a href = "awesomeful.net">this</a> link for randomness</p>'
|
63
|
+
expected = '<p>Look at <a href = "awesomeful.net">this</a> link...</p>'
|
64
|
+
truncate(html, :length => 21).should == expected
|
78
65
|
end
|
66
|
+
end
|
79
67
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
it 'leaves the multibyte characters after truncation' do
|
86
|
-
truncate(@html, :length => @html.length).should == '<p>Look at our multibyte characters ā ž <a href="awesomeful.net">this</a> link for randomness ā ž</p>'
|
87
|
-
end
|
88
|
-
end
|
68
|
+
it 'handles multibyte characters and leaves them in the result' do
|
69
|
+
html = '<p>Look at our multibyte characters ā ž <a href = "awesomeful.net">this</a> link for randomness ā ž</p>'
|
70
|
+
truncate(html, :length => html.length).should == html
|
71
|
+
end
|
89
72
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
73
|
+
#unusual, but just covering my ass
|
74
|
+
it 'recognizes the multiline html properly' do
|
75
|
+
html = <<-END_HTML
|
76
|
+
<div id="foo"
|
77
|
+
class="bar">
|
78
|
+
This is ugly html.
|
79
|
+
</div>
|
80
|
+
END_HTML
|
81
|
+
truncate(html, :length => 12).should == ' <div id="foo" class="bar"> This is...</div>'
|
82
|
+
end
|
100
83
|
|
101
|
-
|
102
|
-
|
103
|
-
end
|
104
|
-
end
|
84
|
+
%w(br hr img).each do |unpaired_tag|
|
85
|
+
context "when the html contains a #{unpaired_tag} tag" do
|
105
86
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
@html_caps = "<div>Some before. <#{unpaired_tag.capitalize}>and some after</div>"
|
113
|
-
end
|
114
|
-
it "does not close the #{unpaired_tag} tag" do
|
115
|
-
truncate(@html, :length => 19).should == "<div>Some before. <#{unpaired_tag}>and...</div>"
|
116
|
-
truncate(@html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize}>and...</div>"
|
117
|
-
end
|
87
|
+
context "and the #{unpaired_tag} does not have the closing slash" do
|
88
|
+
it "does not close the #{unpaired_tag} tag" do
|
89
|
+
html = "<div>Some before. <#{unpaired_tag}>and some after</div>"
|
90
|
+
html_caps = "<div>Some before. <#{unpaired_tag.capitalize}>and some after</div>"
|
91
|
+
truncate(html, :length => 19).should == "<div>Some before. <#{unpaired_tag}>and...</div>"
|
92
|
+
truncate(html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize}>and...</div>"
|
118
93
|
end
|
94
|
+
end
|
119
95
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
truncate(@html, :length => 19).should == "<div>Some before. <#{unpaired_tag} />and...</div>"
|
127
|
-
truncate(@html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize} />and...</div>"
|
128
|
-
end
|
96
|
+
context "and the #{unpaired_tag} does have the closing slash" do
|
97
|
+
it "does not close the #{unpaired_tag} tag" do
|
98
|
+
html = "<div>Some before. <#{unpaired_tag} />and some after</div>"
|
99
|
+
html_caps = "<div>Some before. <#{unpaired_tag.capitalize} />and some after</div>"
|
100
|
+
truncate(html, :length => 19).should == "<div>Some before. <#{unpaired_tag} />and...</div>"
|
101
|
+
truncate(html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize} />and...</div>"
|
129
102
|
end
|
130
|
-
|
131
103
|
end
|
104
|
+
|
132
105
|
end
|
133
106
|
end
|
134
107
|
|
108
|
+
it 'does not truncate quotes off when input contains chinese characters' do
|
109
|
+
html = "<p>“我现在使用的是中文的拼音。”<br>
|
110
|
+
测试一下具体的truncate<em>html功能。<br>
|
111
|
+
“我现在使用的是中文的拼音。”<br>
|
112
|
+
测试一下具体的truncate</em>html功能。<br>
|
113
|
+
“我现在使用的是中文的拼音。”<br>
|
114
|
+
测试一下具体的truncate<em>html功能。<br>
|
115
|
+
“我现在使用的是中文的拼音。”<br>
|
116
|
+
测试一下具体的truncate</em>html功能。</p>"
|
117
|
+
|
118
|
+
result = truncate(html, omission: "", :length => 50)
|
119
|
+
result.should == "<p>“我现在使用的是中文的拼音。”<br> 测试一下具体的truncate<em>html功能。<br> “</em></p>"
|
120
|
+
end
|
121
|
+
|
135
122
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: truncate_html
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-25 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec-rails
|
16
|
-
requirement: &
|
16
|
+
requirement: &70113472188980 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '2.3'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70113472188980
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rails
|
27
|
-
requirement: &
|
27
|
+
requirement: &70113472185140 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 3.0.3
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70113472185140
|
36
36
|
description: Truncates html so you don't have to
|
37
37
|
email:
|
38
38
|
- harold.gimenez@gmail.com
|
@@ -99,7 +99,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
99
99
|
version: '0'
|
100
100
|
segments:
|
101
101
|
- 0
|
102
|
-
hash:
|
102
|
+
hash: 2378599619304365096
|
103
103
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
104
|
none: false
|
105
105
|
requirements:
|
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
108
|
version: '0'
|
109
109
|
segments:
|
110
110
|
- 0
|
111
|
-
hash:
|
111
|
+
hash: 2378599619304365096
|
112
112
|
requirements: []
|
113
113
|
rubyforge_project:
|
114
114
|
rubygems_version: 1.8.10
|