truncate_html 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
module TruncateHtml
|
2
3
|
class HtmlString < String
|
3
4
|
|
4
|
-
UNPAIRED_TAGS = %w(br hr img)
|
5
|
+
UNPAIRED_TAGS = %w(br hr img).freeze
|
5
6
|
|
6
7
|
def initialize(original_html)
|
7
8
|
super(original_html)
|
@@ -33,7 +34,7 @@ module TruncateHtml
|
|
33
34
|
|
34
35
|
private
|
35
36
|
def regex
|
36
|
-
/(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[#{"[[:alpha:]]" if RUBY_VERSION >= '1.9'}\w\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'",\.\/?]+|\s
|
37
|
+
/(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[#{"[[:alpha:]]" if RUBY_VERSION >= '1.9'}\w\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'",\.\/?]+|\s+|\p{P}/
|
37
38
|
end
|
38
39
|
|
39
40
|
end
|
@@ -7,70 +7,61 @@ describe TruncateHtml::HtmlString do
|
|
7
7
|
end
|
8
8
|
|
9
9
|
describe '#html_tokens' do
|
10
|
-
before(:each) do
|
11
|
-
@html = '<h1>Hi there</h1> <p>This is sweet!</p>'
|
12
|
-
end
|
13
|
-
|
14
10
|
it 'returns each token in the string as an array element removing any consecutive whitespace from the string' do
|
15
|
-
|
11
|
+
html = '<h1>Hi there</h1> <p>This is sweet!</p>'
|
12
|
+
html_string(html).html_tokens.should == ['<h1>', 'Hi', ' ', 'there', '</h1>', ' ', '<p>', 'This', ' ', 'is', ' ', 'sweet!', '</p>']
|
16
13
|
end
|
17
|
-
|
18
14
|
end
|
19
15
|
|
20
16
|
describe '#html_tag?' do
|
21
|
-
|
22
17
|
it 'returns false when the string parameter is not an html tag' do
|
23
|
-
html_string('no tags').
|
18
|
+
html_string('no tags').should_not be_html_tag
|
24
19
|
end
|
25
20
|
|
26
21
|
it 'returns true when the string parameter is an html tag' do
|
27
|
-
html_string('<img src="foo">').
|
28
|
-
html_string('</img>').
|
22
|
+
html_string('<img src="foo">').should be_html_tag
|
23
|
+
html_string('</img>').should be_html_tag
|
29
24
|
end
|
30
|
-
|
31
25
|
end
|
32
26
|
|
33
27
|
describe '#open_tag?' do
|
34
|
-
|
35
28
|
it 'returns true if the tag is an open tag' do
|
36
|
-
html_string('<a>').
|
29
|
+
html_string('<a>').should be_open_tag
|
37
30
|
end
|
38
31
|
|
39
32
|
context 'the tag is an open tag, and has whitespace and html properties' do
|
40
33
|
it 'returns true if it has single quotes' do
|
41
|
-
html_string(" <a href='http://awesomeful.net' >").
|
34
|
+
html_string(" <a href='http://awesomeful.net' >").should be_open_tag
|
42
35
|
end
|
43
36
|
|
44
37
|
it 'returns true if it has double quotes' do
|
45
|
-
html_string(' <a href="http://awesomeful.net">').
|
38
|
+
html_string(' <a href="http://awesomeful.net">').should be_open_tag
|
46
39
|
end
|
47
40
|
end
|
48
41
|
|
49
42
|
it 'returns false if the tag is a close tag' do
|
50
|
-
html_string('</a>').
|
43
|
+
html_string('</a>').should_not be_open_tag
|
51
44
|
end
|
52
45
|
|
53
46
|
it 'returns false if the string is not an html tag' do
|
54
|
-
html_string('foo bar').
|
47
|
+
html_string('foo bar').should_not be_open_tag
|
55
48
|
end
|
56
49
|
|
57
50
|
it 'returns false if it is a <script> tag' do
|
58
|
-
html_string('<script>').
|
51
|
+
html_string('<script>').should_not be_open_tag
|
59
52
|
end
|
60
53
|
end
|
61
54
|
|
62
55
|
describe '#matching_close_tag' do
|
63
|
-
tag_pairs = { '<a>'
|
64
|
-
' <div>'
|
65
|
-
'<h1>'
|
66
|
-
'<a href="foo">'
|
56
|
+
tag_pairs = { '<a>' => '</a>',
|
57
|
+
' <div>' => '</div>',
|
58
|
+
'<h1>' => '</h1>',
|
59
|
+
'<a href="foo">' => '</a>' }
|
67
60
|
|
68
61
|
tag_pairs.each do |open_tag, close_tag|
|
69
62
|
it "closes a #{open_tag} and returns #{close_tag}" do
|
70
63
|
html_string(open_tag).matching_close_tag.should == close_tag
|
71
64
|
end
|
72
65
|
end
|
73
|
-
|
74
66
|
end
|
75
|
-
|
76
67
|
end
|
@@ -8,128 +8,115 @@ describe TruncateHtml::HtmlTruncator do
|
|
8
8
|
TruncateHtml::HtmlTruncator.new(html_string).truncate(opts)
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
it 'truncates to the exact length specified' do
|
15
|
-
truncate('<div>123456789</div>', :length => 5, :omission => '', :word_boundary => false).should == '<div>12345</div>'
|
16
|
-
end
|
17
|
-
|
18
|
-
it 'retains the tags within the text' do
|
19
|
-
html = 'some text <span class="caps">CAPS</span> some text'
|
20
|
-
truncate(html, :length => 25, :word_boundary => false).should == 'some text <span class="caps">CAPS</span> some te'
|
21
|
-
end
|
11
|
+
context 'when the word_boundary option is set to false' do
|
12
|
+
it 'truncates to the exact length specified' do
|
13
|
+
truncate('<div>123456789</div>', :length => 5, :omission => '', :word_boundary => false).should == '<div>12345</div>'
|
22
14
|
end
|
23
15
|
|
24
|
-
it
|
25
|
-
|
16
|
+
it 'retains the tags within the text' do
|
17
|
+
html = 'some text <span class="caps">CAPS</span> some text'
|
18
|
+
truncate(html, :length => 25, :word_boundary => false).should == 'some text <span class="caps">CAPS</span> some te'
|
26
19
|
end
|
20
|
+
end
|
27
21
|
|
28
|
-
|
29
|
-
|
30
|
-
|
22
|
+
it "includes the omission text's length in the returned truncated html" do
|
23
|
+
truncate('a b c', :length => 4, :omission => '...').should == 'a...'
|
24
|
+
end
|
31
25
|
|
32
|
-
|
33
|
-
|
34
|
-
|
26
|
+
it "never returns a string longer than :length" do
|
27
|
+
truncate("test this shit", :length => 10).should == 'test...'
|
28
|
+
end
|
35
29
|
|
36
|
-
|
37
|
-
|
38
|
-
|
30
|
+
it 'supports omissions longer than the maximum length' do
|
31
|
+
lambda { truncate('', :length => 1, :omission => '...') }.should_not raise_error
|
32
|
+
end
|
39
33
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
@expected_out = "<p>I have a script <script type=text/javascript>document.write('lum dee dum');</script> and...</p>"
|
44
|
-
end
|
45
|
-
it 'treats the script tag as lengthless string' do
|
46
|
-
truncate(@input_html, :length => 23).should == @expected_out
|
47
|
-
end
|
48
|
-
end
|
34
|
+
it 'returns the omission when the specified length is smaller than the omission' do
|
35
|
+
truncate('a b c', :length => 2, :omission => '...').should == '...'
|
36
|
+
end
|
49
37
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
38
|
+
it 'treats script tags as strings with no length' do
|
39
|
+
input_html = "<p>I have a script <script type = text/javascript>document.write('lum dee dum');</script> and more text</p>"
|
40
|
+
expected_out = "<p>I have a script <script type = text/javascript>document.write('lum dee dum');</script> and...</p>"
|
41
|
+
truncate(input_html, :length => 23).should == expected_out
|
42
|
+
end
|
54
43
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
44
|
+
it 'in the middle of a link, truncates and closes the <a>, and closes any remaining open tags' do
|
45
|
+
html = '<div><ul><li>Look at <a href = "foo">this</a> link </li></ul></div>'
|
46
|
+
expected = '<div><ul><li>Look at <a href = "foo">this...</a></li></ul></div>'
|
47
|
+
truncate(html, :length => 15).should == expected
|
48
|
+
end
|
59
49
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
truncate(@html, :length => 19).should == "<p>Look at <strong>this</strong>#{char}...</p>"
|
67
|
-
end
|
50
|
+
%w(! @ # $ % ^ & * \( \) - _ + = [ ] { } \ | , . / ?).each do |char|
|
51
|
+
context "when the html has a #{char} character after a closing tag" do
|
52
|
+
it 'places the punctuation after the tag without any whitespace' do
|
53
|
+
html = "<p>Look at <strong>this</strong>#{char} More words here</p>"
|
54
|
+
expected = "<p>Look at <strong>this</strong>#{char}...</p>"
|
55
|
+
truncate(html, :length => 19).should == expected
|
68
56
|
end
|
69
57
|
end
|
58
|
+
end
|
70
59
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
truncate(@html, :length => 21).should == '<p>Look at <a href="awesomeful.net">this</a> link...</p>'
|
77
|
-
end
|
60
|
+
context 'when the html has a non punctuation character after a closing tag' do
|
61
|
+
it 'leaves a whitespace between the closing tag and the following word character' do
|
62
|
+
html = '<p>Look at <a href = "awesomeful.net">this</a> link for randomness</p>'
|
63
|
+
expected = '<p>Look at <a href = "awesomeful.net">this</a> link...</p>'
|
64
|
+
truncate(html, :length => 21).should == expected
|
78
65
|
end
|
66
|
+
end
|
79
67
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
it 'leaves the multibyte characters after truncation' do
|
86
|
-
truncate(@html, :length => @html.length).should == '<p>Look at our multibyte characters ā ž <a href="awesomeful.net">this</a> link for randomness ā ž</p>'
|
87
|
-
end
|
88
|
-
end
|
68
|
+
it 'handles multibyte characters and leaves them in the result' do
|
69
|
+
html = '<p>Look at our multibyte characters ā ž <a href = "awesomeful.net">this</a> link for randomness ā ž</p>'
|
70
|
+
truncate(html, :length => html.length).should == html
|
71
|
+
end
|
89
72
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
73
|
+
#unusual, but just covering my ass
|
74
|
+
it 'recognizes the multiline html properly' do
|
75
|
+
html = <<-END_HTML
|
76
|
+
<div id="foo"
|
77
|
+
class="bar">
|
78
|
+
This is ugly html.
|
79
|
+
</div>
|
80
|
+
END_HTML
|
81
|
+
truncate(html, :length => 12).should == ' <div id="foo" class="bar"> This is...</div>'
|
82
|
+
end
|
100
83
|
|
101
|
-
|
102
|
-
|
103
|
-
end
|
104
|
-
end
|
84
|
+
%w(br hr img).each do |unpaired_tag|
|
85
|
+
context "when the html contains a #{unpaired_tag} tag" do
|
105
86
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
@html_caps = "<div>Some before. <#{unpaired_tag.capitalize}>and some after</div>"
|
113
|
-
end
|
114
|
-
it "does not close the #{unpaired_tag} tag" do
|
115
|
-
truncate(@html, :length => 19).should == "<div>Some before. <#{unpaired_tag}>and...</div>"
|
116
|
-
truncate(@html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize}>and...</div>"
|
117
|
-
end
|
87
|
+
context "and the #{unpaired_tag} does not have the closing slash" do
|
88
|
+
it "does not close the #{unpaired_tag} tag" do
|
89
|
+
html = "<div>Some before. <#{unpaired_tag}>and some after</div>"
|
90
|
+
html_caps = "<div>Some before. <#{unpaired_tag.capitalize}>and some after</div>"
|
91
|
+
truncate(html, :length => 19).should == "<div>Some before. <#{unpaired_tag}>and...</div>"
|
92
|
+
truncate(html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize}>and...</div>"
|
118
93
|
end
|
94
|
+
end
|
119
95
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
truncate(@html, :length => 19).should == "<div>Some before. <#{unpaired_tag} />and...</div>"
|
127
|
-
truncate(@html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize} />and...</div>"
|
128
|
-
end
|
96
|
+
context "and the #{unpaired_tag} does have the closing slash" do
|
97
|
+
it "does not close the #{unpaired_tag} tag" do
|
98
|
+
html = "<div>Some before. <#{unpaired_tag} />and some after</div>"
|
99
|
+
html_caps = "<div>Some before. <#{unpaired_tag.capitalize} />and some after</div>"
|
100
|
+
truncate(html, :length => 19).should == "<div>Some before. <#{unpaired_tag} />and...</div>"
|
101
|
+
truncate(html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize} />and...</div>"
|
129
102
|
end
|
130
|
-
|
131
103
|
end
|
104
|
+
|
132
105
|
end
|
133
106
|
end
|
134
107
|
|
108
|
+
it 'does not truncate quotes off when input contains chinese characters' do
|
109
|
+
html = "<p>“我现在使用的是中文的拼音。”<br>
|
110
|
+
测试一下具体的truncate<em>html功能。<br>
|
111
|
+
“我现在使用的是中文的拼音。”<br>
|
112
|
+
测试一下具体的truncate</em>html功能。<br>
|
113
|
+
“我现在使用的是中文的拼音。”<br>
|
114
|
+
测试一下具体的truncate<em>html功能。<br>
|
115
|
+
“我现在使用的是中文的拼音。”<br>
|
116
|
+
测试一下具体的truncate</em>html功能。</p>"
|
117
|
+
|
118
|
+
result = truncate(html, omission: "", :length => 50)
|
119
|
+
result.should == "<p>“我现在使用的是中文的拼音。”<br> 测试一下具体的truncate<em>html功能。<br> “</em></p>"
|
120
|
+
end
|
121
|
+
|
135
122
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: truncate_html
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-25 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec-rails
|
16
|
-
requirement: &
|
16
|
+
requirement: &70113472188980 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '2.3'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70113472188980
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rails
|
27
|
-
requirement: &
|
27
|
+
requirement: &70113472185140 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 3.0.3
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70113472185140
|
36
36
|
description: Truncates html so you don't have to
|
37
37
|
email:
|
38
38
|
- harold.gimenez@gmail.com
|
@@ -99,7 +99,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
99
99
|
version: '0'
|
100
100
|
segments:
|
101
101
|
- 0
|
102
|
-
hash:
|
102
|
+
hash: 2378599619304365096
|
103
103
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
104
|
none: false
|
105
105
|
requirements:
|
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
108
|
version: '0'
|
109
109
|
segments:
|
110
110
|
- 0
|
111
|
-
hash:
|
111
|
+
hash: 2378599619304365096
|
112
112
|
requirements: []
|
113
113
|
rubyforge_project:
|
114
114
|
rubygems_version: 1.8.10
|