truncate_html_chinese 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +92 -0
  6. data/History.txt +40 -0
  7. data/LICENSE +21 -0
  8. data/README.md +93 -0
  9. data/Rakefile +50 -0
  10. data/VERSION +1 -0
  11. data/init.rb +1 -0
  12. data/lib/app/helpers/truncate_html_helper.rb +9 -0
  13. data/lib/truncate_html.rb +13 -0
  14. data/lib/truncate_html/configuration.rb +14 -0
  15. data/lib/truncate_html/html_string.rb +45 -0
  16. data/lib/truncate_html/html_truncator.rb +91 -0
  17. data/lib/truncate_html/version.rb +3 -0
  18. data/spec/helpers/truncate_html_helper_spec.rb +44 -0
  19. data/spec/rails_root/Gemfile +6 -0
  20. data/spec/rails_root/Gemfile.lock +86 -0
  21. data/spec/rails_root/app/controllers/application_controller.rb +10 -0
  22. data/spec/rails_root/app/helpers/application_helper.rb +3 -0
  23. data/spec/rails_root/config/application.rb +14 -0
  24. data/spec/rails_root/config/boot.rb +13 -0
  25. data/spec/rails_root/config/database.yml +22 -0
  26. data/spec/rails_root/config/environment.rb +5 -0
  27. data/spec/rails_root/config/environments/development.rb +17 -0
  28. data/spec/rails_root/config/environments/production.rb +28 -0
  29. data/spec/rails_root/config/environments/test.rb +29 -0
  30. data/spec/rails_root/config/initializers/backtrace_silencers.rb +7 -0
  31. data/spec/rails_root/config/initializers/inflections.rb +10 -0
  32. data/spec/rails_root/config/initializers/mime_types.rb +5 -0
  33. data/spec/rails_root/config/initializers/new_rails_defaults.rb +19 -0
  34. data/spec/rails_root/config/initializers/session_store.rb +15 -0
  35. data/spec/rails_root/config/locales/en.yml +5 -0
  36. data/spec/rails_root/config/routes.rb +43 -0
  37. data/spec/rails_root/init.rb +1 -0
  38. data/spec/rails_root/lib/app/helpers/truncate_html_helper.rb +7 -0
  39. data/spec/rails_root/lib/tasks/rspec.rake +144 -0
  40. data/spec/spec.opts +2 -0
  41. data/spec/spec_helper.rb +11 -0
  42. data/spec/truncate_html/configuration_spec.rb +17 -0
  43. data/spec/truncate_html/html_string_spec.rb +80 -0
  44. data/spec/truncate_html/html_truncator_spec.rb +203 -0
  45. data/truncate_html-0.9.2.gem +0 -0
  46. data/truncate_html.gemspec +23 -0
  47. metadata +117 -0
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
@@ -0,0 +1,11 @@
1
+ ENV["RAILS_ENV"] ||= 'test'
2
+ rails_root = File.expand_path('../rails_root', __FILE__)
3
+ require rails_root + '/config/environment.rb'
4
+
5
+ require 'rspec/rails'
6
+
7
+ require File.expand_path('../../lib/truncate_html', __FILE__)
8
+
9
+ RSpec.configure do |config|
10
+ config.mock_with :rspec
11
+ end
@@ -0,0 +1,17 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'spec_helper')
2
+
3
+ describe TruncateHtml::Configuration do
4
+
5
+ describe 'self.configure' do
6
+
7
+ it 'yields the configuration object' do
8
+ lambda do
9
+ TruncateHtml.configure do |config|
10
+ config.should be_kind_of(TruncateHtml::Configuration)
11
+ throw :yay_it_yielded
12
+ end
13
+ end.should throw_symbol(:yay_it_yielded)
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,80 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'spec_helper')
2
+
3
+ describe TruncateHtml::HtmlString do
4
+
5
+ def html_string(original_string)
6
+ TruncateHtml::HtmlString.new(original_string)
7
+ end
8
+
9
+ describe '#html_tokens' do
10
+ it 'returns each token in the string as an array element removing any consecutive whitespace from the string' do
11
+ html = '<h1>Hi there</h1> <p>This is sweet!</p>'
12
+ html_string(html).html_tokens.should == ['<h1>', 'Hi', ' ', 'there', '</h1>', ' ', '<p>', 'This', ' ', 'is', ' ', 'sweet!', '</p>']
13
+ end
14
+ end
15
+
16
+ describe '#html_tag?' do
17
+ it 'returns false when the string parameter is not an html tag' do
18
+ html_string('no tags').should_not be_html_tag
19
+ end
20
+
21
+ it 'returns true when the string parameter is an html tag' do
22
+ html_string('<img src="foo">').should be_html_tag
23
+ html_string('</img>').should be_html_tag
24
+ end
25
+
26
+ it 'is false for html comments' do
27
+ html_string('<!-- hi -->').should_not be_html_tag
28
+ end
29
+ end
30
+
31
+ describe '#open_tag?' do
32
+ it 'returns true if the tag is an open tag' do
33
+ html_string('<a>').should be_open_tag
34
+ end
35
+
36
+ context 'the tag is an open tag, and has whitespace and html properties' do
37
+ it 'returns true if it has single quotes' do
38
+ html_string(" <a href='http://awesomeful.net' >").should be_open_tag
39
+ end
40
+
41
+ it 'returns true if it has double quotes' do
42
+ html_string(' <a href="http://awesomeful.net">').should be_open_tag
43
+ end
44
+ end
45
+
46
+ it 'returns false if the tag is a close tag' do
47
+ html_string('</a>').should_not be_open_tag
48
+ end
49
+
50
+ it 'returns false if the string is not an html tag' do
51
+ html_string('foo bar').should_not be_open_tag
52
+ end
53
+
54
+ it 'returns false if it is a <script> tag' do
55
+ html_string('<script>').should_not be_open_tag
56
+ end
57
+ end
58
+
59
+ describe '#matching_close_tag' do
60
+ tag_pairs = { '<a>' => '</a>',
61
+ ' <div>' => '</div>',
62
+ '<h1>' => '</h1>',
63
+ '<a href="foo">' => '</a>' }
64
+
65
+ tag_pairs.each do |open_tag, close_tag|
66
+ it "closes a #{open_tag} and returns #{close_tag}" do
67
+ html_string(open_tag).matching_close_tag.should == close_tag
68
+ end
69
+ end
70
+ end
71
+
72
+ describe '#html_comment?' do
73
+ it 'is true for HTML comments' do
74
+ html_string('<!-- hi -->').should be_html_comment
75
+ html_string('<a>').should_not be_html_comment
76
+ html_string('</a>').should_not be_html_comment
77
+ html_string('foo').should_not be_html_comment
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,203 @@
1
+ # Encoding: UTF-8
2
+ require File.join(File.dirname(__FILE__), '..', 'spec_helper')
3
+
4
+ describe TruncateHtml::HtmlTruncator do
5
+
6
+ def truncate(html, opts = {})
7
+ html_string = TruncateHtml::HtmlString.new(html)
8
+ TruncateHtml::HtmlTruncator.new(html_string, opts).truncate
9
+ end
10
+
11
+ context 'when the word_boundary option is set to false' do
12
+ it 'truncates to the exact length specified' do
13
+ truncate('<div>123456789</div>', :length => 5, :omission => '', :word_boundary => false).should == '<div>12345</div>'
14
+ end
15
+
16
+ it 'retains the tags within the text' do
17
+ html = 'some text <span class="caps">CAPS</span> some text'
18
+ truncate(html, :length => 25, :word_boundary => false).should == 'some text <span class="caps">CAPS</span> some te...'
19
+ end
20
+
21
+ context 'and a custom omission value is passed' do
22
+ it 'retains the omission text' do
23
+ truncate("testtest", :length => 10, :omission => '..', :word_boundary => false).should == 'testtest..'
24
+ end
25
+
26
+ it 'handles multibyte characters' do
27
+ truncate("prüfenprüfen", :length => 8, :omission => '..', :word_boundary => false). should == 'prüfen..'
28
+ end
29
+ end
30
+ end
31
+
32
+ context 'when the word_boundary option is set to true' do
33
+ it 'truncates using the default word_boundary option' do
34
+ truncate('hello there. or maybe not?', :length => 16, :omission => '', :word_boundary => true).should == 'hello there. or'
35
+ end
36
+ end
37
+
38
+ context 'when the word_boundary option is a custom value (for splitting on sentences)' do
39
+ it 'truncates to the end of the nearest sentence' do
40
+ truncate('hello there. or maybe not?', :length => 16, :omission => '', :word_boundary => /\S[\.\?\!]/).should == 'hello there.'
41
+ end
42
+ end
43
+
44
+ it "includes the omission text's length in the returned truncated html" do
45
+ truncate('a b c', :length => 4, :omission => '...').should == 'a...'
46
+ end
47
+
48
+ it "includes omission even on the edge (issue #18)" do
49
+ opts = { :word_boundary => false, :length => 12 }
50
+ truncate('One two three', opts).should == 'One two t...'
51
+ end
52
+
53
+ it "never returns a string longer than :length" do
54
+ truncate("test this shit", :length => 10).should == 'test...'
55
+ end
56
+
57
+ it 'supports omissions longer than the maximum length' do
58
+ lambda { truncate('', :length => 1, :omission => '...') }.should_not raise_error
59
+ end
60
+
61
+ it 'returns the omission when the specified length is smaller than the omission' do
62
+ truncate('a b c', :length => 2, :omission => '...').should == '...'
63
+ end
64
+
65
+ it 'treats script tags as strings with no length' do
66
+ input_html = "<p>I have a script <script type = text/javascript>document.write('lum dee dum');</script> and more text</p>"
67
+ expected_out = "<p>I have a script <script type = text/javascript>document.write('lum dee dum');</script> and...</p>"
68
+ truncate(input_html, :length => 23).should == expected_out
69
+ end
70
+
71
+ it 'in the middle of a link, truncates and closes the <a>, and closes any remaining open tags' do
72
+ html = '<div><ul><li>Look at <a href = "foo">this</a> link </li></ul></div>'
73
+ expected = '<div><ul><li>Look at <a href = "foo">this...</a></li></ul></div>'
74
+ truncate(html, :length => 15).should == expected
75
+ end
76
+
77
+ %w(! @ # $ % ^ & * \( \) - _ + = [ ] { } \ | , . / ?).each do |char|
78
+ context "when the html has a #{char} character after a closing tag" do
79
+ it 'places the punctuation after the tag without any whitespace' do
80
+ html = "<p>Look at <strong>this</strong>#{char} More words here</p>"
81
+ expected = "<p>Look at <strong>this</strong>#{char}...</p>"
82
+ truncate(html, :length => 19).should == expected
83
+ end
84
+ end
85
+ end
86
+
87
+ context 'when the html has a non punctuation character after a closing tag' do
88
+ it 'leaves a whitespace between the closing tag and the following word character' do
89
+ html = '<p>Look at <a href = "awesomeful.net">this</a> link for randomness</p>'
90
+ expected = '<p>Look at <a href = "awesomeful.net">this</a> link...</p>'
91
+ truncate(html, :length => 21).should == expected
92
+ end
93
+ end
94
+
95
+ it 'handles multibyte characters and leaves them in the result' do
96
+ html = '<p>Look at our multibyte characters ā ž <a href = "awesomeful.net">this</a> link for randomness ā ž</p>'
97
+ truncate(html, :length => html.length).should == html
98
+ end
99
+
100
+ #unusual, but just covering my ass
101
+ it 'recognizes the multiline html properly' do
102
+ html = <<-END_HTML
103
+ <div id="foo"
104
+ class="bar">
105
+ This is ugly html.
106
+ </div>
107
+ END_HTML
108
+ truncate(html, :length => 12).should == ' <div id="foo" class="bar"> This is...</div>'
109
+ end
110
+
111
+ %w(br hr img).each do |unpaired_tag|
112
+ context "when the html contains a #{unpaired_tag} tag" do
113
+
114
+ context "and the #{unpaired_tag} does not have the closing slash" do
115
+ it "does not close the #{unpaired_tag} tag" do
116
+ html = "<div>Some before. <#{unpaired_tag}>and some after</div>"
117
+ html_caps = "<div>Some before. <#{unpaired_tag.capitalize}>and some after</div>"
118
+ truncate(html, :length => 19).should == "<div>Some before. <#{unpaired_tag}>and...</div>"
119
+ truncate(html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize}>and...</div>"
120
+ end
121
+ end
122
+
123
+ context "and the #{unpaired_tag} does have the closing slash" do
124
+ it "does not close the #{unpaired_tag} tag" do
125
+ html = "<div>Some before. <#{unpaired_tag} />and some after</div>"
126
+ html_caps = "<div>Some before. <#{unpaired_tag.capitalize} />and some after</div>"
127
+ truncate(html, :length => 19).should == "<div>Some before. <#{unpaired_tag} />and...</div>"
128
+ truncate(html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize} />and...</div>"
129
+ end
130
+ end
131
+
132
+ end
133
+ end
134
+
135
+ it 'does not truncate quotes off when input contains chinese characters' do
136
+ html = "<p>“我现在使用的是中文的拼音。”<br>
137
+ 测试一下具体的truncate<em>html功能。<br>
138
+ “我现在使用的是中文的拼音。”<br>
139
+ 测试一下具体的truncate</em>html功能。<br>
140
+ “我现在使用的是中文的拼音。”<br>
141
+ 测试一下具体的truncate<em>html功能。<br>
142
+ “我现在使用的是中文的拼音。”<br>
143
+ 测试一下具体的truncate</em>html功能。</p>"
144
+
145
+ result = truncate(html, :omission => "", :length => 50)
146
+ result.should include "<p>“我现在使用的是中文的拼音。”<br>"
147
+ end
148
+
149
+ context 'when the break_token option is set as <!-- truncate -->' do
150
+ it 'does not truncate abnormally if the break_token is not present' do
151
+ truncate('This is line one. This is line two.', :length => 30, :break_token => '<!-- truncate -->').should == 'This is line one. This is...'
152
+ end
153
+ it 'does not truncate abnormally if the break_token is present, but beyond the length param' do
154
+ truncate('This is line one. This is line <!-- truncate --> two.', :length => 30, :break_token => '<!-- truncate -->').should == 'This is line one. This is...'
155
+ end
156
+ it 'truncates before the length param if the break_token is before the token at "length"' do
157
+ truncate('This is line one. <!-- truncate --> This is line two.', :length => 30, :break_token => '<!-- truncate -->').should == 'This is line one.'
158
+ end
159
+ end
160
+
161
+ context 'when the break_token option is customized as a comment' do
162
+ it 'does not truncate abnormally if the break_token is not present' do
163
+ truncate('This is line one. This is line two.', :length => 30, :break_token => '<!-- break -->').should == 'This is line one. This is...'
164
+ end
165
+ it 'does not truncate abnormally if the break_token is present, but beyond the length param' do
166
+ truncate('This is line one. This is line <!-- break --> two.', :length => 30, :break_token => '<!-- break -->').should == 'This is line one. This is...'
167
+ end
168
+ it 'truncates before the length param if the break_token is before the token at "length"' do
169
+ truncate('This is line one. <!-- break --> This is line two.', :length => 30, :break_token => '<!-- break -->').should == 'This is line one.'
170
+ end
171
+ end
172
+
173
+ context 'when the break_token option is customized as an html tag' do
174
+ it 'does not truncate abnormally if the break_token is not present' do
175
+ truncate('This is line one. This is line two.', :length => 30, :break_token => '<break />').should == 'This is line one. This is...'
176
+ end
177
+ it 'does not truncate abnormally if the break_token is present, but beyond the length param' do
178
+ truncate('This is line one. This is line <break /> two.', :length => 30, :break_token => '<break />').should == 'This is line one. This is...'
179
+ end
180
+ it 'truncates before the length param if the break_token is before the token at "length"' do
181
+ truncate('This is line one. <break /> This is line two.', :length => 30, :break_token => '<break />').should == 'This is line one.'
182
+ end
183
+ end
184
+
185
+ context 'when the break_token option is customized as a word' do
186
+ it 'does not truncate abnormally if the break_token is not present' do
187
+ truncate('This is line one. This is line two.', :length => 30, :break_token => 'foobar').should == 'This is line one. This is...'
188
+ end
189
+ it 'does not truncate abnormally if the break_token is present, but beyond the length param' do
190
+ truncate('This is line one. This is line foobar two.', :length => 30, :break_token => 'foobar').should == 'This is line one. This is...'
191
+ end
192
+ it 'truncates before the length param if the break_token is before the token at "length"' do
193
+ truncate('This is line one. foobar This is line two.', :length => 30, :break_token => 'foobar').should == 'This is line one.'
194
+ end
195
+ end
196
+
197
+ context 'a string with comments' do
198
+ it 'does not duplicate comments (issue #32)' do
199
+ truncate('<h1>hello <!-- stuff --> and <!-- la --> goodbye</h1>', length: 15).should ==
200
+ '<h1>hello <!-- stuff --> and <!-- la -->...</h1>'
201
+ end
202
+ end
203
+ end
Binary file
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "truncate_html/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "truncate_html_chinese"
7
+ s.version = '0.1.0'
8
+ s.authors = ["pobing"]
9
+ s.email = ["cn.jdong@gmail.com"]
10
+ s.homepage = "https://github.com/pobing/truncate_html_chinese"
11
+ s.summary = %q{Uses an API similar to Rails' truncate helper to truncate HTML and close any lingering open tags.}
12
+ s.description = %q{Truncates html so you don't have to,support chinese}
13
+
14
+ s.files = `git ls-files`.split("\n")
15
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ s.require_paths = ["lib"]
18
+
19
+ s.required_ruby_version = '>= 1.9'
20
+
21
+ s.add_development_dependency "rspec-rails", "~> 2.9"
22
+ s.add_development_dependency "rails", "~> 3.0.3"
23
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: truncate_html_chinese
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - pobing
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-05-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec-rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '2.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '2.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rails
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 3.0.3
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 3.0.3
41
+ description: Truncates html so you don't have to,support chinese
42
+ email:
43
+ - cn.jdong@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - .travis.yml
50
+ - Gemfile
51
+ - Gemfile.lock
52
+ - History.txt
53
+ - LICENSE
54
+ - README.md
55
+ - Rakefile
56
+ - VERSION
57
+ - init.rb
58
+ - lib/app/helpers/truncate_html_helper.rb
59
+ - lib/truncate_html.rb
60
+ - lib/truncate_html/configuration.rb
61
+ - lib/truncate_html/html_string.rb
62
+ - lib/truncate_html/html_truncator.rb
63
+ - lib/truncate_html/version.rb
64
+ - spec/helpers/truncate_html_helper_spec.rb
65
+ - spec/rails_root/Gemfile
66
+ - spec/rails_root/Gemfile.lock
67
+ - spec/rails_root/app/controllers/application_controller.rb
68
+ - spec/rails_root/app/helpers/application_helper.rb
69
+ - spec/rails_root/config/application.rb
70
+ - spec/rails_root/config/boot.rb
71
+ - spec/rails_root/config/database.yml
72
+ - spec/rails_root/config/environment.rb
73
+ - spec/rails_root/config/environments/development.rb
74
+ - spec/rails_root/config/environments/production.rb
75
+ - spec/rails_root/config/environments/test.rb
76
+ - spec/rails_root/config/initializers/backtrace_silencers.rb
77
+ - spec/rails_root/config/initializers/inflections.rb
78
+ - spec/rails_root/config/initializers/mime_types.rb
79
+ - spec/rails_root/config/initializers/new_rails_defaults.rb
80
+ - spec/rails_root/config/initializers/session_store.rb
81
+ - spec/rails_root/config/locales/en.yml
82
+ - spec/rails_root/config/routes.rb
83
+ - spec/rails_root/init.rb
84
+ - spec/rails_root/lib/app/helpers/truncate_html_helper.rb
85
+ - spec/rails_root/lib/tasks/rspec.rake
86
+ - spec/spec.opts
87
+ - spec/spec_helper.rb
88
+ - spec/truncate_html/configuration_spec.rb
89
+ - spec/truncate_html/html_string_spec.rb
90
+ - spec/truncate_html/html_truncator_spec.rb
91
+ - truncate_html-0.9.2.gem
92
+ - truncate_html.gemspec
93
+ homepage: https://github.com/pobing/truncate_html_chinese
94
+ licenses: []
95
+ metadata: {}
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ! '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '1.9'
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubyforge_project:
112
+ rubygems_version: 2.0.3
113
+ signing_key:
114
+ specification_version: 4
115
+ summary: Uses an API similar to Rails' truncate helper to truncate HTML and close
116
+ any lingering open tags.
117
+ test_files: []