truncate_html_chinese 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +92 -0
  6. data/History.txt +40 -0
  7. data/LICENSE +21 -0
  8. data/README.md +93 -0
  9. data/Rakefile +50 -0
  10. data/VERSION +1 -0
  11. data/init.rb +1 -0
  12. data/lib/app/helpers/truncate_html_helper.rb +9 -0
  13. data/lib/truncate_html.rb +13 -0
  14. data/lib/truncate_html/configuration.rb +14 -0
  15. data/lib/truncate_html/html_string.rb +45 -0
  16. data/lib/truncate_html/html_truncator.rb +91 -0
  17. data/lib/truncate_html/version.rb +3 -0
  18. data/spec/helpers/truncate_html_helper_spec.rb +44 -0
  19. data/spec/rails_root/Gemfile +6 -0
  20. data/spec/rails_root/Gemfile.lock +86 -0
  21. data/spec/rails_root/app/controllers/application_controller.rb +10 -0
  22. data/spec/rails_root/app/helpers/application_helper.rb +3 -0
  23. data/spec/rails_root/config/application.rb +14 -0
  24. data/spec/rails_root/config/boot.rb +13 -0
  25. data/spec/rails_root/config/database.yml +22 -0
  26. data/spec/rails_root/config/environment.rb +5 -0
  27. data/spec/rails_root/config/environments/development.rb +17 -0
  28. data/spec/rails_root/config/environments/production.rb +28 -0
  29. data/spec/rails_root/config/environments/test.rb +29 -0
  30. data/spec/rails_root/config/initializers/backtrace_silencers.rb +7 -0
  31. data/spec/rails_root/config/initializers/inflections.rb +10 -0
  32. data/spec/rails_root/config/initializers/mime_types.rb +5 -0
  33. data/spec/rails_root/config/initializers/new_rails_defaults.rb +19 -0
  34. data/spec/rails_root/config/initializers/session_store.rb +15 -0
  35. data/spec/rails_root/config/locales/en.yml +5 -0
  36. data/spec/rails_root/config/routes.rb +43 -0
  37. data/spec/rails_root/init.rb +1 -0
  38. data/spec/rails_root/lib/app/helpers/truncate_html_helper.rb +7 -0
  39. data/spec/rails_root/lib/tasks/rspec.rake +144 -0
  40. data/spec/spec.opts +2 -0
  41. data/spec/spec_helper.rb +11 -0
  42. data/spec/truncate_html/configuration_spec.rb +17 -0
  43. data/spec/truncate_html/html_string_spec.rb +80 -0
  44. data/spec/truncate_html/html_truncator_spec.rb +203 -0
  45. data/truncate_html-0.9.2.gem +0 -0
  46. data/truncate_html.gemspec +23 -0
  47. metadata +117 -0
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
@@ -0,0 +1,11 @@
1
+ ENV["RAILS_ENV"] ||= 'test'
2
+ rails_root = File.expand_path('../rails_root', __FILE__)
3
+ require rails_root + '/config/environment.rb'
4
+
5
+ require 'rspec/rails'
6
+
7
+ require File.expand_path('../../lib/truncate_html', __FILE__)
8
+
9
+ RSpec.configure do |config|
10
+ config.mock_with :rspec
11
+ end
@@ -0,0 +1,17 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'spec_helper')
2
+
3
+ describe TruncateHtml::Configuration do
4
+
5
+ describe 'self.configure' do
6
+
7
+ it 'yields the configuration object' do
8
+ lambda do
9
+ TruncateHtml.configure do |config|
10
+ config.should be_kind_of(TruncateHtml::Configuration)
11
+ throw :yay_it_yielded
12
+ end
13
+ end.should throw_symbol(:yay_it_yielded)
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,80 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'spec_helper')
2
+
3
+ describe TruncateHtml::HtmlString do
4
+
5
+ def html_string(original_string)
6
+ TruncateHtml::HtmlString.new(original_string)
7
+ end
8
+
9
+ describe '#html_tokens' do
10
+ it 'returns each token in the string as an array element removing any consecutive whitespace from the string' do
11
+ html = '<h1>Hi there</h1> <p>This is sweet!</p>'
12
+ html_string(html).html_tokens.should == ['<h1>', 'Hi', ' ', 'there', '</h1>', ' ', '<p>', 'This', ' ', 'is', ' ', 'sweet!', '</p>']
13
+ end
14
+ end
15
+
16
+ describe '#html_tag?' do
17
+ it 'returns false when the string parameter is not an html tag' do
18
+ html_string('no tags').should_not be_html_tag
19
+ end
20
+
21
+ it 'returns true when the string parameter is an html tag' do
22
+ html_string('<img src="foo">').should be_html_tag
23
+ html_string('</img>').should be_html_tag
24
+ end
25
+
26
+ it 'is false for html comments' do
27
+ html_string('<!-- hi -->').should_not be_html_tag
28
+ end
29
+ end
30
+
31
+ describe '#open_tag?' do
32
+ it 'returns true if the tag is an open tag' do
33
+ html_string('<a>').should be_open_tag
34
+ end
35
+
36
+ context 'the tag is an open tag, and has whitespace and html properties' do
37
+ it 'returns true if it has single quotes' do
38
+ html_string(" <a href='http://awesomeful.net' >").should be_open_tag
39
+ end
40
+
41
+ it 'returns true if it has double quotes' do
42
+ html_string(' <a href="http://awesomeful.net">').should be_open_tag
43
+ end
44
+ end
45
+
46
+ it 'returns false if the tag is a close tag' do
47
+ html_string('</a>').should_not be_open_tag
48
+ end
49
+
50
+ it 'returns false if the string is not an html tag' do
51
+ html_string('foo bar').should_not be_open_tag
52
+ end
53
+
54
+ it 'returns false if it is a <script> tag' do
55
+ html_string('<script>').should_not be_open_tag
56
+ end
57
+ end
58
+
59
+ describe '#matching_close_tag' do
60
+ tag_pairs = { '<a>' => '</a>',
61
+ ' <div>' => '</div>',
62
+ '<h1>' => '</h1>',
63
+ '<a href="foo">' => '</a>' }
64
+
65
+ tag_pairs.each do |open_tag, close_tag|
66
+ it "closes a #{open_tag} and returns #{close_tag}" do
67
+ html_string(open_tag).matching_close_tag.should == close_tag
68
+ end
69
+ end
70
+ end
71
+
72
+ describe '#html_comment?' do
73
+ it 'is true for HTML comments' do
74
+ html_string('<!-- hi -->').should be_html_comment
75
+ html_string('<a>').should_not be_html_comment
76
+ html_string('</a>').should_not be_html_comment
77
+ html_string('foo').should_not be_html_comment
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,203 @@
1
+ # Encoding: UTF-8
2
+ require File.join(File.dirname(__FILE__), '..', 'spec_helper')
3
+
4
+ describe TruncateHtml::HtmlTruncator do
5
+
6
+ def truncate(html, opts = {})
7
+ html_string = TruncateHtml::HtmlString.new(html)
8
+ TruncateHtml::HtmlTruncator.new(html_string, opts).truncate
9
+ end
10
+
11
+ context 'when the word_boundary option is set to false' do
12
+ it 'truncates to the exact length specified' do
13
+ truncate('<div>123456789</div>', :length => 5, :omission => '', :word_boundary => false).should == '<div>12345</div>'
14
+ end
15
+
16
+ it 'retains the tags within the text' do
17
+ html = 'some text <span class="caps">CAPS</span> some text'
18
+ truncate(html, :length => 25, :word_boundary => false).should == 'some text <span class="caps">CAPS</span> some te...'
19
+ end
20
+
21
+ context 'and a custom omission value is passed' do
22
+ it 'retains the omission text' do
23
+ truncate("testtest", :length => 10, :omission => '..', :word_boundary => false).should == 'testtest..'
24
+ end
25
+
26
+ it 'handles multibyte characters' do
27
+ truncate("prüfenprüfen", :length => 8, :omission => '..', :word_boundary => false). should == 'prüfen..'
28
+ end
29
+ end
30
+ end
31
+
32
+ context 'when the word_boundary option is set to true' do
33
+ it 'truncates using the default word_boundary option' do
34
+ truncate('hello there. or maybe not?', :length => 16, :omission => '', :word_boundary => true).should == 'hello there. or'
35
+ end
36
+ end
37
+
38
+ context 'when the word_boundary option is a custom value (for splitting on sentences)' do
39
+ it 'truncates to the end of the nearest sentence' do
40
+ truncate('hello there. or maybe not?', :length => 16, :omission => '', :word_boundary => /\S[\.\?\!]/).should == 'hello there.'
41
+ end
42
+ end
43
+
44
+ it "includes the omission text's length in the returned truncated html" do
45
+ truncate('a b c', :length => 4, :omission => '...').should == 'a...'
46
+ end
47
+
48
+ it "includes omission even on the edge (issue #18)" do
49
+ opts = { :word_boundary => false, :length => 12 }
50
+ truncate('One two three', opts).should == 'One two t...'
51
+ end
52
+
53
+ it "never returns a string longer than :length" do
54
+ truncate("test this shit", :length => 10).should == 'test...'
55
+ end
56
+
57
+ it 'supports omissions longer than the maximum length' do
58
+ lambda { truncate('', :length => 1, :omission => '...') }.should_not raise_error
59
+ end
60
+
61
+ it 'returns the omission when the specified length is smaller than the omission' do
62
+ truncate('a b c', :length => 2, :omission => '...').should == '...'
63
+ end
64
+
65
+ it 'treats script tags as strings with no length' do
66
+ input_html = "<p>I have a script <script type = text/javascript>document.write('lum dee dum');</script> and more text</p>"
67
+ expected_out = "<p>I have a script <script type = text/javascript>document.write('lum dee dum');</script> and...</p>"
68
+ truncate(input_html, :length => 23).should == expected_out
69
+ end
70
+
71
+ it 'in the middle of a link, truncates and closes the <a>, and closes any remaining open tags' do
72
+ html = '<div><ul><li>Look at <a href = "foo">this</a> link </li></ul></div>'
73
+ expected = '<div><ul><li>Look at <a href = "foo">this...</a></li></ul></div>'
74
+ truncate(html, :length => 15).should == expected
75
+ end
76
+
77
+ %w(! @ # $ % ^ & * \( \) - _ + = [ ] { } \ | , . / ?).each do |char|
78
+ context "when the html has a #{char} character after a closing tag" do
79
+ it 'places the punctuation after the tag without any whitespace' do
80
+ html = "<p>Look at <strong>this</strong>#{char} More words here</p>"
81
+ expected = "<p>Look at <strong>this</strong>#{char}...</p>"
82
+ truncate(html, :length => 19).should == expected
83
+ end
84
+ end
85
+ end
86
+
87
+ context 'when the html has a non punctuation character after a closing tag' do
88
+ it 'leaves a whitespace between the closing tag and the following word character' do
89
+ html = '<p>Look at <a href = "awesomeful.net">this</a> link for randomness</p>'
90
+ expected = '<p>Look at <a href = "awesomeful.net">this</a> link...</p>'
91
+ truncate(html, :length => 21).should == expected
92
+ end
93
+ end
94
+
95
+ it 'handles multibyte characters and leaves them in the result' do
96
+ html = '<p>Look at our multibyte characters ā ž <a href = "awesomeful.net">this</a> link for randomness ā ž</p>'
97
+ truncate(html, :length => html.length).should == html
98
+ end
99
+
100
+ #unusual, but just covering my ass
101
+ it 'recognizes the multiline html properly' do
102
+ html = <<-END_HTML
103
+ <div id="foo"
104
+ class="bar">
105
+ This is ugly html.
106
+ </div>
107
+ END_HTML
108
+ truncate(html, :length => 12).should == ' <div id="foo" class="bar"> This is...</div>'
109
+ end
110
+
111
+ %w(br hr img).each do |unpaired_tag|
112
+ context "when the html contains a #{unpaired_tag} tag" do
113
+
114
+ context "and the #{unpaired_tag} does not have the closing slash" do
115
+ it "does not close the #{unpaired_tag} tag" do
116
+ html = "<div>Some before. <#{unpaired_tag}>and some after</div>"
117
+ html_caps = "<div>Some before. <#{unpaired_tag.capitalize}>and some after</div>"
118
+ truncate(html, :length => 19).should == "<div>Some before. <#{unpaired_tag}>and...</div>"
119
+ truncate(html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize}>and...</div>"
120
+ end
121
+ end
122
+
123
+ context "and the #{unpaired_tag} does have the closing slash" do
124
+ it "does not close the #{unpaired_tag} tag" do
125
+ html = "<div>Some before. <#{unpaired_tag} />and some after</div>"
126
+ html_caps = "<div>Some before. <#{unpaired_tag.capitalize} />and some after</div>"
127
+ truncate(html, :length => 19).should == "<div>Some before. <#{unpaired_tag} />and...</div>"
128
+ truncate(html_caps, :length => 19).should == "<div>Some before. <#{unpaired_tag.capitalize} />and...</div>"
129
+ end
130
+ end
131
+
132
+ end
133
+ end
134
+
135
+ it 'does not truncate quotes off when input contains chinese characters' do
136
+ html = "<p>“我现在使用的是中文的拼音。”<br>
137
+ 测试一下具体的truncate<em>html功能。<br>
138
+ “我现在使用的是中文的拼音。”<br>
139
+ 测试一下具体的truncate</em>html功能。<br>
140
+ “我现在使用的是中文的拼音。”<br>
141
+ 测试一下具体的truncate<em>html功能。<br>
142
+ “我现在使用的是中文的拼音。”<br>
143
+ 测试一下具体的truncate</em>html功能。</p>"
144
+
145
+ result = truncate(html, :omission => "", :length => 50)
146
+ result.should include "<p>“我现在使用的是中文的拼音。”<br>"
147
+ end
148
+
149
+ context 'when the break_token option is set as <!-- truncate -->' do
150
+ it 'does not truncate abnormally if the break_token is not present' do
151
+ truncate('This is line one. This is line two.', :length => 30, :break_token => '<!-- truncate -->').should == 'This is line one. This is...'
152
+ end
153
+ it 'does not truncate abnormally if the break_token is present, but beyond the length param' do
154
+ truncate('This is line one. This is line <!-- truncate --> two.', :length => 30, :break_token => '<!-- truncate -->').should == 'This is line one. This is...'
155
+ end
156
+ it 'truncates before the length param if the break_token is before the token at "length"' do
157
+ truncate('This is line one. <!-- truncate --> This is line two.', :length => 30, :break_token => '<!-- truncate -->').should == 'This is line one.'
158
+ end
159
+ end
160
+
161
+ context 'when the break_token option is customized as a comment' do
162
+ it 'does not truncate abnormally if the break_token is not present' do
163
+ truncate('This is line one. This is line two.', :length => 30, :break_token => '<!-- break -->').should == 'This is line one. This is...'
164
+ end
165
+ it 'does not truncate abnormally if the break_token is present, but beyond the length param' do
166
+ truncate('This is line one. This is line <!-- break --> two.', :length => 30, :break_token => '<!-- break -->').should == 'This is line one. This is...'
167
+ end
168
+ it 'truncates before the length param if the break_token is before the token at "length"' do
169
+ truncate('This is line one. <!-- break --> This is line two.', :length => 30, :break_token => '<!-- break -->').should == 'This is line one.'
170
+ end
171
+ end
172
+
173
+ context 'when the break_token option is customized as an html tag' do
174
+ it 'does not truncate abnormally if the break_token is not present' do
175
+ truncate('This is line one. This is line two.', :length => 30, :break_token => '<break />').should == 'This is line one. This is...'
176
+ end
177
+ it 'does not truncate abnormally if the break_token is present, but beyond the length param' do
178
+ truncate('This is line one. This is line <break /> two.', :length => 30, :break_token => '<break />').should == 'This is line one. This is...'
179
+ end
180
+ it 'truncates before the length param if the break_token is before the token at "length"' do
181
+ truncate('This is line one. <break /> This is line two.', :length => 30, :break_token => '<break />').should == 'This is line one.'
182
+ end
183
+ end
184
+
185
+ context 'when the break_token option is customized as a word' do
186
+ it 'does not truncate abnormally if the break_token is not present' do
187
+ truncate('This is line one. This is line two.', :length => 30, :break_token => 'foobar').should == 'This is line one. This is...'
188
+ end
189
+ it 'does not truncate abnormally if the break_token is present, but beyond the length param' do
190
+ truncate('This is line one. This is line foobar two.', :length => 30, :break_token => 'foobar').should == 'This is line one. This is...'
191
+ end
192
+ it 'truncates before the length param if the break_token is before the token at "length"' do
193
+ truncate('This is line one. foobar This is line two.', :length => 30, :break_token => 'foobar').should == 'This is line one.'
194
+ end
195
+ end
196
+
197
+ context 'a string with comments' do
198
+ it 'does not duplicate comments (issue #32)' do
199
+ truncate('<h1>hello <!-- stuff --> and <!-- la --> goodbye</h1>', length: 15).should ==
200
+ '<h1>hello <!-- stuff --> and <!-- la -->...</h1>'
201
+ end
202
+ end
203
+ end
Binary file
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "truncate_html/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "truncate_html_chinese"
7
+ s.version = '0.1.0'
8
+ s.authors = ["pobing"]
9
+ s.email = ["cn.jdong@gmail.com"]
10
+ s.homepage = "https://github.com/pobing/truncate_html_chinese"
11
+ s.summary = %q{Uses an API similar to Rails' truncate helper to truncate HTML and close any lingering open tags.}
12
+ s.description = %q{Truncates html so you don't have to,support chinese}
13
+
14
+ s.files = `git ls-files`.split("\n")
15
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ s.require_paths = ["lib"]
18
+
19
+ s.required_ruby_version = '>= 1.9'
20
+
21
+ s.add_development_dependency "rspec-rails", "~> 2.9"
22
+ s.add_development_dependency "rails", "~> 3.0.3"
23
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: truncate_html_chinese
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - pobing
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-05-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec-rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '2.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '2.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rails
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 3.0.3
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 3.0.3
41
+ description: Truncates html so you don't have to,support chinese
42
+ email:
43
+ - cn.jdong@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - .travis.yml
50
+ - Gemfile
51
+ - Gemfile.lock
52
+ - History.txt
53
+ - LICENSE
54
+ - README.md
55
+ - Rakefile
56
+ - VERSION
57
+ - init.rb
58
+ - lib/app/helpers/truncate_html_helper.rb
59
+ - lib/truncate_html.rb
60
+ - lib/truncate_html/configuration.rb
61
+ - lib/truncate_html/html_string.rb
62
+ - lib/truncate_html/html_truncator.rb
63
+ - lib/truncate_html/version.rb
64
+ - spec/helpers/truncate_html_helper_spec.rb
65
+ - spec/rails_root/Gemfile
66
+ - spec/rails_root/Gemfile.lock
67
+ - spec/rails_root/app/controllers/application_controller.rb
68
+ - spec/rails_root/app/helpers/application_helper.rb
69
+ - spec/rails_root/config/application.rb
70
+ - spec/rails_root/config/boot.rb
71
+ - spec/rails_root/config/database.yml
72
+ - spec/rails_root/config/environment.rb
73
+ - spec/rails_root/config/environments/development.rb
74
+ - spec/rails_root/config/environments/production.rb
75
+ - spec/rails_root/config/environments/test.rb
76
+ - spec/rails_root/config/initializers/backtrace_silencers.rb
77
+ - spec/rails_root/config/initializers/inflections.rb
78
+ - spec/rails_root/config/initializers/mime_types.rb
79
+ - spec/rails_root/config/initializers/new_rails_defaults.rb
80
+ - spec/rails_root/config/initializers/session_store.rb
81
+ - spec/rails_root/config/locales/en.yml
82
+ - spec/rails_root/config/routes.rb
83
+ - spec/rails_root/init.rb
84
+ - spec/rails_root/lib/app/helpers/truncate_html_helper.rb
85
+ - spec/rails_root/lib/tasks/rspec.rake
86
+ - spec/spec.opts
87
+ - spec/spec_helper.rb
88
+ - spec/truncate_html/configuration_spec.rb
89
+ - spec/truncate_html/html_string_spec.rb
90
+ - spec/truncate_html/html_truncator_spec.rb
91
+ - truncate_html-0.9.2.gem
92
+ - truncate_html.gemspec
93
+ homepage: https://github.com/pobing/truncate_html_chinese
94
+ licenses: []
95
+ metadata: {}
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ! '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '1.9'
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubyforge_project:
112
+ rubygems_version: 2.0.3
113
+ signing_key:
114
+ specification_version: 4
115
+ summary: Uses an API similar to Rails' truncate helper to truncate HTML and close
116
+ any lingering open tags.
117
+ test_files: []