truncate_html_chinese 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +92 -0
  6. data/History.txt +40 -0
  7. data/LICENSE +21 -0
  8. data/README.md +93 -0
  9. data/Rakefile +50 -0
  10. data/VERSION +1 -0
  11. data/init.rb +1 -0
  12. data/lib/app/helpers/truncate_html_helper.rb +9 -0
  13. data/lib/truncate_html.rb +13 -0
  14. data/lib/truncate_html/configuration.rb +14 -0
  15. data/lib/truncate_html/html_string.rb +45 -0
  16. data/lib/truncate_html/html_truncator.rb +91 -0
  17. data/lib/truncate_html/version.rb +3 -0
  18. data/spec/helpers/truncate_html_helper_spec.rb +44 -0
  19. data/spec/rails_root/Gemfile +6 -0
  20. data/spec/rails_root/Gemfile.lock +86 -0
  21. data/spec/rails_root/app/controllers/application_controller.rb +10 -0
  22. data/spec/rails_root/app/helpers/application_helper.rb +3 -0
  23. data/spec/rails_root/config/application.rb +14 -0
  24. data/spec/rails_root/config/boot.rb +13 -0
  25. data/spec/rails_root/config/database.yml +22 -0
  26. data/spec/rails_root/config/environment.rb +5 -0
  27. data/spec/rails_root/config/environments/development.rb +17 -0
  28. data/spec/rails_root/config/environments/production.rb +28 -0
  29. data/spec/rails_root/config/environments/test.rb +29 -0
  30. data/spec/rails_root/config/initializers/backtrace_silencers.rb +7 -0
  31. data/spec/rails_root/config/initializers/inflections.rb +10 -0
  32. data/spec/rails_root/config/initializers/mime_types.rb +5 -0
  33. data/spec/rails_root/config/initializers/new_rails_defaults.rb +19 -0
  34. data/spec/rails_root/config/initializers/session_store.rb +15 -0
  35. data/spec/rails_root/config/locales/en.yml +5 -0
  36. data/spec/rails_root/config/routes.rb +43 -0
  37. data/spec/rails_root/init.rb +1 -0
  38. data/spec/rails_root/lib/app/helpers/truncate_html_helper.rb +7 -0
  39. data/spec/rails_root/lib/tasks/rspec.rake +144 -0
  40. data/spec/spec.opts +2 -0
  41. data/spec/spec_helper.rb +11 -0
  42. data/spec/truncate_html/configuration_spec.rb +17 -0
  43. data/spec/truncate_html/html_string_spec.rb +80 -0
  44. data/spec/truncate_html/html_truncator_spec.rb +203 -0
  45. data/truncate_html-0.9.2.gem +0 -0
  46. data/truncate_html.gemspec +23 -0
  47. metadata +117 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e056fc0c049ba378ca51dba2e9e01d98e5d620c9
4
+ data.tar.gz: cfec7cee37d01bf83c29b9c8806ed62166ae2017
5
+ SHA512:
6
+ metadata.gz: 0bd1b3d35c3fb20644fca74c7e2572a2363fb834d15830c50f807aabdd6c21e1d51c7625744ea3e206561388df7b1a8515e537a927a90eecd71e128605c27101
7
+ data.tar.gz: e00dbf6518fee91e88323fd0a58d1f93e997856499a14765186c31fc399141ef494a164ae7ed4ab324c745db47b6c73d3fd59745a338f46b466a230415dc24a3
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ pkg
2
+ coverage
3
+ profiling
4
+ tmp
5
+ spec/rails_root/log/*
6
+ log/*.log
7
+ .bundle
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - jruby-19mode # JRuby in 1.9 mode
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in truncate_html.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,92 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ truncate_html (0.9.2)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ abstract (1.0.0)
10
+ actionmailer (3.0.3)
11
+ actionpack (= 3.0.3)
12
+ mail (~> 2.2.9)
13
+ actionpack (3.0.3)
14
+ activemodel (= 3.0.3)
15
+ activesupport (= 3.0.3)
16
+ builder (~> 2.1.2)
17
+ erubis (~> 2.6.6)
18
+ i18n (~> 0.4)
19
+ rack (~> 1.2.1)
20
+ rack-mount (~> 0.6.13)
21
+ rack-test (~> 0.5.6)
22
+ tzinfo (~> 0.3.23)
23
+ activemodel (3.0.3)
24
+ activesupport (= 3.0.3)
25
+ builder (~> 2.1.2)
26
+ i18n (~> 0.4)
27
+ activerecord (3.0.3)
28
+ activemodel (= 3.0.3)
29
+ activesupport (= 3.0.3)
30
+ arel (~> 2.0.2)
31
+ tzinfo (~> 0.3.23)
32
+ activeresource (3.0.3)
33
+ activemodel (= 3.0.3)
34
+ activesupport (= 3.0.3)
35
+ activesupport (3.0.3)
36
+ arel (2.0.7)
37
+ builder (2.1.2)
38
+ diff-lcs (1.1.3)
39
+ erubis (2.6.6)
40
+ abstract (>= 1.0.0)
41
+ i18n (0.5.0)
42
+ mail (2.2.15)
43
+ activesupport (>= 2.3.6)
44
+ i18n (>= 0.4.0)
45
+ mime-types (~> 1.16)
46
+ treetop (~> 1.4.8)
47
+ mime-types (1.16)
48
+ polyglot (0.3.1)
49
+ rack (1.2.1)
50
+ rack-mount (0.6.13)
51
+ rack (>= 1.0.0)
52
+ rack-test (0.5.7)
53
+ rack (>= 1.0)
54
+ rails (3.0.3)
55
+ actionmailer (= 3.0.3)
56
+ actionpack (= 3.0.3)
57
+ activerecord (= 3.0.3)
58
+ activeresource (= 3.0.3)
59
+ activesupport (= 3.0.3)
60
+ bundler (~> 1.0)
61
+ railties (= 3.0.3)
62
+ railties (3.0.3)
63
+ actionpack (= 3.0.3)
64
+ activesupport (= 3.0.3)
65
+ rake (>= 0.8.7)
66
+ thor (~> 0.14.4)
67
+ rake (0.8.7)
68
+ rspec (2.9.0)
69
+ rspec-core (~> 2.9.0)
70
+ rspec-expectations (~> 2.9.0)
71
+ rspec-mocks (~> 2.9.0)
72
+ rspec-core (2.9.0)
73
+ rspec-expectations (2.9.1)
74
+ diff-lcs (~> 1.1.3)
75
+ rspec-mocks (2.9.0)
76
+ rspec-rails (2.9.0)
77
+ actionpack (>= 3.0)
78
+ activesupport (>= 3.0)
79
+ railties (>= 3.0)
80
+ rspec (~> 2.9.0)
81
+ thor (0.14.6)
82
+ treetop (1.4.9)
83
+ polyglot (>= 0.3.1)
84
+ tzinfo (0.3.24)
85
+
86
+ PLATFORMS
87
+ ruby
88
+
89
+ DEPENDENCIES
90
+ rails (~> 3.0.3)
91
+ rspec-rails (~> 2.9)
92
+ truncate_html!
data/History.txt ADDED
@@ -0,0 +1,40 @@
1
+ == 0.5.1 2011-04-08
2
+ * Ensure resulting string's length is never greater than supplied length (csquared)
3
+
4
+ == 0.5.0 2011-01-26
5
+ * Multibyte support. (smix, parndt)
6
+
7
+ == 0.4.0 2010-03-30
8
+ * Rails 3 support. This breaks rails 2 support.
9
+
10
+ == 0.3.2 2010-03-23
11
+ * Fix for autoloading of classes in older Rails versions. (kball)
12
+ * Fix issue #5: autoloading of default configuration.
13
+
14
+ == 0.3.1 2010-02-03
15
+ * Fixed minor typo on the word_boundary option name.
16
+
17
+ == 0.3.0 2010-02-02
18
+ * Added the ability to set global configuration parameters
19
+ * Added the word_boundry option
20
+
21
+ == 0.2.2 2009-12-23
22
+ * Fix issue #4: Handle case when supplied length is smaller than omission. (ghazel)
23
+
24
+ == 0.2.1 2009-12-18
25
+ * Fix issue #3: Handle case when input html contins a script tag.
26
+
27
+ == 0.2.0 2009-11-23
28
+ * Fix issue #2: The omission text's length is now included in the returned
29
+ string's calculation. This is more consistent with the rails truncate
30
+ helper's behavior.
31
+
32
+ == 0.1.2 2009-09-25
33
+ * Fix issue #1: Handle case when input html is nil. (bcardarella)
34
+
35
+ == 0.1.1 2009-08-25
36
+ * Fixed issue with regex which would not recognize <a> tags that contain slashes.
37
+ * Other refactoring and improvements to spec coverage.
38
+
39
+ == 0.1.0 2009-08-03
40
+ * Wrote truncate_html. Initial release.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2009 - 2010 Harold A. Giménez
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,93 @@
1
+ TruncateHtml
2
+ ============
3
+
4
+ [![Build Status](https://secure.travis-ci.org/hgmnz/truncate_html.png?branch=master)](http://travis-ci.org/hgmnz/truncate_html)
5
+ [![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/hgmnz/truncate_html)
6
+
7
+ truncate_html cuts off a string of HTML and takes care of closing any lingering open tags. There are many ways to solve this. This library does not have any dependencies, and [parses HTML using regular expressions](http://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags/1732454#1732454).
8
+
9
+ It can be used with or without Rails.
10
+
11
+ Example
12
+ -------
13
+
14
+ ```ruby
15
+ some_html = '<ul><li><a href="http://whatever">This is a link</a></li></ul>'
16
+ truncate_html(some_html, :length => 15, :omission => '...(continued)')
17
+ => <ul><li><a href="http://whatever">This...(continued)</a></li></ul>
18
+ ```
19
+
20
+ A few notes:
21
+
22
+ * By default, it will truncate on word boundary.
23
+ To truncate the HTML string strictly at the specified length, pass in the `:word_boundary => false` option.
24
+ * If the input HTML is nil, it will return an empty string.
25
+ * The omission text's length does count toward the resulting string's length.
26
+ * `<script>` tags will pass right through - they will not count toward the resulting string's length, or be truncated.
27
+
28
+ * The default options are:
29
+ * `:length`: 100
30
+ * `:omission`: '...'
31
+ * `:word_boundary`: /\S/
32
+
33
+ You may also set global configuration options.
34
+ For example, place the following on application boot,
35
+ something like `config/initializers/truncate_html.rb`
36
+
37
+ ```ruby
38
+ TruncateHtml.configure do |config|
39
+ config.length = 50
40
+ config.omission = '...(continued)'
41
+ end
42
+ ```
43
+
44
+ If you really want, you can even set a custom word boundary regexp.
45
+ For example, to truncate at the end of the nearest sentence:
46
+
47
+ ```ruby
48
+ TruncateHtml.configure do |config|
49
+ config.word_boundary = /\S[\.\?\!]/
50
+ end
51
+ ```
52
+
53
+ You can also truncate the html at a specific point not based on length but content.
54
+ To do that, place the `:break_token` in your source. This allows the truncation to be
55
+ data driven, breaking after a leading paragraph or sentence. If the
56
+ `:break_token` is in your content before the specified :length, :length will be
57
+ ignored and the content truncated at :break_token
58
+ If the `:break_token` is in your content after the specified :length,
59
+ `:break_token` will be ignored and the content truncated at :length
60
+
61
+ ```ruby
62
+ TruncateHtml.configure do |config|
63
+ config.break_token = '<!-- truncate -->'
64
+ end
65
+ ```
66
+ Installation
67
+ ------------
68
+
69
+ The latest gem version for the Rails 2.x series is 0.3.2.
70
+ To use truncate_html on a Rails 2 app, please install the 0.3.2 version:
71
+
72
+ gem install truncate_html -v 0.3.2
73
+
74
+ For Rails 3, use the latest truncate_html:
75
+
76
+ gem install truncate_html
77
+
78
+ Issues or Suggestions
79
+ ---------------------
80
+
81
+ Found an issue or have a suggestion? Please report it on [Github's issue tracker](http://github.com/hgmnz/truncate_html/issues).
82
+
83
+ Testing
84
+ -------
85
+
86
+ bundle
87
+ rake
88
+
89
+ All green? Go hack.
90
+
91
+ Copyright (c) 2009 - 2010 Harold A. Giménez, released under the MIT license
92
+
93
+ Thanks to all the [contributors](https://github.com/hgmnz/truncate_html/contributors)!
data/Rakefile ADDED
@@ -0,0 +1,50 @@
1
+ require 'rake'
2
+
3
+ begin
4
+ require 'rspec/core'
5
+ require 'rspec/core/rake_task'
6
+ rescue MissingSourceFile
7
+ module RSpec
8
+ module Core
9
+ class RakeTask
10
+ def initialize(name)
11
+ task name do
12
+ # if rspec-rails is a configured gem, this will output helpful material and exit ...
13
+ require File.expand_path(File.dirname(__FILE__) + "/../../config/environment")
14
+ # ... otherwise, do this:
15
+ raise <<-MSG
16
+ #{"*" * 80}
17
+ * You are trying to run an rspec rake task defined in
18
+ * #{__FILE__},
19
+ * but rspec can not be found in vendor/gems, vendor/plugins or system gems.
20
+ #{"*" * 80}
21
+ MSG
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ task :default => :spec
30
+ task :stats => "spec:statsetup"
31
+
32
+ desc "Run RSpec code examples"
33
+ RSpec::Core::RakeTask.new(:spec) do |t|
34
+ t.pattern = "./spec/**/*_spec.rb"
35
+ end
36
+
37
+ namespace :gem do
38
+ desc "Release to rubygems"
39
+ task :release do
40
+ require File.expand_path('lib/truncate_html/version', File.dirname(__FILE__))
41
+ version = TruncateHtml::VERSION
42
+ message = "Bump to version #{version}"
43
+ system "git tag -a -m '#{message}' v#{version}"
44
+ system "git push origin master"
45
+ system "git push origin $(git tag | tail -1)"
46
+ system "gem build truncate_html.gemspec"
47
+ system "gem push truncate_html-#{version}.gem"
48
+ system "rm truncate_html-#{version}.gem"
49
+ end
50
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.5.1
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'truncate_html'
@@ -0,0 +1,9 @@
1
+ module TruncateHtmlHelper
2
+
3
+ def truncate_html(html, options={})
4
+ return '' if html.nil?
5
+ html_string = TruncateHtml::HtmlString.new(html)
6
+ TruncateHtml::HtmlTruncator.new(html_string, options).truncate.html_safe
7
+ end
8
+
9
+ end
@@ -0,0 +1,13 @@
1
+ require File.join(File.dirname(__FILE__), 'truncate_html', 'version')
2
+ require File.join(File.dirname(__FILE__), 'truncate_html', 'html_truncator')
3
+ require File.join(File.dirname(__FILE__), 'truncate_html', 'html_string')
4
+ require File.join(File.dirname(__FILE__), 'truncate_html', 'configuration')
5
+ require File.join(File.dirname(__FILE__), 'app', 'helpers', 'truncate_html_helper')
6
+
7
+ TruncateHtml.configure do |config|
8
+ config.length = 100
9
+ config.omission = '...'
10
+ config.word_boundary = /\S/
11
+ end
12
+
13
+ ActionController::Base.helper(TruncateHtmlHelper)
@@ -0,0 +1,14 @@
1
+ module TruncateHtml
2
+ class Configuration
3
+ attr_accessor :length, :omission, :word_boundary, :break_token
4
+ end
5
+
6
+ class << self
7
+ attr_accessor :configuration
8
+ end
9
+
10
+ def self.configure
11
+ self.configuration ||= Configuration.new
12
+ yield configuration
13
+ end
14
+ end
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+ module TruncateHtml
3
+ class HtmlString < String
4
+
5
+ UNPAIRED_TAGS = %w(br hr img).freeze
6
+
7
+ def initialize(original_html)
8
+ super(original_html)
9
+ end
10
+
11
+ def html_tokens
12
+ scan(regex).map do |token|
13
+ HtmlString.new(
14
+ token.gsub(
15
+ /\n/,'' #remove newline characters
16
+ ).gsub(
17
+ /\s+/, ' ' #clean out extra consecutive whitespace
18
+ )
19
+ )
20
+ end
21
+ end
22
+
23
+ def html_tag?
24
+ /<\/?[^>]+>/ === self && !html_comment?
25
+ end
26
+
27
+ def open_tag?
28
+ /<(?!(?:#{UNPAIRED_TAGS.join('|')}|script|\/))[^>]+>/i === self
29
+ end
30
+
31
+ def html_comment?
32
+ /<\s?!--.*-->/ === self
33
+ end
34
+
35
+ def matching_close_tag
36
+ gsub(/<(\w+)\s?.*>/, '</\1>').strip
37
+ end
38
+
39
+ private
40
+ def regex
41
+ /(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]]\w\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'",\.\/?]+|\s+|[[:punct:]]+|\P{P}/ #add \p{P}
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,91 @@
1
+ module TruncateHtml
2
+ class HtmlTruncator
3
+
4
+ def initialize(original_html, options = {})
5
+ @original_html = original_html
6
+ length = options[:length] || TruncateHtml.configuration.length
7
+ @omission = options[:omission] || TruncateHtml.configuration.omission
8
+ @word_boundary = (options.has_key?(:word_boundary) ? options[:word_boundary] : TruncateHtml.configuration.word_boundary)
9
+ @break_token = options[:break_token] || TruncateHtml.configuration.break_token || nil
10
+ @chars_remaining = length - @omission.length
11
+ @open_tags, @truncated_html = [], ['']
12
+ end
13
+
14
+ def truncate
15
+ return @omission if @chars_remaining < 0
16
+ @original_html.html_tokens.each do |token|
17
+ if @chars_remaining <= 0 || truncate_token?(token)
18
+ close_open_tags
19
+ break
20
+ else
21
+ process_token(token)
22
+ end
23
+ end
24
+
25
+ out = @truncated_html.join
26
+
27
+ if word_boundary
28
+ term_regexp = Regexp.new("^.*#{word_boundary.source}")
29
+ match = out.match(term_regexp)
30
+ match ? match[0] : out
31
+ else
32
+ out
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def word_boundary
39
+ if @word_boundary == true
40
+ TruncateHtml.configuration.word_boundary
41
+ else
42
+ @word_boundary
43
+ end
44
+ end
45
+
46
+ def process_token(token)
47
+ append_to_result(token)
48
+ if token.html_tag?
49
+ if token.open_tag?
50
+ @open_tags << token
51
+ else
52
+ remove_latest_open_tag(token)
53
+ end
54
+ elsif !token.html_comment?
55
+ @chars_remaining -= (@word_boundary ? token.length : token[0, @chars_remaining].length)
56
+ if @chars_remaining <= 0
57
+ @truncated_html[-1] = @truncated_html[-1].rstrip + @omission
58
+ end
59
+ end
60
+ end
61
+
62
+ def append_to_result(token)
63
+ if token.html_tag? || token.html_comment?
64
+ @truncated_html << token
65
+ elsif @word_boundary
66
+ @truncated_html << token if (@chars_remaining - token.length) >= 0
67
+ else
68
+ @truncated_html << token[0, @chars_remaining]
69
+ end
70
+ end
71
+
72
+ def close_open_tags
73
+ @open_tags.reverse_each do |open_tag|
74
+ @truncated_html << open_tag.matching_close_tag
75
+ end
76
+ end
77
+
78
+ def remove_latest_open_tag(close_tag)
79
+ (0...@open_tags.length).to_a.reverse.each do |index|
80
+ if @open_tags[index].matching_close_tag == close_tag
81
+ @open_tags.delete_at(index)
82
+ break
83
+ end
84
+ end
85
+ end
86
+
87
+ def truncate_token?(token)
88
+ @break_token and token == @break_token
89
+ end
90
+ end
91
+ end