truncate_html_chinese 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.lock +92 -0
  6. data/History.txt +40 -0
  7. data/LICENSE +21 -0
  8. data/README.md +93 -0
  9. data/Rakefile +50 -0
  10. data/VERSION +1 -0
  11. data/init.rb +1 -0
  12. data/lib/app/helpers/truncate_html_helper.rb +9 -0
  13. data/lib/truncate_html.rb +13 -0
  14. data/lib/truncate_html/configuration.rb +14 -0
  15. data/lib/truncate_html/html_string.rb +45 -0
  16. data/lib/truncate_html/html_truncator.rb +91 -0
  17. data/lib/truncate_html/version.rb +3 -0
  18. data/spec/helpers/truncate_html_helper_spec.rb +44 -0
  19. data/spec/rails_root/Gemfile +6 -0
  20. data/spec/rails_root/Gemfile.lock +86 -0
  21. data/spec/rails_root/app/controllers/application_controller.rb +10 -0
  22. data/spec/rails_root/app/helpers/application_helper.rb +3 -0
  23. data/spec/rails_root/config/application.rb +14 -0
  24. data/spec/rails_root/config/boot.rb +13 -0
  25. data/spec/rails_root/config/database.yml +22 -0
  26. data/spec/rails_root/config/environment.rb +5 -0
  27. data/spec/rails_root/config/environments/development.rb +17 -0
  28. data/spec/rails_root/config/environments/production.rb +28 -0
  29. data/spec/rails_root/config/environments/test.rb +29 -0
  30. data/spec/rails_root/config/initializers/backtrace_silencers.rb +7 -0
  31. data/spec/rails_root/config/initializers/inflections.rb +10 -0
  32. data/spec/rails_root/config/initializers/mime_types.rb +5 -0
  33. data/spec/rails_root/config/initializers/new_rails_defaults.rb +19 -0
  34. data/spec/rails_root/config/initializers/session_store.rb +15 -0
  35. data/spec/rails_root/config/locales/en.yml +5 -0
  36. data/spec/rails_root/config/routes.rb +43 -0
  37. data/spec/rails_root/init.rb +1 -0
  38. data/spec/rails_root/lib/app/helpers/truncate_html_helper.rb +7 -0
  39. data/spec/rails_root/lib/tasks/rspec.rake +144 -0
  40. data/spec/spec.opts +2 -0
  41. data/spec/spec_helper.rb +11 -0
  42. data/spec/truncate_html/configuration_spec.rb +17 -0
  43. data/spec/truncate_html/html_string_spec.rb +80 -0
  44. data/spec/truncate_html/html_truncator_spec.rb +203 -0
  45. data/truncate_html-0.9.2.gem +0 -0
  46. data/truncate_html.gemspec +23 -0
  47. metadata +117 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e056fc0c049ba378ca51dba2e9e01d98e5d620c9
4
+ data.tar.gz: cfec7cee37d01bf83c29b9c8806ed62166ae2017
5
+ SHA512:
6
+ metadata.gz: 0bd1b3d35c3fb20644fca74c7e2572a2363fb834d15830c50f807aabdd6c21e1d51c7625744ea3e206561388df7b1a8515e537a927a90eecd71e128605c27101
7
+ data.tar.gz: e00dbf6518fee91e88323fd0a58d1f93e997856499a14765186c31fc399141ef494a164ae7ed4ab324c745db47b6c73d3fd59745a338f46b466a230415dc24a3
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ pkg
2
+ coverage
3
+ profiling
4
+ tmp
5
+ spec/rails_root/log/*
6
+ log/*.log
7
+ .bundle
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - jruby-19mode # JRuby in 1.9 mode
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in truncate_html.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,92 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ truncate_html (0.9.2)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ abstract (1.0.0)
10
+ actionmailer (3.0.3)
11
+ actionpack (= 3.0.3)
12
+ mail (~> 2.2.9)
13
+ actionpack (3.0.3)
14
+ activemodel (= 3.0.3)
15
+ activesupport (= 3.0.3)
16
+ builder (~> 2.1.2)
17
+ erubis (~> 2.6.6)
18
+ i18n (~> 0.4)
19
+ rack (~> 1.2.1)
20
+ rack-mount (~> 0.6.13)
21
+ rack-test (~> 0.5.6)
22
+ tzinfo (~> 0.3.23)
23
+ activemodel (3.0.3)
24
+ activesupport (= 3.0.3)
25
+ builder (~> 2.1.2)
26
+ i18n (~> 0.4)
27
+ activerecord (3.0.3)
28
+ activemodel (= 3.0.3)
29
+ activesupport (= 3.0.3)
30
+ arel (~> 2.0.2)
31
+ tzinfo (~> 0.3.23)
32
+ activeresource (3.0.3)
33
+ activemodel (= 3.0.3)
34
+ activesupport (= 3.0.3)
35
+ activesupport (3.0.3)
36
+ arel (2.0.7)
37
+ builder (2.1.2)
38
+ diff-lcs (1.1.3)
39
+ erubis (2.6.6)
40
+ abstract (>= 1.0.0)
41
+ i18n (0.5.0)
42
+ mail (2.2.15)
43
+ activesupport (>= 2.3.6)
44
+ i18n (>= 0.4.0)
45
+ mime-types (~> 1.16)
46
+ treetop (~> 1.4.8)
47
+ mime-types (1.16)
48
+ polyglot (0.3.1)
49
+ rack (1.2.1)
50
+ rack-mount (0.6.13)
51
+ rack (>= 1.0.0)
52
+ rack-test (0.5.7)
53
+ rack (>= 1.0)
54
+ rails (3.0.3)
55
+ actionmailer (= 3.0.3)
56
+ actionpack (= 3.0.3)
57
+ activerecord (= 3.0.3)
58
+ activeresource (= 3.0.3)
59
+ activesupport (= 3.0.3)
60
+ bundler (~> 1.0)
61
+ railties (= 3.0.3)
62
+ railties (3.0.3)
63
+ actionpack (= 3.0.3)
64
+ activesupport (= 3.0.3)
65
+ rake (>= 0.8.7)
66
+ thor (~> 0.14.4)
67
+ rake (0.8.7)
68
+ rspec (2.9.0)
69
+ rspec-core (~> 2.9.0)
70
+ rspec-expectations (~> 2.9.0)
71
+ rspec-mocks (~> 2.9.0)
72
+ rspec-core (2.9.0)
73
+ rspec-expectations (2.9.1)
74
+ diff-lcs (~> 1.1.3)
75
+ rspec-mocks (2.9.0)
76
+ rspec-rails (2.9.0)
77
+ actionpack (>= 3.0)
78
+ activesupport (>= 3.0)
79
+ railties (>= 3.0)
80
+ rspec (~> 2.9.0)
81
+ thor (0.14.6)
82
+ treetop (1.4.9)
83
+ polyglot (>= 0.3.1)
84
+ tzinfo (0.3.24)
85
+
86
+ PLATFORMS
87
+ ruby
88
+
89
+ DEPENDENCIES
90
+ rails (~> 3.0.3)
91
+ rspec-rails (~> 2.9)
92
+ truncate_html!
data/History.txt ADDED
@@ -0,0 +1,40 @@
1
+ == 0.5.1 2011-04-08
2
+ * Ensure resulting string's length is never greater than supplied length (csquared)
3
+
4
+ == 0.5.0 2011-01-26
5
+ * Multibyte support. (smix, parndt)
6
+
7
+ == 0.4.0 2010-03-30
8
+ * Rails 3 support. This breaks rails 2 support.
9
+
10
+ == 0.3.2 2010-03-23
11
+ * Fix for autoloading of classes in older Rails versions. (kball)
12
+ * Fix issue #5: autoloading of default configuration.
13
+
14
+ == 0.3.1 2010-02-03
15
+ * Fixed minor typo on the word_boundary option name.
16
+
17
+ == 0.3.0 2010-02-02
18
+ * Added the ability to set global configuration parameters
19
+ * Added the word_boundry option
20
+
21
+ == 0.2.2 2009-12-23
22
+ * Fix issue #4: Handle case when supplied length is smaller than omission. (ghazel)
23
+
24
+ == 0.2.1 2009-12-18
25
+ * Fix issue #3: Handle case when input html contins a script tag.
26
+
27
+ == 0.2.0 2009-11-23
28
+ * Fix issue #2: The omission text's length is now included in the returned
29
+ string's calculation. This is more consistent with the rails truncate
30
+ helper's behavior.
31
+
32
+ == 0.1.2 2009-09-25
33
+ * Fix issue #1: Handle case when input html is nil. (bcardarella)
34
+
35
+ == 0.1.1 2009-08-25
36
+ * Fixed issue with regex which would not recognize <a> tags that contain slashes.
37
+ * Other refactoring and improvements to spec coverage.
38
+
39
+ == 0.1.0 2009-08-03
40
+ * Wrote truncate_html. Initial release.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2009 - 2010 Harold A. Giménez
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,93 @@
1
+ TruncateHtml
2
+ ============
3
+
4
+ [![Build Status](https://secure.travis-ci.org/hgmnz/truncate_html.png?branch=master)](http://travis-ci.org/hgmnz/truncate_html)
5
+ [![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/hgmnz/truncate_html)
6
+
7
+ truncate_html cuts off a string of HTML and takes care of closing any lingering open tags. There are many ways to solve this. This library does not have any dependencies, and [parses HTML using regular expressions](http://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags/1732454#1732454).
8
+
9
+ It can be used with or without Rails.
10
+
11
+ Example
12
+ -------
13
+
14
+ ```ruby
15
+ some_html = '<ul><li><a href="http://whatever">This is a link</a></li></ul>'
16
+ truncate_html(some_html, :length => 15, :omission => '...(continued)')
17
+ => <ul><li><a href="http://whatever">This...(continued)</a></li></ul>
18
+ ```
19
+
20
+ A few notes:
21
+
22
+ * By default, it will truncate on word boundary.
23
+ To truncate the HTML string strictly at the specified length, pass in the `:word_boundary => false` option.
24
+ * If the input HTML is nil, it will return an empty string.
25
+ * The omission text's length does count toward the resulting string's length.
26
+ * `<script>` tags will pass right through - they will not count toward the resulting string's length, or be truncated.
27
+
28
+ * The default options are:
29
+ * `:length`: 100
30
+ * `:omission`: '...'
31
+ * `:word_boundary`: /\S/
32
+
33
+ You may also set global configuration options.
34
+ For example, place the following on application boot,
35
+ something like `config/initializers/truncate_html.rb`
36
+
37
+ ```ruby
38
+ TruncateHtml.configure do |config|
39
+ config.length = 50
40
+ config.omission = '...(continued)'
41
+ end
42
+ ```
43
+
44
+ If you really want, you can even set a custom word boundary regexp.
45
+ For example, to truncate at the end of the nearest sentence:
46
+
47
+ ```ruby
48
+ TruncateHtml.configure do |config|
49
+ config.word_boundary = /\S[\.\?\!]/
50
+ end
51
+ ```
52
+
53
+ You can also truncate the html at a specific point not based on length but content.
54
+ To do that, place the `:break_token` in your source. This allows the truncation to be
55
+ data driven, breaking after a leading paragraph or sentence. If the
56
+ `:break_token` is in your content before the specified :length, :length will be
57
+ ignored and the content truncated at :break_token
58
+ If the `:break_token` is in your content after the specified :length,
59
+ `:break_token` will be ignored and the content truncated at :length
60
+
61
+ ```ruby
62
+ TruncateHtml.configure do |config|
63
+ config.break_token = '<!-- truncate -->'
64
+ end
65
+ ```
66
+ Installation
67
+ ------------
68
+
69
+ The latest gem version for the Rails 2.x series is 0.3.2.
70
+ To use truncate_html on a Rails 2 app, please install the 0.3.2 version:
71
+
72
+ gem install truncate_html -v 0.3.2
73
+
74
+ For Rails 3, use the latest truncate_html:
75
+
76
+ gem install truncate_html
77
+
78
+ Issues or Suggestions
79
+ ---------------------
80
+
81
+ Found an issue or have a suggestion? Please report it on [Github's issue tracker](http://github.com/hgmnz/truncate_html/issues).
82
+
83
+ Testing
84
+ -------
85
+
86
+ bundle
87
+ rake
88
+
89
+ All green? Go hack.
90
+
91
+ Copyright (c) 2009 - 2010 Harold A. Giménez, released under the MIT license
92
+
93
+ Thanks to all the [contributors](https://github.com/hgmnz/truncate_html/contributors)!
data/Rakefile ADDED
@@ -0,0 +1,50 @@
1
+ require 'rake'
2
+
3
+ begin
4
+ require 'rspec/core'
5
+ require 'rspec/core/rake_task'
6
+ rescue MissingSourceFile
7
+ module RSpec
8
+ module Core
9
+ class RakeTask
10
+ def initialize(name)
11
+ task name do
12
+ # if rspec-rails is a configured gem, this will output helpful material and exit ...
13
+ require File.expand_path(File.dirname(__FILE__) + "/../../config/environment")
14
+ # ... otherwise, do this:
15
+ raise <<-MSG
16
+ #{"*" * 80}
17
+ * You are trying to run an rspec rake task defined in
18
+ * #{__FILE__},
19
+ * but rspec can not be found in vendor/gems, vendor/plugins or system gems.
20
+ #{"*" * 80}
21
+ MSG
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ task :default => :spec
30
+ task :stats => "spec:statsetup"
31
+
32
+ desc "Run RSpec code examples"
33
+ RSpec::Core::RakeTask.new(:spec) do |t|
34
+ t.pattern = "./spec/**/*_spec.rb"
35
+ end
36
+
37
+ namespace :gem do
38
+ desc "Release to rubygems"
39
+ task :release do
40
+ require File.expand_path('lib/truncate_html/version', File.dirname(__FILE__))
41
+ version = TruncateHtml::VERSION
42
+ message = "Bump to version #{version}"
43
+ system "git tag -a -m '#{message}' v#{version}"
44
+ system "git push origin master"
45
+ system "git push origin $(git tag | tail -1)"
46
+ system "gem build truncate_html.gemspec"
47
+ system "gem push truncate_html-#{version}.gem"
48
+ system "rm truncate_html-#{version}.gem"
49
+ end
50
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.5.1
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'truncate_html'
@@ -0,0 +1,9 @@
1
+ module TruncateHtmlHelper
2
+
3
+ def truncate_html(html, options={})
4
+ return '' if html.nil?
5
+ html_string = TruncateHtml::HtmlString.new(html)
6
+ TruncateHtml::HtmlTruncator.new(html_string, options).truncate.html_safe
7
+ end
8
+
9
+ end
@@ -0,0 +1,13 @@
1
+ require File.join(File.dirname(__FILE__), 'truncate_html', 'version')
2
+ require File.join(File.dirname(__FILE__), 'truncate_html', 'html_truncator')
3
+ require File.join(File.dirname(__FILE__), 'truncate_html', 'html_string')
4
+ require File.join(File.dirname(__FILE__), 'truncate_html', 'configuration')
5
+ require File.join(File.dirname(__FILE__), 'app', 'helpers', 'truncate_html_helper')
6
+
7
+ TruncateHtml.configure do |config|
8
+ config.length = 100
9
+ config.omission = '...'
10
+ config.word_boundary = /\S/
11
+ end
12
+
13
+ ActionController::Base.helper(TruncateHtmlHelper)
@@ -0,0 +1,14 @@
1
+ module TruncateHtml
2
+ class Configuration
3
+ attr_accessor :length, :omission, :word_boundary, :break_token
4
+ end
5
+
6
+ class << self
7
+ attr_accessor :configuration
8
+ end
9
+
10
+ def self.configure
11
+ self.configuration ||= Configuration.new
12
+ yield configuration
13
+ end
14
+ end
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+ module TruncateHtml
3
+ class HtmlString < String
4
+
5
+ UNPAIRED_TAGS = %w(br hr img).freeze
6
+
7
+ def initialize(original_html)
8
+ super(original_html)
9
+ end
10
+
11
+ def html_tokens
12
+ scan(regex).map do |token|
13
+ HtmlString.new(
14
+ token.gsub(
15
+ /\n/,'' #remove newline characters
16
+ ).gsub(
17
+ /\s+/, ' ' #clean out extra consecutive whitespace
18
+ )
19
+ )
20
+ end
21
+ end
22
+
23
+ def html_tag?
24
+ /<\/?[^>]+>/ === self && !html_comment?
25
+ end
26
+
27
+ def open_tag?
28
+ /<(?!(?:#{UNPAIRED_TAGS.join('|')}|script|\/))[^>]+>/i === self
29
+ end
30
+
31
+ def html_comment?
32
+ /<\s?!--.*-->/ === self
33
+ end
34
+
35
+ def matching_close_tag
36
+ gsub(/<(\w+)\s?.*>/, '</\1>').strip
37
+ end
38
+
39
+ private
40
+ def regex
41
+ /(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]]\w\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'",\.\/?]+|\s+|[[:punct:]]+|\P{P}/ #add \p{P}
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,91 @@
1
+ module TruncateHtml
2
+ class HtmlTruncator
3
+
4
+ def initialize(original_html, options = {})
5
+ @original_html = original_html
6
+ length = options[:length] || TruncateHtml.configuration.length
7
+ @omission = options[:omission] || TruncateHtml.configuration.omission
8
+ @word_boundary = (options.has_key?(:word_boundary) ? options[:word_boundary] : TruncateHtml.configuration.word_boundary)
9
+ @break_token = options[:break_token] || TruncateHtml.configuration.break_token || nil
10
+ @chars_remaining = length - @omission.length
11
+ @open_tags, @truncated_html = [], ['']
12
+ end
13
+
14
+ def truncate
15
+ return @omission if @chars_remaining < 0
16
+ @original_html.html_tokens.each do |token|
17
+ if @chars_remaining <= 0 || truncate_token?(token)
18
+ close_open_tags
19
+ break
20
+ else
21
+ process_token(token)
22
+ end
23
+ end
24
+
25
+ out = @truncated_html.join
26
+
27
+ if word_boundary
28
+ term_regexp = Regexp.new("^.*#{word_boundary.source}")
29
+ match = out.match(term_regexp)
30
+ match ? match[0] : out
31
+ else
32
+ out
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def word_boundary
39
+ if @word_boundary == true
40
+ TruncateHtml.configuration.word_boundary
41
+ else
42
+ @word_boundary
43
+ end
44
+ end
45
+
46
+ def process_token(token)
47
+ append_to_result(token)
48
+ if token.html_tag?
49
+ if token.open_tag?
50
+ @open_tags << token
51
+ else
52
+ remove_latest_open_tag(token)
53
+ end
54
+ elsif !token.html_comment?
55
+ @chars_remaining -= (@word_boundary ? token.length : token[0, @chars_remaining].length)
56
+ if @chars_remaining <= 0
57
+ @truncated_html[-1] = @truncated_html[-1].rstrip + @omission
58
+ end
59
+ end
60
+ end
61
+
62
+ def append_to_result(token)
63
+ if token.html_tag? || token.html_comment?
64
+ @truncated_html << token
65
+ elsif @word_boundary
66
+ @truncated_html << token if (@chars_remaining - token.length) >= 0
67
+ else
68
+ @truncated_html << token[0, @chars_remaining]
69
+ end
70
+ end
71
+
72
+ def close_open_tags
73
+ @open_tags.reverse_each do |open_tag|
74
+ @truncated_html << open_tag.matching_close_tag
75
+ end
76
+ end
77
+
78
+ def remove_latest_open_tag(close_tag)
79
+ (0...@open_tags.length).to_a.reverse.each do |index|
80
+ if @open_tags[index].matching_close_tag == close_tag
81
+ @open_tags.delete_at(index)
82
+ break
83
+ end
84
+ end
85
+ end
86
+
87
+ def truncate_token?(token)
88
+ @break_token and token == @break_token
89
+ end
90
+ end
91
+ end