truncateHTML 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ nbproject
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in truncateHTML.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,47 @@
1
+ module HpricotTruncator
2
+ module NodeWithChildren
3
+ def truncate(max_length)
4
+ return self if inner_text.length <= max_length
5
+ truncated_node = if self.is_a?(Hpricot::Doc)
6
+ self.dup
7
+ else
8
+ self.class.send(:new, self.name, self.attributes)
9
+ end
10
+ truncated_node.children = []
11
+ each_child do |node|
12
+ if node.is_a?(Hpricot::Elem) && node.name == "html"
13
+ node.children.each do |c|
14
+ # Find the body node and use it. Let us reset earlier truncations
15
+ # and start afresh with this body tag
16
+ return c.truncate(max_length) if (c.is_a?(Hpricot::Elem) && c.name == "body")
17
+ end
18
+ end
19
+
20
+ remaining_length = max_length - truncated_node.inner_text.length
21
+ break if remaining_length <= 0
22
+ truncated_node.children << node.truncate(remaining_length)
23
+ end
24
+ truncated_node
25
+ end
26
+ end
27
+
28
+ module TextNode
29
+ def truncate(max_length)
30
+ # We're using String#scan because Hpricot doesn't distinguish entities.
31
+ Hpricot::Text.new(content.scan(/&#?[^\W_]+;|./).first(max_length).join)
32
+ end
33
+ end
34
+
35
+ module IgnoredTag
36
+ def truncate(max_length)
37
+ self
38
+ end
39
+ end
40
+ end
41
+
42
+ Hpricot::Doc.send(:include, HpricotTruncator::NodeWithChildren)
43
+ Hpricot::Elem.send(:include, HpricotTruncator::NodeWithChildren)
44
+ Hpricot::Text.send(:include, HpricotTruncator::TextNode)
45
+ Hpricot::BogusETag.send(:include, HpricotTruncator::IgnoredTag)
46
+ Hpricot::Comment.send(:include, HpricotTruncator::IgnoredTag)
47
+ Hpricot::DocType.send(:include, HpricotTruncator::IgnoredTag)
@@ -0,0 +1,3 @@
1
+ module Truncatehtml
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,60 @@
1
+ # By Henrik Nyh <http://henrik.nyh.se> 2008-01-30.
2
+ # Free to modify and redistribute with credit.
3
+
4
+ # modified by Dave Nolan <http://textgoeshere.org.uk> 2008-02-06
5
+ # Ellipsis appended to text of last HTML node
6
+ # Ellipsis inserted after final word break
7
+
8
+ # modified by Mark Dickson <mark@sitesteaders.com> 2008-12-18
9
+ # Option to truncate to last full word
10
+ # Option to include a 'more' link
11
+ # Check for nil last child
12
+
13
+ # modified by Ken-ichi Ueda <http://kueda.net> 2009-09-02
14
+ # Rails 2.3 compatability (chars -> mb_chars), via Henrik
15
+ # Hpricot 0.8 compatability (avoid dup on Hpricot::Elem)
16
+
17
+ # modified by Satyaram B V <http://bvsatyaram.com> 2011-03-24
18
+ # Rails version independent
19
+ # Making this a gem
20
+ # Removed mb_chars
21
+
22
+ require "hpricot"
23
+ require "truncateHTML/hpricot_truncator"
24
+
25
+ module TruncateHTML
26
+ # Like the Rails _truncate_ helper but doesn't break HTML tags, entities, and optionally. words.
27
+ def self.truncate(text, options={})
28
+ return if text.nil?
29
+
30
+ max_length = options[:max_length] || 40
31
+ ellipsis = options[:ellipsis] || "..."
32
+ words = options[:words] || false
33
+ status = options[:status] || false
34
+ # use :link => link_to('more', post_path), or something to that effect
35
+
36
+ doc = Hpricot(text.to_s)
37
+ ellipsis_length = Hpricot(ellipsis).inner_text.length
38
+ content_length = doc.inner_text.length
39
+ actual_length = max_length - ellipsis_length
40
+
41
+ if content_length > max_length
42
+ truncated_doc = doc.truncate(actual_length)
43
+
44
+ if words
45
+ word_length = actual_length - (truncated_doc.inner_html.length - truncated_doc.inner_html.rindex(' '))
46
+ truncated_doc = doc.truncate(word_length)
47
+ end
48
+
49
+ #XXX The check here has to be blank as the inner_html for text node is blank
50
+ return_string = truncated_doc.inner_html + ellipsis
51
+ return_string += options[:link] if options[:link]
52
+ return_status = true
53
+ else
54
+ return_string = text.to_s
55
+ return_status = false
56
+ end
57
+
58
+ return status ? [return_string, return_status] : return_string
59
+ end
60
+ end
@@ -0,0 +1,74 @@
1
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
2
+
3
+ require 'test/unit'
4
+ require 'truncateHTML'
5
+
6
+ class TruncateHTMLTest < Test::Unit::TestCase
7
+ def test_truncate_html_ignores_doc_type
8
+ msg = <<-DOCMSG
9
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
10
+ <html>
11
+ <body bgcolor="#ffffff" text="#000000">
12
+ Take this bottle and go fetch me some water please. Another of those mobiles is going bad.
13
+ </body>
14
+ </html>
15
+ DOCMSG
16
+ assert_equal("Take this bottle and go fetch me some...", TruncateHTML.truncate(msg, :max_length => 40))
17
+ end
18
+
19
+ def test_truncate_html_with_body_node
20
+ msg = <<-MSG
21
+ <html>
22
+ <head>
23
+ <meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
24
+ <title></title>
25
+ </head>
26
+ <body bgcolor="#ffffff" text="#000000">
27
+ Dear Ramana,<br>
28
+ Satyaram is currently setting up his account. The fourth estate magazine
29
+ is a big time hit of all time<br>
30
+ </body>
31
+ </html>
32
+ MSG
33
+ assert_equal("Dear Ramana,<br />Satyaram is currently...", TruncateHTML.truncate(msg, :max_length => 40, :words => true))
34
+ end
35
+
36
+ def test_truncate_html_plain_text
37
+ text = "this is some exmaple test"
38
+ assert_equal("this is...", TruncateHTML.truncate(text, :max_length => 10))
39
+ end
40
+
41
+ def test_truncate_html_ordinary_nodes
42
+ text = "<p><b><i>this is some</i> exmaple </b></p> <p>test</p>"
43
+ assert_equal("<p><b><i>this is</i></b></p>...", TruncateHTML.truncate(text, :max_length => 10))
44
+ end
45
+
46
+ def test_truncate_html_with_html_with_multiple_bodies
47
+ msg = <<-MSG
48
+ <html>
49
+ <head>
50
+ <meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
51
+ <title></title>
52
+ </head>
53
+ <body bgcolor="#ffffff" text="#000000">
54
+ Dear Ramana,<br>
55
+ Satyaram is currently setting up his account. The fourth estate magazine
56
+ is a big time hit of all time<br>
57
+ </body>
58
+ </html>
59
+
60
+ <html>
61
+ <head>
62
+ <meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
63
+ <title></title>
64
+ </head>
65
+ <body bgcolor="#ffffff" text="#000000">
66
+ Dear Ramana,<br>
67
+ Satyaram is currently setting up his account. The fourth estate magazine
68
+ is a big time hit of all time<br>
69
+ </body>
70
+ </html>
71
+ MSG
72
+ assert_equal("Dear Ramana,<br />Satyaram is currently set...", TruncateHTML.truncate(msg))
73
+ end
74
+ end
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "truncateHTML/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "truncateHTML"
7
+ s.version = Truncatehtml::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Satyaram B V"]
10
+ s.email = ["bvsatyaram AT gmail DOT com"]
11
+ s.homepage = "http://bvsatyaram.com"
12
+ s.summary = %q{Truncate HTML without breaking HTML tags, entities, and optionally words}
13
+ s.description = %q{Truncate HTML without breaking HTML tags, entities, and optionally words}
14
+
15
+ s.add_dependency "hpricot"
16
+
17
+ s.rubyforge_project = "truncateHTML"
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
+ s.require_paths = ["lib"]
23
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: truncateHTML
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Satyaram B V
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-03-26 00:00:00 +05:30
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: hpricot
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Truncate HTML without breaking HTML tags, entities, and optionally words
36
+ email:
37
+ - bvsatyaram AT gmail DOT com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - Rakefile
48
+ - lib/truncateHTML.rb
49
+ - lib/truncateHTML/hpricot_truncator.rb
50
+ - lib/truncateHTML/version.rb
51
+ - test/truncateHTML_test.rb
52
+ - truncateHTML.gemspec
53
+ has_rdoc: true
54
+ homepage: http://bvsatyaram.com
55
+ licenses: []
56
+
57
+ post_install_message:
58
+ rdoc_options: []
59
+
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ hash: 3
68
+ segments:
69
+ - 0
70
+ version: "0"
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 3
77
+ segments:
78
+ - 0
79
+ version: "0"
80
+ requirements: []
81
+
82
+ rubyforge_project: truncateHTML
83
+ rubygems_version: 1.3.7
84
+ signing_key:
85
+ specification_version: 3
86
+ summary: Truncate HTML without breaking HTML tags, entities, and optionally words
87
+ test_files: []
88
+