truncateHTML 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ nbproject
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in truncateHTML.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,47 @@
1
+ module HpricotTruncator
2
+ module NodeWithChildren
3
+ def truncate(max_length)
4
+ return self if inner_text.length <= max_length
5
+ truncated_node = if self.is_a?(Hpricot::Doc)
6
+ self.dup
7
+ else
8
+ self.class.send(:new, self.name, self.attributes)
9
+ end
10
+ truncated_node.children = []
11
+ each_child do |node|
12
+ if node.is_a?(Hpricot::Elem) && node.name == "html"
13
+ node.children.each do |c|
14
+ # Find the body node and use it. Let us reset earlier truncations
15
+ # and start afresh with this body tag
16
+ return c.truncate(max_length) if (c.is_a?(Hpricot::Elem) && c.name == "body")
17
+ end
18
+ end
19
+
20
+ remaining_length = max_length - truncated_node.inner_text.length
21
+ break if remaining_length <= 0
22
+ truncated_node.children << node.truncate(remaining_length)
23
+ end
24
+ truncated_node
25
+ end
26
+ end
27
+
28
+ module TextNode
29
+ def truncate(max_length)
30
+ # We're using String#scan because Hpricot doesn't distinguish entities.
31
+ Hpricot::Text.new(content.scan(/&#?[^\W_]+;|./).first(max_length).join)
32
+ end
33
+ end
34
+
35
+ module IgnoredTag
36
+ def truncate(max_length)
37
+ self
38
+ end
39
+ end
40
+ end
41
+
42
+ Hpricot::Doc.send(:include, HpricotTruncator::NodeWithChildren)
43
+ Hpricot::Elem.send(:include, HpricotTruncator::NodeWithChildren)
44
+ Hpricot::Text.send(:include, HpricotTruncator::TextNode)
45
+ Hpricot::BogusETag.send(:include, HpricotTruncator::IgnoredTag)
46
+ Hpricot::Comment.send(:include, HpricotTruncator::IgnoredTag)
47
+ Hpricot::DocType.send(:include, HpricotTruncator::IgnoredTag)
@@ -0,0 +1,3 @@
1
+ module Truncatehtml
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,60 @@
1
+ # By Henrik Nyh <http://henrik.nyh.se> 2008-01-30.
2
+ # Free to modify and redistribute with credit.
3
+
4
+ # modified by Dave Nolan <http://textgoeshere.org.uk> 2008-02-06
5
+ # Ellipsis appended to text of last HTML node
6
+ # Ellipsis inserted after final word break
7
+
8
+ # modified by Mark Dickson <mark@sitesteaders.com> 2008-12-18
9
+ # Option to truncate to last full word
10
+ # Option to include a 'more' link
11
+ # Check for nil last child
12
+
13
+ # modified by Ken-ichi Ueda <http://kueda.net> 2009-09-02
14
+ # Rails 2.3 compatability (chars -> mb_chars), via Henrik
15
+ # Hpricot 0.8 compatability (avoid dup on Hpricot::Elem)
16
+
17
+ # modified by Satyaram B V <http://bvsatyaram.com> 2011-03-24
18
+ # Rails version independent
19
+ # Making this a gem
20
+ # Removed mb_chars
21
+
22
+ require "hpricot"
23
+ require "truncateHTML/hpricot_truncator"
24
+
25
+ module TruncateHTML
26
+ # Like the Rails _truncate_ helper but doesn't break HTML tags, entities, and optionally. words.
27
+ def self.truncate(text, options={})
28
+ return if text.nil?
29
+
30
+ max_length = options[:max_length] || 40
31
+ ellipsis = options[:ellipsis] || "..."
32
+ words = options[:words] || false
33
+ status = options[:status] || false
34
+ # use :link => link_to('more', post_path), or something to that effect
35
+
36
+ doc = Hpricot(text.to_s)
37
+ ellipsis_length = Hpricot(ellipsis).inner_text.length
38
+ content_length = doc.inner_text.length
39
+ actual_length = max_length - ellipsis_length
40
+
41
+ if content_length > max_length
42
+ truncated_doc = doc.truncate(actual_length)
43
+
44
+ if words
45
+ word_length = actual_length - (truncated_doc.inner_html.length - truncated_doc.inner_html.rindex(' '))
46
+ truncated_doc = doc.truncate(word_length)
47
+ end
48
+
49
+ #XXX The check here has to be blank as the inner_html for text node is blank
50
+ return_string = truncated_doc.inner_html + ellipsis
51
+ return_string += options[:link] if options[:link]
52
+ return_status = true
53
+ else
54
+ return_string = text.to_s
55
+ return_status = false
56
+ end
57
+
58
+ return status ? [return_string, return_status] : return_string
59
+ end
60
+ end
@@ -0,0 +1,74 @@
1
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
2
+
3
+ require 'test/unit'
4
+ require 'truncateHTML'
5
+
6
+ class TruncateHTMLTest < Test::Unit::TestCase
7
+ def test_truncate_html_ignores_doc_type
8
+ msg = <<-DOCMSG
9
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
10
+ <html>
11
+ <body bgcolor="#ffffff" text="#000000">
12
+ Take this bottle and go fetch me some water please. Another of those mobiles is going bad.
13
+ </body>
14
+ </html>
15
+ DOCMSG
16
+ assert_equal("Take this bottle and go fetch me some...", TruncateHTML.truncate(msg, :max_length => 40))
17
+ end
18
+
19
+ def test_truncate_html_with_body_node
20
+ msg = <<-MSG
21
+ <html>
22
+ <head>
23
+ <meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
24
+ <title></title>
25
+ </head>
26
+ <body bgcolor="#ffffff" text="#000000">
27
+ Dear Ramana,<br>
28
+ Satyaram is currently setting up his account. The fourth estate magazine
29
+ is a big time hit of all time<br>
30
+ </body>
31
+ </html>
32
+ MSG
33
+ assert_equal("Dear Ramana,<br />Satyaram is currently...", TruncateHTML.truncate(msg, :max_length => 40, :words => true))
34
+ end
35
+
36
+ def test_truncate_html_plain_text
37
+ text = "this is some exmaple test"
38
+ assert_equal("this is...", TruncateHTML.truncate(text, :max_length => 10))
39
+ end
40
+
41
+ def test_truncate_html_ordinary_nodes
42
+ text = "<p><b><i>this is some</i> exmaple </b></p> <p>test</p>"
43
+ assert_equal("<p><b><i>this is</i></b></p>...", TruncateHTML.truncate(text, :max_length => 10))
44
+ end
45
+
46
+ def test_truncate_html_with_html_with_multiple_bodies
47
+ msg = <<-MSG
48
+ <html>
49
+ <head>
50
+ <meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
51
+ <title></title>
52
+ </head>
53
+ <body bgcolor="#ffffff" text="#000000">
54
+ Dear Ramana,<br>
55
+ Satyaram is currently setting up his account. The fourth estate magazine
56
+ is a big time hit of all time<br>
57
+ </body>
58
+ </html>
59
+
60
+ <html>
61
+ <head>
62
+ <meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
63
+ <title></title>
64
+ </head>
65
+ <body bgcolor="#ffffff" text="#000000">
66
+ Dear Ramana,<br>
67
+ Satyaram is currently setting up his account. The fourth estate magazine
68
+ is a big time hit of all time<br>
69
+ </body>
70
+ </html>
71
+ MSG
72
+ assert_equal("Dear Ramana,<br />Satyaram is currently set...", TruncateHTML.truncate(msg))
73
+ end
74
+ end
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "truncateHTML/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "truncateHTML"
7
+ s.version = Truncatehtml::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Satyaram B V"]
10
+ s.email = ["bvsatyaram AT gmail DOT com"]
11
+ s.homepage = "http://bvsatyaram.com"
12
+ s.summary = %q{Truncate HTML without breaking HTML tags, entities, and optionally words}
13
+ s.description = %q{Truncate HTML without breaking HTML tags, entities, and optionally words}
14
+
15
+ s.add_dependency "hpricot"
16
+
17
+ s.rubyforge_project = "truncateHTML"
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
+ s.require_paths = ["lib"]
23
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: truncateHTML
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Satyaram B V
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-03-26 00:00:00 +05:30
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: hpricot
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Truncate HTML without breaking HTML tags, entities, and optionally words
36
+ email:
37
+ - bvsatyaram AT gmail DOT com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - Rakefile
48
+ - lib/truncateHTML.rb
49
+ - lib/truncateHTML/hpricot_truncator.rb
50
+ - lib/truncateHTML/version.rb
51
+ - test/truncateHTML_test.rb
52
+ - truncateHTML.gemspec
53
+ has_rdoc: true
54
+ homepage: http://bvsatyaram.com
55
+ licenses: []
56
+
57
+ post_install_message:
58
+ rdoc_options: []
59
+
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ hash: 3
68
+ segments:
69
+ - 0
70
+ version: "0"
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 3
77
+ segments:
78
+ - 0
79
+ version: "0"
80
+ requirements: []
81
+
82
+ rubyforge_project: truncateHTML
83
+ rubygems_version: 1.3.7
84
+ signing_key:
85
+ specification_version: 3
86
+ summary: Truncate HTML without breaking HTML tags, entities, and optionally words
87
+ test_files: []
88
+