truncateHTML 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/lib/truncateHTML/hpricot_truncator.rb +47 -0
- data/lib/truncateHTML/version.rb +3 -0
- data/lib/truncateHTML.rb +60 -0
- data/test/truncateHTML_test.rb +74 -0
- data/truncateHTML.gemspec +23 -0
- metadata +88 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
module HpricotTruncator
|
2
|
+
module NodeWithChildren
|
3
|
+
def truncate(max_length)
|
4
|
+
return self if inner_text.length <= max_length
|
5
|
+
truncated_node = if self.is_a?(Hpricot::Doc)
|
6
|
+
self.dup
|
7
|
+
else
|
8
|
+
self.class.send(:new, self.name, self.attributes)
|
9
|
+
end
|
10
|
+
truncated_node.children = []
|
11
|
+
each_child do |node|
|
12
|
+
if node.is_a?(Hpricot::Elem) && node.name == "html"
|
13
|
+
node.children.each do |c|
|
14
|
+
# Find the body node and use it. Let us reset earlier truncations
|
15
|
+
# and start afresh with this body tag
|
16
|
+
return c.truncate(max_length) if (c.is_a?(Hpricot::Elem) && c.name == "body")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
remaining_length = max_length - truncated_node.inner_text.length
|
21
|
+
break if remaining_length <= 0
|
22
|
+
truncated_node.children << node.truncate(remaining_length)
|
23
|
+
end
|
24
|
+
truncated_node
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module TextNode
|
29
|
+
def truncate(max_length)
|
30
|
+
# We're using String#scan because Hpricot doesn't distinguish entities.
|
31
|
+
Hpricot::Text.new(content.scan(/&#?[^\W_]+;|./).first(max_length).join)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
module IgnoredTag
|
36
|
+
def truncate(max_length)
|
37
|
+
self
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
Hpricot::Doc.send(:include, HpricotTruncator::NodeWithChildren)
|
43
|
+
Hpricot::Elem.send(:include, HpricotTruncator::NodeWithChildren)
|
44
|
+
Hpricot::Text.send(:include, HpricotTruncator::TextNode)
|
45
|
+
Hpricot::BogusETag.send(:include, HpricotTruncator::IgnoredTag)
|
46
|
+
Hpricot::Comment.send(:include, HpricotTruncator::IgnoredTag)
|
47
|
+
Hpricot::DocType.send(:include, HpricotTruncator::IgnoredTag)
|
data/lib/truncateHTML.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# By Henrik Nyh <http://henrik.nyh.se> 2008-01-30.
|
2
|
+
# Free to modify and redistribute with credit.
|
3
|
+
|
4
|
+
# modified by Dave Nolan <http://textgoeshere.org.uk> 2008-02-06
|
5
|
+
# Ellipsis appended to text of last HTML node
|
6
|
+
# Ellipsis inserted after final word break
|
7
|
+
|
8
|
+
# modified by Mark Dickson <mark@sitesteaders.com> 2008-12-18
|
9
|
+
# Option to truncate to last full word
|
10
|
+
# Option to include a 'more' link
|
11
|
+
# Check for nil last child
|
12
|
+
|
13
|
+
# modified by Ken-ichi Ueda <http://kueda.net> 2009-09-02
|
14
|
+
# Rails 2.3 compatability (chars -> mb_chars), via Henrik
|
15
|
+
# Hpricot 0.8 compatability (avoid dup on Hpricot::Elem)
|
16
|
+
|
17
|
+
# modified by Satyaram B V <http://bvsatyaram.com> 2011-03-24
|
18
|
+
# Rails version independent
|
19
|
+
# Making this a gem
|
20
|
+
# Removed mb_chars
|
21
|
+
|
22
|
+
require "hpricot"
|
23
|
+
require "truncateHTML/hpricot_truncator"
|
24
|
+
|
25
|
+
module TruncateHTML
|
26
|
+
# Like the Rails _truncate_ helper but doesn't break HTML tags, entities, and optionally. words.
|
27
|
+
def self.truncate(text, options={})
|
28
|
+
return if text.nil?
|
29
|
+
|
30
|
+
max_length = options[:max_length] || 40
|
31
|
+
ellipsis = options[:ellipsis] || "..."
|
32
|
+
words = options[:words] || false
|
33
|
+
status = options[:status] || false
|
34
|
+
# use :link => link_to('more', post_path), or something to that effect
|
35
|
+
|
36
|
+
doc = Hpricot(text.to_s)
|
37
|
+
ellipsis_length = Hpricot(ellipsis).inner_text.length
|
38
|
+
content_length = doc.inner_text.length
|
39
|
+
actual_length = max_length - ellipsis_length
|
40
|
+
|
41
|
+
if content_length > max_length
|
42
|
+
truncated_doc = doc.truncate(actual_length)
|
43
|
+
|
44
|
+
if words
|
45
|
+
word_length = actual_length - (truncated_doc.inner_html.length - truncated_doc.inner_html.rindex(' '))
|
46
|
+
truncated_doc = doc.truncate(word_length)
|
47
|
+
end
|
48
|
+
|
49
|
+
#XXX The check here has to be blank as the inner_html for text node is blank
|
50
|
+
return_string = truncated_doc.inner_html + ellipsis
|
51
|
+
return_string += options[:link] if options[:link]
|
52
|
+
return_status = true
|
53
|
+
else
|
54
|
+
return_string = text.to_s
|
55
|
+
return_status = false
|
56
|
+
end
|
57
|
+
|
58
|
+
return status ? [return_string, return_status] : return_string
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'truncateHTML'
|
5
|
+
|
6
|
+
class TruncateHTMLTest < Test::Unit::TestCase
|
7
|
+
def test_truncate_html_ignores_doc_type
|
8
|
+
msg = <<-DOCMSG
|
9
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
10
|
+
<html>
|
11
|
+
<body bgcolor="#ffffff" text="#000000">
|
12
|
+
Take this bottle and go fetch me some water please. Another of those mobiles is going bad.
|
13
|
+
</body>
|
14
|
+
</html>
|
15
|
+
DOCMSG
|
16
|
+
assert_equal("Take this bottle and go fetch me some...", TruncateHTML.truncate(msg, :max_length => 40))
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_truncate_html_with_body_node
|
20
|
+
msg = <<-MSG
|
21
|
+
<html>
|
22
|
+
<head>
|
23
|
+
<meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
|
24
|
+
<title></title>
|
25
|
+
</head>
|
26
|
+
<body bgcolor="#ffffff" text="#000000">
|
27
|
+
Dear Ramana,<br>
|
28
|
+
Satyaram is currently setting up his account. The fourth estate magazine
|
29
|
+
is a big time hit of all time<br>
|
30
|
+
</body>
|
31
|
+
</html>
|
32
|
+
MSG
|
33
|
+
assert_equal("Dear Ramana,<br />Satyaram is currently...", TruncateHTML.truncate(msg, :max_length => 40, :words => true))
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_truncate_html_plain_text
|
37
|
+
text = "this is some exmaple test"
|
38
|
+
assert_equal("this is...", TruncateHTML.truncate(text, :max_length => 10))
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_truncate_html_ordinary_nodes
|
42
|
+
text = "<p><b><i>this is some</i> exmaple </b></p> <p>test</p>"
|
43
|
+
assert_equal("<p><b><i>this is</i></b></p>...", TruncateHTML.truncate(text, :max_length => 10))
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_truncate_html_with_html_with_multiple_bodies
|
47
|
+
msg = <<-MSG
|
48
|
+
<html>
|
49
|
+
<head>
|
50
|
+
<meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
|
51
|
+
<title></title>
|
52
|
+
</head>
|
53
|
+
<body bgcolor="#ffffff" text="#000000">
|
54
|
+
Dear Ramana,<br>
|
55
|
+
Satyaram is currently setting up his account. The fourth estate magazine
|
56
|
+
is a big time hit of all time<br>
|
57
|
+
</body>
|
58
|
+
</html>
|
59
|
+
|
60
|
+
<html>
|
61
|
+
<head>
|
62
|
+
<meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
|
63
|
+
<title></title>
|
64
|
+
</head>
|
65
|
+
<body bgcolor="#ffffff" text="#000000">
|
66
|
+
Dear Ramana,<br>
|
67
|
+
Satyaram is currently setting up his account. The fourth estate magazine
|
68
|
+
is a big time hit of all time<br>
|
69
|
+
</body>
|
70
|
+
</html>
|
71
|
+
MSG
|
72
|
+
assert_equal("Dear Ramana,<br />Satyaram is currently set...", TruncateHTML.truncate(msg))
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "truncateHTML/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "truncateHTML"
|
7
|
+
s.version = Truncatehtml::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Satyaram B V"]
|
10
|
+
s.email = ["bvsatyaram AT gmail DOT com"]
|
11
|
+
s.homepage = "http://bvsatyaram.com"
|
12
|
+
s.summary = %q{Truncate HTML without breaking HTML tags, entities, and optionally words}
|
13
|
+
s.description = %q{Truncate HTML without breaking HTML tags, entities, and optionally words}
|
14
|
+
|
15
|
+
s.add_dependency "hpricot"
|
16
|
+
|
17
|
+
s.rubyforge_project = "truncateHTML"
|
18
|
+
|
19
|
+
s.files = `git ls-files`.split("\n")
|
20
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
21
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
22
|
+
s.require_paths = ["lib"]
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: truncateHTML
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Satyaram B V
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-03-26 00:00:00 +05:30
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: hpricot
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Truncate HTML without breaking HTML tags, entities, and optionally words
|
36
|
+
email:
|
37
|
+
- bvsatyaram AT gmail DOT com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files: []
|
43
|
+
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- Gemfile
|
47
|
+
- Rakefile
|
48
|
+
- lib/truncateHTML.rb
|
49
|
+
- lib/truncateHTML/hpricot_truncator.rb
|
50
|
+
- lib/truncateHTML/version.rb
|
51
|
+
- test/truncateHTML_test.rb
|
52
|
+
- truncateHTML.gemspec
|
53
|
+
has_rdoc: true
|
54
|
+
homepage: http://bvsatyaram.com
|
55
|
+
licenses: []
|
56
|
+
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
hash: 3
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
version: "0"
|
80
|
+
requirements: []
|
81
|
+
|
82
|
+
rubyforge_project: truncateHTML
|
83
|
+
rubygems_version: 1.3.7
|
84
|
+
signing_key:
|
85
|
+
specification_version: 3
|
86
|
+
summary: Truncate HTML without breaking HTML tags, entities, and optionally words
|
87
|
+
test_files: []
|
88
|
+
|