truncateHTML 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/lib/truncateHTML/hpricot_truncator.rb +47 -0
- data/lib/truncateHTML/version.rb +3 -0
- data/lib/truncateHTML.rb +60 -0
- data/test/truncateHTML_test.rb +74 -0
- data/truncateHTML.gemspec +23 -0
- metadata +88 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
module HpricotTruncator
|
2
|
+
module NodeWithChildren
|
3
|
+
def truncate(max_length)
|
4
|
+
return self if inner_text.length <= max_length
|
5
|
+
truncated_node = if self.is_a?(Hpricot::Doc)
|
6
|
+
self.dup
|
7
|
+
else
|
8
|
+
self.class.send(:new, self.name, self.attributes)
|
9
|
+
end
|
10
|
+
truncated_node.children = []
|
11
|
+
each_child do |node|
|
12
|
+
if node.is_a?(Hpricot::Elem) && node.name == "html"
|
13
|
+
node.children.each do |c|
|
14
|
+
# Find the body node and use it. Let us reset earlier truncations
|
15
|
+
# and start afresh with this body tag
|
16
|
+
return c.truncate(max_length) if (c.is_a?(Hpricot::Elem) && c.name == "body")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
remaining_length = max_length - truncated_node.inner_text.length
|
21
|
+
break if remaining_length <= 0
|
22
|
+
truncated_node.children << node.truncate(remaining_length)
|
23
|
+
end
|
24
|
+
truncated_node
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module TextNode
|
29
|
+
def truncate(max_length)
|
30
|
+
# We're using String#scan because Hpricot doesn't distinguish entities.
|
31
|
+
Hpricot::Text.new(content.scan(/&#?[^\W_]+;|./).first(max_length).join)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
module IgnoredTag
|
36
|
+
def truncate(max_length)
|
37
|
+
self
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
Hpricot::Doc.send(:include, HpricotTruncator::NodeWithChildren)
|
43
|
+
Hpricot::Elem.send(:include, HpricotTruncator::NodeWithChildren)
|
44
|
+
Hpricot::Text.send(:include, HpricotTruncator::TextNode)
|
45
|
+
Hpricot::BogusETag.send(:include, HpricotTruncator::IgnoredTag)
|
46
|
+
Hpricot::Comment.send(:include, HpricotTruncator::IgnoredTag)
|
47
|
+
Hpricot::DocType.send(:include, HpricotTruncator::IgnoredTag)
|
data/lib/truncateHTML.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# By Henrik Nyh <http://henrik.nyh.se> 2008-01-30.
|
2
|
+
# Free to modify and redistribute with credit.
|
3
|
+
|
4
|
+
# modified by Dave Nolan <http://textgoeshere.org.uk> 2008-02-06
|
5
|
+
# Ellipsis appended to text of last HTML node
|
6
|
+
# Ellipsis inserted after final word break
|
7
|
+
|
8
|
+
# modified by Mark Dickson <mark@sitesteaders.com> 2008-12-18
|
9
|
+
# Option to truncate to last full word
|
10
|
+
# Option to include a 'more' link
|
11
|
+
# Check for nil last child
|
12
|
+
|
13
|
+
# modified by Ken-ichi Ueda <http://kueda.net> 2009-09-02
|
14
|
+
# Rails 2.3 compatability (chars -> mb_chars), via Henrik
|
15
|
+
# Hpricot 0.8 compatability (avoid dup on Hpricot::Elem)
|
16
|
+
|
17
|
+
# modified by Satyaram B V <http://bvsatyaram.com> 2011-03-24
|
18
|
+
# Rails version independent
|
19
|
+
# Making this a gem
|
20
|
+
# Removed mb_chars
|
21
|
+
|
22
|
+
require "hpricot"
|
23
|
+
require "truncateHTML/hpricot_truncator"
|
24
|
+
|
25
|
+
module TruncateHTML
|
26
|
+
# Like the Rails _truncate_ helper but doesn't break HTML tags, entities, and optionally. words.
|
27
|
+
def self.truncate(text, options={})
|
28
|
+
return if text.nil?
|
29
|
+
|
30
|
+
max_length = options[:max_length] || 40
|
31
|
+
ellipsis = options[:ellipsis] || "..."
|
32
|
+
words = options[:words] || false
|
33
|
+
status = options[:status] || false
|
34
|
+
# use :link => link_to('more', post_path), or something to that effect
|
35
|
+
|
36
|
+
doc = Hpricot(text.to_s)
|
37
|
+
ellipsis_length = Hpricot(ellipsis).inner_text.length
|
38
|
+
content_length = doc.inner_text.length
|
39
|
+
actual_length = max_length - ellipsis_length
|
40
|
+
|
41
|
+
if content_length > max_length
|
42
|
+
truncated_doc = doc.truncate(actual_length)
|
43
|
+
|
44
|
+
if words
|
45
|
+
word_length = actual_length - (truncated_doc.inner_html.length - truncated_doc.inner_html.rindex(' '))
|
46
|
+
truncated_doc = doc.truncate(word_length)
|
47
|
+
end
|
48
|
+
|
49
|
+
#XXX The check here has to be blank as the inner_html for text node is blank
|
50
|
+
return_string = truncated_doc.inner_html + ellipsis
|
51
|
+
return_string += options[:link] if options[:link]
|
52
|
+
return_status = true
|
53
|
+
else
|
54
|
+
return_string = text.to_s
|
55
|
+
return_status = false
|
56
|
+
end
|
57
|
+
|
58
|
+
return status ? [return_string, return_status] : return_string
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'truncateHTML'
|
5
|
+
|
6
|
+
class TruncateHTMLTest < Test::Unit::TestCase
|
7
|
+
def test_truncate_html_ignores_doc_type
|
8
|
+
msg = <<-DOCMSG
|
9
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
10
|
+
<html>
|
11
|
+
<body bgcolor="#ffffff" text="#000000">
|
12
|
+
Take this bottle and go fetch me some water please. Another of those mobiles is going bad.
|
13
|
+
</body>
|
14
|
+
</html>
|
15
|
+
DOCMSG
|
16
|
+
assert_equal("Take this bottle and go fetch me some...", TruncateHTML.truncate(msg, :max_length => 40))
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_truncate_html_with_body_node
|
20
|
+
msg = <<-MSG
|
21
|
+
<html>
|
22
|
+
<head>
|
23
|
+
<meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
|
24
|
+
<title></title>
|
25
|
+
</head>
|
26
|
+
<body bgcolor="#ffffff" text="#000000">
|
27
|
+
Dear Ramana,<br>
|
28
|
+
Satyaram is currently setting up his account. The fourth estate magazine
|
29
|
+
is a big time hit of all time<br>
|
30
|
+
</body>
|
31
|
+
</html>
|
32
|
+
MSG
|
33
|
+
assert_equal("Dear Ramana,<br />Satyaram is currently...", TruncateHTML.truncate(msg, :max_length => 40, :words => true))
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_truncate_html_plain_text
|
37
|
+
text = "this is some exmaple test"
|
38
|
+
assert_equal("this is...", TruncateHTML.truncate(text, :max_length => 10))
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_truncate_html_ordinary_nodes
|
42
|
+
text = "<p><b><i>this is some</i> exmaple </b></p> <p>test</p>"
|
43
|
+
assert_equal("<p><b><i>this is</i></b></p>...", TruncateHTML.truncate(text, :max_length => 10))
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_truncate_html_with_html_with_multiple_bodies
|
47
|
+
msg = <<-MSG
|
48
|
+
<html>
|
49
|
+
<head>
|
50
|
+
<meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
|
51
|
+
<title></title>
|
52
|
+
</head>
|
53
|
+
<body bgcolor="#ffffff" text="#000000">
|
54
|
+
Dear Ramana,<br>
|
55
|
+
Satyaram is currently setting up his account. The fourth estate magazine
|
56
|
+
is a big time hit of all time<br>
|
57
|
+
</body>
|
58
|
+
</html>
|
59
|
+
|
60
|
+
<html>
|
61
|
+
<head>
|
62
|
+
<meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
|
63
|
+
<title></title>
|
64
|
+
</head>
|
65
|
+
<body bgcolor="#ffffff" text="#000000">
|
66
|
+
Dear Ramana,<br>
|
67
|
+
Satyaram is currently setting up his account. The fourth estate magazine
|
68
|
+
is a big time hit of all time<br>
|
69
|
+
</body>
|
70
|
+
</html>
|
71
|
+
MSG
|
72
|
+
assert_equal("Dear Ramana,<br />Satyaram is currently set...", TruncateHTML.truncate(msg))
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "truncateHTML/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "truncateHTML"
|
7
|
+
s.version = Truncatehtml::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Satyaram B V"]
|
10
|
+
s.email = ["bvsatyaram AT gmail DOT com"]
|
11
|
+
s.homepage = "http://bvsatyaram.com"
|
12
|
+
s.summary = %q{Truncate HTML without breaking HTML tags, entities, and optionally words}
|
13
|
+
s.description = %q{Truncate HTML without breaking HTML tags, entities, and optionally words}
|
14
|
+
|
15
|
+
s.add_dependency "hpricot"
|
16
|
+
|
17
|
+
s.rubyforge_project = "truncateHTML"
|
18
|
+
|
19
|
+
s.files = `git ls-files`.split("\n")
|
20
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
21
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
22
|
+
s.require_paths = ["lib"]
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: truncateHTML
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Satyaram B V
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-03-26 00:00:00 +05:30
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: hpricot
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Truncate HTML without breaking HTML tags, entities, and optionally words
|
36
|
+
email:
|
37
|
+
- bvsatyaram AT gmail DOT com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files: []
|
43
|
+
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- Gemfile
|
47
|
+
- Rakefile
|
48
|
+
- lib/truncateHTML.rb
|
49
|
+
- lib/truncateHTML/hpricot_truncator.rb
|
50
|
+
- lib/truncateHTML/version.rb
|
51
|
+
- test/truncateHTML_test.rb
|
52
|
+
- truncateHTML.gemspec
|
53
|
+
has_rdoc: true
|
54
|
+
homepage: http://bvsatyaram.com
|
55
|
+
licenses: []
|
56
|
+
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
hash: 3
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
version: "0"
|
80
|
+
requirements: []
|
81
|
+
|
82
|
+
rubyforge_project: truncateHTML
|
83
|
+
rubygems_version: 1.3.7
|
84
|
+
signing_key:
|
85
|
+
specification_version: 3
|
86
|
+
summary: Truncate HTML without breaking HTML tags, entities, and optionally words
|
87
|
+
test_files: []
|
88
|
+
|