buzzcore 0.2.7 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/buzzcore.gemspec +4 -2
- data/lib/buzzcore/extra/html_truncate.rb +161 -0
- data/lib/buzzcore/extra/xml_utils2.rb +29 -0
- data/lib/buzzcore/string_utils.rb +17 -0
- data/lib/buzzcore.rb +1 -1
- metadata +4 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/buzzcore.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{buzzcore}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["buzzware"]
|
12
|
-
s.date = %q{2010-01-
|
12
|
+
s.date = %q{2010-01-15}
|
13
13
|
s.description = %q{buzzcore is the ruby core library developed and used by Buzzware Solutions.}
|
14
14
|
s.email = %q{contact@buzzware.com.au}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -32,6 +32,8 @@ Gem::Specification.new do |s|
|
|
32
32
|
"lib/buzzcore/database_utils.rb",
|
33
33
|
"lib/buzzcore/enum.rb",
|
34
34
|
"lib/buzzcore/extend_base_classes.rb",
|
35
|
+
"lib/buzzcore/extra/html_truncate.rb",
|
36
|
+
"lib/buzzcore/extra/xml_utils2.rb",
|
35
37
|
"lib/buzzcore/html_utils.rb",
|
36
38
|
"lib/buzzcore/logging.rb",
|
37
39
|
"lib/buzzcore/misc_utils.rb",
|
@@ -0,0 +1,161 @@
|
|
1
|
+
gem 'sanitize'; require 'sanitize'
|
2
|
+
gem 'nokogiri'; require 'nokogiri'
|
3
|
+
|
4
|
+
module HtmlUtils
|
5
|
+
|
6
|
+
# Truncates HTML text. Breaks on word boundaries and closes tags.
|
7
|
+
# aSuffix will be encoded with entities
|
8
|
+
def self.word_safe_truncate(aHtmlText,aMaxLength,aSuffix='...')
|
9
|
+
result = StringUtils.word_safe_truncate(aHtmlText,aMaxLength)+' '+aSuffix
|
10
|
+
Sanitize.clean(result,Sanitize::Config::BASIC)
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
#
|
15
|
+
## from http://gist.github.com/101410
|
16
|
+
#def self.html_truncate(input, num_words = 15, truncate_string = "...")
|
17
|
+
# doc = Nokogiri::HTML(input)
|
18
|
+
#
|
19
|
+
# current = doc.children.first
|
20
|
+
# count = 0
|
21
|
+
#
|
22
|
+
# while true
|
23
|
+
# # we found a text node
|
24
|
+
# if current.is_a?(Nokogiri::XML::Text)
|
25
|
+
# count += current.text.split.length
|
26
|
+
# # we reached our limit, let's get outta here!
|
27
|
+
# break if count > num_words
|
28
|
+
# previous = current
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# if current.children.length > 0
|
32
|
+
# # this node has children, can't be a text node,
|
33
|
+
# # lets descend and look for text nodes
|
34
|
+
# current = current.children.first
|
35
|
+
# elsif !current.next.nil?
|
36
|
+
# #this has no children, but has a sibling, let's check it out
|
37
|
+
# current = current.next
|
38
|
+
# else
|
39
|
+
# # we are the last child, we need to ascend until we are
|
40
|
+
# # either done or find a sibling to continue on to
|
41
|
+
# n = current
|
42
|
+
# while !n.is_a?(Nokogiri::HTML::Document) and n.parent.next.nil?
|
43
|
+
# n = n.parent
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# # we've reached the top and found no more text nodes, break
|
47
|
+
# if n.is_a?(Nokogiri::HTML::Document)
|
48
|
+
# break;
|
49
|
+
# else
|
50
|
+
# current = n.parent.next
|
51
|
+
# end
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# if count >= num_words
|
56
|
+
# unless count == num_words
|
57
|
+
# new_content = current.text.split
|
58
|
+
#
|
59
|
+
# # If we're here, the last text node we counted eclipsed the number of words
|
60
|
+
# # that we want, so we need to cut down on words. The easiest way to think about
|
61
|
+
# # this is that without this node we'd have fewer words than the limit, so all
|
62
|
+
# # the previous words plus a limited number of words from this node are needed.
|
63
|
+
# # We simply need to figure out how many words are needed and grab that many.
|
64
|
+
# # Then we need to -subtract- an index, because the first word would be index zero.
|
65
|
+
#
|
66
|
+
# # For example, given:
|
67
|
+
# # <p>Testing this HTML truncater.</p><p>To see if its working.</p>
|
68
|
+
# # Let's say I want 6 words. The correct returned string would be:
|
69
|
+
# # <p>Testing this HTML truncater.</p><p>To see...</p>
|
70
|
+
# # All the words in both paragraphs = 9
|
71
|
+
# # The last paragraph is the one that breaks the limit. How many words would we
|
72
|
+
# # have without it? 4. But we want up to 6, so we might as well get that many.
|
73
|
+
# # 6 - 4 = 2, so we get 2 words from this node, but words #1-2 are indices #0-1, so
|
74
|
+
# # we subtract 1. If this gives us -1, we want nothing from this node. So go back to
|
75
|
+
# # the previous node instead.
|
76
|
+
# index = num_words-(count-new_content.length)-1
|
77
|
+
# if index >= 0
|
78
|
+
# new_content = new_content[0..index]
|
79
|
+
# current.inner_html = new_content.join(' ') + truncate_string
|
80
|
+
# #require 'ruby-debug'; debugger
|
81
|
+
# #current = current.parent.add_child(Nokogiri::XML::Node.new(truncate_string,current.document))
|
82
|
+
# #current.content.inner_html = current.content.inner_html + truncate_string
|
83
|
+
# else
|
84
|
+
# current = previous
|
85
|
+
# #current.content = current.content + truncate_string
|
86
|
+
# current.inner_html = current.content + truncate_string
|
87
|
+
# #current = current.parent.add_child(Nokogiri::XML::Node.new(truncate_string,current.document))
|
88
|
+
# #current.inner_html = current.content.inner_html + truncate_string
|
89
|
+
# end
|
90
|
+
# end
|
91
|
+
#
|
92
|
+
# # remove everything else
|
93
|
+
# while !current.is_a?(Nokogiri::HTML::Document)
|
94
|
+
# while !current.next.nil?
|
95
|
+
# current.next.remove
|
96
|
+
# end
|
97
|
+
# current = current.parent
|
98
|
+
# end
|
99
|
+
# end
|
100
|
+
#
|
101
|
+
# # now we grab the html and not the text.
|
102
|
+
# # we do first because nokogiri adds html and body tags
|
103
|
+
# # which we don't want
|
104
|
+
# doc.root.children.first.inner_html
|
105
|
+
#end
|
106
|
+
|
107
|
+
# from http://blog.leshill.org/blog/2009/06/03/truncating-html.html
|
108
|
+
|
109
|
+
# Like the Rails _truncate_ helper but doesn't break HTML tags or entities.
|
110
|
+
#def truncate_html(text, max_length = 30, ellipsis = "...")
|
111
|
+
# return if text.nil?
|
112
|
+
# doc = Hpricot(text.to_s)
|
113
|
+
# doc.inner_text.chars.length > max_length ? doc.truncate(max_length, ellipsis).inner_html : text.to_s
|
114
|
+
#end
|
115
|
+
#
|
116
|
+
#def self.truncate_at_space(text, max_length, ellipsis = '...')
|
117
|
+
# l = [max_length - ellipsis.length, 0].max
|
118
|
+
# stop = text.rindex(' ', l) || 0
|
119
|
+
# (text.length > max_length ? text[0...stop] + ellipsis : text).to_s
|
120
|
+
#end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
#module HpricotTruncator
|
125
|
+
# module NodeWithChildren
|
126
|
+
# def truncate(max_length, ellipsis)
|
127
|
+
# return self if inner_text.chars.length <= max_length
|
128
|
+
# truncated_node = dup
|
129
|
+
# truncated_node.name = name
|
130
|
+
# truncated_node.raw_attributes = raw_attributes
|
131
|
+
# truncated_node.children = []
|
132
|
+
# each_child do |node|
|
133
|
+
# break if max_length <= 0
|
134
|
+
# node_length = node.inner_text.chars.length
|
135
|
+
# truncated_node.children << node.truncate(max_length, ellipsis)
|
136
|
+
# max_length = max_length - node_length
|
137
|
+
# end
|
138
|
+
# truncated_node
|
139
|
+
# end
|
140
|
+
# end
|
141
|
+
#
|
142
|
+
# module TextNode
|
143
|
+
# def truncate(max_length, ellipsis)
|
144
|
+
# self.content = TextHelper.truncate_at_space(content, max_length, ellipsis)
|
145
|
+
# self
|
146
|
+
# end
|
147
|
+
# end
|
148
|
+
#
|
149
|
+
# module IgnoredTag
|
150
|
+
# def truncate(max_length, ellipsis)
|
151
|
+
# self
|
152
|
+
# end
|
153
|
+
# end
|
154
|
+
#end
|
155
|
+
#
|
156
|
+
#Hpricot::Doc.send(:include, HpricotTruncator::NodeWithChildren)
|
157
|
+
#Hpricot::Elem.send(:include, HpricotTruncator::NodeWithChildren)
|
158
|
+
#Hpricot::Text.send(:include, HpricotTruncator::TextNode)
|
159
|
+
#Hpricot::BogusETag.send(:include, HpricotTruncator::IgnoredTag)
|
160
|
+
#Hpricot::Comment.send(:include, HpricotTruncator::IgnoredTag)
|
161
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
gem 'nokogiri'; require 'nokogiri'
|
2
|
+
require 'buzzcore/misc_utils'
|
3
|
+
|
4
|
+
module XmlUtils2
|
5
|
+
|
6
|
+
BASIC_HEADER = '<?xml version="1.0"?>'
|
7
|
+
|
8
|
+
# for yore, need to convert
|
9
|
+
# XmlUtils.add_xml_from_string : node
|
10
|
+
# XmlUtils.get_file_root : node
|
11
|
+
# XmlUtils.read_simple_items : hash
|
12
|
+
# XmlUtils.single_node : node
|
13
|
+
# XmlUtils.peek_node_value : String
|
14
|
+
|
15
|
+
def self.clean_data(aXmlString)
|
16
|
+
doc = Nokogiri::XML(aXmlString) {|c| c.options ||= Nokogiri::XML::ParseOptions.NOBLANKS}
|
17
|
+
doc.traverse do |n|
|
18
|
+
case
|
19
|
+
when n.is_a?(Nokogiri::XML::Comment)
|
20
|
+
n.remove
|
21
|
+
when n.is_a?(Nokogiri::XML::Text)
|
22
|
+
n.remove if (n.next || n.previous) && n.content.strip.empty?
|
23
|
+
end
|
24
|
+
end
|
25
|
+
doc.to_xml(:indent => 0)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
@@ -62,5 +62,22 @@ module StringUtils
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
|
+
# truncates a string to the given length by looking for the previous space.
|
66
|
+
def self.word_safe_truncate(aString,aMaxLength)
|
67
|
+
return nil if !aString
|
68
|
+
return aString if aString.length <= aMaxLength
|
69
|
+
posLastSpace = aString.rindex(/[ \t]/,aMaxLength)
|
70
|
+
return aString[0,aMaxLength] if !posLastSpace
|
71
|
+
aString[0,posLastSpace]
|
72
|
+
end
|
73
|
+
|
74
|
+
# replaces all tabs with spaces, and reduces multiple spaces to a single space
|
75
|
+
def self.reduce_whitespace(aText)
|
76
|
+
aText = aText.gsub("\t"," ") # replace tabs with spaces
|
77
|
+
aText.strip!
|
78
|
+
aText.squeeze!(' ')
|
79
|
+
aText
|
80
|
+
end
|
81
|
+
|
65
82
|
end
|
66
83
|
|
data/lib/buzzcore.rb
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
Dir.chdir(File.dirname(__FILE__)) { Dir['buzzcore
|
1
|
+
Dir.chdir(File.dirname(__FILE__)) { Dir['buzzcore/*.rb'] }.each {|f| require f }
|
2
2
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: buzzcore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- buzzware
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-15 00:00:00 +08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -56,6 +56,8 @@ files:
|
|
56
56
|
- lib/buzzcore/database_utils.rb
|
57
57
|
- lib/buzzcore/enum.rb
|
58
58
|
- lib/buzzcore/extend_base_classes.rb
|
59
|
+
- lib/buzzcore/extra/html_truncate.rb
|
60
|
+
- lib/buzzcore/extra/xml_utils2.rb
|
59
61
|
- lib/buzzcore/html_utils.rb
|
60
62
|
- lib/buzzcore/logging.rb
|
61
63
|
- lib/buzzcore/misc_utils.rb
|