buzzcore 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/buzzcore.gemspec +4 -2
- data/lib/buzzcore/extra/html_truncate.rb +161 -0
- data/lib/buzzcore/extra/xml_utils2.rb +29 -0
- data/lib/buzzcore/string_utils.rb +17 -0
- data/lib/buzzcore.rb +1 -1
- metadata +4 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/buzzcore.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{buzzcore}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["buzzware"]
|
12
|
-
s.date = %q{2010-01-
|
12
|
+
s.date = %q{2010-01-15}
|
13
13
|
s.description = %q{buzzcore is the ruby core library developed and used by Buzzware Solutions.}
|
14
14
|
s.email = %q{contact@buzzware.com.au}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -32,6 +32,8 @@ Gem::Specification.new do |s|
|
|
32
32
|
"lib/buzzcore/database_utils.rb",
|
33
33
|
"lib/buzzcore/enum.rb",
|
34
34
|
"lib/buzzcore/extend_base_classes.rb",
|
35
|
+
"lib/buzzcore/extra/html_truncate.rb",
|
36
|
+
"lib/buzzcore/extra/xml_utils2.rb",
|
35
37
|
"lib/buzzcore/html_utils.rb",
|
36
38
|
"lib/buzzcore/logging.rb",
|
37
39
|
"lib/buzzcore/misc_utils.rb",
|
@@ -0,0 +1,161 @@
|
|
1
|
+
gem 'sanitize'; require 'sanitize'
|
2
|
+
gem 'nokogiri'; require 'nokogiri'
|
3
|
+
|
4
|
+
module HtmlUtils
|
5
|
+
|
6
|
+
# Truncates HTML text. Breaks on word boundaries and closes tags.
|
7
|
+
# aSuffix will be encoded with entities
|
8
|
+
def self.word_safe_truncate(aHtmlText,aMaxLength,aSuffix='...')
|
9
|
+
result = StringUtils.word_safe_truncate(aHtmlText,aMaxLength)+' '+aSuffix
|
10
|
+
Sanitize.clean(result,Sanitize::Config::BASIC)
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
#
|
15
|
+
## from http://gist.github.com/101410
|
16
|
+
#def self.html_truncate(input, num_words = 15, truncate_string = "...")
|
17
|
+
# doc = Nokogiri::HTML(input)
|
18
|
+
#
|
19
|
+
# current = doc.children.first
|
20
|
+
# count = 0
|
21
|
+
#
|
22
|
+
# while true
|
23
|
+
# # we found a text node
|
24
|
+
# if current.is_a?(Nokogiri::XML::Text)
|
25
|
+
# count += current.text.split.length
|
26
|
+
# # we reached our limit, let's get outta here!
|
27
|
+
# break if count > num_words
|
28
|
+
# previous = current
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# if current.children.length > 0
|
32
|
+
# # this node has children, can't be a text node,
|
33
|
+
# # lets descend and look for text nodes
|
34
|
+
# current = current.children.first
|
35
|
+
# elsif !current.next.nil?
|
36
|
+
# #this has no children, but has a sibling, let's check it out
|
37
|
+
# current = current.next
|
38
|
+
# else
|
39
|
+
# # we are the last child, we need to ascend until we are
|
40
|
+
# # either done or find a sibling to continue on to
|
41
|
+
# n = current
|
42
|
+
# while !n.is_a?(Nokogiri::HTML::Document) and n.parent.next.nil?
|
43
|
+
# n = n.parent
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# # we've reached the top and found no more text nodes, break
|
47
|
+
# if n.is_a?(Nokogiri::HTML::Document)
|
48
|
+
# break;
|
49
|
+
# else
|
50
|
+
# current = n.parent.next
|
51
|
+
# end
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# if count >= num_words
|
56
|
+
# unless count == num_words
|
57
|
+
# new_content = current.text.split
|
58
|
+
#
|
59
|
+
# # If we're here, the last text node we counted eclipsed the number of words
|
60
|
+
# # that we want, so we need to cut down on words. The easiest way to think about
|
61
|
+
# # this is that without this node we'd have fewer words than the limit, so all
|
62
|
+
# # the previous words plus a limited number of words from this node are needed.
|
63
|
+
# # We simply need to figure out how many words are needed and grab that many.
|
64
|
+
# # Then we need to -subtract- an index, because the first word would be index zero.
|
65
|
+
#
|
66
|
+
# # For example, given:
|
67
|
+
# # <p>Testing this HTML truncater.</p><p>To see if its working.</p>
|
68
|
+
# # Let's say I want 6 words. The correct returned string would be:
|
69
|
+
# # <p>Testing this HTML truncater.</p><p>To see...</p>
|
70
|
+
# # All the words in both paragraphs = 9
|
71
|
+
# # The last paragraph is the one that breaks the limit. How many words would we
|
72
|
+
# # have without it? 4. But we want up to 6, so we might as well get that many.
|
73
|
+
# # 6 - 4 = 2, so we get 2 words from this node, but words #1-2 are indices #0-1, so
|
74
|
+
# # we subtract 1. If this gives us -1, we want nothing from this node. So go back to
|
75
|
+
# # the previous node instead.
|
76
|
+
# index = num_words-(count-new_content.length)-1
|
77
|
+
# if index >= 0
|
78
|
+
# new_content = new_content[0..index]
|
79
|
+
# current.inner_html = new_content.join(' ') + truncate_string
|
80
|
+
# #require 'ruby-debug'; debugger
|
81
|
+
# #current = current.parent.add_child(Nokogiri::XML::Node.new(truncate_string,current.document))
|
82
|
+
# #current.content.inner_html = current.content.inner_html + truncate_string
|
83
|
+
# else
|
84
|
+
# current = previous
|
85
|
+
# #current.content = current.content + truncate_string
|
86
|
+
# current.inner_html = current.content + truncate_string
|
87
|
+
# #current = current.parent.add_child(Nokogiri::XML::Node.new(truncate_string,current.document))
|
88
|
+
# #current.inner_html = current.content.inner_html + truncate_string
|
89
|
+
# end
|
90
|
+
# end
|
91
|
+
#
|
92
|
+
# # remove everything else
|
93
|
+
# while !current.is_a?(Nokogiri::HTML::Document)
|
94
|
+
# while !current.next.nil?
|
95
|
+
# current.next.remove
|
96
|
+
# end
|
97
|
+
# current = current.parent
|
98
|
+
# end
|
99
|
+
# end
|
100
|
+
#
|
101
|
+
# # now we grab the html and not the text.
|
102
|
+
# # we do first because nokogiri adds html and body tags
|
103
|
+
# # which we don't want
|
104
|
+
# doc.root.children.first.inner_html
|
105
|
+
#end
|
106
|
+
|
107
|
+
# from http://blog.leshill.org/blog/2009/06/03/truncating-html.html
|
108
|
+
|
109
|
+
# Like the Rails _truncate_ helper but doesn't break HTML tags or entities.
|
110
|
+
#def truncate_html(text, max_length = 30, ellipsis = "...")
|
111
|
+
# return if text.nil?
|
112
|
+
# doc = Hpricot(text.to_s)
|
113
|
+
# doc.inner_text.chars.length > max_length ? doc.truncate(max_length, ellipsis).inner_html : text.to_s
|
114
|
+
#end
|
115
|
+
#
|
116
|
+
#def self.truncate_at_space(text, max_length, ellipsis = '...')
|
117
|
+
# l = [max_length - ellipsis.length, 0].max
|
118
|
+
# stop = text.rindex(' ', l) || 0
|
119
|
+
# (text.length > max_length ? text[0...stop] + ellipsis : text).to_s
|
120
|
+
#end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
#module HpricotTruncator
|
125
|
+
# module NodeWithChildren
|
126
|
+
# def truncate(max_length, ellipsis)
|
127
|
+
# return self if inner_text.chars.length <= max_length
|
128
|
+
# truncated_node = dup
|
129
|
+
# truncated_node.name = name
|
130
|
+
# truncated_node.raw_attributes = raw_attributes
|
131
|
+
# truncated_node.children = []
|
132
|
+
# each_child do |node|
|
133
|
+
# break if max_length <= 0
|
134
|
+
# node_length = node.inner_text.chars.length
|
135
|
+
# truncated_node.children << node.truncate(max_length, ellipsis)
|
136
|
+
# max_length = max_length - node_length
|
137
|
+
# end
|
138
|
+
# truncated_node
|
139
|
+
# end
|
140
|
+
# end
|
141
|
+
#
|
142
|
+
# module TextNode
|
143
|
+
# def truncate(max_length, ellipsis)
|
144
|
+
# self.content = TextHelper.truncate_at_space(content, max_length, ellipsis)
|
145
|
+
# self
|
146
|
+
# end
|
147
|
+
# end
|
148
|
+
#
|
149
|
+
# module IgnoredTag
|
150
|
+
# def truncate(max_length, ellipsis)
|
151
|
+
# self
|
152
|
+
# end
|
153
|
+
# end
|
154
|
+
#end
|
155
|
+
#
|
156
|
+
#Hpricot::Doc.send(:include, HpricotTruncator::NodeWithChildren)
|
157
|
+
#Hpricot::Elem.send(:include, HpricotTruncator::NodeWithChildren)
|
158
|
+
#Hpricot::Text.send(:include, HpricotTruncator::TextNode)
|
159
|
+
#Hpricot::BogusETag.send(:include, HpricotTruncator::IgnoredTag)
|
160
|
+
#Hpricot::Comment.send(:include, HpricotTruncator::IgnoredTag)
|
161
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
gem 'nokogiri'; require 'nokogiri'
|
2
|
+
require 'buzzcore/misc_utils'
|
3
|
+
|
4
|
+
module XmlUtils2
|
5
|
+
|
6
|
+
BASIC_HEADER = '<?xml version="1.0"?>'
|
7
|
+
|
8
|
+
# for yore, need to convert
|
9
|
+
# XmlUtils.add_xml_from_string : node
|
10
|
+
# XmlUtils.get_file_root : node
|
11
|
+
# XmlUtils.read_simple_items : hash
|
12
|
+
# XmlUtils.single_node : node
|
13
|
+
# XmlUtils.peek_node_value : String
|
14
|
+
|
15
|
+
def self.clean_data(aXmlString)
|
16
|
+
doc = Nokogiri::XML(aXmlString) {|c| c.options ||= Nokogiri::XML::ParseOptions.NOBLANKS}
|
17
|
+
doc.traverse do |n|
|
18
|
+
case
|
19
|
+
when n.is_a?(Nokogiri::XML::Comment)
|
20
|
+
n.remove
|
21
|
+
when n.is_a?(Nokogiri::XML::Text)
|
22
|
+
n.remove if (n.next || n.previous) && n.content.strip.empty?
|
23
|
+
end
|
24
|
+
end
|
25
|
+
doc.to_xml(:indent => 0)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
@@ -62,5 +62,22 @@ module StringUtils
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
|
+
# truncates a string to the given length by looking for the previous space.
|
66
|
+
def self.word_safe_truncate(aString,aMaxLength)
|
67
|
+
return nil if !aString
|
68
|
+
return aString if aString.length <= aMaxLength
|
69
|
+
posLastSpace = aString.rindex(/[ \t]/,aMaxLength)
|
70
|
+
return aString[0,aMaxLength] if !posLastSpace
|
71
|
+
aString[0,posLastSpace]
|
72
|
+
end
|
73
|
+
|
74
|
+
# replaces all tabs with spaces, and reduces multiple spaces to a single space
|
75
|
+
def self.reduce_whitespace(aText)
|
76
|
+
aText = aText.gsub("\t"," ") # replace tabs with spaces
|
77
|
+
aText.strip!
|
78
|
+
aText.squeeze!(' ')
|
79
|
+
aText
|
80
|
+
end
|
81
|
+
|
65
82
|
end
|
66
83
|
|
data/lib/buzzcore.rb
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
Dir.chdir(File.dirname(__FILE__)) { Dir['buzzcore
|
1
|
+
Dir.chdir(File.dirname(__FILE__)) { Dir['buzzcore/*.rb'] }.each {|f| require f }
|
2
2
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: buzzcore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- buzzware
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-15 00:00:00 +08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -56,6 +56,8 @@ files:
|
|
56
56
|
- lib/buzzcore/database_utils.rb
|
57
57
|
- lib/buzzcore/enum.rb
|
58
58
|
- lib/buzzcore/extend_base_classes.rb
|
59
|
+
- lib/buzzcore/extra/html_truncate.rb
|
60
|
+
- lib/buzzcore/extra/xml_utils2.rb
|
59
61
|
- lib/buzzcore/html_utils.rb
|
60
62
|
- lib/buzzcore/logging.rb
|
61
63
|
- lib/buzzcore/misc_utils.rb
|