buzzcore 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.7
1
+ 0.3.0
data/buzzcore.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{buzzcore}
8
- s.version = "0.2.7"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["buzzware"]
12
- s.date = %q{2010-01-14}
12
+ s.date = %q{2010-01-15}
13
13
  s.description = %q{buzzcore is the ruby core library developed and used by Buzzware Solutions.}
14
14
  s.email = %q{contact@buzzware.com.au}
15
15
  s.extra_rdoc_files = [
@@ -32,6 +32,8 @@ Gem::Specification.new do |s|
32
32
  "lib/buzzcore/database_utils.rb",
33
33
  "lib/buzzcore/enum.rb",
34
34
  "lib/buzzcore/extend_base_classes.rb",
35
+ "lib/buzzcore/extra/html_truncate.rb",
36
+ "lib/buzzcore/extra/xml_utils2.rb",
35
37
  "lib/buzzcore/html_utils.rb",
36
38
  "lib/buzzcore/logging.rb",
37
39
  "lib/buzzcore/misc_utils.rb",
@@ -0,0 +1,161 @@
1
+ gem 'sanitize'; require 'sanitize'
2
+ gem 'nokogiri'; require 'nokogiri'
3
+
4
+ module HtmlUtils
5
+
6
+ # Truncates HTML text. Breaks on word boundaries and closes tags.
7
+ # aSuffix will be encoded with entities
8
+ def self.word_safe_truncate(aHtmlText,aMaxLength,aSuffix='...')
9
+ result = StringUtils.word_safe_truncate(aHtmlText,aMaxLength)+' '+aSuffix
10
+ Sanitize.clean(result,Sanitize::Config::BASIC)
11
+ end
12
+
13
+ #
14
+ #
15
+ ## from http://gist.github.com/101410
16
+ #def self.html_truncate(input, num_words = 15, truncate_string = "...")
17
+ # doc = Nokogiri::HTML(input)
18
+ #
19
+ # current = doc.children.first
20
+ # count = 0
21
+ #
22
+ # while true
23
+ # # we found a text node
24
+ # if current.is_a?(Nokogiri::XML::Text)
25
+ # count += current.text.split.length
26
+ # # we reached our limit, let's get outta here!
27
+ # break if count > num_words
28
+ # previous = current
29
+ # end
30
+ #
31
+ # if current.children.length > 0
32
+ # # this node has children, can't be a text node,
33
+ # # lets descend and look for text nodes
34
+ # current = current.children.first
35
+ # elsif !current.next.nil?
36
+ # #this has no children, but has a sibling, let's check it out
37
+ # current = current.next
38
+ # else
39
+ # # we are the last child, we need to ascend until we are
40
+ # # either done or find a sibling to continue on to
41
+ # n = current
42
+ # while !n.is_a?(Nokogiri::HTML::Document) and n.parent.next.nil?
43
+ # n = n.parent
44
+ # end
45
+ #
46
+ # # we've reached the top and found no more text nodes, break
47
+ # if n.is_a?(Nokogiri::HTML::Document)
48
+ # break;
49
+ # else
50
+ # current = n.parent.next
51
+ # end
52
+ # end
53
+ # end
54
+ #
55
+ # if count >= num_words
56
+ # unless count == num_words
57
+ # new_content = current.text.split
58
+ #
59
+ # # If we're here, the last text node we counted eclipsed the number of words
60
+ # # that we want, so we need to cut down on words. The easiest way to think about
61
+ # # this is that without this node we'd have fewer words than the limit, so all
62
+ # # the previous words plus a limited number of words from this node are needed.
63
+ # # We simply need to figure out how many words are needed and grab that many.
64
+ # # Then we need to -subtract- an index, because the first word would be index zero.
65
+ #
66
+ # # For example, given:
67
+ # # <p>Testing this HTML truncater.</p><p>To see if its working.</p>
68
+ # # Let's say I want 6 words. The correct returned string would be:
69
+ # # <p>Testing this HTML truncater.</p><p>To see...</p>
70
+ # # All the words in both paragraphs = 9
71
+ # # The last paragraph is the one that breaks the limit. How many words would we
72
+ # # have without it? 4. But we want up to 6, so we might as well get that many.
73
+ # # 6 - 4 = 2, so we get 2 words from this node, but words #1-2 are indices #0-1, so
74
+ # # we subtract 1. If this gives us -1, we want nothing from this node. So go back to
75
+ # # the previous node instead.
76
+ # index = num_words-(count-new_content.length)-1
77
+ # if index >= 0
78
+ # new_content = new_content[0..index]
79
+ # current.inner_html = new_content.join(' ') + truncate_string
80
+ # #require 'ruby-debug'; debugger
81
+ # #current = current.parent.add_child(Nokogiri::XML::Node.new(truncate_string,current.document))
82
+ # #current.content.inner_html = current.content.inner_html + truncate_string
83
+ # else
84
+ # current = previous
85
+ # #current.content = current.content + truncate_string
86
+ # current.inner_html = current.content + truncate_string
87
+ # #current = current.parent.add_child(Nokogiri::XML::Node.new(truncate_string,current.document))
88
+ # #current.inner_html = current.content.inner_html + truncate_string
89
+ # end
90
+ # end
91
+ #
92
+ # # remove everything else
93
+ # while !current.is_a?(Nokogiri::HTML::Document)
94
+ # while !current.next.nil?
95
+ # current.next.remove
96
+ # end
97
+ # current = current.parent
98
+ # end
99
+ # end
100
+ #
101
+ # # now we grab the html and not the text.
102
+ # # we do first because nokogiri adds html and body tags
103
+ # # which we don't want
104
+ # doc.root.children.first.inner_html
105
+ #end
106
+
107
+ # from http://blog.leshill.org/blog/2009/06/03/truncating-html.html
108
+
109
+ # Like the Rails _truncate_ helper but doesn't break HTML tags or entities.
110
+ #def truncate_html(text, max_length = 30, ellipsis = "...")
111
+ # return if text.nil?
112
+ # doc = Hpricot(text.to_s)
113
+ # doc.inner_text.chars.length > max_length ? doc.truncate(max_length, ellipsis).inner_html : text.to_s
114
+ #end
115
+ #
116
+ #def self.truncate_at_space(text, max_length, ellipsis = '...')
117
+ # l = [max_length - ellipsis.length, 0].max
118
+ # stop = text.rindex(' ', l) || 0
119
+ # (text.length > max_length ? text[0...stop] + ellipsis : text).to_s
120
+ #end
121
+
122
+ end
123
+
124
+ #module HpricotTruncator
125
+ # module NodeWithChildren
126
+ # def truncate(max_length, ellipsis)
127
+ # return self if inner_text.chars.length <= max_length
128
+ # truncated_node = dup
129
+ # truncated_node.name = name
130
+ # truncated_node.raw_attributes = raw_attributes
131
+ # truncated_node.children = []
132
+ # each_child do |node|
133
+ # break if max_length <= 0
134
+ # node_length = node.inner_text.chars.length
135
+ # truncated_node.children << node.truncate(max_length, ellipsis)
136
+ # max_length = max_length - node_length
137
+ # end
138
+ # truncated_node
139
+ # end
140
+ # end
141
+ #
142
+ # module TextNode
143
+ # def truncate(max_length, ellipsis)
144
+ # self.content = TextHelper.truncate_at_space(content, max_length, ellipsis)
145
+ # self
146
+ # end
147
+ # end
148
+ #
149
+ # module IgnoredTag
150
+ # def truncate(max_length, ellipsis)
151
+ # self
152
+ # end
153
+ # end
154
+ #end
155
+ #
156
+ #Hpricot::Doc.send(:include, HpricotTruncator::NodeWithChildren)
157
+ #Hpricot::Elem.send(:include, HpricotTruncator::NodeWithChildren)
158
+ #Hpricot::Text.send(:include, HpricotTruncator::TextNode)
159
+ #Hpricot::BogusETag.send(:include, HpricotTruncator::IgnoredTag)
160
+ #Hpricot::Comment.send(:include, HpricotTruncator::IgnoredTag)
161
+
@@ -0,0 +1,29 @@
1
+ gem 'nokogiri'; require 'nokogiri'
2
+ require 'buzzcore/misc_utils'
3
+
4
+ module XmlUtils2
5
+
6
+ BASIC_HEADER = '<?xml version="1.0"?>'
7
+
8
+ # for yore, need to convert
9
+ # XmlUtils.add_xml_from_string : node
10
+ # XmlUtils.get_file_root : node
11
+ # XmlUtils.read_simple_items : hash
12
+ # XmlUtils.single_node : node
13
+ # XmlUtils.peek_node_value : String
14
+
15
+ def self.clean_data(aXmlString)
16
+ doc = Nokogiri::XML(aXmlString) {|c| c.options ||= Nokogiri::XML::ParseOptions.NOBLANKS}
17
+ doc.traverse do |n|
18
+ case
19
+ when n.is_a?(Nokogiri::XML::Comment)
20
+ n.remove
21
+ when n.is_a?(Nokogiri::XML::Text)
22
+ n.remove if (n.next || n.previous) && n.content.strip.empty?
23
+ end
24
+ end
25
+ doc.to_xml(:indent => 0)
26
+ end
27
+
28
+ end
29
+
@@ -62,5 +62,22 @@ module StringUtils
62
62
  end
63
63
  end
64
64
 
65
+ # truncates a string to the given length by looking for the previous space.
66
+ def self.word_safe_truncate(aString,aMaxLength)
67
+ return nil if !aString
68
+ return aString if aString.length <= aMaxLength
69
+ posLastSpace = aString.rindex(/[ \t]/,aMaxLength)
70
+ return aString[0,aMaxLength] if !posLastSpace
71
+ aString[0,posLastSpace]
72
+ end
73
+
74
+ # replaces all tabs with spaces, and reduces multiple spaces to a single space
75
+ def self.reduce_whitespace(aText)
76
+ aText = aText.gsub("\t"," ") # replace tabs with spaces
77
+ aText.strip!
78
+ aText.squeeze!(' ')
79
+ aText
80
+ end
81
+
65
82
  end
66
83
 
data/lib/buzzcore.rb CHANGED
@@ -1,2 +1,2 @@
1
- Dir.chdir(File.dirname(__FILE__)) { Dir['buzzcore/*'] }.each {|f| require f }
1
+ Dir.chdir(File.dirname(__FILE__)) { Dir['buzzcore/*.rb'] }.each {|f| require f }
2
2
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buzzcore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - buzzware
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-14 00:00:00 +08:00
12
+ date: 2010-01-15 00:00:00 +08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -56,6 +56,8 @@ files:
56
56
  - lib/buzzcore/database_utils.rb
57
57
  - lib/buzzcore/enum.rb
58
58
  - lib/buzzcore/extend_base_classes.rb
59
+ - lib/buzzcore/extra/html_truncate.rb
60
+ - lib/buzzcore/extra/xml_utils2.rb
59
61
  - lib/buzzcore/html_utils.rb
60
62
  - lib/buzzcore/logging.rb
61
63
  - lib/buzzcore/misc_utils.rb