buzzcore 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.7
1
+ 0.3.0
data/buzzcore.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{buzzcore}
8
- s.version = "0.2.7"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["buzzware"]
12
- s.date = %q{2010-01-14}
12
+ s.date = %q{2010-01-15}
13
13
  s.description = %q{buzzcore is the ruby core library developed and used by Buzzware Solutions.}
14
14
  s.email = %q{contact@buzzware.com.au}
15
15
  s.extra_rdoc_files = [
@@ -32,6 +32,8 @@ Gem::Specification.new do |s|
32
32
  "lib/buzzcore/database_utils.rb",
33
33
  "lib/buzzcore/enum.rb",
34
34
  "lib/buzzcore/extend_base_classes.rb",
35
+ "lib/buzzcore/extra/html_truncate.rb",
36
+ "lib/buzzcore/extra/xml_utils2.rb",
35
37
  "lib/buzzcore/html_utils.rb",
36
38
  "lib/buzzcore/logging.rb",
37
39
  "lib/buzzcore/misc_utils.rb",
@@ -0,0 +1,161 @@
1
+ gem 'sanitize'; require 'sanitize'
2
+ gem 'nokogiri'; require 'nokogiri'
3
+
4
+ module HtmlUtils
5
+
6
+ # Truncates HTML text. Breaks on word boundaries and closes tags.
7
+ # aSuffix will be encoded with entities
8
+ def self.word_safe_truncate(aHtmlText,aMaxLength,aSuffix='...')
9
+ result = StringUtils.word_safe_truncate(aHtmlText,aMaxLength)+' '+aSuffix
10
+ Sanitize.clean(result,Sanitize::Config::BASIC)
11
+ end
12
+
13
+ #
14
+ #
15
+ ## from http://gist.github.com/101410
16
+ #def self.html_truncate(input, num_words = 15, truncate_string = "...")
17
+ # doc = Nokogiri::HTML(input)
18
+ #
19
+ # current = doc.children.first
20
+ # count = 0
21
+ #
22
+ # while true
23
+ # # we found a text node
24
+ # if current.is_a?(Nokogiri::XML::Text)
25
+ # count += current.text.split.length
26
+ # # we reached our limit, let's get outta here!
27
+ # break if count > num_words
28
+ # previous = current
29
+ # end
30
+ #
31
+ # if current.children.length > 0
32
+ # # this node has children, can't be a text node,
33
+ # # lets descend and look for text nodes
34
+ # current = current.children.first
35
+ # elsif !current.next.nil?
36
+ # #this has no children, but has a sibling, let's check it out
37
+ # current = current.next
38
+ # else
39
+ # # we are the last child, we need to ascend until we are
40
+ # # either done or find a sibling to continue on to
41
+ # n = current
42
+ # while !n.is_a?(Nokogiri::HTML::Document) and n.parent.next.nil?
43
+ # n = n.parent
44
+ # end
45
+ #
46
+ # # we've reached the top and found no more text nodes, break
47
+ # if n.is_a?(Nokogiri::HTML::Document)
48
+ # break;
49
+ # else
50
+ # current = n.parent.next
51
+ # end
52
+ # end
53
+ # end
54
+ #
55
+ # if count >= num_words
56
+ # unless count == num_words
57
+ # new_content = current.text.split
58
+ #
59
+ # # If we're here, the last text node we counted eclipsed the number of words
60
+ # # that we want, so we need to cut down on words. The easiest way to think about
61
+ # # this is that without this node we'd have fewer words than the limit, so all
62
+ # # the previous words plus a limited number of words from this node are needed.
63
+ # # We simply need to figure out how many words are needed and grab that many.
64
+ # # Then we need to -subtract- an index, because the first word would be index zero.
65
+ #
66
+ # # For example, given:
67
+ # # <p>Testing this HTML truncater.</p><p>To see if its working.</p>
68
+ # # Let's say I want 6 words. The correct returned string would be:
69
+ # # <p>Testing this HTML truncater.</p><p>To see...</p>
70
+ # # All the words in both paragraphs = 9
71
+ # # The last paragraph is the one that breaks the limit. How many words would we
72
+ # # have without it? 4. But we want up to 6, so we might as well get that many.
73
+ # # 6 - 4 = 2, so we get 2 words from this node, but words #1-2 are indices #0-1, so
74
+ # # we subtract 1. If this gives us -1, we want nothing from this node. So go back to
75
+ # # the previous node instead.
76
+ # index = num_words-(count-new_content.length)-1
77
+ # if index >= 0
78
+ # new_content = new_content[0..index]
79
+ # current.inner_html = new_content.join(' ') + truncate_string
80
+ # #require 'ruby-debug'; debugger
81
+ # #current = current.parent.add_child(Nokogiri::XML::Node.new(truncate_string,current.document))
82
+ # #current.content.inner_html = current.content.inner_html + truncate_string
83
+ # else
84
+ # current = previous
85
+ # #current.content = current.content + truncate_string
86
+ # current.inner_html = current.content + truncate_string
87
+ # #current = current.parent.add_child(Nokogiri::XML::Node.new(truncate_string,current.document))
88
+ # #current.inner_html = current.content.inner_html + truncate_string
89
+ # end
90
+ # end
91
+ #
92
+ # # remove everything else
93
+ # while !current.is_a?(Nokogiri::HTML::Document)
94
+ # while !current.next.nil?
95
+ # current.next.remove
96
+ # end
97
+ # current = current.parent
98
+ # end
99
+ # end
100
+ #
101
+ # # now we grab the html and not the text.
102
+ # # we do first because nokogiri adds html and body tags
103
+ # # which we don't want
104
+ # doc.root.children.first.inner_html
105
+ #end
106
+
107
+ # from http://blog.leshill.org/blog/2009/06/03/truncating-html.html
108
+
109
+ # Like the Rails _truncate_ helper but doesn't break HTML tags or entities.
110
+ #def truncate_html(text, max_length = 30, ellipsis = "...")
111
+ # return if text.nil?
112
+ # doc = Hpricot(text.to_s)
113
+ # doc.inner_text.chars.length > max_length ? doc.truncate(max_length, ellipsis).inner_html : text.to_s
114
+ #end
115
+ #
116
+ #def self.truncate_at_space(text, max_length, ellipsis = '...')
117
+ # l = [max_length - ellipsis.length, 0].max
118
+ # stop = text.rindex(' ', l) || 0
119
+ # (text.length > max_length ? text[0...stop] + ellipsis : text).to_s
120
+ #end
121
+
122
+ end
123
+
124
+ #module HpricotTruncator
125
+ # module NodeWithChildren
126
+ # def truncate(max_length, ellipsis)
127
+ # return self if inner_text.chars.length <= max_length
128
+ # truncated_node = dup
129
+ # truncated_node.name = name
130
+ # truncated_node.raw_attributes = raw_attributes
131
+ # truncated_node.children = []
132
+ # each_child do |node|
133
+ # break if max_length <= 0
134
+ # node_length = node.inner_text.chars.length
135
+ # truncated_node.children << node.truncate(max_length, ellipsis)
136
+ # max_length = max_length - node_length
137
+ # end
138
+ # truncated_node
139
+ # end
140
+ # end
141
+ #
142
+ # module TextNode
143
+ # def truncate(max_length, ellipsis)
144
+ # self.content = TextHelper.truncate_at_space(content, max_length, ellipsis)
145
+ # self
146
+ # end
147
+ # end
148
+ #
149
+ # module IgnoredTag
150
+ # def truncate(max_length, ellipsis)
151
+ # self
152
+ # end
153
+ # end
154
+ #end
155
+ #
156
+ #Hpricot::Doc.send(:include, HpricotTruncator::NodeWithChildren)
157
+ #Hpricot::Elem.send(:include, HpricotTruncator::NodeWithChildren)
158
+ #Hpricot::Text.send(:include, HpricotTruncator::TextNode)
159
+ #Hpricot::BogusETag.send(:include, HpricotTruncator::IgnoredTag)
160
+ #Hpricot::Comment.send(:include, HpricotTruncator::IgnoredTag)
161
+
@@ -0,0 +1,29 @@
1
+ gem 'nokogiri'; require 'nokogiri'
2
+ require 'buzzcore/misc_utils'
3
+
4
+ module XmlUtils2
5
+
6
+ BASIC_HEADER = '<?xml version="1.0"?>'
7
+
8
+ # for yore, need to convert
9
+ # XmlUtils.add_xml_from_string : node
10
+ # XmlUtils.get_file_root : node
11
+ # XmlUtils.read_simple_items : hash
12
+ # XmlUtils.single_node : node
13
+ # XmlUtils.peek_node_value : String
14
+
15
+ def self.clean_data(aXmlString)
16
+ doc = Nokogiri::XML(aXmlString) {|c| c.options ||= Nokogiri::XML::ParseOptions.NOBLANKS}
17
+ doc.traverse do |n|
18
+ case
19
+ when n.is_a?(Nokogiri::XML::Comment)
20
+ n.remove
21
+ when n.is_a?(Nokogiri::XML::Text)
22
+ n.remove if (n.next || n.previous) && n.content.strip.empty?
23
+ end
24
+ end
25
+ doc.to_xml(:indent => 0)
26
+ end
27
+
28
+ end
29
+
@@ -62,5 +62,22 @@ module StringUtils
62
62
  end
63
63
  end
64
64
 
65
+ # truncates a string to the given length by looking for the previous space.
66
+ def self.word_safe_truncate(aString,aMaxLength)
67
+ return nil if !aString
68
+ return aString if aString.length <= aMaxLength
69
+ posLastSpace = aString.rindex(/[ \t]/,aMaxLength)
70
+ return aString[0,aMaxLength] if !posLastSpace
71
+ aString[0,posLastSpace]
72
+ end
73
+
74
+ # replaces all tabs with spaces, and reduces multiple spaces to a single space
75
+ def self.reduce_whitespace(aText)
76
+ aText = aText.gsub("\t"," ") # replace tabs with spaces
77
+ aText.strip!
78
+ aText.squeeze!(' ')
79
+ aText
80
+ end
81
+
65
82
  end
66
83
 
data/lib/buzzcore.rb CHANGED
@@ -1,2 +1,2 @@
1
- Dir.chdir(File.dirname(__FILE__)) { Dir['buzzcore/*'] }.each {|f| require f }
1
+ Dir.chdir(File.dirname(__FILE__)) { Dir['buzzcore/*.rb'] }.each {|f| require f }
2
2
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buzzcore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - buzzware
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-14 00:00:00 +08:00
12
+ date: 2010-01-15 00:00:00 +08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -56,6 +56,8 @@ files:
56
56
  - lib/buzzcore/database_utils.rb
57
57
  - lib/buzzcore/enum.rb
58
58
  - lib/buzzcore/extend_base_classes.rb
59
+ - lib/buzzcore/extra/html_truncate.rb
60
+ - lib/buzzcore/extra/xml_utils2.rb
59
61
  - lib/buzzcore/html_utils.rb
60
62
  - lib/buzzcore/logging.rb
61
63
  - lib/buzzcore/misc_utils.rb