webget_ruby_ramp 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/lib/webget_ruby_ramp.rb +250 -0
- data/lib/webget_ruby_ramp/active_record.rb +119 -0
- data/lib/webget_ruby_ramp/active_record/connection_adapters/abstract/schema_statements.rb +24 -0
- data/lib/webget_ruby_ramp/active_record/save_extensions.rb +35 -0
- data/lib/webget_ruby_ramp/array.rb +370 -0
- data/lib/webget_ruby_ramp/csv.rb +53 -0
- data/lib/webget_ruby_ramp/date.rb +90 -0
- data/lib/webget_ruby_ramp/enumerable.rb +385 -0
- data/lib/webget_ruby_ramp/file.rb +15 -0
- data/lib/webget_ruby_ramp/hash.rb +223 -0
- data/lib/webget_ruby_ramp/integer.rb +22 -0
- data/lib/webget_ruby_ramp/io.rb +65 -0
- data/lib/webget_ruby_ramp/kernel.rb +36 -0
- data/lib/webget_ruby_ramp/math.rb +20 -0
- data/lib/webget_ruby_ramp/nil.rb +17 -0
- data/lib/webget_ruby_ramp/numeric.rb +98 -0
- data/lib/webget_ruby_ramp/object.rb +20 -0
- data/lib/webget_ruby_ramp/process.rb +153 -0
- data/lib/webget_ruby_ramp/string.rb +221 -0
- data/lib/webget_ruby_ramp/symbol.rb +11 -0
- data/lib/webget_ruby_ramp/time.rb +11 -0
- data/lib/webget_ruby_ramp/xml.rb +193 -0
- data/lib/webget_ruby_ramp/yaml.rb +34 -0
- data/test/webget_ruby_ramp/active_record/connection_adapters/abstract/schema_statements_test.rb +9 -0
- data/test/webget_ruby_ramp/active_record/save_extensions_test.rb +7 -0
- data/test/webget_ruby_ramp/active_record_test.rb +64 -0
- data/test/webget_ruby_ramp/array_test.rb +171 -0
- data/test/webget_ruby_ramp/csv_test.rb +18 -0
- data/test/webget_ruby_ramp/date_test.rb +60 -0
- data/test/webget_ruby_ramp/enumerable_test.rb +275 -0
- data/test/webget_ruby_ramp/file_test.rb +15 -0
- data/test/webget_ruby_ramp/hash_test.rb +105 -0
- data/test/webget_ruby_ramp/integer_test.rb +19 -0
- data/test/webget_ruby_ramp/io_test.rb +31 -0
- data/test/webget_ruby_ramp/io_test.txt +1 -0
- data/test/webget_ruby_ramp/kernel_test.rb +15 -0
- data/test/webget_ruby_ramp/math_test.rb +17 -0
- data/test/webget_ruby_ramp/nil_test.rb +15 -0
- data/test/webget_ruby_ramp/numeric_test.rb +28 -0
- data/test/webget_ruby_ramp/object_test.rb +12 -0
- data/test/webget_ruby_ramp/process_test.rb +24 -0
- data/test/webget_ruby_ramp/string_test.rb +125 -0
- data/test/webget_ruby_ramp/symbol_test.rb +26 -0
- data/test/webget_ruby_ramp/time_test.rb +12 -0
- data/test/webget_ruby_ramp/xml_test.rb +93 -0
- data/test/webget_ruby_ramp/xml_test_1.xml +5 -0
- data/test/webget_ruby_ramp/xml_test_2.xml +5 -0
- data/test/webget_ruby_ramp/xml_test_msword_clean.html +1 -0
- data/test/webget_ruby_ramp/xml_test_msword_dirty.html +148 -0
- data/test/webget_ruby_ramp/yaml_test.rb +32 -0
- data/test/webget_ruby_ramp/yaml_test_1.yml +38 -0
- data/test/webget_ruby_ramp/yaml_test_2.yml +38 -0
- metadata +128 -0
- metadata.gz.sig +1 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
# Object extensions
|
2
|
+
|
3
|
+
class Object
|
4
|
+
|
5
|
+
# Syntactic sugar for arrays.
|
6
|
+
#
|
7
|
+
# ==Definition
|
8
|
+
# object.in? array === array.include? object
|
9
|
+
#
|
10
|
+
# ==Example
|
11
|
+
# array=['a','b','c']
|
12
|
+
# object='b'
|
13
|
+
# object.in? array
|
14
|
+
# => true
|
15
|
+
|
16
|
+
def in?(array)
|
17
|
+
array.include?(self)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
# Process extensions to help debug Ruby programs.
|
2
|
+
#
|
3
|
+
# ==Examples
|
4
|
+
#
|
5
|
+
# p = Process.ps
|
6
|
+
# puts p
|
7
|
+
# => the results of the 'ps' command for the current process id
|
8
|
+
#
|
9
|
+
# p = Process.ps(1234)
|
10
|
+
# puts p
|
11
|
+
# => the results of the 'ps' command for process id 1234
|
12
|
+
#
|
13
|
+
# p = Process.pss
|
14
|
+
# p['%cpu'] => percentage of cpu use, as a float
|
15
|
+
# p['%mem'] => percentage of memory use, as a float
|
16
|
+
##
|
17
|
+
|
18
|
+
module Process
|
19
|
+
|
20
|
+
|
21
|
+
# Get the 'ps' command as one long text string.
|
22
|
+
#
|
23
|
+
# This is typically useful for logging to a text file.
|
24
|
+
|
25
|
+
def self.ps(pid=Process.pid)
|
26
|
+
`#{self.ps_command} #{pid.to_i}`
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
# Get the 'ps' command as a hash of keys and values.
|
31
|
+
# -
|
32
|
+
# OPTIMIZE: add dates, times
|
33
|
+
|
34
|
+
def self.pss(pid=Process.pid)
|
35
|
+
ps=self.ps(pid)
|
36
|
+
h=Hash[*self.ps_keys.zip(ps.split).flatten]
|
37
|
+
h['c'] =h['c'].to_i
|
38
|
+
h['cp'] =h['cp'].to_f
|
39
|
+
h['egid'] =h['egid'].to_i
|
40
|
+
h['egroup'] =h['egroup'].to_i
|
41
|
+
h['uid'] =h['uid'].to_i
|
42
|
+
h['fgid'] =h['fgid'].to_i
|
43
|
+
h['lwp'] =h['lwp'].to_i
|
44
|
+
h['ni'] =h['ni'].to_i
|
45
|
+
h['nlwp'] =h['nlwp'].to_i
|
46
|
+
h['pcpu'] =h['pcpu'].to_f
|
47
|
+
h['pgid'] =h['pgid'].to_i
|
48
|
+
h['pid'] =h['pid'].to_i
|
49
|
+
h['pmem'] =h['pmem'].to_f
|
50
|
+
h['ppid'] =h['ppid'].to_i
|
51
|
+
h['rgid'] =h['rgid'].to_i
|
52
|
+
h['rss'] =h['rss'].to_i
|
53
|
+
h['ruid'] =h['ruid'].to_i
|
54
|
+
h['sid'] =h['sid'].to_i
|
55
|
+
h['sgid'] =h['sgid'].to_i
|
56
|
+
h['suid'] =h['suid'].to_i
|
57
|
+
self.ps_aliases.each_pair{|key,val| h[key]=h[val]}
|
58
|
+
return h
|
59
|
+
end
|
60
|
+
|
61
|
+
# Get the list of process alias keywords as typically defined by the shell.
|
62
|
+
#
|
63
|
+
# For example, a shell may consider "%cpu" and "pcpu" to be identical.
|
64
|
+
|
65
|
+
def self.ps_aliases
|
66
|
+
@@ps_aliases||=Hash[*%w'
|
67
|
+
%cpu pcpu
|
68
|
+
%mem pmem
|
69
|
+
sig_block blocked
|
70
|
+
sigmask blocked
|
71
|
+
sig_catch caught
|
72
|
+
sigcatch caught
|
73
|
+
cls class
|
74
|
+
cls policy
|
75
|
+
cputime time
|
76
|
+
gid egid
|
77
|
+
group egroup
|
78
|
+
uid euid
|
79
|
+
uname euser
|
80
|
+
user euser
|
81
|
+
flag f
|
82
|
+
flags f
|
83
|
+
fsuid fuid
|
84
|
+
sig_ignore ignored
|
85
|
+
sigignore ignored
|
86
|
+
spid lwp
|
87
|
+
tid lwp
|
88
|
+
nice ni
|
89
|
+
thcount nlwp
|
90
|
+
sig pending
|
91
|
+
sig_pend pending
|
92
|
+
pgrp pgid
|
93
|
+
rssize rss
|
94
|
+
rsz rss
|
95
|
+
state s
|
96
|
+
sess sid
|
97
|
+
session sid
|
98
|
+
svgid sgid
|
99
|
+
tt tname
|
100
|
+
tty tname
|
101
|
+
vsz vsize
|
102
|
+
']
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
# Set the list of process alias keywords.
|
107
|
+
|
108
|
+
def self.ps_aliases=(aliases)
|
109
|
+
@@ps_aliases=aliases
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
# Get the list of process keywords.
|
114
|
+
#
|
115
|
+
# ==Example
|
116
|
+
# Process.ps_keys => ["blocked","group","pending","size"]
|
117
|
+
|
118
|
+
def self.ps_keys
|
119
|
+
@@ps_keys||=%w'blocked bsdtime c caught class cp egid egroup eip esp etime euid euser f fgid fgroup fuid fuser group ignored label lwp ni nlwp nwchan pending pcpu pgid pid pmem ppid pri psr rgid rgroup rss rtprio ruid ruser s sched sgi_p sgid sgroup sid sig size stackp start_time stat suid suser sz time tname tpgid vsize wchan'
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
# Set the list of process keywords.
|
124
|
+
#
|
125
|
+
# ==Example
|
126
|
+
# Process.ps_keys = ["blocked","group","pending","size"]
|
127
|
+
|
128
|
+
def self.ps_keys=(keys)
|
129
|
+
@@ps_keys=keys
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
# Get the process command, i.e. what the sytem will call for the "ps" command.
|
134
|
+
#
|
135
|
+
# ==Example
|
136
|
+
# Process.ps_command => "ps h ww -o blocked,group,pending,size"
|
137
|
+
|
138
|
+
def self.ps_command
|
139
|
+
@@ps_command||='ps h ww -o "'+self.ps_keys.join(',')+'"'
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
# Set the process command, i.e. what the sytem will call for the "ps" command.
|
144
|
+
#
|
145
|
+
# ==Example
|
146
|
+
# Process.ps_command = "ps h ww -o blocked,group,pending,size"
|
147
|
+
|
148
|
+
def self.ps_command=(command)
|
149
|
+
@@ps_comannd=command
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
end
|
@@ -0,0 +1,221 @@
|
|
1
|
+
# String extensions
|
2
|
+
|
3
|
+
class String
|
4
|
+
|
5
|
+
ACCENTS = Hash[*'
|
6
|
+
à a á a â a ã a ä a å a ā a ă a
|
7
|
+
æ ae
|
8
|
+
ď d đ d
|
9
|
+
ç c ć c č c ĉ c ċ c
|
10
|
+
è e é e ê e ë e ē e ę e ě e ĕ e ė e
|
11
|
+
ƒ f
|
12
|
+
ĝ g ğ g ġ g ģ g
|
13
|
+
ĥ h ħ h
|
14
|
+
ì i ì i í i î i ï i ī i ĩ i ĭ i
|
15
|
+
į j ı j ij j ĵ j
|
16
|
+
ķ k ĸ k
|
17
|
+
ł l ľ l ĺ l ļ l ŀ l
|
18
|
+
ñ n ń n ň n ņ n ʼn n ŋ n
|
19
|
+
ò o ó o ô o õ o ö o ø o ō o ő o ŏ o ŏ o
|
20
|
+
œ oek
|
21
|
+
ą q
|
22
|
+
ŕ r ř r ŗ r
|
23
|
+
ś s š s ş s ŝ s ș s
|
24
|
+
ť t ţ t ŧ t ț t
|
25
|
+
ù u ú u û u ü u ū u ů u ű u ŭ u ũ u ų u
|
26
|
+
ŵ w
|
27
|
+
ý y ÿ y ŷ y
|
28
|
+
ž z ż z ź z
|
29
|
+
'.split]
|
30
|
+
|
31
|
+
|
32
|
+
# Return the string with words capitalized
|
33
|
+
|
34
|
+
def capitalize_words
|
35
|
+
split(/\b/).map{|word| word.capitalize }.join
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
# Return an array that is the string split into words, i.e. split(\W*\b\*)
|
40
|
+
|
41
|
+
def words
|
42
|
+
split(/\W*\b\W*/)
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
# Return an array that is the string split at tabs, i.e. split(/\t/)
|
47
|
+
|
48
|
+
def split_tab
|
49
|
+
split(/\t/)
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
# Return an array that is the string split at newlines, then tabs.
|
54
|
+
# This is useful to split a TSV (Tab Separated Values) string
|
55
|
+
# into an array of rows, and each row into an array of fields.
|
56
|
+
|
57
|
+
def split_tsv
|
58
|
+
split(/\n/).map{|line| line.split(/\t/)}
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
# Return the string in lowercase, with any non-word-characters
|
63
|
+
# replaced with single underscores (aka low dashes).
|
64
|
+
#
|
65
|
+
# ==Example
|
66
|
+
# 'Foo Goo Hoo' => 'foo_goo_hoo'
|
67
|
+
# 'Foo***Goo***Hoo' => 'foo_goo_hoo'
|
68
|
+
|
69
|
+
def lowcase
|
70
|
+
downcase.gsub(/[_\W]+/,'_')
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
# Return the string as an XML id, which is the same as #lowcase
|
75
|
+
#
|
76
|
+
# ==Example
|
77
|
+
# "Foo Hoo Goo" => 'foo_goo_hoo'
|
78
|
+
# "Foo***Goo***Hoo" => 'foo_goo_hoo'
|
79
|
+
|
80
|
+
def to_xid
|
81
|
+
self.lowcase
|
82
|
+
end
|
83
|
+
|
84
|
+
# Ruby String#to_class method to convert from a String to a class
|
85
|
+
#
|
86
|
+
# From Mirage at http://infovore.org/archives/2006/08/02/getting-a-class-object-in-ruby-from-a-string-containing-that-classes-name/
|
87
|
+
|
88
|
+
def to_class
|
89
|
+
split('::').inject(Kernel) {|scope, const_name| scope.const_get(const_name)}
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
# Increment the rightmost natural number
|
94
|
+
#
|
95
|
+
# ==Example
|
96
|
+
# 'foo5bar'.increment => 'foo4bar'
|
97
|
+
# 'foo5bar'.increment(3) => 'foo8bar'
|
98
|
+
# 'foo9bar'.increment => 'foo10bar'
|
99
|
+
#
|
100
|
+
# - see String#decrement
|
101
|
+
|
102
|
+
def increment(step=1)
|
103
|
+
self=~/\d+/ ? $`+($&.to_i+step).to_s+$' : self
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
# Decrement the rightmost natural number
|
108
|
+
#
|
109
|
+
# ==Example
|
110
|
+
# 'foo5bar'.decrement => 'foo4bar'
|
111
|
+
# 'foo5bar'.decrement(3) => 'foo2bar'
|
112
|
+
# 'foo10bar'.derement => 'foo9bar'
|
113
|
+
#
|
114
|
+
# - see String#increment
|
115
|
+
|
116
|
+
def decrement(step=1)
|
117
|
+
self=~/\d+/ ? $`+($&.to_i-step).to_s+$' : self
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
# Return the previous character, with a changed flag and carry flag
|
122
|
+
#
|
123
|
+
# ==Examples
|
124
|
+
# String.prev_char('n') => 'm', true, false # change
|
125
|
+
# String.prev_char('a') => 'z', true, true # change & carry
|
126
|
+
# String.prev_char('6') => '5', true, false # change
|
127
|
+
# String.prev_char('0') => '9', true, true # change & carry
|
128
|
+
# String.prev_char('-') => '-', false, false # unchanged
|
129
|
+
|
130
|
+
def self.prev_char(c) #=> prev_char, changed_flag, carry_flag
|
131
|
+
case c
|
132
|
+
when '1'..'9', 'B'..'Z', 'b'..'z'
|
133
|
+
i=(c.respond_to?(:ord) ? c.ord : c[0])
|
134
|
+
return (i-1).chr, true, false
|
135
|
+
when '0'
|
136
|
+
return '9', true, true
|
137
|
+
when 'A'
|
138
|
+
return 'Z', true, true
|
139
|
+
when 'a'
|
140
|
+
return 'z', true, true
|
141
|
+
else
|
142
|
+
return c, false, false
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# Return the previous string
|
147
|
+
#
|
148
|
+
# c.f. String#next
|
149
|
+
#
|
150
|
+
# ==Examples
|
151
|
+
# '888'.prev => '887'
|
152
|
+
# 'n'.prev => 'm'
|
153
|
+
# 'N'.prev => 'M'
|
154
|
+
#
|
155
|
+
# ==Examples with carry
|
156
|
+
# '880'.prev => '879'
|
157
|
+
# 'nna'.prev => 'nmz'
|
158
|
+
# 'NNA'.prev => 'NMZ'
|
159
|
+
# 'nn0aA'.prev => 'nm9zZ'
|
160
|
+
|
161
|
+
def prev
|
162
|
+
self.clone.prev!
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
# Do String#prev in place
|
167
|
+
|
168
|
+
def prev!
|
169
|
+
return self if length==0
|
170
|
+
index=length-1 # rightmost
|
171
|
+
while true do
|
172
|
+
chr=self[index].chr
|
173
|
+
prev_chr,changed_flag,carry_flag=String.prev_char(chr)
|
174
|
+
return self if !changed_flag
|
175
|
+
self[index]=prev_chr
|
176
|
+
return self if !carry_flag
|
177
|
+
index-=1
|
178
|
+
return nil if index<0
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
alias pred prev # String#pred : predecessor :: String#succ : successor
|
183
|
+
alias pred! prev!
|
184
|
+
|
185
|
+
class << self
|
186
|
+
alias_method :pred_char, :prev_char
|
187
|
+
end
|
188
|
+
|
189
|
+
# Helpful constants
|
190
|
+
|
191
|
+
LOWERCASE_ENGLISH_CHARS = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
|
192
|
+
UPPERCASE_ENGLISH_CHARS = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
|
193
|
+
|
194
|
+
|
195
|
+
##
|
196
|
+
#
|
197
|
+
# Lorem Ipsum random text generator
|
198
|
+
#
|
199
|
+
##
|
200
|
+
|
201
|
+
# Return a random length suitable for a "lorem ipsum" string.
|
202
|
+
#
|
203
|
+
# This method uses 1+rand(10)
|
204
|
+
|
205
|
+
def self.lorem_length
|
206
|
+
1+rand(10)
|
207
|
+
end
|
208
|
+
|
209
|
+
# Return a random string suitable for "lorem ipsum" text.
|
210
|
+
#
|
211
|
+
# This method chooses from lowercase letters a-z.
|
212
|
+
#
|
213
|
+
# This method defaults to length = self.lorem_length.
|
214
|
+
|
215
|
+
def self.lorem(length=self.lorem_length)
|
216
|
+
['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'].choices(length).join
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
end
|
221
|
+
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
|
3
|
+
# XML extensions
|
4
|
+
|
5
|
+
module XML
|
6
|
+
|
7
|
+
|
8
|
+
# Specify one or more directory patterns and pass each XML file in the matching directories to a block.
|
9
|
+
#
|
10
|
+
# See [Dir#glob](http://www.ruby-doc.org/core/classes/Dir.html#M002347) for pattern details.
|
11
|
+
#
|
12
|
+
# ==Example
|
13
|
+
# XML.load_dir('/tmp/*.xml'){|xml_document|
|
14
|
+
# #...whatever you want to do with each xml document
|
15
|
+
# }
|
16
|
+
#
|
17
|
+
# ==Example to load xml documents in files beginning in "foo" or "bar"
|
18
|
+
# XML.load_dir('/tmp/foo*.yaml','/tmp/bar*.xml','){|xml_document|
|
19
|
+
# #...whatever you want to do with the xml document
|
20
|
+
# }
|
21
|
+
|
22
|
+
def XML.load_dir(*dirpaths)
|
23
|
+
dirpaths=[*dirpaths.flatten]
|
24
|
+
dirpaths.each do |dirpath|
|
25
|
+
Dir[dirpath].sort.each do |filename|
|
26
|
+
File.open(filename) do |file|
|
27
|
+
doc = REXML::Document.new file
|
28
|
+
yield doc
|
29
|
+
end #file
|
30
|
+
end #dir
|
31
|
+
end #each
|
32
|
+
end #def
|
33
|
+
|
34
|
+
|
35
|
+
# Sugar to load elements from a file.
|
36
|
+
#
|
37
|
+
# ==Example
|
38
|
+
# XML.load_attributes('config.xml','userlist/user'){|element| pp element.attributes['first_name'] }
|
39
|
+
|
40
|
+
def XML.load_elements(dirpath,xpath)
|
41
|
+
XML.load_dir(dirpath){|doc|
|
42
|
+
doc.elements.each(xpath){|elem|
|
43
|
+
yield elem
|
44
|
+
}
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
# Sugar to load attributes from a file.
|
50
|
+
#
|
51
|
+
# ==Example
|
52
|
+
# XML.load_attributes('config.xml','userlist/user'){|attributes| pp attributes['first_name'] }
|
53
|
+
|
54
|
+
def XML.load_attributes(dirpath,xpath)
|
55
|
+
XML.load_elements(dirpath,xpath){|elem|
|
56
|
+
yield elem.attributes
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
# Sugar to load attributes hash from a file.
|
61
|
+
#
|
62
|
+
# ==Example
|
63
|
+
# XML.load_attributes('config.xml','userlist/user'){|attributes| pp attributes['first_name'] }
|
64
|
+
|
65
|
+
def XML.load_attributes_hash(dirpath,xpath)
|
66
|
+
XML.load_elements(dirpath,xpath){|elem|
|
67
|
+
yield elem.attributes.to_hash
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
# Santize dirty xml by removing unprintables, bad tags,
|
73
|
+
# comments, and generally anything else we might need
|
74
|
+
# to enable the XML parser to handle a dirty document.
|
75
|
+
#
|
76
|
+
# ==Example
|
77
|
+
# # This example shows curly braces instead of angle braces because of HTML formatting
|
78
|
+
# s="{foo a=b c=d}{!--comment--}Hello{!-[if bar]}Microsoft{![endif]}World{/foo}"
|
79
|
+
# XML.strip_all(s) => "{foo}HelloWorld{/foo}"
|
80
|
+
#
|
81
|
+
# This method calls these in order:
|
82
|
+
# - XML.strip_unprintables
|
83
|
+
# - XML.strip_microsoft
|
84
|
+
# - XML.strip_comments
|
85
|
+
# - XML.strip_attributes
|
86
|
+
|
87
|
+
def XML.strip_all(xml_text)
|
88
|
+
return XML.strip_attributes(XML.strip_comments(XML.strip_microsoft(XML.strip_unprintables(xml_text))))
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
# Strip out all attributes from the xml text's tags.
|
93
|
+
#
|
94
|
+
# ==Example
|
95
|
+
# s="<foo a=b c=d e=f>Hello</foo>"
|
96
|
+
# XML.strip_attributes(s) => "<foo>Hello</foo>"
|
97
|
+
|
98
|
+
def XML.strip_attributes(xml_text)
|
99
|
+
return xml_text.gsub(/<(\/?\w+).*?>/im){"<#{$1}>"} # delete attributes
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
# Strip out all comments from the xml text.
|
104
|
+
#
|
105
|
+
# ==Example
|
106
|
+
# # This example shows curly braces instead of angle braces because of HTML formatting
|
107
|
+
# s="Hello{!--comment--}World"
|
108
|
+
# XML.strip_comments(s) => "HelloWorld"
|
109
|
+
|
110
|
+
def XML.strip_comments(xml_text)
|
111
|
+
return xml_text.gsub(/<!.*?>/im,'')
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
# Strip out all microsoft proprietary codes.
|
116
|
+
#
|
117
|
+
# ==Example
|
118
|
+
# s="Hello<!-[if foo]>Microsoft<![endif]->World"
|
119
|
+
# XML.strip_microsoft(s) => "HelloWorld"
|
120
|
+
|
121
|
+
def XML.strip_microsoft(xml_text)
|
122
|
+
return xml_text.gsub(/<!-*\[if\b.*?<!\[endif\]-*>/im,'')
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
# Strip out all unprintable characters from the input string.
|
127
|
+
#
|
128
|
+
# ==Example
|
129
|
+
# s="Hello\XXXWorld" # where XXX is unprintable
|
130
|
+
# XML.strip_unprintables(s) => "HelloWorld"
|
131
|
+
|
132
|
+
def XML.strip_unprintables(xml_text)
|
133
|
+
return xml_text.gsub(/[^[:print:]]/, "")
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
# REXML::Attributes extensions
|
140
|
+
|
141
|
+
class REXML::Attributes
|
142
|
+
|
143
|
+
# Return a new hash of the attribute keys and values.
|
144
|
+
#
|
145
|
+
# ==Example
|
146
|
+
# attributes.to_hash => {"src"=>"pic.jpg", "height" => "100", "width" => "200"}
|
147
|
+
|
148
|
+
def to_hash
|
149
|
+
h=Hash.new
|
150
|
+
self.keys.each{|k| h[k]=self[k]}
|
151
|
+
h
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
# REXML::Document extensions
|
158
|
+
|
159
|
+
class REXML::Document
|
160
|
+
|
161
|
+
# Remove all attributes from the document's elements.
|
162
|
+
#
|
163
|
+
# Return the document.
|
164
|
+
#
|
165
|
+
# cf. Element#remove_attributes
|
166
|
+
|
167
|
+
def remove_attributes
|
168
|
+
self.elements.each("//") { |e| e.attributes.each_attribute{|attribute| attribute.remove }}
|
169
|
+
self
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
# REXML::Element extensions
|
176
|
+
|
177
|
+
class REXML::Element
|
178
|
+
|
179
|
+
# Remove all attributes from the element.
|
180
|
+
#
|
181
|
+
# Return the element.
|
182
|
+
#
|
183
|
+
# cf. Document#remove_attributes
|
184
|
+
|
185
|
+
def remove_attributes
|
186
|
+
self.attributes.each_attribute{|attribute| attribute.remove }
|
187
|
+
self
|
188
|
+
end
|
189
|
+
|
190
|
+
end
|
191
|
+
|
192
|
+
|
193
|
+
|