rexle 0.9.4 → 0.9.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/rexle.rb +418 -157
  2. metadata +30 -27
data/lib/rexle.rb CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  # file: rexle.rb
4
4
 
@@ -6,20 +6,48 @@ require 'rexml/document'
6
6
  require 'rexleparser'
7
7
  require 'dynarex-parser'
8
8
  require 'polyrex-parser'
9
+ require 'cgi'
9
10
  include REXML
10
11
 
12
+ # modifications:
13
+ # 20-Oct-2012: feature: added Rexle::Element#texts which is the equivalent
14
+ # of REXML::Element#texts
15
+ # 10-Sep-2012: bug fix: Removed code from method pretty_print in order to
16
+ # get the XML displayed properly
17
+ # 23-Aug-2012: feature: implemented xpath function contains()
18
+ # 17-Aug-2012: bug fix: pretty print now ignores text containing empty space
19
+ # 16-Aug-2012: the current element's text (if its not empty) is now returned
20
+ # from its children method
21
+ # 15-Aug-2012: feature: xpath containing child:: now supported
22
+ # 13-Aug-2012: bug fix: xpath can now handle the name() function
23
+ # 11-Aug-2012: bug fix: separated the max() method from 1 line into 3
24
+ # and that fixed it
25
+ # 08-Aug-2012: feature: added Element#insert_before and Element#insert_after
26
+ # 19-Jul-2012: Changed children to elements where appropriate
27
+ # 15-Jul-2012: bug fix: self.root.value is no longer appended
28
+ # to the body if there are no child elements
29
+ # 19-Jun-2012: a bug fix for .//*[@class]
30
+ # 17-Jun-2012: a couple of new xpath things are supported '.' and '|'
31
+ # 15-Apr-2012: bug fix: New element names are typecast as string
32
+ # 16-Mar-2012: bug fix: Element names which contain a colon can now be selected
33
+ # in the xpath.
34
+ # 22-Feb-2012: bug resolution: Deactivated the PolyrexParser; using RexleParser instead
35
+ # 14-Jan-2012: Implemented Rexle::Elements#each
36
+ # 21-Dec-2011: Bug fix: xpath modified to allow querying from the actual
37
+ # root rather than the 1st child element from the root
38
+
11
39
  module XMLhelper
12
40
 
13
41
  def doc_print(children)
14
-
15
- body = children.empty? ? self.root.value : scan_print(children).join
42
+
43
+ body = (children.nil? or children.empty? or children.is_an_empty_string? ) ? '' : scan_print(children).join
16
44
  a = self.root.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
17
45
  "<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : ' ' + a.join(' '), body, self.root.name]
18
46
  end
19
47
 
20
48
  def doc_pretty_print(children)
21
49
 
22
- body = children.empty? ? self.value : pretty_print(children,2).join
50
+ body = pretty_print(children,2).join
23
51
  a = self.root.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
24
52
  ind = "\n "
25
53
  "<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : ' ' + a.join(' '), ind, body, "\n", self.root.name]
@@ -28,16 +56,27 @@ module XMLhelper
28
56
  def scan_print(nodes)
29
57
 
30
58
  nodes.map do |x|
31
- unless x.name == '![' then
32
- a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
33
- tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
34
-
35
- out = ["<%s>" % tag]
36
- out << x.value unless x.value.nil? || x.value.empty?
37
- out << scan_print(x.children)
38
- out << "</%s>" % x.name
39
- else
40
- "<![CDATA[%s]]>" % x.value
59
+
60
+ if x.is_a? Rexle::Element then
61
+ if x.name.chr != '!' then
62
+ a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
63
+ tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
64
+
65
+ if x.value.length > 0 or (x.children.length > 0 and not x.children.is_an_empty_string?) then
66
+ out = ["<%s>" % tag]
67
+ #out << x.value unless x.value.nil? || x.value.empty?
68
+ out << scan_print(x.children)
69
+ out << "</%s>" % x.name
70
+ else
71
+ out = ["<%s/>" % tag]
72
+ end
73
+ elsif x.name == '!-' then
74
+ "<!--%s-->" % x.value
75
+ else
76
+ "<![CDATA[%s]]>" % x.value
77
+ end
78
+ elsif x.is_a? String then
79
+ x
41
80
  end
42
81
  end
43
82
 
@@ -45,21 +84,30 @@ module XMLhelper
45
84
 
46
85
  def pretty_print(nodes, indent='0')
47
86
  indent = indent.to_i
48
- nodes.map.with_index do |x, i|
49
- unless x.name == '![' then
50
- a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
51
- tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
52
-
53
- ind1 = x.children.length > 0 ? ("\n" + ' ' * indent) : ''
54
- start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
55
- out = ["%s<%s>%s" % [start, tag, ind1]]
56
-
57
- out << x.value.sub(/^[\n\s]+$/,'') unless x.value.nil? || x.value.empty?
58
- out << pretty_print(x.children, (indent + 1).to_s.clone)
59
- ind2 = x.children.length > 0 ? ("\n" + ' ' * (indent - 1)) : ''
60
- out << "%s</%s>" % [ind2, x.name]
61
- else
62
- "<![CDATA[%s]]>" % x.value
87
+
88
+ nodes.select(){|x| x.is_a? Rexle::Element or x.strip.length > 0}
89
+ .map.with_index do |x, i|
90
+
91
+ if x.is_a? Rexle::Element then
92
+ unless x.name == '![' then
93
+ #return ["<%s/>" % x.name] if x.value = ''
94
+ a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
95
+ a ||= []
96
+ tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
97
+
98
+ start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
99
+ ind1 = x.children.grep(Rexle::Element).length > 0 ?
100
+ ("\n" + ' ' * indent) : ''
101
+ out = ["%s<%s>%s" % [start, tag, ind1]]
102
+
103
+ out << pretty_print(x.children, (indent + 1).to_s.clone)
104
+ ind2 = ind1.length > 0 ? ("\n" + ' ' * (indent - 1)) : ''
105
+ out << "%s</%s>" % [ind2, x.name]
106
+ else
107
+ "<![CDATA[%s]]>" % x.value
108
+ end
109
+ elsif x.is_a? String then
110
+ x.sub(/^[\n\s]+$/,'')
63
111
  end
64
112
  end
65
113
 
@@ -70,108 +118,224 @@ end
70
118
  class Rexle
71
119
  include XMLhelper
72
120
 
121
+ attr_reader :prefixes
122
+
123
+ def self.version()
124
+ '0.9.xx'
125
+ end
126
+
73
127
  def initialize(x=nil)
74
128
  super()
75
129
 
130
+ # what type of input is it? Is it a string, array, or REXML doc?
76
131
  if x then
77
132
  procs = {
78
- String: proc {|x| parse_string(x)},
79
- Array: proc {|x| x},
80
- :"REXML::Document" => proc {|x| scan_doc x.root}
133
+ String: proc {|x| parse_string(x)},
134
+ Array: proc {|x| x},
135
+ :"REXML::Document" => proc {|x| scan_doc x.root}
81
136
  }
137
+
138
+ doc_node = ['doc','',{}]
139
+
82
140
 
83
- a = procs[x.class.to_s.to_sym].call(x)
84
- @doc = scan_element(*a)
85
- end
86
-
87
- end
88
-
89
- def xpath(path, &blk)
90
-
91
- # is it a function
92
- fn_match = path.match(/^(\w+)\(([^\)]+)\)$/)
141
+ @a = procs[x.class.to_s.to_sym].call(x)
142
+ @doc = scan_element(*(doc_node << @a))
143
+
144
+ # fetch the namespaces
145
+ @prefixes = []
146
+ if @doc.root.attributes then
93
147
 
94
- # Array: proc {|x| x.flatten.compact},
95
- if fn_match.nil? then
96
- procs = {
97
- Array: proc {|x| block_given? ? x : x.flatten },
98
- String: proc {|x| x},
99
- :"Rexle::Element" => proc {|x| [x]}
100
- }
101
- bucket = []
102
- result = @doc.xpath(path, bucket, &blk)
148
+ xmlns = @doc.root.attributes.select {|k,v| k[/^xmlns:/]}
149
+ @prefixes = xmlns.keys.map{|x| x[/\w+$/]}
150
+ end
103
151
 
104
- procs[result.class.to_s.to_sym].call(result)
105
-
106
- else
107
- m, xpath_value = fn_match.captures
108
- method(m.to_sym).call(xpath_value)
109
152
  end
110
153
 
111
154
  end
155
+
156
+ def xpath(path, &blk)
157
+ @doc.xpath(path, &blk)
158
+ end
112
159
 
113
160
  class Element
114
161
  include XMLhelper
115
162
 
116
163
  attr_accessor :name, :value, :parent
117
- attr_reader :child_lookup
164
+ attr_reader :child_lookup, :child_elements
165
+
166
+ alias original_clone clone
118
167
 
119
- def initialize(name=nil, value='', attributes={})
168
+ def initialize(name=nil, value='', attributes={}, rexle=nil)
169
+ @rexle = rexle
120
170
  super()
121
- @name, @value, @attributes = name, value, attributes
171
+ @name, @value, @attributes = name.to_s, value, attributes
122
172
  raise "Element name must not be blank" unless name
123
173
  @child_elements = []
124
174
  @child_lookup = []
125
175
  end
126
176
 
127
- def xpath(xpath_value, rlist=[], &blk)
177
+ def contains(raw_args)
178
+ path, raw_val = raw_args.split(',',2)
179
+ val = raw_val.strip[/^["']?.*["']?$/]
180
+
181
+ anode = query_xpath(path)
182
+ return unless anode
183
+ a = scan_contents(anode.first)
184
+
185
+ [a.grep(/#{val}/).length > 0]
186
+ end
187
+
188
+ def count(path)
189
+ length = query_xpath(path).flatten.compact.length
190
+ length
191
+ end
192
+
193
+ def max(path)
194
+ a = query_xpath(path).flatten.compact.map(&:to_i)
195
+ a.max
196
+ end
197
+
198
+ def name()
199
+ if @rexle then
200
+ prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] } if @rexle.prefixes.is_a? Array
201
+ prefix ? @name.sub(prefix + ':', '') : @name
202
+ else
203
+ @name
204
+ end
205
+ end
206
+
207
+ def xpath(path, rlist=[], &blk)
208
+ r = filter_xpath(path, rlist=[], &blk)
209
+ r.is_a?(Array) ? r.compact : r
210
+ end
211
+
212
+ def filter_xpath(path, rlist=[], &blk)
128
213
 
129
- raw_path, raw_condition = xpath_value.sub(/^\/(?!\/)/,'').match(/([^\[]+)(\[[^\]]+\])?/).captures
130
- remaining_path = ($').to_s
131
- a_path = raw_path.split('/')
214
+ # is it a function
215
+ fn_match = path.match(/^(\w+)\(["']?([^\)]*)["']?\)$/)
216
+
217
+ # Array: proc {|x| x.flatten.compact},
218
+ if (fn_match and fn_match.captures.first[/^(attribute|@)/]) or fn_match.nil? then
219
+ procs = {
220
+ Array: proc {|x| block_given? ? x : x.flatten.uniq },
221
+ String: proc {|x| x},
222
+ Hash: proc {|x| x},
223
+ TrueClass: proc{|x| x},
224
+ FalseClass: proc{|x| x},
225
+ :"Rexle::Element" => proc {|x| [x]}
226
+ }
227
+ bucket = []
228
+ raw_results = path.split('|').map do |xp|
229
+ query_xpath(xp, bucket, &blk)
230
+ end
231
+
232
+ #results = raw_results.inject(&:+)
233
+ results = raw_results.last
234
+ procs[results.class.to_s.to_sym].call(results) if results
235
+
236
+ else
237
+ m, xpath_value = fn_match.captures
238
+ xpath_value.empty? ? method(m.to_sym).call : method(m.to_sym).call(xpath_value)
239
+ end
240
+
241
+ end
242
+
243
+ def query_xpath(raw_xpath_value, rlist=[], &blk)
244
+
245
+ #remove any pre'fixes
246
+ #@rexle.prefixes.each {|x| xpath_value.sub!(x + ':','') }
247
+ flag_func = false
248
+
249
+ xpath_value = raw_xpath_value.sub('child::','./')
250
+ #xpath_value.sub!(/\.\/(?=[\/])/,'')
251
+
252
+ if xpath_value[/^[\w\/]+\s*=.*/] then
253
+ flag_func = true
254
+
255
+ xpath_value.sub!(/^\w+\s*=.*/,'.[\0]')
256
+ xpath_value.sub!(/\/([\w]+\s*=.*)/,'[\1]')
132
257
 
258
+ #result = self.element xpath_value
259
+ #return [(result.is_a?(Rexle::Element) ? true : false)]
260
+ end
261
+
262
+ #xpath_value.sub!(/^attribute::/,'*/attribute::')
263
+ raw_path, raw_condition = xpath_value.sub(/^\.?\/(?!\/)/,'')\
264
+ .match(/([^\[]+)(\[[^\]]+\])?/).captures
265
+
266
+ remaining_path = ($').to_s
267
+
268
+ r = raw_path[/([^\/]+)(?=\/\/)/,1]
269
+ if r then
270
+ a_path = raw_path.split(/(?=\/\/)/,2)
271
+ else
272
+ a_path = raw_path.split('/',2)
273
+ end
274
+
133
275
  condition = raw_condition if a_path.length <= 1
134
276
 
135
277
  if raw_path[0,2] == '//' then
136
- s = a_path[2] || ''
137
- condition = raw_condition
138
- elsif raw_path == 'text()' then
278
+ s = ''
279
+ elsif raw_path == 'text()'
139
280
  a_path.shift
140
281
  return @value
141
282
  else
142
- attribute = xpath_value[/^attribute::(.*)/,1]
143
- return @attributes[attribute.to_sym] if attribute and @attributes and @attributes.has_key?(attribute.to_sym)
144
-
283
+
284
+ attribute = xpath_value[/^(attribute::|@)(.*)/,2]
285
+
286
+ return @attributes if attribute == '*'
287
+ return [@attributes[attribute.to_sym]] if attribute and @attributes and @attributes.has_key?(attribute.to_sym)
145
288
  s = a_path.shift
146
289
  end
147
290
 
148
291
  # isolate the xpath to return just the path to the current element
149
- elmnt_path = s[/^([\w\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
150
- element_part = elmnt_path[/(^@?[^\[]+)?/,1] if elmnt_path
151
292
 
293
+ elmnt_path = s[/^([\w:\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
294
+ element_part = elmnt_path[/(^@?[^\[]+)?/,1] if elmnt_path
295
+
152
296
  if element_part then
153
297
  unless element_part[/^@/] then
154
- element_name = element_part
298
+ element_name = element_part[/^[\w:\*\.]+/]
155
299
  else
156
- condition = element_part
300
+
301
+ condition = xpath_value[/^\[/] ? xpath_value : element_part
157
302
  element_name = nil
158
303
  end
159
304
 
160
305
  end
161
306
 
307
+ #element_name ||= '*'
162
308
  raw_condition = '' if condition
309
+ attr_search = format_condition(condition) if condition and condition.length > 0
310
+
311
+ attr_search2 = xpath_value[/^\[(.*)\]$/,1]
312
+ if attr_search2 then
313
+ r4 = attribute_search(attr_search, self, self.attributes)
314
+ return r4
315
+ end
316
+
317
+ return_elements = []
163
318
 
164
- attr_search = format_condition(condition) if condition and condition.length > 0
319
+ if raw_path[0,2] == '//' then
165
320
 
166
- if raw_path[0,2] == '//'
167
- return_elements = scan_match(self, element_name, attr_search, condition, rlist)
168
- return (xpath_value[/text\(\)$/] ? return_elements.map(&:value) : return_elements)
169
- end
321
+ regex = /\[(\d+)\]/
322
+ n = xpath_value[regex,1]
323
+ xpath_value.slice!(regex)
324
+
325
+ rs = scan_match(self, xpath_value).flatten.compact
326
+ return n ? rs[n.to_i-1] : rs
327
+
328
+ elsif (raw_path == '.' or raw_path == self.name) and attr_search.nil? then
329
+ return [self]
330
+ else
331
+
332
+ return_elements = @child_lookup.map.with_index.select do |x|
333
+ (x[0][0] == element_name || element_name == '.') or \
334
+ (element_name == '*' && x[0].is_a?(Array))
335
+ end
170
336
 
171
- return_elements = @child_lookup.map.with_index.select do |x|
172
- x[0][0] == element_name or element_name == '*'
173
337
  end
174
-
338
+
175
339
  if return_elements.length > 0 then
176
340
 
177
341
  if (a_path + [remaining_path]).join.empty? then
@@ -180,10 +344,13 @@ class Rexle
180
344
  else
181
345
 
182
346
  rlist << return_elements.map.with_index do |x,i|
347
+
183
348
  rtn_element = filter(x, i+1, attr_search){|e| r = e.xpath(a_path.join('/') + raw_condition.to_s + remaining_path, &blk); (r || e) }
184
349
  next if rtn_element.nil? or (rtn_element.is_a? Array and rtn_element.empty?)
185
350
 
186
- if rtn_element.is_a? Array then
351
+ if rtn_element.is_a? Hash then
352
+ rtn_element
353
+ elsif rtn_element.is_a? Array then
187
354
  rtn_element
188
355
  elsif (rtn_element.is_a? String) || (rtn_element.is_a?(Array) and not(rtn_element[0].is_a? String))
189
356
  rtn_element
@@ -192,15 +359,17 @@ class Rexle
192
359
  end
193
360
  end
194
361
  #
362
+
195
363
  rlist = rlist.flatten(1) unless rlist.length > 1 and rlist[0].is_a? Array
196
364
 
197
365
  end
366
+
198
367
  rlist.compact! if rlist.is_a? Array
199
368
 
200
369
  else
201
370
 
202
371
  # strip off the 1st element from the XPath
203
- new_xpath = xpath_value[/^\/\/\w+\/(.*)/,1]
372
+ new_xpath = xpath_value[/^\/\/[\w:]+\/(.*)/,1]
204
373
 
205
374
  if new_xpath then
206
375
  self.xpath(new_xpath + raw_condition.to_s + remaining_path, rlist,&blk)
@@ -209,19 +378,31 @@ class Rexle
209
378
 
210
379
  rlist = rlist.flatten(1) unless not(rlist.is_a? Array) or (rlist.length > 1 and rlist[0].is_a? Array)
211
380
  rlist = [rlist] if rlist.is_a? Rexle::Element
381
+ rlist = (rlist.length > 0 ? true : false) if flag_func == true
212
382
  rlist
213
383
  end
214
384
 
215
385
  def add_element(item)
216
- @child_lookup << [item.name, item.attributes, item.value]
217
- @child_elements << item
218
- # add a reference from this element (the parent) to the child
219
- item.parent = self
220
- item
386
+ if item.is_a? Rexle::Element then
387
+ @child_lookup << [item.name, item.attributes, item.value]
388
+ @child_elements << item
389
+ # add a reference from this element (the parent) to the child
390
+ item.parent = self
391
+ item
392
+ elsif item.is_a? String then
393
+ @child_lookup << item
394
+ @child_elements << item
395
+ elsif item.is_a? Rexle then
396
+ self.add_element(item.root)
397
+ end
221
398
  end
222
399
 
223
400
  def inspect()
401
+ if self.xml.length > 30 then
224
402
  "%s ... </>" % self.xml[/<[^>]+>/]
403
+ else
404
+ self.xml
405
+ end
225
406
  end
226
407
 
227
408
  alias add add_element
@@ -240,11 +421,29 @@ class Rexle
240
421
  end
241
422
 
242
423
  def add_text(s) @value = s; self end
243
- def attribute(key) @attributes[key.to_sym] end
424
+
425
+ def attribute(key)
426
+ key = key.to_sym if key.is_a? String
427
+ @attributes[key].gsub('&lt;','<').gsub('&gt;','>')
428
+ end
429
+
244
430
  def attributes() @attributes end
245
- def children() @child_elements end
246
- def children=(a) @child_elements = a end
247
-
431
+
432
+ def children()
433
+ return unless @value
434
+ r = (@value.empty? ? [] : [@value]) + @child_elements
435
+ def r.is_an_empty_string?()
436
+ self.length == 1 and self.first == ''
437
+ end
438
+
439
+ return r
440
+ end
441
+
442
+ def children=(a) @child_elements = a end
443
+
444
+ def deep_clone() Rexle.new(self.xml).root end
445
+ def clone() Element.new(@name, @value, @attributes) end
446
+
248
447
  def delete(obj=nil)
249
448
  if obj then
250
449
  i = @child_elements.index(obj)
@@ -254,17 +453,30 @@ class Rexle
254
453
  end
255
454
  end
256
455
 
257
- def element(s) self.xpath(s).first end
456
+ def element(s)
457
+ r = self.xpath(s)
458
+ r.is_a?(Array) ? r.first : r
459
+ end
258
460
 
259
461
  def elements(s=nil)
260
462
  procs = {
261
- NilClass: proc {Elements.new(@child_elements)},
463
+ NilClass: proc {Elements.new(@child_elements.select{|x| x.is_a? Rexle::Element })},
262
464
  String: proc {|x| @child_elements[x]}
263
465
  }
466
+
264
467
  procs[s.class.to_s.to_sym].call(s)
265
468
  end
266
469
 
267
- def root() self end
470
+ def doc_root() @rexle.root end
471
+ def each(&blk)
472
+ @child_elements.each(&blk) #unless @child_elements.empty?
473
+ end
474
+ def has_elements?() !self.elements.empty? end
475
+
476
+ def insert_after(node) insert(node, 1) end
477
+ def insert_before(node) insert(node) end
478
+
479
+ def root() self end #@rexle.root end
268
480
 
269
481
  def text(s='')
270
482
 
@@ -274,6 +486,7 @@ class Rexle
274
486
  e = self.element(s)
275
487
  result = e.value if e
276
488
  end
489
+ result = CGI.unescape_html result.to_s
277
490
 
278
491
  def result.unescape()
279
492
  s = self.clone
@@ -283,10 +496,14 @@ class Rexle
283
496
 
284
497
  result
285
498
  end
499
+
500
+ def texts()
501
+ [@value] + @child_elements.select {|x| x.is_a? String}
502
+ end
286
503
 
287
504
  def value=(raw_s)
288
505
 
289
- @value = raw_s.to_s.clone
506
+ @value = String.new(raw_s.to_s.clone)
290
507
  escape_chars = %w(& &amp; < &lt; > &gt;).each_slice(2).to_a
291
508
  escape_chars.each{|x| @value.gsub!(*x)}
292
509
 
@@ -306,17 +523,27 @@ class Rexle
306
523
  method(msg).call(self.children)
307
524
  end
308
525
 
526
+ alias to_s xml
309
527
 
310
528
  private
529
+
530
+ def insert(node,offset=0)
531
+ i = parent.child_elements.index(self)
532
+ return unless i
533
+ parent.child_elements.insert(i+offset,node)
534
+ parent.child_lookup.insert(i+offset, [node.name, node.attributes, node.value])
535
+ self
536
+ end
311
537
 
312
538
  def format_condition(condition)
313
- #raw_items = condition[1..-1].scan(/\'[^\']*\'|and|or|\d+|[!=]+|[@\w\.\/]+/)
314
- raw_items = condition[1..-1].scan(/\'[^\']*\'|and|or|\d+|[!=<>]+|position\(\)|[@\w\.\/]+/)
539
+
540
+ raw_items = condition[1..-1].scan(/\'[^\']*\'|\"[^\"]*\"|and|or|\d+|[!=<>]+|position\(\)|[@\w\.\/&;]+/)
315
541
 
316
542
  if raw_items[0][/^\d+$/] then
317
543
  return raw_items[0].to_i
318
544
  elsif raw_items[0] == 'position()' then
319
- return "i %s %s" % raw_items[1..-1]
545
+ rrr = "i %s %s" % [raw_items[1].gsub('&lt;','<').gsub('&gt;','>'), raw_items[-1]]
546
+ return rrr
320
547
  else
321
548
 
322
549
  andor_items = raw_items.map.with_index.select{|x,i| x[/\band\b|\bor\b/]}.map{|x| [x.last, x.last + 1]}.flatten
@@ -331,9 +558,10 @@ class Rexle
331
558
 
332
559
  if x.length >= 3 then
333
560
  x[1] = '==' if x[1] == '='
334
- "h[:%s] %s %s" % x
561
+ "h[:'%s'] %s %s" % x
335
562
  else
336
- x
563
+
564
+ x.join[/^(and|or)$/] ? x : ("h[:'%s']" % x)
337
565
  end
338
566
  end
339
567
 
@@ -344,6 +572,7 @@ class Rexle
344
572
  items = cons_items.map do |x|
345
573
 
346
574
  if x.length >= 3 then
575
+
347
576
  x[1] = '==' if x[1] == '='
348
577
  if x[0] != '.' then
349
578
  if x[0][/\//] then
@@ -351,7 +580,7 @@ class Rexle
351
580
 
352
581
  "e.xpath('#{path}').first.value == #{value}"
353
582
  else
354
- "(name == '%s' and value %s %s)" % [x[0], x[1], x[2]]
583
+ "(name == '%s' and value %s '%s')" % [x[0], x[1], x[2].sub(/^['"](.*)['"]$/,'\1')]
355
584
  end
356
585
  else
357
586
  "e.value %s %s" % [x[1], x[2]]
@@ -360,7 +589,7 @@ class Rexle
360
589
  x
361
590
  end
362
591
  end
363
-
592
+
364
593
  return items.join(' ')
365
594
  end
366
595
  end
@@ -368,59 +597,47 @@ class Rexle
368
597
 
369
598
  end
370
599
 
371
- def scan_match(nodes, element, attr_search, condition, rlist)
600
+
601
+ def scan_match(node, path)
372
602
 
373
- nodes.children.each.with_index do |x, i|
603
+ r = []
604
+ xpath2 = path[2..-1]
605
+ xpath2.sub!(/^\*\//,'')
606
+ xpath2.sub!(/^\*/,self.name)
607
+ xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name
608
+
374
609
 
375
- h = x.attributes
610
+ r << node.xpath(xpath2)
611
+ r << node.elements.map {|n| scan_match(n, path) if n.is_a? Rexle::Element}
612
+ r
613
+ end
376
614
 
615
+ # used by xpath function contains()
616
+ #
617
+ def scan_contents(node)
377
618
 
378
- if element and not(element.empty?) then
379
- if x.name == element
380
- if attr_search then
381
- rlist << x if h and eval(attr_search)
382
- else
383
- rlist << x
384
- end
385
- end
386
- else
619
+ a = []
620
+ a << node.text
387
621
 
388
- if condition[/^@/] then
389
- attribute = condition[/@(.*)/,1]
390
- if h and h.has_key? attribute.to_sym then
391
- rlist << h[attribute.to_sym]
392
- end
393
- else
394
- rlist << x if h and eval(attr_search)
395
- end
396
- end
397
-
398
- x.xpath('//' + element.to_s + condition.to_s, rlist) unless x.children.empty?
622
+ node.elements.each do |child|
623
+ a.concat scan_contents(child)
399
624
  end
400
- rlist
625
+ a
401
626
  end
402
-
627
+
628
+
403
629
  def filter(raw_element, i, attr_search, &blk)
404
630
 
405
631
  x = raw_element
406
632
  e = @child_elements[x.last]
407
- h = x[0][1] # <-- fetch the attributes
408
633
 
634
+ return unless e.is_a? Rexle::Element
635
+ name, value = e.name, e.value if e.is_a? Rexle::Element
636
+
637
+ h = x[0][1] # <-- fetch the attributes
638
+
409
639
  if attr_search then
410
- if attr_search.is_a? Fixnum then
411
- block_given? ? blk.call(e) : e if i == attr_search
412
- elsif attr_search[/i\s[<>\=]\s\d+/] and eval(attr_search) then
413
- block_given? ? blk.call(e) : e
414
- elsif h and attr_search[/^h\[/] and eval(attr_search)
415
- block_given? ? blk.call(e) : e
416
- elsif attr_search[/^\(name ==/] and \
417
- e.child_lookup.select{|name, attributes, value| eval(attr_search) }.length > 0
418
- block_given? ? blk.call(e) : e
419
- elsif attr_search[/^e\.value/] and eval(attr_search)
420
- block_given? ? blk.call(e) : e
421
- elsif attr_search[/^e\.xpath/] and eval(attr_search)
422
- block_given? ? blk.call(e) : e
423
- end
640
+ attribute_search(attr_search,e, h, i, &blk)
424
641
  else
425
642
 
426
643
  block_given? ? blk.call(e) : e
@@ -428,9 +645,28 @@ class Rexle
428
645
 
429
646
  end
430
647
 
648
+ def attribute_search(attr_search, e, h, i=nil, &blk)
649
+ if attr_search.is_a? Fixnum then
650
+ block_given? ? blk.call(e) : e if i == attr_search
651
+ elsif attr_search[/i\s[<>\=]\s\d+/] and eval(attr_search) then
652
+ block_given? ? blk.call(e) : e
653
+ elsif h and attr_search[/^h\[/] and eval(attr_search)
654
+ block_given? ? blk.call(e) : e
655
+ elsif attr_search[/^\(name ==/] and e.child_lookup.select{|name, attributes, value| eval(attr_search) }.length > 0
656
+ block_given? ? blk.call(e) : e
657
+ elsif attr_search[/^\(name ==/] and eval(attr_search)
658
+ block_given? ? blk.call(e) : e
659
+ elsif attr_search[/^e\.value/] and eval(attr_search)
660
+ block_given? ? blk.call(e) : e
661
+ elsif attr_search[/^e\.xpath/] and eval(attr_search)
662
+ block_given? ? blk.call(e) : e
663
+ end
664
+ end
431
665
  end # -- end of element --
432
666
 
433
667
  class Elements
668
+ include Enumerable
669
+
434
670
  def initialize(elements=[])
435
671
  super()
436
672
  @elements = elements
@@ -439,6 +675,10 @@ class Rexle
439
675
  def [](i)
440
676
  @elements[i-1]
441
677
  end
678
+
679
+ def each(&blk) @elements.each(&blk) end
680
+ def to_a() @elements end
681
+
442
682
  end # -- end of elements --
443
683
 
444
684
 
@@ -448,16 +688,17 @@ class Rexle
448
688
 
449
689
  if x then
450
690
  procs = {
451
- String: proc {|x| parse_string(x)},
452
- Array: proc {|x| x},
453
- :"REXML::Document" => proc {|x| scan_doc x.root}
691
+ String: proc {|x| parse_string(x)},
692
+ Array: proc {|x| x},
693
+ :"REXML::Document" => proc {|x| scan_doc x.root}
454
694
  }
455
695
  a = procs[x.class.to_s.to_sym].call(x)
456
696
  else
457
697
  a = yield
458
698
  end
459
-
460
- @doc = scan_element(*a)
699
+ doc_node = ['doc','',{}]
700
+ @a = procs[x.class.to_s.to_sym].call(x)
701
+ @doc = scan_element(*(doc_node << @a))
461
702
  self
462
703
  end
463
704
 
@@ -465,15 +706,22 @@ class Rexle
465
706
  def attribute(key) @doc.attribute(key) end
466
707
  def attributes() @doc.attributes end
467
708
  def add_element(element) @doc.root.add_element(element) end
709
+ def add_text(s) end
468
710
 
469
711
  alias add add_element
470
712
 
471
- def delete(xpath) @doc.element(xpath).delete end
472
- def element(xpath) @doc.element(xpath) end
713
+ def delete(xpath)
714
+ e = @doc.element(xpath)
715
+ e.delete if e
716
+ end
717
+
718
+ def element(xpath) self.xpath(xpath).first end
473
719
  def elements(s=nil) @doc.elements(s) end
720
+ def name() @doc.root.name end
721
+ def to_a() @a end
474
722
  def to_s(options={}) self.xml options end
475
723
  def text(xpath) @doc.text(xpath) end
476
- def root() @doc end
724
+ def root() @doc.elements.first end
477
725
 
478
726
  def write(f)
479
727
  f.write xml
@@ -482,9 +730,11 @@ class Rexle
482
730
  def xml(options={})
483
731
  o = {pretty: false, declaration: true}.merge(options)
484
732
  msg = o[:pretty] == false ? :doc_print : :doc_pretty_print
733
+
485
734
  r = ''
486
735
  r = "<?xml version='1.0' encoding='UTF-8'?>\n" if o[:declaration] == true
487
736
  r << method(msg).call(self.root.children)
737
+
488
738
  r
489
739
  end
490
740
 
@@ -501,26 +751,37 @@ class Rexle
501
751
  if recordx_type then
502
752
  procs = {
503
753
  'dynarex' => proc {|x| DynarexParser.new(x).to_a},
504
- 'polyrex' => proc {|x| PolyrexParser.new(x).to_a}
754
+ 'polyrex' => proc {|x| PolyrexParser.new(x).to_a},
755
+ 'polyrex' => proc {|x| RexleParser.new(x).to_a}
505
756
  }
506
757
  procs[recordx_type].call(x)
507
758
  else
759
+
508
760
  RexleParser.new(x).to_a
509
761
  end
510
762
  else
763
+
511
764
  RexleParser.new(x).to_a
512
765
  end
513
766
 
514
767
  end
768
+
769
+ def scan_element(name, value=nil, attributes=nil, *children)
515
770
 
516
- def scan_element(name, value, attributes, *children)
517
- element = Element.new(name, value, attributes)
518
- children.each{|x| element.add_element scan_element(*x)} if children
771
+ element = Element.new(name, value, attributes, self)
772
+
773
+ if children then
774
+ children.each do |x|
775
+ if x.is_a? Array then
776
+ element.add_element scan_element(*x)
777
+ elsif x.is_a? String
778
+ element.add_element x
779
+ end
780
+ end
781
+ end
519
782
  return element
520
783
  end
521
784
 
522
- def count(path) @doc.xpath(path).flatten.compact.length end
523
- def max(path) @doc.xpath(path).map(&:to_i).max end
524
785
 
525
786
  # scan a rexml doc
526
787
  #
metadata CHANGED
@@ -1,57 +1,61 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexle
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.4
4
+ prerelease:
5
+ version: 0.9.5
5
6
  platform: ruby
6
- authors: []
7
-
7
+ authors:
8
+ - James Robertson
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
12
 
12
- date: 2011-02-11 00:00:00 +00:00
13
- default_executable:
13
+ date: 2012-10-20 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rexleparser
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
20
  requirements:
21
21
  - - ">="
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0"
24
- version:
24
+ type: :runtime
25
+ version_requirements: *id001
25
26
  - !ruby/object:Gem::Dependency
26
27
  name: dynarex-parser
27
- type: :runtime
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
30
31
  requirements:
31
32
  - - ">="
32
33
  - !ruby/object:Gem::Version
33
34
  version: "0"
34
- version:
35
+ type: :runtime
36
+ version_requirements: *id002
35
37
  - !ruby/object:Gem::Dependency
36
38
  name: polyrex-parser
37
- type: :runtime
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
40
42
  requirements:
41
43
  - - ">="
42
44
  - !ruby/object:Gem::Version
43
45
  version: "0"
44
- version:
45
- - !ruby/object:Gem::Dependency
46
- name: nokogiri
47
46
  type: :runtime
48
- version_requirement:
49
- version_requirements: !ruby/object:Gem::Requirement
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: rexle-builder
50
+ prerelease: false
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
50
53
  requirements:
51
54
  - - ">="
52
55
  - !ruby/object:Gem::Version
53
56
  version: "0"
54
- version:
57
+ type: :runtime
58
+ version_requirements: *id004
55
59
  description:
56
60
  email:
57
61
  executables: []
@@ -62,7 +66,6 @@ extra_rdoc_files: []
62
66
 
63
67
  files:
64
68
  - lib/rexle.rb
65
- has_rdoc: true
66
69
  homepage:
67
70
  licenses: []
68
71
 
@@ -72,23 +75,23 @@ rdoc_options: []
72
75
  require_paths:
73
76
  - lib
74
77
  required_ruby_version: !ruby/object:Gem::Requirement
78
+ none: false
75
79
  requirements:
76
80
  - - ">="
77
81
  - !ruby/object:Gem::Version
78
82
  version: "0"
79
- version:
80
83
  required_rubygems_version: !ruby/object:Gem::Requirement
84
+ none: false
81
85
  requirements:
82
86
  - - ">="
83
87
  - !ruby/object:Gem::Version
84
88
  version: "0"
85
- version:
86
89
  requirements: []
87
90
 
88
91
  rubyforge_project:
89
- rubygems_version: 1.3.5
92
+ rubygems_version: 1.8.23
90
93
  signing_key:
91
94
  specification_version: 3
92
- summary: rexle
95
+ summary: Rexle is a simple XML parser written purely in Ruby
93
96
  test_files: []
94
97