rexle 0.9.4 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/rexle.rb +418 -157
  2. metadata +30 -27
data/lib/rexle.rb CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  # file: rexle.rb
4
4
 
@@ -6,20 +6,48 @@ require 'rexml/document'
6
6
  require 'rexleparser'
7
7
  require 'dynarex-parser'
8
8
  require 'polyrex-parser'
9
+ require 'cgi'
9
10
  include REXML
10
11
 
12
+ # modifications:
13
+ # 20-Oct-2012: feature: added Rexle::Element#texts which is the equivalent
14
+ # of REXML::Element#texts
15
+ # 10-Sep-2012: bug fix: Removed code from method pretty_print in order to
16
+ # get the XML displayed properly
17
+ # 23-Aug-2012: feature: implemented xpath function contains()
18
+ # 17-Aug-2012: bug fix: pretty print now ignores text containing empty space
19
+ # 16-Aug-2012: the current element's text (if its not empty) is now returned
20
+ # from its children method
21
+ # 15-Aug-2012: feature: xpath containing child:: now supported
22
+ # 13-Aug-2012: bug fix: xpath can now handle the name() function
23
+ # 11-Aug-2012: bug fix: separated the max() method from 1 line into 3
24
+ # and that fixed it
25
+ # 08-Aug-2012: feature: added Element#insert_before and Element#insert_after
26
+ # 19-Jul-2012: Changed children to elements where appropriate
27
+ # 15-Jul-2012: bug fix: self.root.value is no longer appended
28
+ # to the body if there are no child elements
29
+ # 19-Jun-2012: a bug fix for .//*[@class]
30
+ # 17-Jun-2012: a couple of new xpath things are supported '.' and '|'
31
+ # 15-Apr-2012: bug fix: New element names are typecast as string
32
+ # 16-Mar-2012: bug fix: Element names which contain a colon can now be selected
33
+ # in the xpath.
34
+ # 22-Feb-2012: bug resolution: Deactivated the PolyrexParser; using RexleParser instead
35
+ # 14-Jan-2012: Implemented Rexle::Elements#each
36
+ # 21-Dec-2011: Bug fix: xpath modified to allow querying from the actual
37
+ # root rather than the 1st child element from the root
38
+
11
39
  module XMLhelper
12
40
 
13
41
  def doc_print(children)
14
-
15
- body = children.empty? ? self.root.value : scan_print(children).join
42
+
43
+ body = (children.nil? or children.empty? or children.is_an_empty_string? ) ? '' : scan_print(children).join
16
44
  a = self.root.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
17
45
  "<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : ' ' + a.join(' '), body, self.root.name]
18
46
  end
19
47
 
20
48
  def doc_pretty_print(children)
21
49
 
22
- body = children.empty? ? self.value : pretty_print(children,2).join
50
+ body = pretty_print(children,2).join
23
51
  a = self.root.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
24
52
  ind = "\n "
25
53
  "<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : ' ' + a.join(' '), ind, body, "\n", self.root.name]
@@ -28,16 +56,27 @@ module XMLhelper
28
56
  def scan_print(nodes)
29
57
 
30
58
  nodes.map do |x|
31
- unless x.name == '![' then
32
- a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
33
- tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
34
-
35
- out = ["<%s>" % tag]
36
- out << x.value unless x.value.nil? || x.value.empty?
37
- out << scan_print(x.children)
38
- out << "</%s>" % x.name
39
- else
40
- "<![CDATA[%s]]>" % x.value
59
+
60
+ if x.is_a? Rexle::Element then
61
+ if x.name.chr != '!' then
62
+ a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
63
+ tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
64
+
65
+ if x.value.length > 0 or (x.children.length > 0 and not x.children.is_an_empty_string?) then
66
+ out = ["<%s>" % tag]
67
+ #out << x.value unless x.value.nil? || x.value.empty?
68
+ out << scan_print(x.children)
69
+ out << "</%s>" % x.name
70
+ else
71
+ out = ["<%s/>" % tag]
72
+ end
73
+ elsif x.name == '!-' then
74
+ "<!--%s-->" % x.value
75
+ else
76
+ "<![CDATA[%s]]>" % x.value
77
+ end
78
+ elsif x.is_a? String then
79
+ x
41
80
  end
42
81
  end
43
82
 
@@ -45,21 +84,30 @@ module XMLhelper
45
84
 
46
85
  def pretty_print(nodes, indent='0')
47
86
  indent = indent.to_i
48
- nodes.map.with_index do |x, i|
49
- unless x.name == '![' then
50
- a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
51
- tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
52
-
53
- ind1 = x.children.length > 0 ? ("\n" + ' ' * indent) : ''
54
- start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
55
- out = ["%s<%s>%s" % [start, tag, ind1]]
56
-
57
- out << x.value.sub(/^[\n\s]+$/,'') unless x.value.nil? || x.value.empty?
58
- out << pretty_print(x.children, (indent + 1).to_s.clone)
59
- ind2 = x.children.length > 0 ? ("\n" + ' ' * (indent - 1)) : ''
60
- out << "%s</%s>" % [ind2, x.name]
61
- else
62
- "<![CDATA[%s]]>" % x.value
87
+
88
+ nodes.select(){|x| x.is_a? Rexle::Element or x.strip.length > 0}
89
+ .map.with_index do |x, i|
90
+
91
+ if x.is_a? Rexle::Element then
92
+ unless x.name == '![' then
93
+ #return ["<%s/>" % x.name] if x.value = ''
94
+ a = x.attributes.to_a.map{|k,v| "%s='%s'" % [k,v]}
95
+ a ||= []
96
+ tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
97
+
98
+ start = i > 0 ? ("\n" + ' ' * (indent - 1)) : ''
99
+ ind1 = x.children.grep(Rexle::Element).length > 0 ?
100
+ ("\n" + ' ' * indent) : ''
101
+ out = ["%s<%s>%s" % [start, tag, ind1]]
102
+
103
+ out << pretty_print(x.children, (indent + 1).to_s.clone)
104
+ ind2 = ind1.length > 0 ? ("\n" + ' ' * (indent - 1)) : ''
105
+ out << "%s</%s>" % [ind2, x.name]
106
+ else
107
+ "<![CDATA[%s]]>" % x.value
108
+ end
109
+ elsif x.is_a? String then
110
+ x.sub(/^[\n\s]+$/,'')
63
111
  end
64
112
  end
65
113
 
@@ -70,108 +118,224 @@ end
70
118
  class Rexle
71
119
  include XMLhelper
72
120
 
121
+ attr_reader :prefixes
122
+
123
+ def self.version()
124
+ '0.9.xx'
125
+ end
126
+
73
127
  def initialize(x=nil)
74
128
  super()
75
129
 
130
+ # what type of input is it? Is it a string, array, or REXML doc?
76
131
  if x then
77
132
  procs = {
78
- String: proc {|x| parse_string(x)},
79
- Array: proc {|x| x},
80
- :"REXML::Document" => proc {|x| scan_doc x.root}
133
+ String: proc {|x| parse_string(x)},
134
+ Array: proc {|x| x},
135
+ :"REXML::Document" => proc {|x| scan_doc x.root}
81
136
  }
137
+
138
+ doc_node = ['doc','',{}]
139
+
82
140
 
83
- a = procs[x.class.to_s.to_sym].call(x)
84
- @doc = scan_element(*a)
85
- end
86
-
87
- end
88
-
89
- def xpath(path, &blk)
90
-
91
- # is it a function
92
- fn_match = path.match(/^(\w+)\(([^\)]+)\)$/)
141
+ @a = procs[x.class.to_s.to_sym].call(x)
142
+ @doc = scan_element(*(doc_node << @a))
143
+
144
+ # fetch the namespaces
145
+ @prefixes = []
146
+ if @doc.root.attributes then
93
147
 
94
- # Array: proc {|x| x.flatten.compact},
95
- if fn_match.nil? then
96
- procs = {
97
- Array: proc {|x| block_given? ? x : x.flatten },
98
- String: proc {|x| x},
99
- :"Rexle::Element" => proc {|x| [x]}
100
- }
101
- bucket = []
102
- result = @doc.xpath(path, bucket, &blk)
148
+ xmlns = @doc.root.attributes.select {|k,v| k[/^xmlns:/]}
149
+ @prefixes = xmlns.keys.map{|x| x[/\w+$/]}
150
+ end
103
151
 
104
- procs[result.class.to_s.to_sym].call(result)
105
-
106
- else
107
- m, xpath_value = fn_match.captures
108
- method(m.to_sym).call(xpath_value)
109
152
  end
110
153
 
111
154
  end
155
+
156
+ def xpath(path, &blk)
157
+ @doc.xpath(path, &blk)
158
+ end
112
159
 
113
160
  class Element
114
161
  include XMLhelper
115
162
 
116
163
  attr_accessor :name, :value, :parent
117
- attr_reader :child_lookup
164
+ attr_reader :child_lookup, :child_elements
165
+
166
+ alias original_clone clone
118
167
 
119
- def initialize(name=nil, value='', attributes={})
168
+ def initialize(name=nil, value='', attributes={}, rexle=nil)
169
+ @rexle = rexle
120
170
  super()
121
- @name, @value, @attributes = name, value, attributes
171
+ @name, @value, @attributes = name.to_s, value, attributes
122
172
  raise "Element name must not be blank" unless name
123
173
  @child_elements = []
124
174
  @child_lookup = []
125
175
  end
126
176
 
127
- def xpath(xpath_value, rlist=[], &blk)
177
+ def contains(raw_args)
178
+ path, raw_val = raw_args.split(',',2)
179
+ val = raw_val.strip[/^["']?.*["']?$/]
180
+
181
+ anode = query_xpath(path)
182
+ return unless anode
183
+ a = scan_contents(anode.first)
184
+
185
+ [a.grep(/#{val}/).length > 0]
186
+ end
187
+
188
+ def count(path)
189
+ length = query_xpath(path).flatten.compact.length
190
+ length
191
+ end
192
+
193
+ def max(path)
194
+ a = query_xpath(path).flatten.compact.map(&:to_i)
195
+ a.max
196
+ end
197
+
198
+ def name()
199
+ if @rexle then
200
+ prefix = @rexle.prefixes.find {|x| x == @name[/^(\w+):/,1] } if @rexle.prefixes.is_a? Array
201
+ prefix ? @name.sub(prefix + ':', '') : @name
202
+ else
203
+ @name
204
+ end
205
+ end
206
+
207
+ def xpath(path, rlist=[], &blk)
208
+ r = filter_xpath(path, rlist=[], &blk)
209
+ r.is_a?(Array) ? r.compact : r
210
+ end
211
+
212
+ def filter_xpath(path, rlist=[], &blk)
128
213
 
129
- raw_path, raw_condition = xpath_value.sub(/^\/(?!\/)/,'').match(/([^\[]+)(\[[^\]]+\])?/).captures
130
- remaining_path = ($').to_s
131
- a_path = raw_path.split('/')
214
+ # is it a function
215
+ fn_match = path.match(/^(\w+)\(["']?([^\)]*)["']?\)$/)
216
+
217
+ # Array: proc {|x| x.flatten.compact},
218
+ if (fn_match and fn_match.captures.first[/^(attribute|@)/]) or fn_match.nil? then
219
+ procs = {
220
+ Array: proc {|x| block_given? ? x : x.flatten.uniq },
221
+ String: proc {|x| x},
222
+ Hash: proc {|x| x},
223
+ TrueClass: proc{|x| x},
224
+ FalseClass: proc{|x| x},
225
+ :"Rexle::Element" => proc {|x| [x]}
226
+ }
227
+ bucket = []
228
+ raw_results = path.split('|').map do |xp|
229
+ query_xpath(xp, bucket, &blk)
230
+ end
231
+
232
+ #results = raw_results.inject(&:+)
233
+ results = raw_results.last
234
+ procs[results.class.to_s.to_sym].call(results) if results
235
+
236
+ else
237
+ m, xpath_value = fn_match.captures
238
+ xpath_value.empty? ? method(m.to_sym).call : method(m.to_sym).call(xpath_value)
239
+ end
240
+
241
+ end
242
+
243
+ def query_xpath(raw_xpath_value, rlist=[], &blk)
244
+
245
+ #remove any pre'fixes
246
+ #@rexle.prefixes.each {|x| xpath_value.sub!(x + ':','') }
247
+ flag_func = false
248
+
249
+ xpath_value = raw_xpath_value.sub('child::','./')
250
+ #xpath_value.sub!(/\.\/(?=[\/])/,'')
251
+
252
+ if xpath_value[/^[\w\/]+\s*=.*/] then
253
+ flag_func = true
254
+
255
+ xpath_value.sub!(/^\w+\s*=.*/,'.[\0]')
256
+ xpath_value.sub!(/\/([\w]+\s*=.*)/,'[\1]')
132
257
 
258
+ #result = self.element xpath_value
259
+ #return [(result.is_a?(Rexle::Element) ? true : false)]
260
+ end
261
+
262
+ #xpath_value.sub!(/^attribute::/,'*/attribute::')
263
+ raw_path, raw_condition = xpath_value.sub(/^\.?\/(?!\/)/,'')\
264
+ .match(/([^\[]+)(\[[^\]]+\])?/).captures
265
+
266
+ remaining_path = ($').to_s
267
+
268
+ r = raw_path[/([^\/]+)(?=\/\/)/,1]
269
+ if r then
270
+ a_path = raw_path.split(/(?=\/\/)/,2)
271
+ else
272
+ a_path = raw_path.split('/',2)
273
+ end
274
+
133
275
  condition = raw_condition if a_path.length <= 1
134
276
 
135
277
  if raw_path[0,2] == '//' then
136
- s = a_path[2] || ''
137
- condition = raw_condition
138
- elsif raw_path == 'text()' then
278
+ s = ''
279
+ elsif raw_path == 'text()'
139
280
  a_path.shift
140
281
  return @value
141
282
  else
142
- attribute = xpath_value[/^attribute::(.*)/,1]
143
- return @attributes[attribute.to_sym] if attribute and @attributes and @attributes.has_key?(attribute.to_sym)
144
-
283
+
284
+ attribute = xpath_value[/^(attribute::|@)(.*)/,2]
285
+
286
+ return @attributes if attribute == '*'
287
+ return [@attributes[attribute.to_sym]] if attribute and @attributes and @attributes.has_key?(attribute.to_sym)
145
288
  s = a_path.shift
146
289
  end
147
290
 
148
291
  # isolate the xpath to return just the path to the current element
149
- elmnt_path = s[/^([\w\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
150
- element_part = elmnt_path[/(^@?[^\[]+)?/,1] if elmnt_path
151
292
 
293
+ elmnt_path = s[/^([\w:\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/]
294
+ element_part = elmnt_path[/(^@?[^\[]+)?/,1] if elmnt_path
295
+
152
296
  if element_part then
153
297
  unless element_part[/^@/] then
154
- element_name = element_part
298
+ element_name = element_part[/^[\w:\*\.]+/]
155
299
  else
156
- condition = element_part
300
+
301
+ condition = xpath_value[/^\[/] ? xpath_value : element_part
157
302
  element_name = nil
158
303
  end
159
304
 
160
305
  end
161
306
 
307
+ #element_name ||= '*'
162
308
  raw_condition = '' if condition
309
+ attr_search = format_condition(condition) if condition and condition.length > 0
310
+
311
+ attr_search2 = xpath_value[/^\[(.*)\]$/,1]
312
+ if attr_search2 then
313
+ r4 = attribute_search(attr_search, self, self.attributes)
314
+ return r4
315
+ end
316
+
317
+ return_elements = []
163
318
 
164
- attr_search = format_condition(condition) if condition and condition.length > 0
319
+ if raw_path[0,2] == '//' then
165
320
 
166
- if raw_path[0,2] == '//'
167
- return_elements = scan_match(self, element_name, attr_search, condition, rlist)
168
- return (xpath_value[/text\(\)$/] ? return_elements.map(&:value) : return_elements)
169
- end
321
+ regex = /\[(\d+)\]/
322
+ n = xpath_value[regex,1]
323
+ xpath_value.slice!(regex)
324
+
325
+ rs = scan_match(self, xpath_value).flatten.compact
326
+ return n ? rs[n.to_i-1] : rs
327
+
328
+ elsif (raw_path == '.' or raw_path == self.name) and attr_search.nil? then
329
+ return [self]
330
+ else
331
+
332
+ return_elements = @child_lookup.map.with_index.select do |x|
333
+ (x[0][0] == element_name || element_name == '.') or \
334
+ (element_name == '*' && x[0].is_a?(Array))
335
+ end
170
336
 
171
- return_elements = @child_lookup.map.with_index.select do |x|
172
- x[0][0] == element_name or element_name == '*'
173
337
  end
174
-
338
+
175
339
  if return_elements.length > 0 then
176
340
 
177
341
  if (a_path + [remaining_path]).join.empty? then
@@ -180,10 +344,13 @@ class Rexle
180
344
  else
181
345
 
182
346
  rlist << return_elements.map.with_index do |x,i|
347
+
183
348
  rtn_element = filter(x, i+1, attr_search){|e| r = e.xpath(a_path.join('/') + raw_condition.to_s + remaining_path, &blk); (r || e) }
184
349
  next if rtn_element.nil? or (rtn_element.is_a? Array and rtn_element.empty?)
185
350
 
186
- if rtn_element.is_a? Array then
351
+ if rtn_element.is_a? Hash then
352
+ rtn_element
353
+ elsif rtn_element.is_a? Array then
187
354
  rtn_element
188
355
  elsif (rtn_element.is_a? String) || (rtn_element.is_a?(Array) and not(rtn_element[0].is_a? String))
189
356
  rtn_element
@@ -192,15 +359,17 @@ class Rexle
192
359
  end
193
360
  end
194
361
  #
362
+
195
363
  rlist = rlist.flatten(1) unless rlist.length > 1 and rlist[0].is_a? Array
196
364
 
197
365
  end
366
+
198
367
  rlist.compact! if rlist.is_a? Array
199
368
 
200
369
  else
201
370
 
202
371
  # strip off the 1st element from the XPath
203
- new_xpath = xpath_value[/^\/\/\w+\/(.*)/,1]
372
+ new_xpath = xpath_value[/^\/\/[\w:]+\/(.*)/,1]
204
373
 
205
374
  if new_xpath then
206
375
  self.xpath(new_xpath + raw_condition.to_s + remaining_path, rlist,&blk)
@@ -209,19 +378,31 @@ class Rexle
209
378
 
210
379
  rlist = rlist.flatten(1) unless not(rlist.is_a? Array) or (rlist.length > 1 and rlist[0].is_a? Array)
211
380
  rlist = [rlist] if rlist.is_a? Rexle::Element
381
+ rlist = (rlist.length > 0 ? true : false) if flag_func == true
212
382
  rlist
213
383
  end
214
384
 
215
385
  def add_element(item)
216
- @child_lookup << [item.name, item.attributes, item.value]
217
- @child_elements << item
218
- # add a reference from this element (the parent) to the child
219
- item.parent = self
220
- item
386
+ if item.is_a? Rexle::Element then
387
+ @child_lookup << [item.name, item.attributes, item.value]
388
+ @child_elements << item
389
+ # add a reference from this element (the parent) to the child
390
+ item.parent = self
391
+ item
392
+ elsif item.is_a? String then
393
+ @child_lookup << item
394
+ @child_elements << item
395
+ elsif item.is_a? Rexle then
396
+ self.add_element(item.root)
397
+ end
221
398
  end
222
399
 
223
400
  def inspect()
401
+ if self.xml.length > 30 then
224
402
  "%s ... </>" % self.xml[/<[^>]+>/]
403
+ else
404
+ self.xml
405
+ end
225
406
  end
226
407
 
227
408
  alias add add_element
@@ -240,11 +421,29 @@ class Rexle
240
421
  end
241
422
 
242
423
  def add_text(s) @value = s; self end
243
- def attribute(key) @attributes[key.to_sym] end
424
+
425
+ def attribute(key)
426
+ key = key.to_sym if key.is_a? String
427
+ @attributes[key].gsub('&lt;','<').gsub('&gt;','>')
428
+ end
429
+
244
430
  def attributes() @attributes end
245
- def children() @child_elements end
246
- def children=(a) @child_elements = a end
247
-
431
+
432
+ def children()
433
+ return unless @value
434
+ r = (@value.empty? ? [] : [@value]) + @child_elements
435
+ def r.is_an_empty_string?()
436
+ self.length == 1 and self.first == ''
437
+ end
438
+
439
+ return r
440
+ end
441
+
442
+ def children=(a) @child_elements = a end
443
+
444
+ def deep_clone() Rexle.new(self.xml).root end
445
+ def clone() Element.new(@name, @value, @attributes) end
446
+
248
447
  def delete(obj=nil)
249
448
  if obj then
250
449
  i = @child_elements.index(obj)
@@ -254,17 +453,30 @@ class Rexle
254
453
  end
255
454
  end
256
455
 
257
- def element(s) self.xpath(s).first end
456
+ def element(s)
457
+ r = self.xpath(s)
458
+ r.is_a?(Array) ? r.first : r
459
+ end
258
460
 
259
461
  def elements(s=nil)
260
462
  procs = {
261
- NilClass: proc {Elements.new(@child_elements)},
463
+ NilClass: proc {Elements.new(@child_elements.select{|x| x.is_a? Rexle::Element })},
262
464
  String: proc {|x| @child_elements[x]}
263
465
  }
466
+
264
467
  procs[s.class.to_s.to_sym].call(s)
265
468
  end
266
469
 
267
- def root() self end
470
+ def doc_root() @rexle.root end
471
+ def each(&blk)
472
+ @child_elements.each(&blk) #unless @child_elements.empty?
473
+ end
474
+ def has_elements?() !self.elements.empty? end
475
+
476
+ def insert_after(node) insert(node, 1) end
477
+ def insert_before(node) insert(node) end
478
+
479
+ def root() self end #@rexle.root end
268
480
 
269
481
  def text(s='')
270
482
 
@@ -274,6 +486,7 @@ class Rexle
274
486
  e = self.element(s)
275
487
  result = e.value if e
276
488
  end
489
+ result = CGI.unescape_html result.to_s
277
490
 
278
491
  def result.unescape()
279
492
  s = self.clone
@@ -283,10 +496,14 @@ class Rexle
283
496
 
284
497
  result
285
498
  end
499
+
500
+ def texts()
501
+ [@value] + @child_elements.select {|x| x.is_a? String}
502
+ end
286
503
 
287
504
  def value=(raw_s)
288
505
 
289
- @value = raw_s.to_s.clone
506
+ @value = String.new(raw_s.to_s.clone)
290
507
  escape_chars = %w(& &amp; < &lt; > &gt;).each_slice(2).to_a
291
508
  escape_chars.each{|x| @value.gsub!(*x)}
292
509
 
@@ -306,17 +523,27 @@ class Rexle
306
523
  method(msg).call(self.children)
307
524
  end
308
525
 
526
+ alias to_s xml
309
527
 
310
528
  private
529
+
530
+ def insert(node,offset=0)
531
+ i = parent.child_elements.index(self)
532
+ return unless i
533
+ parent.child_elements.insert(i+offset,node)
534
+ parent.child_lookup.insert(i+offset, [node.name, node.attributes, node.value])
535
+ self
536
+ end
311
537
 
312
538
  def format_condition(condition)
313
- #raw_items = condition[1..-1].scan(/\'[^\']*\'|and|or|\d+|[!=]+|[@\w\.\/]+/)
314
- raw_items = condition[1..-1].scan(/\'[^\']*\'|and|or|\d+|[!=<>]+|position\(\)|[@\w\.\/]+/)
539
+
540
+ raw_items = condition[1..-1].scan(/\'[^\']*\'|\"[^\"]*\"|and|or|\d+|[!=<>]+|position\(\)|[@\w\.\/&;]+/)
315
541
 
316
542
  if raw_items[0][/^\d+$/] then
317
543
  return raw_items[0].to_i
318
544
  elsif raw_items[0] == 'position()' then
319
- return "i %s %s" % raw_items[1..-1]
545
+ rrr = "i %s %s" % [raw_items[1].gsub('&lt;','<').gsub('&gt;','>'), raw_items[-1]]
546
+ return rrr
320
547
  else
321
548
 
322
549
  andor_items = raw_items.map.with_index.select{|x,i| x[/\band\b|\bor\b/]}.map{|x| [x.last, x.last + 1]}.flatten
@@ -331,9 +558,10 @@ class Rexle
331
558
 
332
559
  if x.length >= 3 then
333
560
  x[1] = '==' if x[1] == '='
334
- "h[:%s] %s %s" % x
561
+ "h[:'%s'] %s %s" % x
335
562
  else
336
- x
563
+
564
+ x.join[/^(and|or)$/] ? x : ("h[:'%s']" % x)
337
565
  end
338
566
  end
339
567
 
@@ -344,6 +572,7 @@ class Rexle
344
572
  items = cons_items.map do |x|
345
573
 
346
574
  if x.length >= 3 then
575
+
347
576
  x[1] = '==' if x[1] == '='
348
577
  if x[0] != '.' then
349
578
  if x[0][/\//] then
@@ -351,7 +580,7 @@ class Rexle
351
580
 
352
581
  "e.xpath('#{path}').first.value == #{value}"
353
582
  else
354
- "(name == '%s' and value %s %s)" % [x[0], x[1], x[2]]
583
+ "(name == '%s' and value %s '%s')" % [x[0], x[1], x[2].sub(/^['"](.*)['"]$/,'\1')]
355
584
  end
356
585
  else
357
586
  "e.value %s %s" % [x[1], x[2]]
@@ -360,7 +589,7 @@ class Rexle
360
589
  x
361
590
  end
362
591
  end
363
-
592
+
364
593
  return items.join(' ')
365
594
  end
366
595
  end
@@ -368,59 +597,47 @@ class Rexle
368
597
 
369
598
  end
370
599
 
371
- def scan_match(nodes, element, attr_search, condition, rlist)
600
+
601
+ def scan_match(node, path)
372
602
 
373
- nodes.children.each.with_index do |x, i|
603
+ r = []
604
+ xpath2 = path[2..-1]
605
+ xpath2.sub!(/^\*\//,'')
606
+ xpath2.sub!(/^\*/,self.name)
607
+ xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name
608
+
374
609
 
375
- h = x.attributes
610
+ r << node.xpath(xpath2)
611
+ r << node.elements.map {|n| scan_match(n, path) if n.is_a? Rexle::Element}
612
+ r
613
+ end
376
614
 
615
+ # used by xpath function contains()
616
+ #
617
+ def scan_contents(node)
377
618
 
378
- if element and not(element.empty?) then
379
- if x.name == element
380
- if attr_search then
381
- rlist << x if h and eval(attr_search)
382
- else
383
- rlist << x
384
- end
385
- end
386
- else
619
+ a = []
620
+ a << node.text
387
621
 
388
- if condition[/^@/] then
389
- attribute = condition[/@(.*)/,1]
390
- if h and h.has_key? attribute.to_sym then
391
- rlist << h[attribute.to_sym]
392
- end
393
- else
394
- rlist << x if h and eval(attr_search)
395
- end
396
- end
397
-
398
- x.xpath('//' + element.to_s + condition.to_s, rlist) unless x.children.empty?
622
+ node.elements.each do |child|
623
+ a.concat scan_contents(child)
399
624
  end
400
- rlist
625
+ a
401
626
  end
402
-
627
+
628
+
403
629
  def filter(raw_element, i, attr_search, &blk)
404
630
 
405
631
  x = raw_element
406
632
  e = @child_elements[x.last]
407
- h = x[0][1] # <-- fetch the attributes
408
633
 
634
+ return unless e.is_a? Rexle::Element
635
+ name, value = e.name, e.value if e.is_a? Rexle::Element
636
+
637
+ h = x[0][1] # <-- fetch the attributes
638
+
409
639
  if attr_search then
410
- if attr_search.is_a? Fixnum then
411
- block_given? ? blk.call(e) : e if i == attr_search
412
- elsif attr_search[/i\s[<>\=]\s\d+/] and eval(attr_search) then
413
- block_given? ? blk.call(e) : e
414
- elsif h and attr_search[/^h\[/] and eval(attr_search)
415
- block_given? ? blk.call(e) : e
416
- elsif attr_search[/^\(name ==/] and \
417
- e.child_lookup.select{|name, attributes, value| eval(attr_search) }.length > 0
418
- block_given? ? blk.call(e) : e
419
- elsif attr_search[/^e\.value/] and eval(attr_search)
420
- block_given? ? blk.call(e) : e
421
- elsif attr_search[/^e\.xpath/] and eval(attr_search)
422
- block_given? ? blk.call(e) : e
423
- end
640
+ attribute_search(attr_search,e, h, i, &blk)
424
641
  else
425
642
 
426
643
  block_given? ? blk.call(e) : e
@@ -428,9 +645,28 @@ class Rexle
428
645
 
429
646
  end
430
647
 
648
+ def attribute_search(attr_search, e, h, i=nil, &blk)
649
+ if attr_search.is_a? Fixnum then
650
+ block_given? ? blk.call(e) : e if i == attr_search
651
+ elsif attr_search[/i\s[<>\=]\s\d+/] and eval(attr_search) then
652
+ block_given? ? blk.call(e) : e
653
+ elsif h and attr_search[/^h\[/] and eval(attr_search)
654
+ block_given? ? blk.call(e) : e
655
+ elsif attr_search[/^\(name ==/] and e.child_lookup.select{|name, attributes, value| eval(attr_search) }.length > 0
656
+ block_given? ? blk.call(e) : e
657
+ elsif attr_search[/^\(name ==/] and eval(attr_search)
658
+ block_given? ? blk.call(e) : e
659
+ elsif attr_search[/^e\.value/] and eval(attr_search)
660
+ block_given? ? blk.call(e) : e
661
+ elsif attr_search[/^e\.xpath/] and eval(attr_search)
662
+ block_given? ? blk.call(e) : e
663
+ end
664
+ end
431
665
  end # -- end of element --
432
666
 
433
667
  class Elements
668
+ include Enumerable
669
+
434
670
  def initialize(elements=[])
435
671
  super()
436
672
  @elements = elements
@@ -439,6 +675,10 @@ class Rexle
439
675
  def [](i)
440
676
  @elements[i-1]
441
677
  end
678
+
679
+ def each(&blk) @elements.each(&blk) end
680
+ def to_a() @elements end
681
+
442
682
  end # -- end of elements --
443
683
 
444
684
 
@@ -448,16 +688,17 @@ class Rexle
448
688
 
449
689
  if x then
450
690
  procs = {
451
- String: proc {|x| parse_string(x)},
452
- Array: proc {|x| x},
453
- :"REXML::Document" => proc {|x| scan_doc x.root}
691
+ String: proc {|x| parse_string(x)},
692
+ Array: proc {|x| x},
693
+ :"REXML::Document" => proc {|x| scan_doc x.root}
454
694
  }
455
695
  a = procs[x.class.to_s.to_sym].call(x)
456
696
  else
457
697
  a = yield
458
698
  end
459
-
460
- @doc = scan_element(*a)
699
+ doc_node = ['doc','',{}]
700
+ @a = procs[x.class.to_s.to_sym].call(x)
701
+ @doc = scan_element(*(doc_node << @a))
461
702
  self
462
703
  end
463
704
 
@@ -465,15 +706,22 @@ class Rexle
465
706
  def attribute(key) @doc.attribute(key) end
466
707
  def attributes() @doc.attributes end
467
708
  def add_element(element) @doc.root.add_element(element) end
709
+ def add_text(s) end
468
710
 
469
711
  alias add add_element
470
712
 
471
- def delete(xpath) @doc.element(xpath).delete end
472
- def element(xpath) @doc.element(xpath) end
713
+ def delete(xpath)
714
+ e = @doc.element(xpath)
715
+ e.delete if e
716
+ end
717
+
718
+ def element(xpath) self.xpath(xpath).first end
473
719
  def elements(s=nil) @doc.elements(s) end
720
+ def name() @doc.root.name end
721
+ def to_a() @a end
474
722
  def to_s(options={}) self.xml options end
475
723
  def text(xpath) @doc.text(xpath) end
476
- def root() @doc end
724
+ def root() @doc.elements.first end
477
725
 
478
726
  def write(f)
479
727
  f.write xml
@@ -482,9 +730,11 @@ class Rexle
482
730
  def xml(options={})
483
731
  o = {pretty: false, declaration: true}.merge(options)
484
732
  msg = o[:pretty] == false ? :doc_print : :doc_pretty_print
733
+
485
734
  r = ''
486
735
  r = "<?xml version='1.0' encoding='UTF-8'?>\n" if o[:declaration] == true
487
736
  r << method(msg).call(self.root.children)
737
+
488
738
  r
489
739
  end
490
740
 
@@ -501,26 +751,37 @@ class Rexle
501
751
  if recordx_type then
502
752
  procs = {
503
753
  'dynarex' => proc {|x| DynarexParser.new(x).to_a},
504
- 'polyrex' => proc {|x| PolyrexParser.new(x).to_a}
754
+ 'polyrex' => proc {|x| PolyrexParser.new(x).to_a},
755
+ 'polyrex' => proc {|x| RexleParser.new(x).to_a}
505
756
  }
506
757
  procs[recordx_type].call(x)
507
758
  else
759
+
508
760
  RexleParser.new(x).to_a
509
761
  end
510
762
  else
763
+
511
764
  RexleParser.new(x).to_a
512
765
  end
513
766
 
514
767
  end
768
+
769
+ def scan_element(name, value=nil, attributes=nil, *children)
515
770
 
516
- def scan_element(name, value, attributes, *children)
517
- element = Element.new(name, value, attributes)
518
- children.each{|x| element.add_element scan_element(*x)} if children
771
+ element = Element.new(name, value, attributes, self)
772
+
773
+ if children then
774
+ children.each do |x|
775
+ if x.is_a? Array then
776
+ element.add_element scan_element(*x)
777
+ elsif x.is_a? String
778
+ element.add_element x
779
+ end
780
+ end
781
+ end
519
782
  return element
520
783
  end
521
784
 
522
- def count(path) @doc.xpath(path).flatten.compact.length end
523
- def max(path) @doc.xpath(path).map(&:to_i).max end
524
785
 
525
786
  # scan a rexml doc
526
787
  #
metadata CHANGED
@@ -1,57 +1,61 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexle
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.4
4
+ prerelease:
5
+ version: 0.9.5
5
6
  platform: ruby
6
- authors: []
7
-
7
+ authors:
8
+ - James Robertson
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
12
 
12
- date: 2011-02-11 00:00:00 +00:00
13
- default_executable:
13
+ date: 2012-10-20 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rexleparser
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
20
  requirements:
21
21
  - - ">="
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0"
24
- version:
24
+ type: :runtime
25
+ version_requirements: *id001
25
26
  - !ruby/object:Gem::Dependency
26
27
  name: dynarex-parser
27
- type: :runtime
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
30
31
  requirements:
31
32
  - - ">="
32
33
  - !ruby/object:Gem::Version
33
34
  version: "0"
34
- version:
35
+ type: :runtime
36
+ version_requirements: *id002
35
37
  - !ruby/object:Gem::Dependency
36
38
  name: polyrex-parser
37
- type: :runtime
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
40
42
  requirements:
41
43
  - - ">="
42
44
  - !ruby/object:Gem::Version
43
45
  version: "0"
44
- version:
45
- - !ruby/object:Gem::Dependency
46
- name: nokogiri
47
46
  type: :runtime
48
- version_requirement:
49
- version_requirements: !ruby/object:Gem::Requirement
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: rexle-builder
50
+ prerelease: false
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
50
53
  requirements:
51
54
  - - ">="
52
55
  - !ruby/object:Gem::Version
53
56
  version: "0"
54
- version:
57
+ type: :runtime
58
+ version_requirements: *id004
55
59
  description:
56
60
  email:
57
61
  executables: []
@@ -62,7 +66,6 @@ extra_rdoc_files: []
62
66
 
63
67
  files:
64
68
  - lib/rexle.rb
65
- has_rdoc: true
66
69
  homepage:
67
70
  licenses: []
68
71
 
@@ -72,23 +75,23 @@ rdoc_options: []
72
75
  require_paths:
73
76
  - lib
74
77
  required_ruby_version: !ruby/object:Gem::Requirement
78
+ none: false
75
79
  requirements:
76
80
  - - ">="
77
81
  - !ruby/object:Gem::Version
78
82
  version: "0"
79
- version:
80
83
  required_rubygems_version: !ruby/object:Gem::Requirement
84
+ none: false
81
85
  requirements:
82
86
  - - ">="
83
87
  - !ruby/object:Gem::Version
84
88
  version: "0"
85
- version:
86
89
  requirements: []
87
90
 
88
91
  rubyforge_project:
89
- rubygems_version: 1.3.5
92
+ rubygems_version: 1.8.23
90
93
  signing_key:
91
94
  specification_version: 3
92
- summary: rexle
95
+ summary: Rexle is a simple XML parser written purely in Ruby
93
96
  test_files: []
94
97