oga 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '0.1.3'
2
+ VERSION = '0.2.0'
3
3
  end # Oga
@@ -87,7 +87,9 @@ module Oga
87
87
  full_name = name
88
88
  end
89
89
 
90
- return %Q(#{full_name}="#{value}")
90
+ enc_value = value ? Entities.encode(value) : nil
91
+
92
+ return %Q(#{full_name}="#{enc_value}")
91
93
  end
92
94
 
93
95
  ##
@@ -138,6 +138,20 @@ module Oga
138
138
  end
139
139
  end
140
140
 
141
+ ##
142
+ # Removes an attribute from the element.
143
+ #
144
+ # @param [String] name The name (optionally including namespace prefix)
145
+ # of the attribute to remove.
146
+ #
147
+ # @return [Oga::XML::Attribute]
148
+ #
149
+ def unset(name)
150
+ found = attribute(name)
151
+
152
+ return attributes.delete(found) if found
153
+ end
154
+
141
155
  ##
142
156
  # Returns the namespace of the element.
143
157
  #
@@ -273,7 +287,7 @@ module Oga
273
287
  # @return [Hash]
274
288
  #
275
289
  def available_namespaces
276
- merged = namespaces
290
+ merged = namespaces.dup
277
291
  node = parent
278
292
 
279
293
  while node && node.respond_to?(:namespaces)
@@ -0,0 +1,60 @@
1
+ module Oga
2
+ module XML
3
+ module Entities
4
+ ##
5
+ # Hash containing XML entities and the corresponding characters.
6
+ #
7
+ # The `&` mapping must come last to ensure proper conversion of non
8
+ # encoded to encoded forms (see {Oga::XML::Text#to_xml}).
9
+ #
10
+ # @return [Hash]
11
+ #
12
+ DECODE_MAPPING = {
13
+ '&lt;' => '<',
14
+ '&gt;' => '>',
15
+ '&amp;' => '&'
16
+ }
17
+
18
+ ##
19
+ # Hash containing characters and the corresponding XML entities.
20
+ #
21
+ # @return [Hash]
22
+ #
23
+ ENCODE_MAPPING = {
24
+ '&' => '&amp;',
25
+ '>' => '&gt;',
26
+ '<' => '&lt;'
27
+ }
28
+
29
+ ##
30
+ # Decodes XML entities.
31
+ #
32
+ # @param [String] input
33
+ # @return [String]
34
+ #
35
+ def self.decode(input)
36
+ if input.include?('&')
37
+ DECODE_MAPPING.each do |find, replace|
38
+ input = input.gsub(find, replace)
39
+ end
40
+ end
41
+
42
+ return input
43
+ end
44
+
45
+ ##
46
+ # Encodes special characters as XML entities.
47
+ #
48
+ # @param [String] input
49
+ # @return [String]
50
+ #
51
+ def self.encode(input)
52
+ ENCODE_MAPPING.each do |from, to|
53
+ input = input.gsub(from, to) if input.include?(from)
54
+ end
55
+
56
+ return input
57
+ end
58
+ end # Entities
59
+ end # XML
60
+ end # Oga
@@ -24,5 +24,7 @@ module Oga
24
24
  'track',
25
25
  'wbr'
26
26
  ])
27
+
28
+ HTML_VOID_ELEMENTS.merge(HTML_VOID_ELEMENTS.map { |name| name.upcase })
27
29
  end # XML
28
30
  end # Oga
@@ -66,7 +66,7 @@ module Oga
66
66
  @line = 1
67
67
  @elements = []
68
68
 
69
- @data.rewind if io_input?
69
+ @data.rewind if @data.respond_to?(:rewind)
70
70
 
71
71
  reset_native
72
72
  end
@@ -78,25 +78,18 @@ module Oga
78
78
  # @yieldparam [String]
79
79
  #
80
80
  def read_data
81
- # We can't check for #each_line since String also defines that. Using
82
- # String#each_line has no benefit over just lexing the String in one
83
- # go.
84
- if io_input?
85
- @data.each_line do |line|
86
- yield line
87
- end
88
- else
81
+ if @data.is_a?(String)
89
82
  yield @data
90
- end
91
- end
92
83
 
93
- ##
94
- # Returns `true` if the input is an IO like object, false otherwise.
95
- #
96
- # @return [TrueClass|FalseClass]
97
- #
98
- def io_input?
99
- return @data.is_a?(IO) || @data.is_a?(StringIO)
84
+ # IO, StringIO, etc
85
+ # THINK: read(N) would be nice, but currently this screws up the C code
86
+ elsif @data.respond_to?(:each_line)
87
+ @data.each_line { |line| yield line }
88
+
89
+ # Enumerator, Array, etc
90
+ elsif @data.respond_to?(:each)
91
+ @data.each { |chunk| yield chunk }
92
+ end
100
93
  end
101
94
 
102
95
  ##
@@ -189,12 +182,26 @@ module Oga
189
182
  end
190
183
 
191
184
  ##
192
- # Called when processing single/double quoted strings.
185
+ # Called when processing a single quote.
186
+ #
187
+ def on_string_squote
188
+ add_token(:T_STRING_SQUOTE)
189
+ end
190
+
191
+ ##
192
+ # Called when processing a double quote.
193
+ #
194
+ def on_string_dquote
195
+ add_token(:T_STRING_DQUOTE)
196
+ end
197
+
198
+ ##
199
+ # Called when processing the body of a string.
193
200
  #
194
201
  # @param [String] value The data between the quotes.
195
202
  #
196
- def on_string(value)
197
- add_token(:T_STRING, value)
203
+ def on_string_body(value)
204
+ add_token(:T_STRING_BODY, Entities.decode(value))
198
205
  end
199
206
 
200
207
  ##
@@ -322,7 +329,12 @@ module Oga
322
329
  # Called on the closing `>` of the open tag of an element.
323
330
  #
324
331
  def on_element_open_end
325
- if html? and HTML_VOID_ELEMENTS.include?(current_element.downcase)
332
+ return unless html?
333
+
334
+ # Only downcase the name if we can't find an all lower/upper version of
335
+ # the element name. This can save us a *lot* of String allocations.
336
+ if HTML_VOID_ELEMENTS.include?(current_element) \
337
+ or HTML_VOID_ELEMENTS.include?(current_element.downcase)
326
338
  add_token(:T_ELEM_END)
327
339
  @elements.pop
328
340
  end
@@ -343,13 +355,9 @@ module Oga
343
355
  # @param [String] value
344
356
  #
345
357
  def on_text(value)
346
- unless value.empty?
347
- add_token(:T_TEXT, value)
358
+ return if value.empty?
348
359
 
349
- lines = value.count("\n")
350
-
351
- advance_line(lines) if lines > 0
352
- end
360
+ add_token(:T_TEXT, Entities.decode(value))
353
361
  end
354
362
 
355
363
  ##
@@ -197,6 +197,28 @@ module Oga
197
197
  return self.class.new(to_a | other.to_a)
198
198
  end
199
199
 
200
+ ##
201
+ # Returns `true` if the current node set and the one given in `other` are
202
+ # equal to each other.
203
+ #
204
+ # @param [Oga::XML::NodeSet] other
205
+ #
206
+ def ==(other)
207
+ return other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
208
+ end
209
+
210
+ ##
211
+ # Returns `true` if the nodes given in `nodes` are equal to those
212
+ # specified in the current `@nodes` variable. This method allows two
213
+ # NodeSet instances to compare each other without the need of exposing
214
+ # `@nodes` to the public.
215
+ #
216
+ # @param [Array<Oga::XML::Node>] nodes
217
+ #
218
+ def equal_nodes?(nodes)
219
+ return @nodes == nodes
220
+ end
221
+
200
222
  ##
201
223
  # Adds the nodes of the given node set to the current node set.
202
224
  #
@@ -81,43 +81,10 @@ module Oga
81
81
  # @raise [Racc::ParseError]
82
82
  #
83
83
  def on_error(type, value, stack)
84
- name = token_to_str(type)
85
- name = TOKEN_ERROR_MAPPING[name] || name
86
- index = @line - 1
87
- index_range = (index - 5)..(index + 5)
88
- code = ''
89
-
90
- # For IO we sadly have to re-read the input :<
91
- if @data.respond_to?(:rewind)
92
- @data.rewind
93
- end
94
-
95
- # Show up to 5 lines before and after the offending line (if they exist).
96
- @data.each_line.with_index do |line, line_index|
97
- next unless index_range.cover?(line_index)
98
-
99
- number = line_index + 1
100
-
101
- if line_index == index
102
- prefix = '=> '
103
- else
104
- prefix = ' '
105
- end
106
-
107
- line = line.strip
84
+ name = token_to_str(type)
85
+ name = TOKEN_ERROR_MAPPING[name] || name
108
86
 
109
- if line.length > 80
110
- line = line[0..79] + ' (more)'
111
- end
112
-
113
- code << "#{prefix}#{number}: #{line}\n"
114
- end
115
-
116
- raise Racc::ParseError, <<-EOF.strip
117
- Unexpected #{name} on line #{@line}:
118
-
119
- #{code}
120
- EOF
87
+ raise Racc::ParseError, "Unexpected #{name} on line #{@line}"
121
88
  end
122
89
 
123
90
  ##
@@ -254,118 +221,138 @@ Unexpected #{name} on line #{@line}:
254
221
  ##### State transition tables begin ###
255
222
 
256
223
  racc_action_table = [
257
- 40, 20, 12, 25, 26, 20, 12, 13, 14, 16,
258
- 41, 13, 14, 16, 21, 19, 35, 15, 39, 19,
259
- 43, 15, 20, 12, 52, 31, 32, 51, 13, 14,
260
- 16, 48, 36, 37, 47, 38, 19, 44, 15, 31,
261
- 32, 31, 32, 45, 46, 24, 49, 50, 23, 53 ]
224
+ 40, 20, 47, 46, 49, 12, 60, 64, 50, 63,
225
+ 13, 14, 16, 20, 48, 47, 46, 12, 19, 51,
226
+ 15, 39, 13, 14, 16, 20, 58, 53, 57, 12,
227
+ 19, 54, 15, 41, 13, 14, 16, 36, 37, 35,
228
+ 38, 24, 19, 23, 15, 31, 32, 62, 63, 31,
229
+ 32, 55, 57, 25, 26, 47, 46, 31, 32, 21,
230
+ 65 ]
262
231
 
263
232
  racc_action_check = [
264
- 24, 0, 0, 16, 16, 3, 3, 0, 0, 0,
265
- 26, 3, 3, 3, 1, 0, 21, 0, 24, 3,
266
- 30, 3, 18, 18, 48, 19, 19, 48, 18, 18,
267
- 18, 37, 23, 23, 37, 23, 18, 32, 18, 28,
268
- 28, 17, 17, 33, 34, 15, 38, 40, 12, 52 ]
233
+ 24, 0, 52, 52, 33, 0, 52, 59, 34, 59,
234
+ 0, 0, 0, 3, 32, 37, 37, 3, 0, 37,
235
+ 0, 24, 3, 3, 3, 18, 47, 38, 47, 18,
236
+ 3, 40, 3, 26, 18, 18, 18, 23, 23, 21,
237
+ 23, 15, 18, 12, 18, 19, 19, 56, 56, 17,
238
+ 17, 46, 46, 16, 16, 30, 30, 28, 28, 1,
239
+ 61 ]
269
240
 
270
241
  racc_action_pointer = [
271
- -2, 14, nil, 2, nil, nil, nil, nil, nil, nil,
272
- nil, nil, 41, nil, nil, 25, -9, 26, 19, 10,
273
- nil, 16, nil, 27, -3, nil, -2, nil, 24, nil,
274
- 18, nil, 22, 29, 26, nil, nil, 29, 41, nil,
275
- 26, nil, nil, nil, nil, nil, nil, nil, 22, nil,
276
- nil, nil, 44, nil ]
242
+ -1, 59, nil, 11, nil, nil, nil, nil, nil, nil,
243
+ nil, nil, 34, nil, nil, 19, 39, 32, 23, 28,
244
+ nil, 39, nil, 30, -2, nil, 19, nil, 40, nil,
245
+ 52, nil, -3, -12, -12, nil, nil, 12, 20, nil,
246
+ 8, nil, nil, nil, nil, nil, 47, 23, nil, nil,
247
+ nil, nil, -1, nil, nil, nil, 43, nil, nil, 4,
248
+ nil, 53, nil, nil, nil, nil ]
277
249
 
278
250
  racc_action_default = [
279
- -3, -36, -1, -2, -5, -6, -7, -8, -9, -10,
280
- -11, -12, -36, -18, -19, -36, -36, -27, -3, -27,
281
- -35, -36, -4, -36, -36, -22, -36, -24, -26, -29,
282
- -30, -32, -36, -36, -36, 54, -13, -36, -36, -20,
283
- -36, -23, -28, -31, -33, -25, -34, -14, -36, -17,
284
- -21, -15, -36, -16 ]
251
+ -3, -44, -1, -2, -5, -6, -7, -8, -9, -10,
252
+ -11, -12, -44, -18, -19, -44, -44, -27, -3, -27,
253
+ -35, -44, -4, -44, -44, -22, -44, -24, -26, -29,
254
+ -30, -32, -44, -44, -44, 66, -13, -44, -44, -20,
255
+ -44, -23, -28, -31, -36, -37, -44, -44, -33, -25,
256
+ -34, -14, -44, -17, -21, -38, -44, -42, -40, -44,
257
+ -15, -44, -39, -43, -41, -16 ]
285
258
 
286
259
  racc_goto_table = [
287
- 2, 27, 22, 34, 1, 42, nil, nil, nil, nil,
288
- nil, nil, nil, nil, nil, nil, nil, nil, 33 ]
260
+ 43, 2, 27, 22, 34, 56, 59, 52, 42, 1,
261
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, 33,
262
+ nil, nil, 61 ]
289
263
 
290
264
  racc_goto_check = [
291
- 2, 14, 4, 14, 1, 16, nil, nil, nil, nil,
292
- nil, nil, nil, nil, nil, nil, nil, nil, 2 ]
265
+ 12, 2, 15, 4, 15, 21, 21, 12, 17, 1,
266
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, 2,
267
+ nil, nil, 12 ]
293
268
 
294
269
  racc_goto_pointer = [
295
- nil, 4, 0, nil, -1, nil, nil, nil, nil, nil,
296
- nil, nil, nil, nil, -16, nil, -23, nil ]
270
+ nil, 9, 1, nil, 0, nil, nil, nil, nil, nil,
271
+ nil, nil, -30, nil, nil, -15, nil, -20, nil, nil,
272
+ nil, -41 ]
297
273
 
298
274
  racc_goto_default = [
299
275
  nil, nil, nil, 3, 4, 5, 6, 7, 8, 9,
300
- 10, 11, 17, 18, nil, 28, 29, 30 ]
276
+ 10, 11, nil, 17, 18, nil, 28, 29, 30, 44,
277
+ 45, nil ]
301
278
 
302
279
  racc_reduce_table = [
303
280
  0, 0, :racc_error,
304
- 1, 23, :_reduce_1,
305
- 1, 24, :_reduce_2,
306
- 0, 24, :_reduce_3,
307
- 2, 25, :_reduce_4,
308
- 1, 25, :_reduce_5,
309
- 1, 26, :_reduce_none,
310
- 1, 26, :_reduce_none,
311
- 1, 26, :_reduce_none,
312
- 1, 26, :_reduce_none,
313
- 1, 26, :_reduce_none,
314
- 1, 26, :_reduce_none,
315
- 1, 26, :_reduce_none,
316
- 3, 27, :_reduce_13,
317
- 4, 27, :_reduce_14,
318
- 5, 27, :_reduce_15,
319
- 6, 27, :_reduce_16,
320
- 4, 27, :_reduce_17,
321
- 1, 28, :_reduce_18,
322
- 1, 29, :_reduce_19,
323
- 3, 33, :_reduce_20,
324
- 4, 33, :_reduce_21,
325
- 2, 34, :_reduce_22,
326
- 3, 34, :_reduce_23,
327
- 2, 35, :_reduce_24,
328
- 3, 30, :_reduce_25,
329
- 1, 36, :_reduce_26,
330
- 0, 36, :_reduce_27,
331
- 2, 37, :_reduce_28,
332
- 1, 37, :_reduce_29,
333
- 1, 38, :_reduce_30,
334
- 2, 38, :_reduce_31,
335
- 1, 39, :_reduce_32,
336
- 2, 39, :_reduce_33,
337
- 3, 32, :_reduce_34,
338
- 1, 31, :_reduce_35 ]
339
-
340
- racc_reduce_n = 36
341
-
342
- racc_shift_n = 54
281
+ 1, 25, :_reduce_1,
282
+ 1, 26, :_reduce_2,
283
+ 0, 26, :_reduce_3,
284
+ 2, 27, :_reduce_4,
285
+ 1, 27, :_reduce_5,
286
+ 1, 28, :_reduce_none,
287
+ 1, 28, :_reduce_none,
288
+ 1, 28, :_reduce_none,
289
+ 1, 28, :_reduce_none,
290
+ 1, 28, :_reduce_none,
291
+ 1, 28, :_reduce_none,
292
+ 1, 28, :_reduce_none,
293
+ 3, 29, :_reduce_13,
294
+ 4, 29, :_reduce_14,
295
+ 5, 29, :_reduce_15,
296
+ 6, 29, :_reduce_16,
297
+ 4, 29, :_reduce_17,
298
+ 1, 30, :_reduce_18,
299
+ 1, 31, :_reduce_19,
300
+ 3, 35, :_reduce_20,
301
+ 4, 35, :_reduce_21,
302
+ 2, 37, :_reduce_22,
303
+ 3, 37, :_reduce_23,
304
+ 2, 38, :_reduce_24,
305
+ 3, 32, :_reduce_25,
306
+ 1, 39, :_reduce_26,
307
+ 0, 39, :_reduce_27,
308
+ 2, 40, :_reduce_28,
309
+ 1, 40, :_reduce_29,
310
+ 1, 41, :_reduce_30,
311
+ 2, 41, :_reduce_31,
312
+ 1, 42, :_reduce_32,
313
+ 2, 42, :_reduce_33,
314
+ 3, 34, :_reduce_34,
315
+ 1, 33, :_reduce_35,
316
+ 1, 36, :_reduce_none,
317
+ 1, 36, :_reduce_none,
318
+ 2, 43, :_reduce_38,
319
+ 3, 43, :_reduce_39,
320
+ 2, 44, :_reduce_40,
321
+ 3, 44, :_reduce_41,
322
+ 1, 45, :_reduce_42,
323
+ 2, 45, :_reduce_43 ]
324
+
325
+ racc_reduce_n = 44
326
+
327
+ racc_shift_n = 66
343
328
 
344
329
  racc_token_table = {
345
330
  false => 0,
346
331
  :error => 1,
347
- :T_STRING => 2,
348
- :T_TEXT => 3,
349
- :T_DOCTYPE_START => 4,
350
- :T_DOCTYPE_END => 5,
351
- :T_DOCTYPE_TYPE => 6,
352
- :T_DOCTYPE_NAME => 7,
353
- :T_DOCTYPE_INLINE => 8,
354
- :T_CDATA => 9,
355
- :T_COMMENT => 10,
356
- :T_ELEM_START => 11,
357
- :T_ELEM_NAME => 12,
358
- :T_ELEM_NS => 13,
359
- :T_ELEM_END => 14,
360
- :T_ATTR => 15,
361
- :T_ATTR_NS => 16,
362
- :T_XML_DECL_START => 17,
363
- :T_XML_DECL_END => 18,
364
- :T_PROC_INS_START => 19,
365
- :T_PROC_INS_NAME => 20,
366
- :T_PROC_INS_END => 21 }
367
-
368
- racc_nt_base = 22
332
+ :T_TEXT => 2,
333
+ :T_STRING_SQUOTE => 3,
334
+ :T_STRING_DQUOTE => 4,
335
+ :T_STRING_BODY => 5,
336
+ :T_DOCTYPE_START => 6,
337
+ :T_DOCTYPE_END => 7,
338
+ :T_DOCTYPE_TYPE => 8,
339
+ :T_DOCTYPE_NAME => 9,
340
+ :T_DOCTYPE_INLINE => 10,
341
+ :T_CDATA => 11,
342
+ :T_COMMENT => 12,
343
+ :T_ELEM_START => 13,
344
+ :T_ELEM_NAME => 14,
345
+ :T_ELEM_NS => 15,
346
+ :T_ELEM_END => 16,
347
+ :T_ATTR => 17,
348
+ :T_ATTR_NS => 18,
349
+ :T_XML_DECL_START => 19,
350
+ :T_XML_DECL_END => 20,
351
+ :T_PROC_INS_START => 21,
352
+ :T_PROC_INS_NAME => 22,
353
+ :T_PROC_INS_END => 23 }
354
+
355
+ racc_nt_base = 24
369
356
 
370
357
  racc_use_result_var = false
371
358
 
@@ -388,8 +375,10 @@ Racc_arg = [
388
375
  Racc_token_to_s_table = [
389
376
  "$end",
390
377
  "error",
391
- "T_STRING",
392
378
  "T_TEXT",
379
+ "T_STRING_SQUOTE",
380
+ "T_STRING_DQUOTE",
381
+ "T_STRING_BODY",
393
382
  "T_DOCTYPE_START",
394
383
  "T_DOCTYPE_END",
395
384
  "T_DOCTYPE_TYPE",
@@ -420,12 +409,16 @@ Racc_token_to_s_table = [
420
409
  "text",
421
410
  "xmldecl",
422
411
  "proc_ins",
412
+ "string",
423
413
  "element_open",
424
414
  "element_start",
425
415
  "attributes",
426
416
  "attributes_",
427
417
  "attribute",
428
- "attribute_name" ]
418
+ "attribute_name",
419
+ "string_dquote",
420
+ "string_squote",
421
+ "string_body" ]
429
422
 
430
423
  Racc_debug_parser = false
431
424
 
@@ -579,6 +572,34 @@ def _reduce_35(val, _values)
579
572
  on_text(val[0])
580
573
  end
581
574
 
575
+ # reduce 36 omitted
576
+
577
+ # reduce 37 omitted
578
+
579
+ def _reduce_38(val, _values)
580
+ ''
581
+ end
582
+
583
+ def _reduce_39(val, _values)
584
+ val[1]
585
+ end
586
+
587
+ def _reduce_40(val, _values)
588
+ ''
589
+ end
590
+
591
+ def _reduce_41(val, _values)
592
+ val[1]
593
+ end
594
+
595
+ def _reduce_42(val, _values)
596
+ val[0]
597
+ end
598
+
599
+ def _reduce_43(val, _values)
600
+ val[0] + val[1]
601
+ end
602
+
582
603
  def _reduce_none(val, _values)
583
604
  val[0]
584
605
  end