oga 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '0.1.3'
2
+ VERSION = '0.2.0'
3
3
  end # Oga
@@ -87,7 +87,9 @@ module Oga
87
87
  full_name = name
88
88
  end
89
89
 
90
- return %Q(#{full_name}="#{value}")
90
+ enc_value = value ? Entities.encode(value) : nil
91
+
92
+ return %Q(#{full_name}="#{enc_value}")
91
93
  end
92
94
 
93
95
  ##
@@ -138,6 +138,20 @@ module Oga
138
138
  end
139
139
  end
140
140
 
141
+ ##
142
+ # Removes an attribute from the element.
143
+ #
144
+ # @param [String] name The name (optionally including namespace prefix)
145
+ # of the attribute to remove.
146
+ #
147
+ # @return [Oga::XML::Attribute]
148
+ #
149
+ def unset(name)
150
+ found = attribute(name)
151
+
152
+ return attributes.delete(found) if found
153
+ end
154
+
141
155
  ##
142
156
  # Returns the namespace of the element.
143
157
  #
@@ -273,7 +287,7 @@ module Oga
273
287
  # @return [Hash]
274
288
  #
275
289
  def available_namespaces
276
- merged = namespaces
290
+ merged = namespaces.dup
277
291
  node = parent
278
292
 
279
293
  while node && node.respond_to?(:namespaces)
@@ -0,0 +1,60 @@
1
+ module Oga
2
+ module XML
3
+ module Entities
4
+ ##
5
+ # Hash containing XML entities and the corresponding characters.
6
+ #
7
+ # The `&` mapping must come last to ensure proper conversion of non
8
+ # encoded to encoded forms (see {Oga::XML::Text#to_xml}).
9
+ #
10
+ # @return [Hash]
11
+ #
12
+ DECODE_MAPPING = {
13
+ '&lt;' => '<',
14
+ '&gt;' => '>',
15
+ '&amp;' => '&'
16
+ }
17
+
18
+ ##
19
+ # Hash containing characters and the corresponding XML entities.
20
+ #
21
+ # @return [Hash]
22
+ #
23
+ ENCODE_MAPPING = {
24
+ '&' => '&amp;',
25
+ '>' => '&gt;',
26
+ '<' => '&lt;'
27
+ }
28
+
29
+ ##
30
+ # Decodes XML entities.
31
+ #
32
+ # @param [String] input
33
+ # @return [String]
34
+ #
35
+ def self.decode(input)
36
+ if input.include?('&')
37
+ DECODE_MAPPING.each do |find, replace|
38
+ input = input.gsub(find, replace)
39
+ end
40
+ end
41
+
42
+ return input
43
+ end
44
+
45
+ ##
46
+ # Encodes special characters as XML entities.
47
+ #
48
+ # @param [String] input
49
+ # @return [String]
50
+ #
51
+ def self.encode(input)
52
+ ENCODE_MAPPING.each do |from, to|
53
+ input = input.gsub(from, to) if input.include?(from)
54
+ end
55
+
56
+ return input
57
+ end
58
+ end # Entities
59
+ end # XML
60
+ end # Oga
@@ -24,5 +24,7 @@ module Oga
24
24
  'track',
25
25
  'wbr'
26
26
  ])
27
+
28
+ HTML_VOID_ELEMENTS.merge(HTML_VOID_ELEMENTS.map { |name| name.upcase })
27
29
  end # XML
28
30
  end # Oga
@@ -66,7 +66,7 @@ module Oga
66
66
  @line = 1
67
67
  @elements = []
68
68
 
69
- @data.rewind if io_input?
69
+ @data.rewind if @data.respond_to?(:rewind)
70
70
 
71
71
  reset_native
72
72
  end
@@ -78,25 +78,18 @@ module Oga
78
78
  # @yieldparam [String]
79
79
  #
80
80
  def read_data
81
- # We can't check for #each_line since String also defines that. Using
82
- # String#each_line has no benefit over just lexing the String in one
83
- # go.
84
- if io_input?
85
- @data.each_line do |line|
86
- yield line
87
- end
88
- else
81
+ if @data.is_a?(String)
89
82
  yield @data
90
- end
91
- end
92
83
 
93
- ##
94
- # Returns `true` if the input is an IO like object, false otherwise.
95
- #
96
- # @return [TrueClass|FalseClass]
97
- #
98
- def io_input?
99
- return @data.is_a?(IO) || @data.is_a?(StringIO)
84
+ # IO, StringIO, etc
85
+ # THINK: read(N) would be nice, but currently this screws up the C code
86
+ elsif @data.respond_to?(:each_line)
87
+ @data.each_line { |line| yield line }
88
+
89
+ # Enumerator, Array, etc
90
+ elsif @data.respond_to?(:each)
91
+ @data.each { |chunk| yield chunk }
92
+ end
100
93
  end
101
94
 
102
95
  ##
@@ -189,12 +182,26 @@ module Oga
189
182
  end
190
183
 
191
184
  ##
192
- # Called when processing single/double quoted strings.
185
+ # Called when processing a single quote.
186
+ #
187
+ def on_string_squote
188
+ add_token(:T_STRING_SQUOTE)
189
+ end
190
+
191
+ ##
192
+ # Called when processing a double quote.
193
+ #
194
+ def on_string_dquote
195
+ add_token(:T_STRING_DQUOTE)
196
+ end
197
+
198
+ ##
199
+ # Called when processing the body of a string.
193
200
  #
194
201
  # @param [String] value The data between the quotes.
195
202
  #
196
- def on_string(value)
197
- add_token(:T_STRING, value)
203
+ def on_string_body(value)
204
+ add_token(:T_STRING_BODY, Entities.decode(value))
198
205
  end
199
206
 
200
207
  ##
@@ -322,7 +329,12 @@ module Oga
322
329
  # Called on the closing `>` of the open tag of an element.
323
330
  #
324
331
  def on_element_open_end
325
- if html? and HTML_VOID_ELEMENTS.include?(current_element.downcase)
332
+ return unless html?
333
+
334
+ # Only downcase the name if we can't find an all lower/upper version of
335
+ # the element name. This can save us a *lot* of String allocations.
336
+ if HTML_VOID_ELEMENTS.include?(current_element) \
337
+ or HTML_VOID_ELEMENTS.include?(current_element.downcase)
326
338
  add_token(:T_ELEM_END)
327
339
  @elements.pop
328
340
  end
@@ -343,13 +355,9 @@ module Oga
343
355
  # @param [String] value
344
356
  #
345
357
  def on_text(value)
346
- unless value.empty?
347
- add_token(:T_TEXT, value)
358
+ return if value.empty?
348
359
 
349
- lines = value.count("\n")
350
-
351
- advance_line(lines) if lines > 0
352
- end
360
+ add_token(:T_TEXT, Entities.decode(value))
353
361
  end
354
362
 
355
363
  ##
@@ -197,6 +197,28 @@ module Oga
197
197
  return self.class.new(to_a | other.to_a)
198
198
  end
199
199
 
200
+ ##
201
+ # Returns `true` if the current node set and the one given in `other` are
202
+ # equal to each other.
203
+ #
204
+ # @param [Oga::XML::NodeSet] other
205
+ #
206
+ def ==(other)
207
+ return other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
208
+ end
209
+
210
+ ##
211
+ # Returns `true` if the nodes given in `nodes` are equal to those
212
+ # specified in the current `@nodes` variable. This method allows two
213
+ # NodeSet instances to compare each other without the need of exposing
214
+ # `@nodes` to the public.
215
+ #
216
+ # @param [Array<Oga::XML::Node>] nodes
217
+ #
218
+ def equal_nodes?(nodes)
219
+ return @nodes == nodes
220
+ end
221
+
200
222
  ##
201
223
  # Adds the nodes of the given node set to the current node set.
202
224
  #
@@ -81,43 +81,10 @@ module Oga
81
81
  # @raise [Racc::ParseError]
82
82
  #
83
83
  def on_error(type, value, stack)
84
- name = token_to_str(type)
85
- name = TOKEN_ERROR_MAPPING[name] || name
86
- index = @line - 1
87
- index_range = (index - 5)..(index + 5)
88
- code = ''
89
-
90
- # For IO we sadly have to re-read the input :<
91
- if @data.respond_to?(:rewind)
92
- @data.rewind
93
- end
94
-
95
- # Show up to 5 lines before and after the offending line (if they exist).
96
- @data.each_line.with_index do |line, line_index|
97
- next unless index_range.cover?(line_index)
98
-
99
- number = line_index + 1
100
-
101
- if line_index == index
102
- prefix = '=> '
103
- else
104
- prefix = ' '
105
- end
106
-
107
- line = line.strip
84
+ name = token_to_str(type)
85
+ name = TOKEN_ERROR_MAPPING[name] || name
108
86
 
109
- if line.length > 80
110
- line = line[0..79] + ' (more)'
111
- end
112
-
113
- code << "#{prefix}#{number}: #{line}\n"
114
- end
115
-
116
- raise Racc::ParseError, <<-EOF.strip
117
- Unexpected #{name} on line #{@line}:
118
-
119
- #{code}
120
- EOF
87
+ raise Racc::ParseError, "Unexpected #{name} on line #{@line}"
121
88
  end
122
89
 
123
90
  ##
@@ -254,118 +221,138 @@ Unexpected #{name} on line #{@line}:
254
221
  ##### State transition tables begin ###
255
222
 
256
223
  racc_action_table = [
257
- 40, 20, 12, 25, 26, 20, 12, 13, 14, 16,
258
- 41, 13, 14, 16, 21, 19, 35, 15, 39, 19,
259
- 43, 15, 20, 12, 52, 31, 32, 51, 13, 14,
260
- 16, 48, 36, 37, 47, 38, 19, 44, 15, 31,
261
- 32, 31, 32, 45, 46, 24, 49, 50, 23, 53 ]
224
+ 40, 20, 47, 46, 49, 12, 60, 64, 50, 63,
225
+ 13, 14, 16, 20, 48, 47, 46, 12, 19, 51,
226
+ 15, 39, 13, 14, 16, 20, 58, 53, 57, 12,
227
+ 19, 54, 15, 41, 13, 14, 16, 36, 37, 35,
228
+ 38, 24, 19, 23, 15, 31, 32, 62, 63, 31,
229
+ 32, 55, 57, 25, 26, 47, 46, 31, 32, 21,
230
+ 65 ]
262
231
 
263
232
  racc_action_check = [
264
- 24, 0, 0, 16, 16, 3, 3, 0, 0, 0,
265
- 26, 3, 3, 3, 1, 0, 21, 0, 24, 3,
266
- 30, 3, 18, 18, 48, 19, 19, 48, 18, 18,
267
- 18, 37, 23, 23, 37, 23, 18, 32, 18, 28,
268
- 28, 17, 17, 33, 34, 15, 38, 40, 12, 52 ]
233
+ 24, 0, 52, 52, 33, 0, 52, 59, 34, 59,
234
+ 0, 0, 0, 3, 32, 37, 37, 3, 0, 37,
235
+ 0, 24, 3, 3, 3, 18, 47, 38, 47, 18,
236
+ 3, 40, 3, 26, 18, 18, 18, 23, 23, 21,
237
+ 23, 15, 18, 12, 18, 19, 19, 56, 56, 17,
238
+ 17, 46, 46, 16, 16, 30, 30, 28, 28, 1,
239
+ 61 ]
269
240
 
270
241
  racc_action_pointer = [
271
- -2, 14, nil, 2, nil, nil, nil, nil, nil, nil,
272
- nil, nil, 41, nil, nil, 25, -9, 26, 19, 10,
273
- nil, 16, nil, 27, -3, nil, -2, nil, 24, nil,
274
- 18, nil, 22, 29, 26, nil, nil, 29, 41, nil,
275
- 26, nil, nil, nil, nil, nil, nil, nil, 22, nil,
276
- nil, nil, 44, nil ]
242
+ -1, 59, nil, 11, nil, nil, nil, nil, nil, nil,
243
+ nil, nil, 34, nil, nil, 19, 39, 32, 23, 28,
244
+ nil, 39, nil, 30, -2, nil, 19, nil, 40, nil,
245
+ 52, nil, -3, -12, -12, nil, nil, 12, 20, nil,
246
+ 8, nil, nil, nil, nil, nil, 47, 23, nil, nil,
247
+ nil, nil, -1, nil, nil, nil, 43, nil, nil, 4,
248
+ nil, 53, nil, nil, nil, nil ]
277
249
 
278
250
  racc_action_default = [
279
- -3, -36, -1, -2, -5, -6, -7, -8, -9, -10,
280
- -11, -12, -36, -18, -19, -36, -36, -27, -3, -27,
281
- -35, -36, -4, -36, -36, -22, -36, -24, -26, -29,
282
- -30, -32, -36, -36, -36, 54, -13, -36, -36, -20,
283
- -36, -23, -28, -31, -33, -25, -34, -14, -36, -17,
284
- -21, -15, -36, -16 ]
251
+ -3, -44, -1, -2, -5, -6, -7, -8, -9, -10,
252
+ -11, -12, -44, -18, -19, -44, -44, -27, -3, -27,
253
+ -35, -44, -4, -44, -44, -22, -44, -24, -26, -29,
254
+ -30, -32, -44, -44, -44, 66, -13, -44, -44, -20,
255
+ -44, -23, -28, -31, -36, -37, -44, -44, -33, -25,
256
+ -34, -14, -44, -17, -21, -38, -44, -42, -40, -44,
257
+ -15, -44, -39, -43, -41, -16 ]
285
258
 
286
259
  racc_goto_table = [
287
- 2, 27, 22, 34, 1, 42, nil, nil, nil, nil,
288
- nil, nil, nil, nil, nil, nil, nil, nil, 33 ]
260
+ 43, 2, 27, 22, 34, 56, 59, 52, 42, 1,
261
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, 33,
262
+ nil, nil, 61 ]
289
263
 
290
264
  racc_goto_check = [
291
- 2, 14, 4, 14, 1, 16, nil, nil, nil, nil,
292
- nil, nil, nil, nil, nil, nil, nil, nil, 2 ]
265
+ 12, 2, 15, 4, 15, 21, 21, 12, 17, 1,
266
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, 2,
267
+ nil, nil, 12 ]
293
268
 
294
269
  racc_goto_pointer = [
295
- nil, 4, 0, nil, -1, nil, nil, nil, nil, nil,
296
- nil, nil, nil, nil, -16, nil, -23, nil ]
270
+ nil, 9, 1, nil, 0, nil, nil, nil, nil, nil,
271
+ nil, nil, -30, nil, nil, -15, nil, -20, nil, nil,
272
+ nil, -41 ]
297
273
 
298
274
  racc_goto_default = [
299
275
  nil, nil, nil, 3, 4, 5, 6, 7, 8, 9,
300
- 10, 11, 17, 18, nil, 28, 29, 30 ]
276
+ 10, 11, nil, 17, 18, nil, 28, 29, 30, 44,
277
+ 45, nil ]
301
278
 
302
279
  racc_reduce_table = [
303
280
  0, 0, :racc_error,
304
- 1, 23, :_reduce_1,
305
- 1, 24, :_reduce_2,
306
- 0, 24, :_reduce_3,
307
- 2, 25, :_reduce_4,
308
- 1, 25, :_reduce_5,
309
- 1, 26, :_reduce_none,
310
- 1, 26, :_reduce_none,
311
- 1, 26, :_reduce_none,
312
- 1, 26, :_reduce_none,
313
- 1, 26, :_reduce_none,
314
- 1, 26, :_reduce_none,
315
- 1, 26, :_reduce_none,
316
- 3, 27, :_reduce_13,
317
- 4, 27, :_reduce_14,
318
- 5, 27, :_reduce_15,
319
- 6, 27, :_reduce_16,
320
- 4, 27, :_reduce_17,
321
- 1, 28, :_reduce_18,
322
- 1, 29, :_reduce_19,
323
- 3, 33, :_reduce_20,
324
- 4, 33, :_reduce_21,
325
- 2, 34, :_reduce_22,
326
- 3, 34, :_reduce_23,
327
- 2, 35, :_reduce_24,
328
- 3, 30, :_reduce_25,
329
- 1, 36, :_reduce_26,
330
- 0, 36, :_reduce_27,
331
- 2, 37, :_reduce_28,
332
- 1, 37, :_reduce_29,
333
- 1, 38, :_reduce_30,
334
- 2, 38, :_reduce_31,
335
- 1, 39, :_reduce_32,
336
- 2, 39, :_reduce_33,
337
- 3, 32, :_reduce_34,
338
- 1, 31, :_reduce_35 ]
339
-
340
- racc_reduce_n = 36
341
-
342
- racc_shift_n = 54
281
+ 1, 25, :_reduce_1,
282
+ 1, 26, :_reduce_2,
283
+ 0, 26, :_reduce_3,
284
+ 2, 27, :_reduce_4,
285
+ 1, 27, :_reduce_5,
286
+ 1, 28, :_reduce_none,
287
+ 1, 28, :_reduce_none,
288
+ 1, 28, :_reduce_none,
289
+ 1, 28, :_reduce_none,
290
+ 1, 28, :_reduce_none,
291
+ 1, 28, :_reduce_none,
292
+ 1, 28, :_reduce_none,
293
+ 3, 29, :_reduce_13,
294
+ 4, 29, :_reduce_14,
295
+ 5, 29, :_reduce_15,
296
+ 6, 29, :_reduce_16,
297
+ 4, 29, :_reduce_17,
298
+ 1, 30, :_reduce_18,
299
+ 1, 31, :_reduce_19,
300
+ 3, 35, :_reduce_20,
301
+ 4, 35, :_reduce_21,
302
+ 2, 37, :_reduce_22,
303
+ 3, 37, :_reduce_23,
304
+ 2, 38, :_reduce_24,
305
+ 3, 32, :_reduce_25,
306
+ 1, 39, :_reduce_26,
307
+ 0, 39, :_reduce_27,
308
+ 2, 40, :_reduce_28,
309
+ 1, 40, :_reduce_29,
310
+ 1, 41, :_reduce_30,
311
+ 2, 41, :_reduce_31,
312
+ 1, 42, :_reduce_32,
313
+ 2, 42, :_reduce_33,
314
+ 3, 34, :_reduce_34,
315
+ 1, 33, :_reduce_35,
316
+ 1, 36, :_reduce_none,
317
+ 1, 36, :_reduce_none,
318
+ 2, 43, :_reduce_38,
319
+ 3, 43, :_reduce_39,
320
+ 2, 44, :_reduce_40,
321
+ 3, 44, :_reduce_41,
322
+ 1, 45, :_reduce_42,
323
+ 2, 45, :_reduce_43 ]
324
+
325
+ racc_reduce_n = 44
326
+
327
+ racc_shift_n = 66
343
328
 
344
329
  racc_token_table = {
345
330
  false => 0,
346
331
  :error => 1,
347
- :T_STRING => 2,
348
- :T_TEXT => 3,
349
- :T_DOCTYPE_START => 4,
350
- :T_DOCTYPE_END => 5,
351
- :T_DOCTYPE_TYPE => 6,
352
- :T_DOCTYPE_NAME => 7,
353
- :T_DOCTYPE_INLINE => 8,
354
- :T_CDATA => 9,
355
- :T_COMMENT => 10,
356
- :T_ELEM_START => 11,
357
- :T_ELEM_NAME => 12,
358
- :T_ELEM_NS => 13,
359
- :T_ELEM_END => 14,
360
- :T_ATTR => 15,
361
- :T_ATTR_NS => 16,
362
- :T_XML_DECL_START => 17,
363
- :T_XML_DECL_END => 18,
364
- :T_PROC_INS_START => 19,
365
- :T_PROC_INS_NAME => 20,
366
- :T_PROC_INS_END => 21 }
367
-
368
- racc_nt_base = 22
332
+ :T_TEXT => 2,
333
+ :T_STRING_SQUOTE => 3,
334
+ :T_STRING_DQUOTE => 4,
335
+ :T_STRING_BODY => 5,
336
+ :T_DOCTYPE_START => 6,
337
+ :T_DOCTYPE_END => 7,
338
+ :T_DOCTYPE_TYPE => 8,
339
+ :T_DOCTYPE_NAME => 9,
340
+ :T_DOCTYPE_INLINE => 10,
341
+ :T_CDATA => 11,
342
+ :T_COMMENT => 12,
343
+ :T_ELEM_START => 13,
344
+ :T_ELEM_NAME => 14,
345
+ :T_ELEM_NS => 15,
346
+ :T_ELEM_END => 16,
347
+ :T_ATTR => 17,
348
+ :T_ATTR_NS => 18,
349
+ :T_XML_DECL_START => 19,
350
+ :T_XML_DECL_END => 20,
351
+ :T_PROC_INS_START => 21,
352
+ :T_PROC_INS_NAME => 22,
353
+ :T_PROC_INS_END => 23 }
354
+
355
+ racc_nt_base = 24
369
356
 
370
357
  racc_use_result_var = false
371
358
 
@@ -388,8 +375,10 @@ Racc_arg = [
388
375
  Racc_token_to_s_table = [
389
376
  "$end",
390
377
  "error",
391
- "T_STRING",
392
378
  "T_TEXT",
379
+ "T_STRING_SQUOTE",
380
+ "T_STRING_DQUOTE",
381
+ "T_STRING_BODY",
393
382
  "T_DOCTYPE_START",
394
383
  "T_DOCTYPE_END",
395
384
  "T_DOCTYPE_TYPE",
@@ -420,12 +409,16 @@ Racc_token_to_s_table = [
420
409
  "text",
421
410
  "xmldecl",
422
411
  "proc_ins",
412
+ "string",
423
413
  "element_open",
424
414
  "element_start",
425
415
  "attributes",
426
416
  "attributes_",
427
417
  "attribute",
428
- "attribute_name" ]
418
+ "attribute_name",
419
+ "string_dquote",
420
+ "string_squote",
421
+ "string_body" ]
429
422
 
430
423
  Racc_debug_parser = false
431
424
 
@@ -579,6 +572,34 @@ def _reduce_35(val, _values)
579
572
  on_text(val[0])
580
573
  end
581
574
 
575
+ # reduce 36 omitted
576
+
577
+ # reduce 37 omitted
578
+
579
+ def _reduce_38(val, _values)
580
+ ''
581
+ end
582
+
583
+ def _reduce_39(val, _values)
584
+ val[1]
585
+ end
586
+
587
+ def _reduce_40(val, _values)
588
+ ''
589
+ end
590
+
591
+ def _reduce_41(val, _values)
592
+ val[1]
593
+ end
594
+
595
+ def _reduce_42(val, _values)
596
+ val[0]
597
+ end
598
+
599
+ def _reduce_43(val, _values)
600
+ val[0] + val[1]
601
+ end
602
+
582
603
  def _reduce_none(val, _values)
583
604
  val[0]
584
605
  end