oga 1.2.3 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/doc/css_selectors.md +1 -1
  3. data/lib/oga.rb +6 -1
  4. data/lib/oga/blacklist.rb +0 -10
  5. data/lib/oga/css/lexer.rb +530 -255
  6. data/lib/oga/css/parser.rb +232 -230
  7. data/lib/oga/entity_decoder.rb +0 -4
  8. data/lib/oga/html/entities.rb +0 -4
  9. data/lib/oga/html/parser.rb +0 -4
  10. data/lib/oga/html/sax_parser.rb +0 -4
  11. data/lib/oga/lru.rb +0 -26
  12. data/lib/oga/oga.rb +0 -8
  13. data/lib/oga/ruby/generator.rb +225 -0
  14. data/lib/oga/ruby/node.rb +189 -0
  15. data/lib/oga/version.rb +1 -1
  16. data/lib/oga/whitelist.rb +0 -6
  17. data/lib/oga/xml/attribute.rb +13 -20
  18. data/lib/oga/xml/cdata.rb +0 -4
  19. data/lib/oga/xml/character_node.rb +0 -8
  20. data/lib/oga/xml/comment.rb +0 -4
  21. data/lib/oga/xml/default_namespace.rb +0 -2
  22. data/lib/oga/xml/doctype.rb +0 -8
  23. data/lib/oga/xml/document.rb +10 -14
  24. data/lib/oga/xml/element.rb +1 -52
  25. data/lib/oga/xml/entities.rb +0 -26
  26. data/lib/oga/xml/expanded_name.rb +12 -0
  27. data/lib/oga/xml/html_void_elements.rb +0 -2
  28. data/lib/oga/xml/lexer.rb +0 -86
  29. data/lib/oga/xml/namespace.rb +0 -10
  30. data/lib/oga/xml/node.rb +18 -34
  31. data/lib/oga/xml/node_set.rb +0 -50
  32. data/lib/oga/xml/parser.rb +13 -50
  33. data/lib/oga/xml/processing_instruction.rb +0 -8
  34. data/lib/oga/xml/pull_parser.rb +0 -18
  35. data/lib/oga/xml/querying.rb +58 -19
  36. data/lib/oga/xml/sax_parser.rb +0 -18
  37. data/lib/oga/xml/text.rb +0 -12
  38. data/lib/oga/xml/traversal.rb +0 -4
  39. data/lib/oga/xml/xml_declaration.rb +0 -8
  40. data/lib/oga/xpath/compiler.rb +1568 -0
  41. data/lib/oga/xpath/conversion.rb +102 -0
  42. data/lib/oga/xpath/lexer.rb +1844 -1238
  43. data/lib/oga/xpath/parser.rb +182 -153
  44. metadata +7 -3
  45. data/lib/oga/xpath/evaluator.rb +0 -1800
@@ -0,0 +1,12 @@
1
+ module Oga
2
+ module XML
3
+ module ExpandedName
4
+ # Returns the expanded name of the current Element or Attribute.
5
+ #
6
+ # @return [String]
7
+ def expanded_name
8
+ namespace_name ? "#{namespace_name}:#{name}" : name
9
+ end
10
+ end # ExpandedName
11
+ end # XML
12
+ end # Oga
@@ -1,12 +1,10 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Names of the HTML void elements that should be handled when HTML lexing
5
4
  # is enabled.
6
5
  #
7
6
  # @api private
8
7
  # @return [Oga::Whitelist]
9
- #
10
8
  HTML_VOID_ELEMENTS = Whitelist.new(%w{
11
9
  area base br col command embed hr img input keygen link meta param source
12
10
  track wbr
@@ -1,6 +1,5 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Low level lexer that supports both XML and HTML (using an extra option).
5
4
  # To lex HTML input set the `:html` option to `true` when creating an
6
5
  # instance of the lexer:
@@ -46,7 +45,6 @@ module Oga
46
45
  # Strict mode only applies to XML documents.
47
46
  #
48
47
  # @private
49
- #
50
48
  class Lexer
51
49
  # These are all constant/frozen to remove the need for String allocations
52
50
  # every time they are referenced in the lexer.
@@ -96,12 +94,9 @@ module Oga
96
94
  HTML_CLOSE_SELF[key.upcase] = HTML_CLOSE_SELF[key]
97
95
  end
98
96
 
99
- ##
100
97
  # Names of HTML tags of which the content should be lexed as-is.
101
- #
102
98
  LITERAL_HTML_ELEMENTS = Whitelist.new([HTML_SCRIPT, HTML_STYLE])
103
99
 
104
- ##
105
100
  # @param [String|IO] data The data to lex. This can either be a String or
106
101
  # an IO instance.
107
102
  #
@@ -113,7 +108,6 @@ module Oga
113
108
  #
114
109
  # @option options [TrueClass|FalseClass] :strict Enables/disables strict
115
110
  # parsing of XML documents, disabled by default.
116
- #
117
111
  def initialize(data, options = {})
118
112
  @data = data
119
113
  @html = options[:html]
@@ -122,11 +116,9 @@ module Oga
122
116
  reset
123
117
  end
124
118
 
125
- ##
126
119
  # Resets the internal state of the lexer. Typically you don't need to
127
120
  # call this method yourself as its called by #lex after lexing a given
128
121
  # String.
129
- #
130
122
  def reset
131
123
  @line = 1
132
124
  @elements = []
@@ -136,12 +128,10 @@ module Oga
136
128
  reset_native
137
129
  end
138
130
 
139
- ##
140
131
  # Yields the data to lex to the supplied block.
141
132
  #
142
133
  # @return [String]
143
134
  # @yieldparam [String]
144
- #
145
135
  def read_data
146
136
  if @data.is_a?(String)
147
137
  yield @data
@@ -157,7 +147,6 @@ module Oga
157
147
  end
158
148
  end
159
149
 
160
- ##
161
150
  # Gathers all the tokens for the input and returns them as an Array.
162
151
  #
163
152
  # This method resets the internal state of the lexer after consuming the
@@ -165,7 +154,6 @@ module Oga
165
154
  #
166
155
  # @see #advance
167
156
  # @return [Array]
168
- #
169
157
  def lex
170
158
  tokens = []
171
159
 
@@ -178,7 +166,6 @@ module Oga
178
166
  tokens
179
167
  end
180
168
 
181
- ##
182
169
  # Advances through the input and generates the corresponding tokens. Each
183
170
  # token is yielded to the supplied block.
184
171
  #
@@ -196,7 +183,6 @@ module Oga
196
183
  # @yieldparam [Symbol] type
197
184
  # @yieldparam [String] value
198
185
  # @yieldparam [Fixnum] line
199
- #
200
186
  def advance(&block)
201
187
  @block = block
202
188
 
@@ -212,44 +198,33 @@ module Oga
212
198
  @block = nil
213
199
  end
214
200
 
215
- ##
216
201
  # @return [TrueClass|FalseClass]
217
- #
218
202
  def html?
219
203
  @html == true
220
204
  end
221
205
 
222
- ##
223
206
  # @return [TrueClass|FalseClass]
224
- #
225
207
  def strict?
226
208
  @strict
227
209
  end
228
210
 
229
- ##
230
211
  # @return [TrueClass|FalseClass]
231
- #
232
212
  def html_script?
233
213
  html? && current_element == HTML_SCRIPT
234
214
  end
235
215
 
236
- ##
237
216
  # @return [TrueClass|FalseClass]
238
- #
239
217
  def html_style?
240
218
  html? && current_element == HTML_STYLE
241
219
  end
242
220
 
243
221
  private
244
222
 
245
- ##
246
223
  # @param [Fixnum] amount The amount of lines to advance.
247
- #
248
224
  def advance_line(amount = 1)
249
225
  @line += amount
250
226
  end
251
227
 
252
- ##
253
228
  # Calls the supplied block with the information of the current token.
254
229
  #
255
230
  # @param [Symbol] type The token type.
@@ -258,192 +233,145 @@ module Oga
258
233
  # @yieldparam [String] type
259
234
  # @yieldparam [String] value
260
235
  # @yieldparam [Fixnum] line
261
- #
262
236
  def add_token(type, value = nil)
263
237
  @block.call(type, value, @line)
264
238
  end
265
239
 
266
- ##
267
240
  # Returns the name of the element we're currently in.
268
241
  #
269
242
  # @return [String]
270
- #
271
243
  def current_element
272
244
  @elements.last
273
245
  end
274
246
 
275
- ##
276
247
  # Called when processing a single quote.
277
- #
278
248
  def on_string_squote
279
249
  add_token(:T_STRING_SQUOTE)
280
250
  end
281
251
 
282
- ##
283
252
  # Called when processing a double quote.
284
- #
285
253
  def on_string_dquote
286
254
  add_token(:T_STRING_DQUOTE)
287
255
  end
288
256
 
289
- ##
290
257
  # Called when processing the body of a string.
291
258
  #
292
259
  # @param [String] value The data between the quotes.
293
- #
294
260
  def on_string_body(value)
295
261
  add_token(:T_STRING_BODY, value)
296
262
  end
297
263
 
298
- ##
299
264
  # Called when a doctype starts.
300
- #
301
265
  def on_doctype_start
302
266
  add_token(:T_DOCTYPE_START)
303
267
  end
304
268
 
305
- ##
306
269
  # Called on the identifier specifying the type of the doctype.
307
270
  #
308
271
  # @param [String] value
309
- #
310
272
  def on_doctype_type(value)
311
273
  add_token(:T_DOCTYPE_TYPE, value)
312
274
  end
313
275
 
314
- ##
315
276
  # Called on the identifier specifying the name of the doctype.
316
277
  #
317
278
  # @param [String] value
318
- #
319
279
  def on_doctype_name(value)
320
280
  add_token(:T_DOCTYPE_NAME, value)
321
281
  end
322
282
 
323
- ##
324
283
  # Called on the end of a doctype.
325
- #
326
284
  def on_doctype_end
327
285
  add_token(:T_DOCTYPE_END)
328
286
  end
329
287
 
330
- ##
331
288
  # Called on an inline doctype block.
332
289
  #
333
290
  # @param [String] value
334
- #
335
291
  def on_doctype_inline(value)
336
292
  add_token(:T_DOCTYPE_INLINE, value)
337
293
  end
338
294
 
339
- ##
340
295
  # Called on the open CDATA tag.
341
- #
342
296
  def on_cdata_start
343
297
  add_token(:T_CDATA_START)
344
298
  end
345
299
 
346
- ##
347
300
  # Called on the closing CDATA tag.
348
- #
349
301
  def on_cdata_end
350
302
  add_token(:T_CDATA_END)
351
303
  end
352
304
 
353
- ##
354
305
  # Called for the body of a CDATA tag.
355
306
  #
356
307
  # @param [String] value
357
- #
358
308
  def on_cdata_body(value)
359
309
  add_token(:T_CDATA_BODY, value)
360
310
  end
361
311
 
362
- ##
363
312
  # Called on the open comment tag.
364
- #
365
313
  def on_comment_start
366
314
  add_token(:T_COMMENT_START)
367
315
  end
368
316
 
369
- ##
370
317
  # Called on the closing comment tag.
371
- #
372
318
  def on_comment_end
373
319
  add_token(:T_COMMENT_END)
374
320
  end
375
321
 
376
- ##
377
322
  # Called on a comment.
378
323
  #
379
324
  # @param [String] value
380
- #
381
325
  def on_comment_body(value)
382
326
  add_token(:T_COMMENT_BODY, value)
383
327
  end
384
328
 
385
- ##
386
329
  # Called on the start of an XML declaration tag.
387
- #
388
330
  def on_xml_decl_start
389
331
  add_token(:T_XML_DECL_START)
390
332
  end
391
333
 
392
- ##
393
334
  # Called on the end of an XML declaration tag.
394
- #
395
335
  def on_xml_decl_end
396
336
  add_token(:T_XML_DECL_END)
397
337
  end
398
338
 
399
- ##
400
339
  # Called on the start of a processing instruction.
401
- #
402
340
  def on_proc_ins_start
403
341
  add_token(:T_PROC_INS_START)
404
342
  end
405
343
 
406
- ##
407
344
  # Called on a processing instruction name.
408
345
  #
409
346
  # @param [String] value
410
- #
411
347
  def on_proc_ins_name(value)
412
348
  add_token(:T_PROC_INS_NAME, value)
413
349
  end
414
350
 
415
- ##
416
351
  # Called on the body of a processing instruction.
417
352
  #
418
353
  # @param [String] value
419
- #
420
354
  def on_proc_ins_body(value)
421
355
  add_token(:T_PROC_INS_BODY, value)
422
356
  end
423
357
 
424
- ##
425
358
  # Called on the end of a processing instruction.
426
- #
427
359
  def on_proc_ins_end
428
360
  add_token(:T_PROC_INS_END)
429
361
  end
430
362
 
431
- ##
432
363
  # Called on the name of an element.
433
364
  #
434
365
  # @param [String] name The name of the element, including namespace.
435
- #
436
366
  def on_element_name(name)
437
367
  before_html_element_name(name) if html?
438
368
 
439
369
  add_element(name)
440
370
  end
441
371
 
442
- ##
443
372
  # Handles inserting of any missing tags whenever a new HTML tag is opened.
444
373
  #
445
374
  # @param [String] name
446
- #
447
375
  def before_html_element_name(name)
448
376
  close_current = HTML_CLOSE_SELF[current_element]
449
377
 
@@ -463,27 +391,21 @@ module Oga
463
391
  end
464
392
  end
465
393
 
466
- ##
467
394
  # @param [String] name
468
- #
469
395
  def add_element(name)
470
396
  @elements << name
471
397
 
472
398
  add_token(:T_ELEM_NAME, name)
473
399
  end
474
400
 
475
- ##
476
401
  # Called on the element namespace.
477
402
  #
478
403
  # @param [String] namespace
479
- #
480
404
  def on_element_ns(namespace)
481
405
  add_token(:T_ELEM_NS, namespace)
482
406
  end
483
407
 
484
- ##
485
408
  # Called on the closing `>` of the open tag of an element.
486
- #
487
409
  def on_element_open_end
488
410
  return unless html?
489
411
 
@@ -496,12 +418,10 @@ module Oga
496
418
  end
497
419
  end
498
420
 
499
- ##
500
421
  # Called on the closing tag of an element.
501
422
  #
502
423
  # @param [String] name The name of the element (minus namespace
503
424
  # prefix). This is not set for self closing tags.
504
- #
505
425
  def on_element_end(name = nil)
506
426
  return if @elements.empty?
507
427
 
@@ -520,31 +440,25 @@ module Oga
520
440
  @elements.pop
521
441
  end
522
442
 
523
- ##
524
443
  # Called on regular text values.
525
444
  #
526
445
  # @param [String] value
527
- #
528
446
  def on_text(value)
529
447
  return if value.empty?
530
448
 
531
449
  add_token(:T_TEXT, value)
532
450
  end
533
451
 
534
- ##
535
452
  # Called on attribute namespaces.
536
453
  #
537
454
  # @param [String] value
538
- #
539
455
  def on_attribute_ns(value)
540
456
  add_token(:T_ATTR_NS, value)
541
457
  end
542
458
 
543
- ##
544
459
  # Called on tag attributes.
545
460
  #
546
461
  # @param [String] value
547
- #
548
462
  def on_attribute(value)
549
463
  add_token(:T_ATTR, value)
550
464
  end
@@ -1,9 +1,7 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # The Namespace class contains information about XML namespaces such as the
5
4
  # name and URI.
6
- #
7
5
  class Namespace
8
6
  # @return [String]
9
7
  attr_accessor :name
@@ -11,35 +9,27 @@ module Oga
11
9
  # @return [String]
12
10
  attr_accessor :uri
13
11
 
14
- ##
15
12
  # @param [Hash] options
16
13
  #
17
14
  # @option options [String] :name
18
15
  # @option options [String] :uri
19
- #
20
16
  def initialize(options = {})
21
17
  @name = options[:name]
22
18
  @uri = options[:uri]
23
19
  end
24
20
 
25
- ##
26
21
  # @return [String]
27
- #
28
22
  def to_s
29
23
  name.to_s
30
24
  end
31
25
 
32
- ##
33
26
  # @return [String]
34
- #
35
27
  def inspect
36
28
  "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
37
29
  end
38
30
 
39
- ##
40
31
  # @param [Oga::XML::Namespace] other
41
32
  # @return [TrueClass|FalseClass]
42
- #
43
33
  def ==(other)
44
34
  other.is_a?(self.class) && name == other.name && uri == other.uri
45
35
  end