oga 1.2.3-java → 1.3.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/doc/css_selectors.md +1 -1
  3. data/lib/liboga.jar +0 -0
  4. data/lib/oga.rb +6 -1
  5. data/lib/oga/blacklist.rb +0 -10
  6. data/lib/oga/css/lexer.rb +530 -255
  7. data/lib/oga/css/parser.rb +232 -230
  8. data/lib/oga/entity_decoder.rb +0 -4
  9. data/lib/oga/html/entities.rb +0 -4
  10. data/lib/oga/html/parser.rb +0 -4
  11. data/lib/oga/html/sax_parser.rb +0 -4
  12. data/lib/oga/lru.rb +0 -26
  13. data/lib/oga/oga.rb +0 -8
  14. data/lib/oga/ruby/generator.rb +225 -0
  15. data/lib/oga/ruby/node.rb +189 -0
  16. data/lib/oga/version.rb +1 -1
  17. data/lib/oga/whitelist.rb +0 -6
  18. data/lib/oga/xml/attribute.rb +13 -20
  19. data/lib/oga/xml/cdata.rb +0 -4
  20. data/lib/oga/xml/character_node.rb +0 -8
  21. data/lib/oga/xml/comment.rb +0 -4
  22. data/lib/oga/xml/default_namespace.rb +0 -2
  23. data/lib/oga/xml/doctype.rb +0 -8
  24. data/lib/oga/xml/document.rb +10 -14
  25. data/lib/oga/xml/element.rb +1 -52
  26. data/lib/oga/xml/entities.rb +0 -26
  27. data/lib/oga/xml/expanded_name.rb +12 -0
  28. data/lib/oga/xml/html_void_elements.rb +0 -2
  29. data/lib/oga/xml/lexer.rb +0 -86
  30. data/lib/oga/xml/namespace.rb +0 -10
  31. data/lib/oga/xml/node.rb +18 -34
  32. data/lib/oga/xml/node_set.rb +0 -50
  33. data/lib/oga/xml/parser.rb +13 -50
  34. data/lib/oga/xml/processing_instruction.rb +0 -8
  35. data/lib/oga/xml/pull_parser.rb +0 -18
  36. data/lib/oga/xml/querying.rb +58 -19
  37. data/lib/oga/xml/sax_parser.rb +0 -18
  38. data/lib/oga/xml/text.rb +0 -12
  39. data/lib/oga/xml/traversal.rb +0 -4
  40. data/lib/oga/xml/xml_declaration.rb +0 -8
  41. data/lib/oga/xpath/compiler.rb +1568 -0
  42. data/lib/oga/xpath/conversion.rb +102 -0
  43. data/lib/oga/xpath/lexer.rb +1844 -1238
  44. data/lib/oga/xpath/parser.rb +182 -153
  45. metadata +7 -3
  46. data/lib/oga/xpath/evaluator.rb +0 -1800
@@ -0,0 +1,12 @@
1
+ module Oga
2
+ module XML
3
+ module ExpandedName
4
+ # Returns the expanded name of the current Element or Attribute.
5
+ #
6
+ # @return [String]
7
+ def expanded_name
8
+ namespace_name ? "#{namespace_name}:#{name}" : name
9
+ end
10
+ end # ExpandedName
11
+ end # XML
12
+ end # Oga
@@ -1,12 +1,10 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Names of the HTML void elements that should be handled when HTML lexing
5
4
  # is enabled.
6
5
  #
7
6
  # @api private
8
7
  # @return [Oga::Whitelist]
9
- #
10
8
  HTML_VOID_ELEMENTS = Whitelist.new(%w{
11
9
  area base br col command embed hr img input keygen link meta param source
12
10
  track wbr
@@ -1,6 +1,5 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Low level lexer that supports both XML and HTML (using an extra option).
5
4
  # To lex HTML input set the `:html` option to `true` when creating an
6
5
  # instance of the lexer:
@@ -46,7 +45,6 @@ module Oga
46
45
  # Strict mode only applies to XML documents.
47
46
  #
48
47
  # @private
49
- #
50
48
  class Lexer
51
49
  # These are all constant/frozen to remove the need for String allocations
52
50
  # every time they are referenced in the lexer.
@@ -96,12 +94,9 @@ module Oga
96
94
  HTML_CLOSE_SELF[key.upcase] = HTML_CLOSE_SELF[key]
97
95
  end
98
96
 
99
- ##
100
97
  # Names of HTML tags of which the content should be lexed as-is.
101
- #
102
98
  LITERAL_HTML_ELEMENTS = Whitelist.new([HTML_SCRIPT, HTML_STYLE])
103
99
 
104
- ##
105
100
  # @param [String|IO] data The data to lex. This can either be a String or
106
101
  # an IO instance.
107
102
  #
@@ -113,7 +108,6 @@ module Oga
113
108
  #
114
109
  # @option options [TrueClass|FalseClass] :strict Enables/disables strict
115
110
  # parsing of XML documents, disabled by default.
116
- #
117
111
  def initialize(data, options = {})
118
112
  @data = data
119
113
  @html = options[:html]
@@ -122,11 +116,9 @@ module Oga
122
116
  reset
123
117
  end
124
118
 
125
- ##
126
119
  # Resets the internal state of the lexer. Typically you don't need to
127
120
  # call this method yourself as its called by #lex after lexing a given
128
121
  # String.
129
- #
130
122
  def reset
131
123
  @line = 1
132
124
  @elements = []
@@ -136,12 +128,10 @@ module Oga
136
128
  reset_native
137
129
  end
138
130
 
139
- ##
140
131
  # Yields the data to lex to the supplied block.
141
132
  #
142
133
  # @return [String]
143
134
  # @yieldparam [String]
144
- #
145
135
  def read_data
146
136
  if @data.is_a?(String)
147
137
  yield @data
@@ -157,7 +147,6 @@ module Oga
157
147
  end
158
148
  end
159
149
 
160
- ##
161
150
  # Gathers all the tokens for the input and returns them as an Array.
162
151
  #
163
152
  # This method resets the internal state of the lexer after consuming the
@@ -165,7 +154,6 @@ module Oga
165
154
  #
166
155
  # @see #advance
167
156
  # @return [Array]
168
- #
169
157
  def lex
170
158
  tokens = []
171
159
 
@@ -178,7 +166,6 @@ module Oga
178
166
  tokens
179
167
  end
180
168
 
181
- ##
182
169
  # Advances through the input and generates the corresponding tokens. Each
183
170
  # token is yielded to the supplied block.
184
171
  #
@@ -196,7 +183,6 @@ module Oga
196
183
  # @yieldparam [Symbol] type
197
184
  # @yieldparam [String] value
198
185
  # @yieldparam [Fixnum] line
199
- #
200
186
  def advance(&block)
201
187
  @block = block
202
188
 
@@ -212,44 +198,33 @@ module Oga
212
198
  @block = nil
213
199
  end
214
200
 
215
- ##
216
201
  # @return [TrueClass|FalseClass]
217
- #
218
202
  def html?
219
203
  @html == true
220
204
  end
221
205
 
222
- ##
223
206
  # @return [TrueClass|FalseClass]
224
- #
225
207
  def strict?
226
208
  @strict
227
209
  end
228
210
 
229
- ##
230
211
  # @return [TrueClass|FalseClass]
231
- #
232
212
  def html_script?
233
213
  html? && current_element == HTML_SCRIPT
234
214
  end
235
215
 
236
- ##
237
216
  # @return [TrueClass|FalseClass]
238
- #
239
217
  def html_style?
240
218
  html? && current_element == HTML_STYLE
241
219
  end
242
220
 
243
221
  private
244
222
 
245
- ##
246
223
  # @param [Fixnum] amount The amount of lines to advance.
247
- #
248
224
  def advance_line(amount = 1)
249
225
  @line += amount
250
226
  end
251
227
 
252
- ##
253
228
  # Calls the supplied block with the information of the current token.
254
229
  #
255
230
  # @param [Symbol] type The token type.
@@ -258,192 +233,145 @@ module Oga
258
233
  # @yieldparam [String] type
259
234
  # @yieldparam [String] value
260
235
  # @yieldparam [Fixnum] line
261
- #
262
236
  def add_token(type, value = nil)
263
237
  @block.call(type, value, @line)
264
238
  end
265
239
 
266
- ##
267
240
  # Returns the name of the element we're currently in.
268
241
  #
269
242
  # @return [String]
270
- #
271
243
  def current_element
272
244
  @elements.last
273
245
  end
274
246
 
275
- ##
276
247
  # Called when processing a single quote.
277
- #
278
248
  def on_string_squote
279
249
  add_token(:T_STRING_SQUOTE)
280
250
  end
281
251
 
282
- ##
283
252
  # Called when processing a double quote.
284
- #
285
253
  def on_string_dquote
286
254
  add_token(:T_STRING_DQUOTE)
287
255
  end
288
256
 
289
- ##
290
257
  # Called when processing the body of a string.
291
258
  #
292
259
  # @param [String] value The data between the quotes.
293
- #
294
260
  def on_string_body(value)
295
261
  add_token(:T_STRING_BODY, value)
296
262
  end
297
263
 
298
- ##
299
264
  # Called when a doctype starts.
300
- #
301
265
  def on_doctype_start
302
266
  add_token(:T_DOCTYPE_START)
303
267
  end
304
268
 
305
- ##
306
269
  # Called on the identifier specifying the type of the doctype.
307
270
  #
308
271
  # @param [String] value
309
- #
310
272
  def on_doctype_type(value)
311
273
  add_token(:T_DOCTYPE_TYPE, value)
312
274
  end
313
275
 
314
- ##
315
276
  # Called on the identifier specifying the name of the doctype.
316
277
  #
317
278
  # @param [String] value
318
- #
319
279
  def on_doctype_name(value)
320
280
  add_token(:T_DOCTYPE_NAME, value)
321
281
  end
322
282
 
323
- ##
324
283
  # Called on the end of a doctype.
325
- #
326
284
  def on_doctype_end
327
285
  add_token(:T_DOCTYPE_END)
328
286
  end
329
287
 
330
- ##
331
288
  # Called on an inline doctype block.
332
289
  #
333
290
  # @param [String] value
334
- #
335
291
  def on_doctype_inline(value)
336
292
  add_token(:T_DOCTYPE_INLINE, value)
337
293
  end
338
294
 
339
- ##
340
295
  # Called on the open CDATA tag.
341
- #
342
296
  def on_cdata_start
343
297
  add_token(:T_CDATA_START)
344
298
  end
345
299
 
346
- ##
347
300
  # Called on the closing CDATA tag.
348
- #
349
301
  def on_cdata_end
350
302
  add_token(:T_CDATA_END)
351
303
  end
352
304
 
353
- ##
354
305
  # Called for the body of a CDATA tag.
355
306
  #
356
307
  # @param [String] value
357
- #
358
308
  def on_cdata_body(value)
359
309
  add_token(:T_CDATA_BODY, value)
360
310
  end
361
311
 
362
- ##
363
312
  # Called on the open comment tag.
364
- #
365
313
  def on_comment_start
366
314
  add_token(:T_COMMENT_START)
367
315
  end
368
316
 
369
- ##
370
317
  # Called on the closing comment tag.
371
- #
372
318
  def on_comment_end
373
319
  add_token(:T_COMMENT_END)
374
320
  end
375
321
 
376
- ##
377
322
  # Called on a comment.
378
323
  #
379
324
  # @param [String] value
380
- #
381
325
  def on_comment_body(value)
382
326
  add_token(:T_COMMENT_BODY, value)
383
327
  end
384
328
 
385
- ##
386
329
  # Called on the start of an XML declaration tag.
387
- #
388
330
  def on_xml_decl_start
389
331
  add_token(:T_XML_DECL_START)
390
332
  end
391
333
 
392
- ##
393
334
  # Called on the end of an XML declaration tag.
394
- #
395
335
  def on_xml_decl_end
396
336
  add_token(:T_XML_DECL_END)
397
337
  end
398
338
 
399
- ##
400
339
  # Called on the start of a processing instruction.
401
- #
402
340
  def on_proc_ins_start
403
341
  add_token(:T_PROC_INS_START)
404
342
  end
405
343
 
406
- ##
407
344
  # Called on a processing instruction name.
408
345
  #
409
346
  # @param [String] value
410
- #
411
347
  def on_proc_ins_name(value)
412
348
  add_token(:T_PROC_INS_NAME, value)
413
349
  end
414
350
 
415
- ##
416
351
  # Called on the body of a processing instruction.
417
352
  #
418
353
  # @param [String] value
419
- #
420
354
  def on_proc_ins_body(value)
421
355
  add_token(:T_PROC_INS_BODY, value)
422
356
  end
423
357
 
424
- ##
425
358
  # Called on the end of a processing instruction.
426
- #
427
359
  def on_proc_ins_end
428
360
  add_token(:T_PROC_INS_END)
429
361
  end
430
362
 
431
- ##
432
363
  # Called on the name of an element.
433
364
  #
434
365
  # @param [String] name The name of the element, including namespace.
435
- #
436
366
  def on_element_name(name)
437
367
  before_html_element_name(name) if html?
438
368
 
439
369
  add_element(name)
440
370
  end
441
371
 
442
- ##
443
372
  # Handles inserting of any missing tags whenever a new HTML tag is opened.
444
373
  #
445
374
  # @param [String] name
446
- #
447
375
  def before_html_element_name(name)
448
376
  close_current = HTML_CLOSE_SELF[current_element]
449
377
 
@@ -463,27 +391,21 @@ module Oga
463
391
  end
464
392
  end
465
393
 
466
- ##
467
394
  # @param [String] name
468
- #
469
395
  def add_element(name)
470
396
  @elements << name
471
397
 
472
398
  add_token(:T_ELEM_NAME, name)
473
399
  end
474
400
 
475
- ##
476
401
  # Called on the element namespace.
477
402
  #
478
403
  # @param [String] namespace
479
- #
480
404
  def on_element_ns(namespace)
481
405
  add_token(:T_ELEM_NS, namespace)
482
406
  end
483
407
 
484
- ##
485
408
  # Called on the closing `>` of the open tag of an element.
486
- #
487
409
  def on_element_open_end
488
410
  return unless html?
489
411
 
@@ -496,12 +418,10 @@ module Oga
496
418
  end
497
419
  end
498
420
 
499
- ##
500
421
  # Called on the closing tag of an element.
501
422
  #
502
423
  # @param [String] name The name of the element (minus namespace
503
424
  # prefix). This is not set for self closing tags.
504
- #
505
425
  def on_element_end(name = nil)
506
426
  return if @elements.empty?
507
427
 
@@ -520,31 +440,25 @@ module Oga
520
440
  @elements.pop
521
441
  end
522
442
 
523
- ##
524
443
  # Called on regular text values.
525
444
  #
526
445
  # @param [String] value
527
- #
528
446
  def on_text(value)
529
447
  return if value.empty?
530
448
 
531
449
  add_token(:T_TEXT, value)
532
450
  end
533
451
 
534
- ##
535
452
  # Called on attribute namespaces.
536
453
  #
537
454
  # @param [String] value
538
- #
539
455
  def on_attribute_ns(value)
540
456
  add_token(:T_ATTR_NS, value)
541
457
  end
542
458
 
543
- ##
544
459
  # Called on tag attributes.
545
460
  #
546
461
  # @param [String] value
547
- #
548
462
  def on_attribute(value)
549
463
  add_token(:T_ATTR, value)
550
464
  end
@@ -1,9 +1,7 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # The Namespace class contains information about XML namespaces such as the
5
4
  # name and URI.
6
- #
7
5
  class Namespace
8
6
  # @return [String]
9
7
  attr_accessor :name
@@ -11,35 +9,27 @@ module Oga
11
9
  # @return [String]
12
10
  attr_accessor :uri
13
11
 
14
- ##
15
12
  # @param [Hash] options
16
13
  #
17
14
  # @option options [String] :name
18
15
  # @option options [String] :uri
19
- #
20
16
  def initialize(options = {})
21
17
  @name = options[:name]
22
18
  @uri = options[:uri]
23
19
  end
24
20
 
25
- ##
26
21
  # @return [String]
27
- #
28
22
  def to_s
29
23
  name.to_s
30
24
  end
31
25
 
32
- ##
33
26
  # @return [String]
34
- #
35
27
  def inspect
36
28
  "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
37
29
  end
38
30
 
39
- ##
40
31
  # @param [Oga::XML::Namespace] other
41
32
  # @return [TrueClass|FalseClass]
42
- #
43
33
  def ==(other)
44
34
  other.is_a?(self.class) && name == other.name && uri == other.uri
45
35
  end