oga 1.2.3-java → 1.3.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/css_selectors.md +1 -1
- data/lib/liboga.jar +0 -0
- data/lib/oga.rb +6 -1
- data/lib/oga/blacklist.rb +0 -10
- data/lib/oga/css/lexer.rb +530 -255
- data/lib/oga/css/parser.rb +232 -230
- data/lib/oga/entity_decoder.rb +0 -4
- data/lib/oga/html/entities.rb +0 -4
- data/lib/oga/html/parser.rb +0 -4
- data/lib/oga/html/sax_parser.rb +0 -4
- data/lib/oga/lru.rb +0 -26
- data/lib/oga/oga.rb +0 -8
- data/lib/oga/ruby/generator.rb +225 -0
- data/lib/oga/ruby/node.rb +189 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +0 -6
- data/lib/oga/xml/attribute.rb +13 -20
- data/lib/oga/xml/cdata.rb +0 -4
- data/lib/oga/xml/character_node.rb +0 -8
- data/lib/oga/xml/comment.rb +0 -4
- data/lib/oga/xml/default_namespace.rb +0 -2
- data/lib/oga/xml/doctype.rb +0 -8
- data/lib/oga/xml/document.rb +10 -14
- data/lib/oga/xml/element.rb +1 -52
- data/lib/oga/xml/entities.rb +0 -26
- data/lib/oga/xml/expanded_name.rb +12 -0
- data/lib/oga/xml/html_void_elements.rb +0 -2
- data/lib/oga/xml/lexer.rb +0 -86
- data/lib/oga/xml/namespace.rb +0 -10
- data/lib/oga/xml/node.rb +18 -34
- data/lib/oga/xml/node_set.rb +0 -50
- data/lib/oga/xml/parser.rb +13 -50
- data/lib/oga/xml/processing_instruction.rb +0 -8
- data/lib/oga/xml/pull_parser.rb +0 -18
- data/lib/oga/xml/querying.rb +58 -19
- data/lib/oga/xml/sax_parser.rb +0 -18
- data/lib/oga/xml/text.rb +0 -12
- data/lib/oga/xml/traversal.rb +0 -4
- data/lib/oga/xml/xml_declaration.rb +0 -8
- data/lib/oga/xpath/compiler.rb +1568 -0
- data/lib/oga/xpath/conversion.rb +102 -0
- data/lib/oga/xpath/lexer.rb +1844 -1238
- data/lib/oga/xpath/parser.rb +182 -153
- metadata +7 -3
- data/lib/oga/xpath/evaluator.rb +0 -1800
@@ -0,0 +1,12 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
module ExpandedName
|
4
|
+
# Returns the expanded name of the current Element or Attribute.
|
5
|
+
#
|
6
|
+
# @return [String]
|
7
|
+
def expanded_name
|
8
|
+
namespace_name ? "#{namespace_name}:#{name}" : name
|
9
|
+
end
|
10
|
+
end # ExpandedName
|
11
|
+
end # XML
|
12
|
+
end # Oga
|
@@ -1,12 +1,10 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Names of the HTML void elements that should be handled when HTML lexing
|
5
4
|
# is enabled.
|
6
5
|
#
|
7
6
|
# @api private
|
8
7
|
# @return [Oga::Whitelist]
|
9
|
-
#
|
10
8
|
HTML_VOID_ELEMENTS = Whitelist.new(%w{
|
11
9
|
area base br col command embed hr img input keygen link meta param source
|
12
10
|
track wbr
|
data/lib/oga/xml/lexer.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Low level lexer that supports both XML and HTML (using an extra option).
|
5
4
|
# To lex HTML input set the `:html` option to `true` when creating an
|
6
5
|
# instance of the lexer:
|
@@ -46,7 +45,6 @@ module Oga
|
|
46
45
|
# Strict mode only applies to XML documents.
|
47
46
|
#
|
48
47
|
# @private
|
49
|
-
#
|
50
48
|
class Lexer
|
51
49
|
# These are all constant/frozen to remove the need for String allocations
|
52
50
|
# every time they are referenced in the lexer.
|
@@ -96,12 +94,9 @@ module Oga
|
|
96
94
|
HTML_CLOSE_SELF[key.upcase] = HTML_CLOSE_SELF[key]
|
97
95
|
end
|
98
96
|
|
99
|
-
##
|
100
97
|
# Names of HTML tags of which the content should be lexed as-is.
|
101
|
-
#
|
102
98
|
LITERAL_HTML_ELEMENTS = Whitelist.new([HTML_SCRIPT, HTML_STYLE])
|
103
99
|
|
104
|
-
##
|
105
100
|
# @param [String|IO] data The data to lex. This can either be a String or
|
106
101
|
# an IO instance.
|
107
102
|
#
|
@@ -113,7 +108,6 @@ module Oga
|
|
113
108
|
#
|
114
109
|
# @option options [TrueClass|FalseClass] :strict Enables/disables strict
|
115
110
|
# parsing of XML documents, disabled by default.
|
116
|
-
#
|
117
111
|
def initialize(data, options = {})
|
118
112
|
@data = data
|
119
113
|
@html = options[:html]
|
@@ -122,11 +116,9 @@ module Oga
|
|
122
116
|
reset
|
123
117
|
end
|
124
118
|
|
125
|
-
##
|
126
119
|
# Resets the internal state of the lexer. Typically you don't need to
|
127
120
|
# call this method yourself as its called by #lex after lexing a given
|
128
121
|
# String.
|
129
|
-
#
|
130
122
|
def reset
|
131
123
|
@line = 1
|
132
124
|
@elements = []
|
@@ -136,12 +128,10 @@ module Oga
|
|
136
128
|
reset_native
|
137
129
|
end
|
138
130
|
|
139
|
-
##
|
140
131
|
# Yields the data to lex to the supplied block.
|
141
132
|
#
|
142
133
|
# @return [String]
|
143
134
|
# @yieldparam [String]
|
144
|
-
#
|
145
135
|
def read_data
|
146
136
|
if @data.is_a?(String)
|
147
137
|
yield @data
|
@@ -157,7 +147,6 @@ module Oga
|
|
157
147
|
end
|
158
148
|
end
|
159
149
|
|
160
|
-
##
|
161
150
|
# Gathers all the tokens for the input and returns them as an Array.
|
162
151
|
#
|
163
152
|
# This method resets the internal state of the lexer after consuming the
|
@@ -165,7 +154,6 @@ module Oga
|
|
165
154
|
#
|
166
155
|
# @see #advance
|
167
156
|
# @return [Array]
|
168
|
-
#
|
169
157
|
def lex
|
170
158
|
tokens = []
|
171
159
|
|
@@ -178,7 +166,6 @@ module Oga
|
|
178
166
|
tokens
|
179
167
|
end
|
180
168
|
|
181
|
-
##
|
182
169
|
# Advances through the input and generates the corresponding tokens. Each
|
183
170
|
# token is yielded to the supplied block.
|
184
171
|
#
|
@@ -196,7 +183,6 @@ module Oga
|
|
196
183
|
# @yieldparam [Symbol] type
|
197
184
|
# @yieldparam [String] value
|
198
185
|
# @yieldparam [Fixnum] line
|
199
|
-
#
|
200
186
|
def advance(&block)
|
201
187
|
@block = block
|
202
188
|
|
@@ -212,44 +198,33 @@ module Oga
|
|
212
198
|
@block = nil
|
213
199
|
end
|
214
200
|
|
215
|
-
##
|
216
201
|
# @return [TrueClass|FalseClass]
|
217
|
-
#
|
218
202
|
def html?
|
219
203
|
@html == true
|
220
204
|
end
|
221
205
|
|
222
|
-
##
|
223
206
|
# @return [TrueClass|FalseClass]
|
224
|
-
#
|
225
207
|
def strict?
|
226
208
|
@strict
|
227
209
|
end
|
228
210
|
|
229
|
-
##
|
230
211
|
# @return [TrueClass|FalseClass]
|
231
|
-
#
|
232
212
|
def html_script?
|
233
213
|
html? && current_element == HTML_SCRIPT
|
234
214
|
end
|
235
215
|
|
236
|
-
##
|
237
216
|
# @return [TrueClass|FalseClass]
|
238
|
-
#
|
239
217
|
def html_style?
|
240
218
|
html? && current_element == HTML_STYLE
|
241
219
|
end
|
242
220
|
|
243
221
|
private
|
244
222
|
|
245
|
-
##
|
246
223
|
# @param [Fixnum] amount The amount of lines to advance.
|
247
|
-
#
|
248
224
|
def advance_line(amount = 1)
|
249
225
|
@line += amount
|
250
226
|
end
|
251
227
|
|
252
|
-
##
|
253
228
|
# Calls the supplied block with the information of the current token.
|
254
229
|
#
|
255
230
|
# @param [Symbol] type The token type.
|
@@ -258,192 +233,145 @@ module Oga
|
|
258
233
|
# @yieldparam [String] type
|
259
234
|
# @yieldparam [String] value
|
260
235
|
# @yieldparam [Fixnum] line
|
261
|
-
#
|
262
236
|
def add_token(type, value = nil)
|
263
237
|
@block.call(type, value, @line)
|
264
238
|
end
|
265
239
|
|
266
|
-
##
|
267
240
|
# Returns the name of the element we're currently in.
|
268
241
|
#
|
269
242
|
# @return [String]
|
270
|
-
#
|
271
243
|
def current_element
|
272
244
|
@elements.last
|
273
245
|
end
|
274
246
|
|
275
|
-
##
|
276
247
|
# Called when processing a single quote.
|
277
|
-
#
|
278
248
|
def on_string_squote
|
279
249
|
add_token(:T_STRING_SQUOTE)
|
280
250
|
end
|
281
251
|
|
282
|
-
##
|
283
252
|
# Called when processing a double quote.
|
284
|
-
#
|
285
253
|
def on_string_dquote
|
286
254
|
add_token(:T_STRING_DQUOTE)
|
287
255
|
end
|
288
256
|
|
289
|
-
##
|
290
257
|
# Called when processing the body of a string.
|
291
258
|
#
|
292
259
|
# @param [String] value The data between the quotes.
|
293
|
-
#
|
294
260
|
def on_string_body(value)
|
295
261
|
add_token(:T_STRING_BODY, value)
|
296
262
|
end
|
297
263
|
|
298
|
-
##
|
299
264
|
# Called when a doctype starts.
|
300
|
-
#
|
301
265
|
def on_doctype_start
|
302
266
|
add_token(:T_DOCTYPE_START)
|
303
267
|
end
|
304
268
|
|
305
|
-
##
|
306
269
|
# Called on the identifier specifying the type of the doctype.
|
307
270
|
#
|
308
271
|
# @param [String] value
|
309
|
-
#
|
310
272
|
def on_doctype_type(value)
|
311
273
|
add_token(:T_DOCTYPE_TYPE, value)
|
312
274
|
end
|
313
275
|
|
314
|
-
##
|
315
276
|
# Called on the identifier specifying the name of the doctype.
|
316
277
|
#
|
317
278
|
# @param [String] value
|
318
|
-
#
|
319
279
|
def on_doctype_name(value)
|
320
280
|
add_token(:T_DOCTYPE_NAME, value)
|
321
281
|
end
|
322
282
|
|
323
|
-
##
|
324
283
|
# Called on the end of a doctype.
|
325
|
-
#
|
326
284
|
def on_doctype_end
|
327
285
|
add_token(:T_DOCTYPE_END)
|
328
286
|
end
|
329
287
|
|
330
|
-
##
|
331
288
|
# Called on an inline doctype block.
|
332
289
|
#
|
333
290
|
# @param [String] value
|
334
|
-
#
|
335
291
|
def on_doctype_inline(value)
|
336
292
|
add_token(:T_DOCTYPE_INLINE, value)
|
337
293
|
end
|
338
294
|
|
339
|
-
##
|
340
295
|
# Called on the open CDATA tag.
|
341
|
-
#
|
342
296
|
def on_cdata_start
|
343
297
|
add_token(:T_CDATA_START)
|
344
298
|
end
|
345
299
|
|
346
|
-
##
|
347
300
|
# Called on the closing CDATA tag.
|
348
|
-
#
|
349
301
|
def on_cdata_end
|
350
302
|
add_token(:T_CDATA_END)
|
351
303
|
end
|
352
304
|
|
353
|
-
##
|
354
305
|
# Called for the body of a CDATA tag.
|
355
306
|
#
|
356
307
|
# @param [String] value
|
357
|
-
#
|
358
308
|
def on_cdata_body(value)
|
359
309
|
add_token(:T_CDATA_BODY, value)
|
360
310
|
end
|
361
311
|
|
362
|
-
##
|
363
312
|
# Called on the open comment tag.
|
364
|
-
#
|
365
313
|
def on_comment_start
|
366
314
|
add_token(:T_COMMENT_START)
|
367
315
|
end
|
368
316
|
|
369
|
-
##
|
370
317
|
# Called on the closing comment tag.
|
371
|
-
#
|
372
318
|
def on_comment_end
|
373
319
|
add_token(:T_COMMENT_END)
|
374
320
|
end
|
375
321
|
|
376
|
-
##
|
377
322
|
# Called on a comment.
|
378
323
|
#
|
379
324
|
# @param [String] value
|
380
|
-
#
|
381
325
|
def on_comment_body(value)
|
382
326
|
add_token(:T_COMMENT_BODY, value)
|
383
327
|
end
|
384
328
|
|
385
|
-
##
|
386
329
|
# Called on the start of an XML declaration tag.
|
387
|
-
#
|
388
330
|
def on_xml_decl_start
|
389
331
|
add_token(:T_XML_DECL_START)
|
390
332
|
end
|
391
333
|
|
392
|
-
##
|
393
334
|
# Called on the end of an XML declaration tag.
|
394
|
-
#
|
395
335
|
def on_xml_decl_end
|
396
336
|
add_token(:T_XML_DECL_END)
|
397
337
|
end
|
398
338
|
|
399
|
-
##
|
400
339
|
# Called on the start of a processing instruction.
|
401
|
-
#
|
402
340
|
def on_proc_ins_start
|
403
341
|
add_token(:T_PROC_INS_START)
|
404
342
|
end
|
405
343
|
|
406
|
-
##
|
407
344
|
# Called on a processing instruction name.
|
408
345
|
#
|
409
346
|
# @param [String] value
|
410
|
-
#
|
411
347
|
def on_proc_ins_name(value)
|
412
348
|
add_token(:T_PROC_INS_NAME, value)
|
413
349
|
end
|
414
350
|
|
415
|
-
##
|
416
351
|
# Called on the body of a processing instruction.
|
417
352
|
#
|
418
353
|
# @param [String] value
|
419
|
-
#
|
420
354
|
def on_proc_ins_body(value)
|
421
355
|
add_token(:T_PROC_INS_BODY, value)
|
422
356
|
end
|
423
357
|
|
424
|
-
##
|
425
358
|
# Called on the end of a processing instruction.
|
426
|
-
#
|
427
359
|
def on_proc_ins_end
|
428
360
|
add_token(:T_PROC_INS_END)
|
429
361
|
end
|
430
362
|
|
431
|
-
##
|
432
363
|
# Called on the name of an element.
|
433
364
|
#
|
434
365
|
# @param [String] name The name of the element, including namespace.
|
435
|
-
#
|
436
366
|
def on_element_name(name)
|
437
367
|
before_html_element_name(name) if html?
|
438
368
|
|
439
369
|
add_element(name)
|
440
370
|
end
|
441
371
|
|
442
|
-
##
|
443
372
|
# Handles inserting of any missing tags whenever a new HTML tag is opened.
|
444
373
|
#
|
445
374
|
# @param [String] name
|
446
|
-
#
|
447
375
|
def before_html_element_name(name)
|
448
376
|
close_current = HTML_CLOSE_SELF[current_element]
|
449
377
|
|
@@ -463,27 +391,21 @@ module Oga
|
|
463
391
|
end
|
464
392
|
end
|
465
393
|
|
466
|
-
##
|
467
394
|
# @param [String] name
|
468
|
-
#
|
469
395
|
def add_element(name)
|
470
396
|
@elements << name
|
471
397
|
|
472
398
|
add_token(:T_ELEM_NAME, name)
|
473
399
|
end
|
474
400
|
|
475
|
-
##
|
476
401
|
# Called on the element namespace.
|
477
402
|
#
|
478
403
|
# @param [String] namespace
|
479
|
-
#
|
480
404
|
def on_element_ns(namespace)
|
481
405
|
add_token(:T_ELEM_NS, namespace)
|
482
406
|
end
|
483
407
|
|
484
|
-
##
|
485
408
|
# Called on the closing `>` of the open tag of an element.
|
486
|
-
#
|
487
409
|
def on_element_open_end
|
488
410
|
return unless html?
|
489
411
|
|
@@ -496,12 +418,10 @@ module Oga
|
|
496
418
|
end
|
497
419
|
end
|
498
420
|
|
499
|
-
##
|
500
421
|
# Called on the closing tag of an element.
|
501
422
|
#
|
502
423
|
# @param [String] name The name of the element (minus namespace
|
503
424
|
# prefix). This is not set for self closing tags.
|
504
|
-
#
|
505
425
|
def on_element_end(name = nil)
|
506
426
|
return if @elements.empty?
|
507
427
|
|
@@ -520,31 +440,25 @@ module Oga
|
|
520
440
|
@elements.pop
|
521
441
|
end
|
522
442
|
|
523
|
-
##
|
524
443
|
# Called on regular text values.
|
525
444
|
#
|
526
445
|
# @param [String] value
|
527
|
-
#
|
528
446
|
def on_text(value)
|
529
447
|
return if value.empty?
|
530
448
|
|
531
449
|
add_token(:T_TEXT, value)
|
532
450
|
end
|
533
451
|
|
534
|
-
##
|
535
452
|
# Called on attribute namespaces.
|
536
453
|
#
|
537
454
|
# @param [String] value
|
538
|
-
#
|
539
455
|
def on_attribute_ns(value)
|
540
456
|
add_token(:T_ATTR_NS, value)
|
541
457
|
end
|
542
458
|
|
543
|
-
##
|
544
459
|
# Called on tag attributes.
|
545
460
|
#
|
546
461
|
# @param [String] value
|
547
|
-
#
|
548
462
|
def on_attribute(value)
|
549
463
|
add_token(:T_ATTR, value)
|
550
464
|
end
|
data/lib/oga/xml/namespace.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# The Namespace class contains information about XML namespaces such as the
|
5
4
|
# name and URI.
|
6
|
-
#
|
7
5
|
class Namespace
|
8
6
|
# @return [String]
|
9
7
|
attr_accessor :name
|
@@ -11,35 +9,27 @@ module Oga
|
|
11
9
|
# @return [String]
|
12
10
|
attr_accessor :uri
|
13
11
|
|
14
|
-
##
|
15
12
|
# @param [Hash] options
|
16
13
|
#
|
17
14
|
# @option options [String] :name
|
18
15
|
# @option options [String] :uri
|
19
|
-
#
|
20
16
|
def initialize(options = {})
|
21
17
|
@name = options[:name]
|
22
18
|
@uri = options[:uri]
|
23
19
|
end
|
24
20
|
|
25
|
-
##
|
26
21
|
# @return [String]
|
27
|
-
#
|
28
22
|
def to_s
|
29
23
|
name.to_s
|
30
24
|
end
|
31
25
|
|
32
|
-
##
|
33
26
|
# @return [String]
|
34
|
-
#
|
35
27
|
def inspect
|
36
28
|
"Namespace(name: #{name.inspect} uri: #{uri.inspect})"
|
37
29
|
end
|
38
30
|
|
39
|
-
##
|
40
31
|
# @param [Oga::XML::Namespace] other
|
41
32
|
# @return [TrueClass|FalseClass]
|
42
|
-
#
|
43
33
|
def ==(other)
|
44
34
|
other.is_a?(self.class) && name == other.name && uri == other.uri
|
45
35
|
end
|