oga 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +19 -0
  4. data/README.md +171 -0
  5. data/doc/DCO.md +25 -0
  6. data/doc/changelog.md +7 -0
  7. data/doc/css/common.css +76 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/ext/c/extconf.rb +13 -0
  10. data/ext/c/lexer.c +1518 -0
  11. data/ext/c/lexer.h +8 -0
  12. data/ext/c/lexer.rl +121 -0
  13. data/ext/c/liboga.c +6 -0
  14. data/ext/c/liboga.h +11 -0
  15. data/ext/java/Liboga.java +14 -0
  16. data/ext/java/org/liboga/xml/Lexer.java +829 -0
  17. data/ext/java/org/liboga/xml/Lexer.rl +151 -0
  18. data/ext/ragel/base_lexer.rl +323 -0
  19. data/lib/oga.rb +43 -0
  20. data/lib/oga/html/parser.rb +25 -0
  21. data/lib/oga/oga.rb +27 -0
  22. data/lib/oga/version.rb +3 -0
  23. data/lib/oga/xml/attribute.rb +111 -0
  24. data/lib/oga/xml/cdata.rb +24 -0
  25. data/lib/oga/xml/character_node.rb +39 -0
  26. data/lib/oga/xml/comment.rb +24 -0
  27. data/lib/oga/xml/doctype.rb +91 -0
  28. data/lib/oga/xml/document.rb +99 -0
  29. data/lib/oga/xml/element.rb +340 -0
  30. data/lib/oga/xml/lexer.rb +399 -0
  31. data/lib/oga/xml/namespace.rb +42 -0
  32. data/lib/oga/xml/node.rb +175 -0
  33. data/lib/oga/xml/node_set.rb +313 -0
  34. data/lib/oga/xml/parser.rb +556 -0
  35. data/lib/oga/xml/processing_instruction.rb +39 -0
  36. data/lib/oga/xml/pull_parser.rb +166 -0
  37. data/lib/oga/xml/querying.rb +32 -0
  38. data/lib/oga/xml/text.rb +16 -0
  39. data/lib/oga/xml/traversal.rb +48 -0
  40. data/lib/oga/xml/xml_declaration.rb +76 -0
  41. data/lib/oga/xpath/evaluator.rb +1748 -0
  42. data/lib/oga/xpath/lexer.rb +2043 -0
  43. data/lib/oga/xpath/node.rb +10 -0
  44. data/lib/oga/xpath/parser.rb +535 -0
  45. data/oga.gemspec +45 -0
  46. metadata +221 -0
@@ -0,0 +1,556 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by Racc 1.4.12
4
+ # from Racc grammer file "".
5
+ #
6
+
7
+ require 'racc/parser.rb'
8
+ module Oga
9
+ module XML
10
+ class Parser < Racc::Parser
11
+
12
+ ##
13
+ # @param [String|IO] data The input to parse.
14
+ # @param [Hash] options
15
+ # @see [Oga::XML::Lexer#initialize]
16
+ #
17
+ def initialize(data, options = {})
18
+ @data = data
19
+ @lexer = Lexer.new(data, options)
20
+
21
+ reset
22
+ end
23
+
24
+ ##
25
+ # Resets the internal state of the parser.
26
+ #
27
+ def reset
28
+ @line = 1
29
+
30
+ @lexer.reset
31
+ end
32
+
33
+ ##
34
+ # Yields the next token from the lexer.
35
+ #
36
+ # @yieldparam [Array]
37
+ #
38
+ def yield_next_token
39
+ @lexer.advance do |type, value, line|
40
+ @line = line if line
41
+
42
+ yield [type, value]
43
+ end
44
+
45
+ yield [false, false]
46
+ end
47
+
48
+ ##
49
+ # @param [Fixnum] type The type of token the error occured on.
50
+ # @param [String] value The value of the token.
51
+ # @param [Array] stack The current stack of parsed nodes.
52
+ # @raise [Racc::ParseError]
53
+ #
54
+ def on_error(type, value, stack)
55
+ name = token_to_str(type)
56
+ index = @line - 1
57
+ index_range = (index - 5)..(index + 5)
58
+ code = ''
59
+
60
+ # For IO we sadly have to re-read the input :<
61
+ if @data.respond_to?(:rewind)
62
+ @data.rewind
63
+ end
64
+
65
+ # Show up to 5 lines before and after the offending line (if they exist).
66
+ @data.each_line.with_index do |line, line_index|
67
+ next unless index_range.cover?(line_index)
68
+
69
+ number = line_index + 1
70
+
71
+ if line_index == index
72
+ prefix = '=> '
73
+ else
74
+ prefix = ' '
75
+ end
76
+
77
+ line = line.strip
78
+
79
+ if line.length > 80
80
+ line = line[0..79] + ' (more)'
81
+ end
82
+
83
+ code << "#{prefix}#{number}: #{line}\n"
84
+ end
85
+
86
+ raise Racc::ParseError, <<-EOF.strip
87
+ Unexpected #{name} with value #{value.inspect} on line #{@line}:
88
+
89
+ #{code}
90
+ EOF
91
+ end
92
+
93
+ ##
94
+ # Parses the input and returns the corresponding AST.
95
+ #
96
+ # @example
97
+ # parser = Oga::Parser.new('<foo>bar</foo>')
98
+ # ast = parser.parse
99
+ #
100
+ # @return [Oga::AST::Node]
101
+ #
102
+ def parse
103
+ ast = yyparse(self, :yield_next_token)
104
+
105
+ reset
106
+
107
+ return ast
108
+ end
109
+
110
+ ##
111
+ # @param [Array] children
112
+ # @return [Oga::XML::Document]
113
+ #
114
+ def on_document(children = [])
115
+ document = Document.new
116
+
117
+ children.each do |child|
118
+ if child.is_a?(Doctype)
119
+ document.doctype = child
120
+
121
+ elsif child.is_a?(XmlDeclaration)
122
+ document.xml_declaration = child
123
+
124
+ else
125
+ document.children << child
126
+ end
127
+ end
128
+
129
+ return document
130
+ end
131
+
132
+ ##
133
+ # @param [Hash] options
134
+ #
135
+ def on_doctype(options = {})
136
+ return Doctype.new(options)
137
+ end
138
+
139
+ ##
140
+ # @param [String] text
141
+ # @return [Oga::XML::Cdata]
142
+ #
143
+ def on_cdata(text = nil)
144
+ return Cdata.new(:text => text)
145
+ end
146
+
147
+ ##
148
+ # @param [String] text
149
+ # @return [Oga::XML::Comment]
150
+ #
151
+ def on_comment(text = nil)
152
+ return Comment.new(:text => text)
153
+ end
154
+
155
+ ##
156
+ # @param [String] name
157
+ # @param [String] text
158
+ # @return [Oga::XML::ProcessingInstruction]
159
+ #
160
+ def on_proc_ins(name, text = nil)
161
+ return ProcessingInstruction.new(:name => name, :text => text)
162
+ end
163
+
164
+ ##
165
+ # @param [Array] attributes
166
+ # @return [Oga::XML::XmlDeclaration]
167
+ #
168
+ def on_xml_decl(attributes = [])
169
+ options = {}
170
+
171
+ attributes.each do |attr|
172
+ options[attr.name.to_sym] = attr.value
173
+ end
174
+
175
+ return XmlDeclaration.new(options)
176
+ end
177
+
178
+ ##
179
+ # @param [String] text
180
+ # @return [Oga::XML::Text]
181
+ #
182
+ def on_text(text)
183
+ return Text.new(:text => text)
184
+ end
185
+
186
+ ##
187
+ # @param [String] namespace
188
+ # @param [String] name
189
+ # @param [Hash] attributes
190
+ # @return [Oga::XML::Element]
191
+ #
192
+ def on_element(namespace, name, attributes = {})
193
+ element = Element.new(
194
+ :namespace_name => namespace,
195
+ :name => name,
196
+ :attributes => attributes
197
+ )
198
+
199
+ return element
200
+ end
201
+
202
+ ##
203
+ # @param [Oga::XML::Element] element
204
+ # @param [Array] children
205
+ # @return [Oga::XML::Element]
206
+ #
207
+ def on_element_children(element, children = [])
208
+ element.children = children
209
+
210
+ return element
211
+ end
212
+
213
+ ##
214
+ # @param [Oga::XML::Element] element
215
+ # @return [Oga::XML::Element]
216
+ #
217
+ def after_element(element)
218
+ return element
219
+ end
220
+
221
+ # vim: set ft=racc:
222
+ ##### State transition tables begin ###
223
+
224
+ racc_action_table = [
225
+ 40, 20, 12, 25, 26, 20, 12, 13, 14, 16,
226
+ 41, 13, 14, 16, 21, 19, 35, 15, 39, 19,
227
+ 43, 15, 20, 12, 52, 31, 32, 51, 13, 14,
228
+ 16, 48, 36, 37, 47, 38, 19, 44, 15, 31,
229
+ 32, 31, 32, 45, 46, 24, 49, 50, 23, 53 ]
230
+
231
+ racc_action_check = [
232
+ 24, 0, 0, 16, 16, 3, 3, 0, 0, 0,
233
+ 26, 3, 3, 3, 1, 0, 21, 0, 24, 3,
234
+ 30, 3, 18, 18, 48, 19, 19, 48, 18, 18,
235
+ 18, 37, 23, 23, 37, 23, 18, 32, 18, 28,
236
+ 28, 17, 17, 33, 34, 15, 38, 40, 12, 52 ]
237
+
238
+ racc_action_pointer = [
239
+ -2, 14, nil, 2, nil, nil, nil, nil, nil, nil,
240
+ nil, nil, 41, nil, nil, 25, -9, 26, 19, 10,
241
+ nil, 16, nil, 27, -3, nil, -2, nil, 24, nil,
242
+ 18, nil, 22, 29, 26, nil, nil, 29, 41, nil,
243
+ 26, nil, nil, nil, nil, nil, nil, nil, 22, nil,
244
+ nil, nil, 44, nil ]
245
+
246
+ racc_action_default = [
247
+ -3, -36, -1, -2, -5, -6, -7, -8, -9, -10,
248
+ -11, -12, -36, -18, -19, -36, -36, -27, -3, -27,
249
+ -35, -36, -4, -36, -36, -22, -36, -24, -26, -29,
250
+ -30, -32, -36, -36, -36, 54, -13, -36, -36, -20,
251
+ -36, -23, -28, -31, -33, -25, -34, -14, -36, -17,
252
+ -21, -15, -36, -16 ]
253
+
254
+ racc_goto_table = [
255
+ 2, 27, 22, 34, 1, 42, nil, nil, nil, nil,
256
+ nil, nil, nil, nil, nil, nil, nil, nil, 33 ]
257
+
258
+ racc_goto_check = [
259
+ 2, 14, 4, 14, 1, 16, nil, nil, nil, nil,
260
+ nil, nil, nil, nil, nil, nil, nil, nil, 2 ]
261
+
262
+ racc_goto_pointer = [
263
+ nil, 4, 0, nil, -1, nil, nil, nil, nil, nil,
264
+ nil, nil, nil, nil, -16, nil, -23, nil ]
265
+
266
+ racc_goto_default = [
267
+ nil, nil, nil, 3, 4, 5, 6, 7, 8, 9,
268
+ 10, 11, 17, 18, nil, 28, 29, 30 ]
269
+
270
+ racc_reduce_table = [
271
+ 0, 0, :racc_error,
272
+ 1, 23, :_reduce_1,
273
+ 1, 24, :_reduce_2,
274
+ 0, 24, :_reduce_3,
275
+ 2, 25, :_reduce_4,
276
+ 1, 25, :_reduce_5,
277
+ 1, 26, :_reduce_none,
278
+ 1, 26, :_reduce_none,
279
+ 1, 26, :_reduce_none,
280
+ 1, 26, :_reduce_none,
281
+ 1, 26, :_reduce_none,
282
+ 1, 26, :_reduce_none,
283
+ 1, 26, :_reduce_none,
284
+ 3, 27, :_reduce_13,
285
+ 4, 27, :_reduce_14,
286
+ 5, 27, :_reduce_15,
287
+ 6, 27, :_reduce_16,
288
+ 4, 27, :_reduce_17,
289
+ 1, 28, :_reduce_18,
290
+ 1, 29, :_reduce_19,
291
+ 3, 33, :_reduce_20,
292
+ 4, 33, :_reduce_21,
293
+ 2, 34, :_reduce_22,
294
+ 3, 34, :_reduce_23,
295
+ 2, 35, :_reduce_24,
296
+ 3, 30, :_reduce_25,
297
+ 1, 36, :_reduce_26,
298
+ 0, 36, :_reduce_27,
299
+ 2, 37, :_reduce_28,
300
+ 1, 37, :_reduce_29,
301
+ 1, 38, :_reduce_30,
302
+ 2, 38, :_reduce_31,
303
+ 1, 39, :_reduce_32,
304
+ 2, 39, :_reduce_33,
305
+ 3, 32, :_reduce_34,
306
+ 1, 31, :_reduce_35 ]
307
+
308
+ racc_reduce_n = 36
309
+
310
+ racc_shift_n = 54
311
+
312
+ racc_token_table = {
313
+ false => 0,
314
+ :error => 1,
315
+ :T_STRING => 2,
316
+ :T_TEXT => 3,
317
+ :T_DOCTYPE_START => 4,
318
+ :T_DOCTYPE_END => 5,
319
+ :T_DOCTYPE_TYPE => 6,
320
+ :T_DOCTYPE_NAME => 7,
321
+ :T_DOCTYPE_INLINE => 8,
322
+ :T_CDATA => 9,
323
+ :T_COMMENT => 10,
324
+ :T_ELEM_START => 11,
325
+ :T_ELEM_NAME => 12,
326
+ :T_ELEM_NS => 13,
327
+ :T_ELEM_END => 14,
328
+ :T_ATTR => 15,
329
+ :T_ATTR_NS => 16,
330
+ :T_XML_DECL_START => 17,
331
+ :T_XML_DECL_END => 18,
332
+ :T_PROC_INS_START => 19,
333
+ :T_PROC_INS_NAME => 20,
334
+ :T_PROC_INS_END => 21 }
335
+
336
+ racc_nt_base = 22
337
+
338
+ racc_use_result_var = false
339
+
340
+ Racc_arg = [
341
+ racc_action_table,
342
+ racc_action_check,
343
+ racc_action_default,
344
+ racc_action_pointer,
345
+ racc_goto_table,
346
+ racc_goto_check,
347
+ racc_goto_default,
348
+ racc_goto_pointer,
349
+ racc_nt_base,
350
+ racc_reduce_table,
351
+ racc_token_table,
352
+ racc_shift_n,
353
+ racc_reduce_n,
354
+ racc_use_result_var ]
355
+
356
+ Racc_token_to_s_table = [
357
+ "$end",
358
+ "error",
359
+ "T_STRING",
360
+ "T_TEXT",
361
+ "T_DOCTYPE_START",
362
+ "T_DOCTYPE_END",
363
+ "T_DOCTYPE_TYPE",
364
+ "T_DOCTYPE_NAME",
365
+ "T_DOCTYPE_INLINE",
366
+ "T_CDATA",
367
+ "T_COMMENT",
368
+ "T_ELEM_START",
369
+ "T_ELEM_NAME",
370
+ "T_ELEM_NS",
371
+ "T_ELEM_END",
372
+ "T_ATTR",
373
+ "T_ATTR_NS",
374
+ "T_XML_DECL_START",
375
+ "T_XML_DECL_END",
376
+ "T_PROC_INS_START",
377
+ "T_PROC_INS_NAME",
378
+ "T_PROC_INS_END",
379
+ "$start",
380
+ "document",
381
+ "expressions",
382
+ "expressions_",
383
+ "expression",
384
+ "doctype",
385
+ "cdata",
386
+ "comment",
387
+ "element",
388
+ "text",
389
+ "xmldecl",
390
+ "proc_ins",
391
+ "element_open",
392
+ "element_start",
393
+ "attributes",
394
+ "attributes_",
395
+ "attribute",
396
+ "attribute_name" ]
397
+
398
+ Racc_debug_parser = false
399
+
400
+ ##### State transition tables end #####
401
+
402
+ # reduce 0 omitted
403
+
404
+ def _reduce_1(val, _values)
405
+ on_document(val[0])
406
+ end
407
+
408
+ def _reduce_2(val, _values)
409
+ val[0]
410
+ end
411
+
412
+ def _reduce_3(val, _values)
413
+ []
414
+ end
415
+
416
+ def _reduce_4(val, _values)
417
+ val[0] << val[1]
418
+ end
419
+
420
+ def _reduce_5(val, _values)
421
+ val
422
+ end
423
+
424
+ # reduce 6 omitted
425
+
426
+ # reduce 7 omitted
427
+
428
+ # reduce 8 omitted
429
+
430
+ # reduce 9 omitted
431
+
432
+ # reduce 10 omitted
433
+
434
+ # reduce 11 omitted
435
+
436
+ # reduce 12 omitted
437
+
438
+ def _reduce_13(val, _values)
439
+ on_doctype(:name => val[1])
440
+
441
+ end
442
+
443
+ def _reduce_14(val, _values)
444
+ on_doctype(:name => val[1], :type => val[2])
445
+
446
+ end
447
+
448
+ def _reduce_15(val, _values)
449
+ on_doctype(:name => val[1], :type => val[2], :public_id => val[3])
450
+
451
+ end
452
+
453
+ def _reduce_16(val, _values)
454
+ on_doctype(
455
+ :name => val[1],
456
+ :type => val[2],
457
+ :public_id => val[3],
458
+ :system_id => val[4]
459
+ )
460
+
461
+ end
462
+
463
+ def _reduce_17(val, _values)
464
+ on_doctype(:name => val[1], :inline_rules => val[2])
465
+
466
+ end
467
+
468
+ def _reduce_18(val, _values)
469
+ on_cdata(val[0])
470
+ end
471
+
472
+ def _reduce_19(val, _values)
473
+ on_comment(val[0])
474
+ end
475
+
476
+ def _reduce_20(val, _values)
477
+ on_proc_ins(val[1])
478
+
479
+ end
480
+
481
+ def _reduce_21(val, _values)
482
+ on_proc_ins(val[1], val[2])
483
+
484
+ end
485
+
486
+ def _reduce_22(val, _values)
487
+ [nil, val[1]]
488
+ end
489
+
490
+ def _reduce_23(val, _values)
491
+ [val[1], val[2]]
492
+ end
493
+
494
+ def _reduce_24(val, _values)
495
+ on_element(val[0][0], val[0][1], val[1])
496
+ end
497
+
498
+ def _reduce_25(val, _values)
499
+ if val[0]
500
+ on_element_children(val[0], val[1])
501
+ end
502
+
503
+ after_element(val[0])
504
+
505
+ end
506
+
507
+ def _reduce_26(val, _values)
508
+ val[0]
509
+ end
510
+
511
+ def _reduce_27(val, _values)
512
+ []
513
+ end
514
+
515
+ def _reduce_28(val, _values)
516
+ val[0] << val[1]
517
+ end
518
+
519
+ def _reduce_29(val, _values)
520
+ val
521
+ end
522
+
523
+ def _reduce_30(val, _values)
524
+ val[0]
525
+ end
526
+
527
+ def _reduce_31(val, _values)
528
+ val[0].value = val[1]
529
+ val[0]
530
+
531
+ end
532
+
533
+ def _reduce_32(val, _values)
534
+ Attribute.new(:name => val[0])
535
+ end
536
+
537
+ def _reduce_33(val, _values)
538
+ Attribute.new(:namespace_name => val[0], :name => val[1])
539
+
540
+ end
541
+
542
+ def _reduce_34(val, _values)
543
+ on_xml_decl(val[1])
544
+ end
545
+
546
+ def _reduce_35(val, _values)
547
+ on_text(val[0])
548
+ end
549
+
550
+ def _reduce_none(val, _values)
551
+ val[0]
552
+ end
553
+
554
+ end # class Parser
555
+ end # module XML
556
+ end # module Oga