oga 0.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +19 -0
  4. data/README.md +179 -0
  5. data/doc/DCO.md +25 -0
  6. data/doc/changelog.md +20 -0
  7. data/doc/css/common.css +76 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/ext/c/extconf.rb +13 -0
  10. data/ext/c/lexer.c +1518 -0
  11. data/ext/c/lexer.h +8 -0
  12. data/ext/c/lexer.rl +121 -0
  13. data/ext/c/liboga.c +6 -0
  14. data/ext/c/liboga.h +11 -0
  15. data/ext/java/Liboga.java +14 -0
  16. data/ext/java/org/liboga/xml/Lexer.java +829 -0
  17. data/ext/java/org/liboga/xml/Lexer.rl +151 -0
  18. data/ext/ragel/base_lexer.rl +323 -0
  19. data/lib/liboga.jar +0 -0
  20. data/lib/oga.rb +43 -0
  21. data/lib/oga/html/parser.rb +25 -0
  22. data/lib/oga/oga.rb +27 -0
  23. data/lib/oga/version.rb +3 -0
  24. data/lib/oga/xml/attribute.rb +111 -0
  25. data/lib/oga/xml/cdata.rb +17 -0
  26. data/lib/oga/xml/character_node.rb +39 -0
  27. data/lib/oga/xml/comment.rb +17 -0
  28. data/lib/oga/xml/doctype.rb +84 -0
  29. data/lib/oga/xml/document.rb +99 -0
  30. data/lib/oga/xml/element.rb +331 -0
  31. data/lib/oga/xml/lexer.rb +399 -0
  32. data/lib/oga/xml/namespace.rb +42 -0
  33. data/lib/oga/xml/node.rb +168 -0
  34. data/lib/oga/xml/node_set.rb +313 -0
  35. data/lib/oga/xml/parser.rb +556 -0
  36. data/lib/oga/xml/processing_instruction.rb +39 -0
  37. data/lib/oga/xml/pull_parser.rb +180 -0
  38. data/lib/oga/xml/querying.rb +32 -0
  39. data/lib/oga/xml/text.rb +11 -0
  40. data/lib/oga/xml/traversal.rb +48 -0
  41. data/lib/oga/xml/xml_declaration.rb +69 -0
  42. data/lib/oga/xpath/evaluator.rb +1748 -0
  43. data/lib/oga/xpath/lexer.rb +2043 -0
  44. data/lib/oga/xpath/node.rb +10 -0
  45. data/lib/oga/xpath/parser.rb +537 -0
  46. data/oga.gemspec +45 -0
  47. metadata +221 -0
@@ -0,0 +1,556 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by Racc 1.4.12
4
+ # from Racc grammer file "".
5
+ #
6
+
7
+ require 'racc/parser.rb'
8
+ module Oga
9
+ module XML
10
+ class Parser < Racc::Parser
11
+
12
+ ##
13
+ # @param [String|IO] data The input to parse.
14
+ # @param [Hash] options
15
+ # @see [Oga::XML::Lexer#initialize]
16
+ #
17
+ def initialize(data, options = {})
18
+ @data = data
19
+ @lexer = Lexer.new(data, options)
20
+
21
+ reset
22
+ end
23
+
24
+ ##
25
+ # Resets the internal state of the parser.
26
+ #
27
+ def reset
28
+ @line = 1
29
+
30
+ @lexer.reset
31
+ end
32
+
33
+ ##
34
+ # Yields the next token from the lexer.
35
+ #
36
+ # @yieldparam [Array]
37
+ #
38
+ def yield_next_token
39
+ @lexer.advance do |type, value, line|
40
+ @line = line if line
41
+
42
+ yield [type, value]
43
+ end
44
+
45
+ yield [false, false]
46
+ end
47
+
48
+ ##
49
+ # @param [Fixnum] type The type of token the error occured on.
50
+ # @param [String] value The value of the token.
51
+ # @param [Array] stack The current stack of parsed nodes.
52
+ # @raise [Racc::ParseError]
53
+ #
54
+ def on_error(type, value, stack)
55
+ name = token_to_str(type)
56
+ index = @line - 1
57
+ index_range = (index - 5)..(index + 5)
58
+ code = ''
59
+
60
+ # For IO we sadly have to re-read the input :<
61
+ if @data.respond_to?(:rewind)
62
+ @data.rewind
63
+ end
64
+
65
+ # Show up to 5 lines before and after the offending line (if they exist).
66
+ @data.each_line.with_index do |line, line_index|
67
+ next unless index_range.cover?(line_index)
68
+
69
+ number = line_index + 1
70
+
71
+ if line_index == index
72
+ prefix = '=> '
73
+ else
74
+ prefix = ' '
75
+ end
76
+
77
+ line = line.strip
78
+
79
+ if line.length > 80
80
+ line = line[0..79] + ' (more)'
81
+ end
82
+
83
+ code << "#{prefix}#{number}: #{line}\n"
84
+ end
85
+
86
+ raise Racc::ParseError, <<-EOF.strip
87
+ Unexpected #{name} with value #{value.inspect} on line #{@line}:
88
+
89
+ #{code}
90
+ EOF
91
+ end
92
+
93
+ ##
94
+ # Parses the input and returns the corresponding AST.
95
+ #
96
+ # @example
97
+ # parser = Oga::Parser.new('<foo>bar</foo>')
98
+ # ast = parser.parse
99
+ #
100
+ # @return [Oga::AST::Node]
101
+ #
102
+ def parse
103
+ ast = yyparse(self, :yield_next_token)
104
+
105
+ reset
106
+
107
+ return ast
108
+ end
109
+
110
+ ##
111
+ # @param [Array] children
112
+ # @return [Oga::XML::Document]
113
+ #
114
+ def on_document(children = [])
115
+ document = Document.new
116
+
117
+ children.each do |child|
118
+ if child.is_a?(Doctype)
119
+ document.doctype = child
120
+
121
+ elsif child.is_a?(XmlDeclaration)
122
+ document.xml_declaration = child
123
+
124
+ else
125
+ document.children << child
126
+ end
127
+ end
128
+
129
+ return document
130
+ end
131
+
132
+ ##
133
+ # @param [Hash] options
134
+ #
135
+ def on_doctype(options = {})
136
+ return Doctype.new(options)
137
+ end
138
+
139
+ ##
140
+ # @param [String] text
141
+ # @return [Oga::XML::Cdata]
142
+ #
143
+ def on_cdata(text = nil)
144
+ return Cdata.new(:text => text)
145
+ end
146
+
147
+ ##
148
+ # @param [String] text
149
+ # @return [Oga::XML::Comment]
150
+ #
151
+ def on_comment(text = nil)
152
+ return Comment.new(:text => text)
153
+ end
154
+
155
+ ##
156
+ # @param [String] name
157
+ # @param [String] text
158
+ # @return [Oga::XML::ProcessingInstruction]
159
+ #
160
+ def on_proc_ins(name, text = nil)
161
+ return ProcessingInstruction.new(:name => name, :text => text)
162
+ end
163
+
164
+ ##
165
+ # @param [Array] attributes
166
+ # @return [Oga::XML::XmlDeclaration]
167
+ #
168
+ def on_xml_decl(attributes = [])
169
+ options = {}
170
+
171
+ attributes.each do |attr|
172
+ options[attr.name.to_sym] = attr.value
173
+ end
174
+
175
+ return XmlDeclaration.new(options)
176
+ end
177
+
178
+ ##
179
+ # @param [String] text
180
+ # @return [Oga::XML::Text]
181
+ #
182
+ def on_text(text)
183
+ return Text.new(:text => text)
184
+ end
185
+
186
+ ##
187
+ # @param [String] namespace
188
+ # @param [String] name
189
+ # @param [Hash] attributes
190
+ # @return [Oga::XML::Element]
191
+ #
192
+ def on_element(namespace, name, attributes = {})
193
+ element = Element.new(
194
+ :namespace_name => namespace,
195
+ :name => name,
196
+ :attributes => attributes
197
+ )
198
+
199
+ return element
200
+ end
201
+
202
+ ##
203
+ # @param [Oga::XML::Element] element
204
+ # @param [Array] children
205
+ # @return [Oga::XML::Element]
206
+ #
207
+ def on_element_children(element, children = [])
208
+ element.children = children
209
+
210
+ return element
211
+ end
212
+
213
+ ##
214
+ # @param [Oga::XML::Element] element
215
+ # @return [Oga::XML::Element]
216
+ #
217
+ def after_element(element)
218
+ return element
219
+ end
220
+
221
+ # vim: set ft=racc:
222
+ ##### State transition tables begin ###
223
+
224
+ racc_action_table = [
225
+ 40, 20, 12, 31, 32, 20, 12, 13, 14, 16,
226
+ 35, 13, 14, 16, 41, 19, 24, 15, 39, 19,
227
+ 43, 15, 20, 12, 36, 37, 44, 38, 13, 14,
228
+ 16, 48, 52, 45, 47, 51, 19, 46, 15, 31,
229
+ 32, 25, 26, 31, 32, 23, 49, 50, 21, 53 ]
230
+
231
+ racc_action_check = [
232
+ 24, 18, 18, 19, 19, 0, 0, 18, 18, 18,
233
+ 21, 0, 0, 0, 26, 18, 15, 18, 24, 0,
234
+ 30, 0, 3, 3, 23, 23, 32, 23, 3, 3,
235
+ 3, 37, 48, 33, 37, 48, 3, 34, 3, 17,
236
+ 17, 16, 16, 28, 28, 12, 38, 40, 1, 52 ]
237
+
238
+ racc_action_pointer = [
239
+ 2, 48, nil, 19, nil, nil, nil, nil, nil, nil,
240
+ nil, nil, 38, nil, nil, -4, 29, 24, -2, -12,
241
+ nil, 10, nil, 19, -3, nil, 2, nil, 28, nil,
242
+ 18, nil, 11, 19, 19, nil, nil, 29, 41, nil,
243
+ 26, nil, nil, nil, nil, nil, nil, nil, 30, nil,
244
+ nil, nil, 44, nil ]
245
+
246
+ racc_action_default = [
247
+ -3, -36, -1, -2, -5, -6, -7, -8, -9, -10,
248
+ -11, -12, -36, -18, -19, -36, -36, -27, -3, -27,
249
+ -35, -36, -4, -36, -36, -22, -36, -24, -26, -29,
250
+ -30, -32, -36, -36, -36, 54, -13, -36, -36, -20,
251
+ -36, -23, -28, -31, -33, -25, -34, -14, -36, -17,
252
+ -21, -15, -36, -16 ]
253
+
254
+ racc_goto_table = [
255
+ 2, 27, 1, 34, 22, 42, nil, nil, nil, nil,
256
+ nil, nil, nil, nil, nil, nil, nil, nil, 33 ]
257
+
258
+ racc_goto_check = [
259
+ 2, 14, 1, 14, 4, 16, nil, nil, nil, nil,
260
+ nil, nil, nil, nil, nil, nil, nil, nil, 2 ]
261
+
262
+ racc_goto_pointer = [
263
+ nil, 2, 0, nil, 1, nil, nil, nil, nil, nil,
264
+ nil, nil, nil, nil, -16, nil, -23, nil ]
265
+
266
+ racc_goto_default = [
267
+ nil, nil, nil, 3, 4, 5, 6, 7, 8, 9,
268
+ 10, 11, 17, 18, nil, 28, 29, 30 ]
269
+
270
+ racc_reduce_table = [
271
+ 0, 0, :racc_error,
272
+ 1, 23, :_reduce_1,
273
+ 1, 24, :_reduce_2,
274
+ 0, 24, :_reduce_3,
275
+ 2, 25, :_reduce_4,
276
+ 1, 25, :_reduce_5,
277
+ 1, 26, :_reduce_none,
278
+ 1, 26, :_reduce_none,
279
+ 1, 26, :_reduce_none,
280
+ 1, 26, :_reduce_none,
281
+ 1, 26, :_reduce_none,
282
+ 1, 26, :_reduce_none,
283
+ 1, 26, :_reduce_none,
284
+ 3, 27, :_reduce_13,
285
+ 4, 27, :_reduce_14,
286
+ 5, 27, :_reduce_15,
287
+ 6, 27, :_reduce_16,
288
+ 4, 27, :_reduce_17,
289
+ 1, 28, :_reduce_18,
290
+ 1, 29, :_reduce_19,
291
+ 3, 33, :_reduce_20,
292
+ 4, 33, :_reduce_21,
293
+ 2, 34, :_reduce_22,
294
+ 3, 34, :_reduce_23,
295
+ 2, 35, :_reduce_24,
296
+ 3, 30, :_reduce_25,
297
+ 1, 36, :_reduce_26,
298
+ 0, 36, :_reduce_27,
299
+ 2, 37, :_reduce_28,
300
+ 1, 37, :_reduce_29,
301
+ 1, 38, :_reduce_30,
302
+ 2, 38, :_reduce_31,
303
+ 1, 39, :_reduce_32,
304
+ 2, 39, :_reduce_33,
305
+ 3, 32, :_reduce_34,
306
+ 1, 31, :_reduce_35 ]
307
+
308
+ racc_reduce_n = 36
309
+
310
+ racc_shift_n = 54
311
+
312
+ racc_token_table = {
313
+ false => 0,
314
+ :error => 1,
315
+ :T_STRING => 2,
316
+ :T_TEXT => 3,
317
+ :T_DOCTYPE_START => 4,
318
+ :T_DOCTYPE_END => 5,
319
+ :T_DOCTYPE_TYPE => 6,
320
+ :T_DOCTYPE_NAME => 7,
321
+ :T_DOCTYPE_INLINE => 8,
322
+ :T_CDATA => 9,
323
+ :T_COMMENT => 10,
324
+ :T_ELEM_START => 11,
325
+ :T_ELEM_NAME => 12,
326
+ :T_ELEM_NS => 13,
327
+ :T_ELEM_END => 14,
328
+ :T_ATTR => 15,
329
+ :T_ATTR_NS => 16,
330
+ :T_XML_DECL_START => 17,
331
+ :T_XML_DECL_END => 18,
332
+ :T_PROC_INS_START => 19,
333
+ :T_PROC_INS_NAME => 20,
334
+ :T_PROC_INS_END => 21 }
335
+
336
+ racc_nt_base = 22
337
+
338
+ racc_use_result_var = false
339
+
340
+ Racc_arg = [
341
+ racc_action_table,
342
+ racc_action_check,
343
+ racc_action_default,
344
+ racc_action_pointer,
345
+ racc_goto_table,
346
+ racc_goto_check,
347
+ racc_goto_default,
348
+ racc_goto_pointer,
349
+ racc_nt_base,
350
+ racc_reduce_table,
351
+ racc_token_table,
352
+ racc_shift_n,
353
+ racc_reduce_n,
354
+ racc_use_result_var ]
355
+
356
+ Racc_token_to_s_table = [
357
+ "$end",
358
+ "error",
359
+ "T_STRING",
360
+ "T_TEXT",
361
+ "T_DOCTYPE_START",
362
+ "T_DOCTYPE_END",
363
+ "T_DOCTYPE_TYPE",
364
+ "T_DOCTYPE_NAME",
365
+ "T_DOCTYPE_INLINE",
366
+ "T_CDATA",
367
+ "T_COMMENT",
368
+ "T_ELEM_START",
369
+ "T_ELEM_NAME",
370
+ "T_ELEM_NS",
371
+ "T_ELEM_END",
372
+ "T_ATTR",
373
+ "T_ATTR_NS",
374
+ "T_XML_DECL_START",
375
+ "T_XML_DECL_END",
376
+ "T_PROC_INS_START",
377
+ "T_PROC_INS_NAME",
378
+ "T_PROC_INS_END",
379
+ "$start",
380
+ "document",
381
+ "expressions",
382
+ "expressions_",
383
+ "expression",
384
+ "doctype",
385
+ "cdata",
386
+ "comment",
387
+ "element",
388
+ "text",
389
+ "xmldecl",
390
+ "proc_ins",
391
+ "element_open",
392
+ "element_start",
393
+ "attributes",
394
+ "attributes_",
395
+ "attribute",
396
+ "attribute_name" ]
397
+
398
+ Racc_debug_parser = false
399
+
400
+ ##### State transition tables end #####
401
+
402
+ # reduce 0 omitted
403
+
404
+ def _reduce_1(val, _values)
405
+ on_document(val[0])
406
+ end
407
+
408
+ def _reduce_2(val, _values)
409
+ val[0]
410
+ end
411
+
412
+ def _reduce_3(val, _values)
413
+ []
414
+ end
415
+
416
+ def _reduce_4(val, _values)
417
+ val[0] << val[1]
418
+ end
419
+
420
+ def _reduce_5(val, _values)
421
+ val
422
+ end
423
+
424
+ # reduce 6 omitted
425
+
426
+ # reduce 7 omitted
427
+
428
+ # reduce 8 omitted
429
+
430
+ # reduce 9 omitted
431
+
432
+ # reduce 10 omitted
433
+
434
+ # reduce 11 omitted
435
+
436
+ # reduce 12 omitted
437
+
438
+ def _reduce_13(val, _values)
439
+ on_doctype(:name => val[1])
440
+
441
+ end
442
+
443
+ def _reduce_14(val, _values)
444
+ on_doctype(:name => val[1], :type => val[2])
445
+
446
+ end
447
+
448
+ def _reduce_15(val, _values)
449
+ on_doctype(:name => val[1], :type => val[2], :public_id => val[3])
450
+
451
+ end
452
+
453
+ def _reduce_16(val, _values)
454
+ on_doctype(
455
+ :name => val[1],
456
+ :type => val[2],
457
+ :public_id => val[3],
458
+ :system_id => val[4]
459
+ )
460
+
461
+ end
462
+
463
+ def _reduce_17(val, _values)
464
+ on_doctype(:name => val[1], :inline_rules => val[2])
465
+
466
+ end
467
+
468
+ def _reduce_18(val, _values)
469
+ on_cdata(val[0])
470
+ end
471
+
472
+ def _reduce_19(val, _values)
473
+ on_comment(val[0])
474
+ end
475
+
476
+ def _reduce_20(val, _values)
477
+ on_proc_ins(val[1])
478
+
479
+ end
480
+
481
+ def _reduce_21(val, _values)
482
+ on_proc_ins(val[1], val[2])
483
+
484
+ end
485
+
486
+ def _reduce_22(val, _values)
487
+ [nil, val[1]]
488
+ end
489
+
490
+ def _reduce_23(val, _values)
491
+ [val[1], val[2]]
492
+ end
493
+
494
+ def _reduce_24(val, _values)
495
+ on_element(val[0][0], val[0][1], val[1])
496
+ end
497
+
498
+ def _reduce_25(val, _values)
499
+ if val[0]
500
+ on_element_children(val[0], val[1])
501
+ end
502
+
503
+ after_element(val[0])
504
+
505
+ end
506
+
507
+ def _reduce_26(val, _values)
508
+ val[0]
509
+ end
510
+
511
+ def _reduce_27(val, _values)
512
+ []
513
+ end
514
+
515
+ def _reduce_28(val, _values)
516
+ val[0] << val[1]
517
+ end
518
+
519
+ def _reduce_29(val, _values)
520
+ val
521
+ end
522
+
523
+ def _reduce_30(val, _values)
524
+ val[0]
525
+ end
526
+
527
+ def _reduce_31(val, _values)
528
+ val[0].value = val[1]
529
+ val[0]
530
+
531
+ end
532
+
533
+ def _reduce_32(val, _values)
534
+ Attribute.new(:name => val[0])
535
+ end
536
+
537
+ def _reduce_33(val, _values)
538
+ Attribute.new(:namespace_name => val[0], :name => val[1])
539
+
540
+ end
541
+
542
+ def _reduce_34(val, _values)
543
+ on_xml_decl(val[1])
544
+ end
545
+
546
+ def _reduce_35(val, _values)
547
+ on_text(val[0])
548
+ end
549
+
550
+ def _reduce_none(val, _values)
551
+ val[0]
552
+ end
553
+
554
+ end # class Parser
555
+ end # module XML
556
+ end # module Oga