oga 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -363,16 +363,17 @@
363
363
  # body of an element is lexed using the `main` machine.
364
364
  #
365
365
 
366
- element_start = '<' ident_char;
367
- element_end = '</' identifier (':' identifier)* '>';
368
-
369
366
  action start_element {
370
367
  fhold;
371
368
  fnext element_name;
372
369
  }
373
370
 
371
+ action start_close_element {
372
+ fnext element_close;
373
+ }
374
+
374
375
  action close_element {
375
- callback_simple(id_on_element_end);
376
+ callback(id_on_element_end, data, encoding, ts, te);
376
377
  }
377
378
 
378
379
  action close_element_fnext_main {
@@ -381,6 +382,9 @@
381
382
  fnext main;
382
383
  }
383
384
 
385
+ element_start = '<' ident_char;
386
+ element_end = '</';
387
+
384
388
  # Machine used for lexing the name/namespace of an element.
385
389
  element_name := |*
386
390
  identifier ':' => {
@@ -393,6 +397,28 @@
393
397
  };
394
398
  *|;
395
399
 
400
+ # Machine used for lexing the closing tag of an element
401
+ element_close := |*
402
+ # namespace prefixes, currently not used but allows the rule below it
403
+ # to be used for the actual element name.
404
+ identifier ':';
405
+
406
+ identifier => close_element;
407
+
408
+ '>' => {
409
+ if ( lines > 0 )
410
+ {
411
+ advance_line(lines);
412
+
413
+ lines = 0;
414
+ }
415
+
416
+ fnext main;
417
+ };
418
+
419
+ any $count_newlines;
420
+ *|;
421
+
396
422
  # Characters that can be used for unquoted HTML attribute values.
397
423
  # See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
398
424
  # for more info.
@@ -582,7 +608,7 @@
582
608
  cdata_start => start_cdata;
583
609
  proc_ins_start => start_proc_ins;
584
610
  element_start => start_element;
585
- element_end => close_element;
611
+ element_end => start_close_element;
586
612
  any => start_text;
587
613
  *|;
588
614
  }%%
data/lib/oga.rb CHANGED
@@ -3,19 +3,19 @@ require 'set'
3
3
  require 'stringio'
4
4
  require 'thread'
5
5
 
6
- require_relative 'oga/version'
7
- require_relative 'oga/oga'
8
- require_relative 'oga/lru'
9
- require_relative 'oga/entity_decoder'
10
- require_relative 'oga/blacklist'
11
- require_relative 'oga/whitelist'
6
+ require 'oga/version'
7
+ require 'oga/oga'
8
+ require 'oga/lru'
9
+ require 'oga/entity_decoder'
10
+ require 'oga/blacklist'
11
+ require 'oga/whitelist'
12
12
 
13
13
  # Load these first so that the native extensions don't have to define the
14
14
  # Oga::XML namespace.
15
- require_relative 'oga/xml/lexer'
16
- require_relative 'oga/xml/parser'
15
+ require 'oga/xml/lexer'
16
+ require 'oga/xml/parser'
17
17
 
18
- require_relative 'liboga'
18
+ require 'liboga'
19
19
 
20
20
  #:nocov:
21
21
  if RUBY_PLATFORM == 'java'
@@ -23,35 +23,35 @@ if RUBY_PLATFORM == 'java'
23
23
  end
24
24
  #:nocov:
25
25
 
26
- require_relative 'oga/xml/html_void_elements'
27
- require_relative 'oga/xml/entities'
28
- require_relative 'oga/xml/querying'
29
- require_relative 'oga/xml/traversal'
30
- require_relative 'oga/xml/node'
31
- require_relative 'oga/xml/document'
32
- require_relative 'oga/xml/character_node'
33
- require_relative 'oga/xml/text'
34
- require_relative 'oga/xml/comment'
35
- require_relative 'oga/xml/cdata'
36
- require_relative 'oga/xml/xml_declaration'
37
- require_relative 'oga/xml/processing_instruction'
38
- require_relative 'oga/xml/doctype'
39
- require_relative 'oga/xml/namespace'
40
- require_relative 'oga/xml/default_namespace'
41
- require_relative 'oga/xml/attribute'
42
- require_relative 'oga/xml/element'
43
- require_relative 'oga/xml/node_set'
44
-
45
- require_relative 'oga/xml/sax_parser'
46
- require_relative 'oga/xml/pull_parser'
47
-
48
- require_relative 'oga/html/parser'
49
- require_relative 'oga/html/sax_parser'
50
- require_relative 'oga/html/entities'
51
-
52
- require_relative 'oga/xpath/lexer'
53
- require_relative 'oga/xpath/parser'
54
- require_relative 'oga/xpath/evaluator'
55
-
56
- require_relative 'oga/css/lexer'
57
- require_relative 'oga/css/parser'
26
+ require 'oga/xml/html_void_elements'
27
+ require 'oga/xml/entities'
28
+ require 'oga/xml/querying'
29
+ require 'oga/xml/traversal'
30
+ require 'oga/xml/node'
31
+ require 'oga/xml/document'
32
+ require 'oga/xml/character_node'
33
+ require 'oga/xml/text'
34
+ require 'oga/xml/comment'
35
+ require 'oga/xml/cdata'
36
+ require 'oga/xml/xml_declaration'
37
+ require 'oga/xml/processing_instruction'
38
+ require 'oga/xml/doctype'
39
+ require 'oga/xml/namespace'
40
+ require 'oga/xml/default_namespace'
41
+ require 'oga/xml/attribute'
42
+ require 'oga/xml/element'
43
+ require 'oga/xml/node_set'
44
+
45
+ require 'oga/xml/sax_parser'
46
+ require 'oga/xml/pull_parser'
47
+
48
+ require 'oga/html/parser'
49
+ require 'oga/html/sax_parser'
50
+ require 'oga/html/entities'
51
+
52
+ require 'oga/xpath/lexer'
53
+ require 'oga/xpath/parser'
54
+ require 'oga/xpath/evaluator'
55
+
56
+ require 'oga/css/lexer'
57
+ require 'oga/css/parser'
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '1.0.1'
2
+ VERSION = '1.0.2'
3
3
  end # Oga
@@ -50,6 +50,10 @@ module Oga
50
50
  %w{thead tbody tfoot tr caption colgroup col}
51
51
  )
52
52
 
53
+ HTML_SCRIPT_ELEMENTS = Whitelist.new(%w{script template})
54
+
55
+ HTML_TABLE_ROW_ELEMENTS = Whitelist.new(%w{tr}) + HTML_SCRIPT_ELEMENTS
56
+
53
57
  # Elements that should be closed automatically before a new opening tag is
54
58
  # processed.
55
59
  HTML_CLOSE_SELF = {
@@ -59,8 +63,9 @@ module Oga
59
63
  'dt' => Blacklist.new(%w{dt dd}),
60
64
  'dd' => Blacklist.new(%w{dt dd}),
61
65
  'p' => Blacklist.new(%w{
62
- address article aside blockquote div dl fieldset footer form h1 h2 h3
63
- h4 h5 h6 header hgroup hr main nav ol p pre section table ul
66
+ address article aside blockquote details div dl fieldset figcaption
67
+ figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr main menu nav
68
+ ol p pre section table ul
64
69
  }),
65
70
  'rb' => Blacklist.new(%w{rb rt rtc rp}),
66
71
  'rt' => Blacklist.new(%w{rb rt rtc rp}),
@@ -70,11 +75,11 @@ module Oga
70
75
  'option' => Blacklist.new(%w{optgroup option}),
71
76
  'colgroup' => Whitelist.new(%w{col template}),
72
77
  'caption' => HTML_TABLE_ALLOWED.to_blacklist,
73
- 'table' => HTML_TABLE_ALLOWED,
74
- 'thead' => Whitelist.new(%w{tr}),
75
- 'tbody' => Whitelist.new(%w{tr}),
76
- 'tfoot' => Whitelist.new(%w{tr}),
77
- 'tr' => Whitelist.new(%w{td th}),
78
+ 'table' => HTML_TABLE_ALLOWED + HTML_SCRIPT_ELEMENTS,
79
+ 'thead' => HTML_TABLE_ROW_ELEMENTS,
80
+ 'tbody' => HTML_TABLE_ROW_ELEMENTS,
81
+ 'tfoot' => HTML_TABLE_ROW_ELEMENTS,
82
+ 'tr' => Whitelist.new(%w{td th}) + HTML_SCRIPT_ELEMENTS,
78
83
  'td' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED,
79
84
  'th' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED
80
85
  }
@@ -475,9 +480,19 @@ module Oga
475
480
  ##
476
481
  # Called on the closing tag of an element.
477
482
  #
478
- def on_element_end
483
+ # @param [String] ns_name The name of the element (minus namespace
484
+ # prefix). This is not set for self closing tags.
485
+ #
486
+ def on_element_end(name = nil)
479
487
  return if @elements.empty?
480
488
 
489
+ if html? and name and @elements.include?(name)
490
+ while current_element != name
491
+ add_token(:T_ELEM_END)
492
+ @elements.pop
493
+ end
494
+ end
495
+
481
496
  add_token(:T_ELEM_END)
482
497
 
483
498
  @elements.pop
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oga
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yorick Peterse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-21 00:00:00.000000000 Z
11
+ date: 2015-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ast
@@ -219,7 +219,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
219
219
  version: '0'
220
220
  requirements: []
221
221
  rubyforge_project:
222
- rubygems_version: 2.4.5
222
+ rubygems_version: 2.4.7
223
223
  signing_key:
224
224
  specification_version: 4
225
225
  summary: Oga is an XML/HTML parser written in Ruby.