oga 1.0.1-java → 1.0.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -363,16 +363,17 @@
363
363
  # body of an element is lexed using the `main` machine.
364
364
  #
365
365
 
366
- element_start = '<' ident_char;
367
- element_end = '</' identifier (':' identifier)* '>';
368
-
369
366
  action start_element {
370
367
  fhold;
371
368
  fnext element_name;
372
369
  }
373
370
 
371
+ action start_close_element {
372
+ fnext element_close;
373
+ }
374
+
374
375
  action close_element {
375
- callback_simple(id_on_element_end);
376
+ callback(id_on_element_end, data, encoding, ts, te);
376
377
  }
377
378
 
378
379
  action close_element_fnext_main {
@@ -381,6 +382,9 @@
381
382
  fnext main;
382
383
  }
383
384
 
385
+ element_start = '<' ident_char;
386
+ element_end = '</';
387
+
384
388
  # Machine used for lexing the name/namespace of an element.
385
389
  element_name := |*
386
390
  identifier ':' => {
@@ -393,6 +397,28 @@
393
397
  };
394
398
  *|;
395
399
 
400
+ # Machine used for lexing the closing tag of an element
401
+ element_close := |*
402
+ # namespace prefixes, currently not used but allows the rule below it
403
+ # to be used for the actual element name.
404
+ identifier ':';
405
+
406
+ identifier => close_element;
407
+
408
+ '>' => {
409
+ if ( lines > 0 )
410
+ {
411
+ advance_line(lines);
412
+
413
+ lines = 0;
414
+ }
415
+
416
+ fnext main;
417
+ };
418
+
419
+ any $count_newlines;
420
+ *|;
421
+
396
422
  # Characters that can be used for unquoted HTML attribute values.
397
423
  # See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
398
424
  # for more info.
@@ -582,7 +608,7 @@
582
608
  cdata_start => start_cdata;
583
609
  proc_ins_start => start_proc_ins;
584
610
  element_start => start_element;
585
- element_end => close_element;
611
+ element_end => start_close_element;
586
612
  any => start_text;
587
613
  *|;
588
614
  }%%
data/lib/liboga.jar CHANGED
Binary file
data/lib/oga.rb CHANGED
@@ -3,19 +3,19 @@ require 'set'
3
3
  require 'stringio'
4
4
  require 'thread'
5
5
 
6
- require_relative 'oga/version'
7
- require_relative 'oga/oga'
8
- require_relative 'oga/lru'
9
- require_relative 'oga/entity_decoder'
10
- require_relative 'oga/blacklist'
11
- require_relative 'oga/whitelist'
6
+ require 'oga/version'
7
+ require 'oga/oga'
8
+ require 'oga/lru'
9
+ require 'oga/entity_decoder'
10
+ require 'oga/blacklist'
11
+ require 'oga/whitelist'
12
12
 
13
13
  # Load these first so that the native extensions don't have to define the
14
14
  # Oga::XML namespace.
15
- require_relative 'oga/xml/lexer'
16
- require_relative 'oga/xml/parser'
15
+ require 'oga/xml/lexer'
16
+ require 'oga/xml/parser'
17
17
 
18
- require_relative 'liboga'
18
+ require 'liboga'
19
19
 
20
20
  #:nocov:
21
21
  if RUBY_PLATFORM == 'java'
@@ -23,35 +23,35 @@ if RUBY_PLATFORM == 'java'
23
23
  end
24
24
  #:nocov:
25
25
 
26
- require_relative 'oga/xml/html_void_elements'
27
- require_relative 'oga/xml/entities'
28
- require_relative 'oga/xml/querying'
29
- require_relative 'oga/xml/traversal'
30
- require_relative 'oga/xml/node'
31
- require_relative 'oga/xml/document'
32
- require_relative 'oga/xml/character_node'
33
- require_relative 'oga/xml/text'
34
- require_relative 'oga/xml/comment'
35
- require_relative 'oga/xml/cdata'
36
- require_relative 'oga/xml/xml_declaration'
37
- require_relative 'oga/xml/processing_instruction'
38
- require_relative 'oga/xml/doctype'
39
- require_relative 'oga/xml/namespace'
40
- require_relative 'oga/xml/default_namespace'
41
- require_relative 'oga/xml/attribute'
42
- require_relative 'oga/xml/element'
43
- require_relative 'oga/xml/node_set'
44
-
45
- require_relative 'oga/xml/sax_parser'
46
- require_relative 'oga/xml/pull_parser'
47
-
48
- require_relative 'oga/html/parser'
49
- require_relative 'oga/html/sax_parser'
50
- require_relative 'oga/html/entities'
51
-
52
- require_relative 'oga/xpath/lexer'
53
- require_relative 'oga/xpath/parser'
54
- require_relative 'oga/xpath/evaluator'
55
-
56
- require_relative 'oga/css/lexer'
57
- require_relative 'oga/css/parser'
26
+ require 'oga/xml/html_void_elements'
27
+ require 'oga/xml/entities'
28
+ require 'oga/xml/querying'
29
+ require 'oga/xml/traversal'
30
+ require 'oga/xml/node'
31
+ require 'oga/xml/document'
32
+ require 'oga/xml/character_node'
33
+ require 'oga/xml/text'
34
+ require 'oga/xml/comment'
35
+ require 'oga/xml/cdata'
36
+ require 'oga/xml/xml_declaration'
37
+ require 'oga/xml/processing_instruction'
38
+ require 'oga/xml/doctype'
39
+ require 'oga/xml/namespace'
40
+ require 'oga/xml/default_namespace'
41
+ require 'oga/xml/attribute'
42
+ require 'oga/xml/element'
43
+ require 'oga/xml/node_set'
44
+
45
+ require 'oga/xml/sax_parser'
46
+ require 'oga/xml/pull_parser'
47
+
48
+ require 'oga/html/parser'
49
+ require 'oga/html/sax_parser'
50
+ require 'oga/html/entities'
51
+
52
+ require 'oga/xpath/lexer'
53
+ require 'oga/xpath/parser'
54
+ require 'oga/xpath/evaluator'
55
+
56
+ require 'oga/css/lexer'
57
+ require 'oga/css/parser'
data/lib/oga/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '1.0.1'
2
+ VERSION = '1.0.2'
3
3
  end # Oga
data/lib/oga/xml/lexer.rb CHANGED
@@ -50,6 +50,10 @@ module Oga
50
50
  %w{thead tbody tfoot tr caption colgroup col}
51
51
  )
52
52
 
53
+ HTML_SCRIPT_ELEMENTS = Whitelist.new(%w{script template})
54
+
55
+ HTML_TABLE_ROW_ELEMENTS = Whitelist.new(%w{tr}) + HTML_SCRIPT_ELEMENTS
56
+
53
57
  # Elements that should be closed automatically before a new opening tag is
54
58
  # processed.
55
59
  HTML_CLOSE_SELF = {
@@ -59,8 +63,9 @@ module Oga
59
63
  'dt' => Blacklist.new(%w{dt dd}),
60
64
  'dd' => Blacklist.new(%w{dt dd}),
61
65
  'p' => Blacklist.new(%w{
62
- address article aside blockquote div dl fieldset footer form h1 h2 h3
63
- h4 h5 h6 header hgroup hr main nav ol p pre section table ul
66
+ address article aside blockquote details div dl fieldset figcaption
67
+ figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr main menu nav
68
+ ol p pre section table ul
64
69
  }),
65
70
  'rb' => Blacklist.new(%w{rb rt rtc rp}),
66
71
  'rt' => Blacklist.new(%w{rb rt rtc rp}),
@@ -70,11 +75,11 @@ module Oga
70
75
  'option' => Blacklist.new(%w{optgroup option}),
71
76
  'colgroup' => Whitelist.new(%w{col template}),
72
77
  'caption' => HTML_TABLE_ALLOWED.to_blacklist,
73
- 'table' => HTML_TABLE_ALLOWED,
74
- 'thead' => Whitelist.new(%w{tr}),
75
- 'tbody' => Whitelist.new(%w{tr}),
76
- 'tfoot' => Whitelist.new(%w{tr}),
77
- 'tr' => Whitelist.new(%w{td th}),
78
+ 'table' => HTML_TABLE_ALLOWED + HTML_SCRIPT_ELEMENTS,
79
+ 'thead' => HTML_TABLE_ROW_ELEMENTS,
80
+ 'tbody' => HTML_TABLE_ROW_ELEMENTS,
81
+ 'tfoot' => HTML_TABLE_ROW_ELEMENTS,
82
+ 'tr' => Whitelist.new(%w{td th}) + HTML_SCRIPT_ELEMENTS,
78
83
  'td' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED,
79
84
  'th' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED
80
85
  }
@@ -475,9 +480,19 @@ module Oga
475
480
  ##
476
481
  # Called on the closing tag of an element.
477
482
  #
478
- def on_element_end
483
+ # @param [String] ns_name The name of the element (minus namespace
484
+ # prefix). This is not set for self closing tags.
485
+ #
486
+ def on_element_end(name = nil)
479
487
  return if @elements.empty?
480
488
 
489
+ if html? and name and @elements.include?(name)
490
+ while current_element != name
491
+ add_token(:T_ELEM_END)
492
+ @elements.pop
493
+ end
494
+ end
495
+
481
496
  add_token(:T_ELEM_END)
482
497
 
483
498
  @elements.pop
metadata CHANGED
@@ -1,199 +1,199 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oga
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: java
6
6
  authors:
7
7
  - Yorick Peterse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-21 00:00:00.000000000 Z
11
+ date: 2015-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: ast
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - '>='
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - '>='
23
17
  - !ruby/object:Gem::Version
24
18
  version: '0'
19
+ name: ast
25
20
  prerelease: false
26
21
  type: :runtime
27
- - !ruby/object:Gem::Dependency
28
- name: ruby-ll
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - ~>
24
+ - - '>='
32
25
  - !ruby/object:Gem::Version
33
- version: '2.1'
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - ~>
37
31
  - !ruby/object:Gem::Version
38
32
  version: '2.1'
33
+ name: ruby-ll
39
34
  prerelease: false
40
35
  type: :runtime
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
36
  version_requirements: !ruby/object:Gem::Requirement
44
37
  requirements:
45
- - - '>='
38
+ - - ~>
46
39
  - !ruby/object:Gem::Version
47
- version: '0'
40
+ version: '2.1'
41
+ - !ruby/object:Gem::Dependency
48
42
  requirement: !ruby/object:Gem::Requirement
49
43
  requirements:
50
44
  - - '>='
51
45
  - !ruby/object:Gem::Version
52
46
  version: '0'
47
+ name: rake
53
48
  prerelease: false
54
49
  type: :development
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
50
  version_requirements: !ruby/object:Gem::Requirement
58
51
  requirements:
59
- - - ~>
52
+ - - '>='
60
53
  - !ruby/object:Gem::Version
61
- version: '3.0'
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
62
56
  requirement: !ruby/object:Gem::Requirement
63
57
  requirements:
64
58
  - - ~>
65
59
  - !ruby/object:Gem::Version
66
60
  version: '3.0'
61
+ name: rspec
67
62
  prerelease: false
68
63
  type: :development
69
- - !ruby/object:Gem::Dependency
70
- name: yard
71
64
  version_requirements: !ruby/object:Gem::Requirement
72
65
  requirements:
73
- - - '>='
66
+ - - ~>
74
67
  - !ruby/object:Gem::Version
75
- version: '0'
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
76
70
  requirement: !ruby/object:Gem::Requirement
77
71
  requirements:
78
72
  - - '>='
79
73
  - !ruby/object:Gem::Version
80
74
  version: '0'
75
+ name: yard
81
76
  prerelease: false
82
77
  type: :development
83
- - !ruby/object:Gem::Dependency
84
- name: simplecov
85
78
  version_requirements: !ruby/object:Gem::Requirement
86
79
  requirements:
87
80
  - - '>='
88
81
  - !ruby/object:Gem::Version
89
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
90
84
  requirement: !ruby/object:Gem::Requirement
91
85
  requirements:
92
86
  - - '>='
93
87
  - !ruby/object:Gem::Version
94
88
  version: '0'
89
+ name: simplecov
95
90
  prerelease: false
96
91
  type: :development
97
- - !ruby/object:Gem::Dependency
98
- name: kramdown
99
92
  version_requirements: !ruby/object:Gem::Requirement
100
93
  requirements:
101
94
  - - '>='
102
95
  - !ruby/object:Gem::Version
103
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
104
98
  requirement: !ruby/object:Gem::Requirement
105
99
  requirements:
106
100
  - - '>='
107
101
  - !ruby/object:Gem::Version
108
102
  version: '0'
103
+ name: kramdown
109
104
  prerelease: false
110
105
  type: :development
111
- - !ruby/object:Gem::Dependency
112
- name: benchmark-ips
113
106
  version_requirements: !ruby/object:Gem::Requirement
114
107
  requirements:
115
- - - ~>
108
+ - - '>='
116
109
  - !ruby/object:Gem::Version
117
- version: '2.0'
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
118
112
  requirement: !ruby/object:Gem::Requirement
119
113
  requirements:
120
114
  - - ~>
121
115
  - !ruby/object:Gem::Version
122
116
  version: '2.0'
117
+ name: benchmark-ips
123
118
  prerelease: false
124
119
  type: :development
125
- - !ruby/object:Gem::Dependency
126
- name: rake-compiler
127
120
  version_requirements: !ruby/object:Gem::Requirement
128
121
  requirements:
129
- - - '>='
122
+ - - ~>
130
123
  - !ruby/object:Gem::Version
131
- version: '0'
124
+ version: '2.0'
125
+ - !ruby/object:Gem::Dependency
132
126
  requirement: !ruby/object:Gem::Requirement
133
127
  requirements:
134
128
  - - '>='
135
129
  - !ruby/object:Gem::Version
136
130
  version: '0'
131
+ name: rake-compiler
137
132
  prerelease: false
138
133
  type: :development
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
139
  description: Oga is an XML/HTML parser written in Ruby.
140
140
  email: yorickpeterse@gmail.com
141
141
  executables: []
142
142
  extensions: []
143
143
  extra_rdoc_files: []
144
144
  files:
145
- - doc/manually_creating_documents.md
146
145
  - doc/xml_namespaces.md
146
+ - doc/manually_creating_documents.md
147
147
  - doc/css_selectors.md
148
148
  - doc/migrating_from_nokogiri.md
149
149
  - doc/css/common.css
150
150
  - lib/oga.rb
151
- - lib/oga/oga.rb
152
- - lib/oga/version.rb
151
+ - lib/oga/whitelist.rb
153
152
  - lib/oga/blacklist.rb
154
- - lib/oga/lru.rb
153
+ - lib/oga/version.rb
155
154
  - lib/oga/entity_decoder.rb
156
- - lib/oga/whitelist.rb
155
+ - lib/oga/lru.rb
156
+ - lib/oga/oga.rb
157
157
  - lib/oga/css/lexer.rb
158
158
  - lib/oga/css/parser.rb
159
- - lib/oga/html/sax_parser.rb
160
- - lib/oga/html/parser.rb
161
- - lib/oga/html/entities.rb
162
- - lib/oga/xml/lexer.rb
163
159
  - lib/oga/xml/namespace.rb
164
- - lib/oga/xml/processing_instruction.rb
165
- - lib/oga/xml/character_node.rb
166
- - lib/oga/xml/sax_parser.rb
167
- - lib/oga/xml/doctype.rb
168
- - lib/oga/xml/document.rb
169
- - lib/oga/xml/comment.rb
170
- - lib/oga/xml/default_namespace.rb
171
- - lib/oga/xml/text.rb
160
+ - lib/oga/xml/lexer.rb
172
161
  - lib/oga/xml/querying.rb
173
- - lib/oga/xml/attribute.rb
174
- - lib/oga/xml/pull_parser.rb
175
162
  - lib/oga/xml/parser.rb
176
- - lib/oga/xml/entities.rb
177
- - lib/oga/xml/html_void_elements.rb
163
+ - lib/oga/xml/traversal.rb
164
+ - lib/oga/xml/text.rb
178
165
  - lib/oga/xml/node.rb
166
+ - lib/oga/xml/document.rb
167
+ - lib/oga/xml/pull_parser.rb
179
168
  - lib/oga/xml/node_set.rb
169
+ - lib/oga/xml/sax_parser.rb
170
+ - lib/oga/xml/cdata.rb
180
171
  - lib/oga/xml/element.rb
172
+ - lib/oga/xml/character_node.rb
173
+ - lib/oga/xml/doctype.rb
174
+ - lib/oga/xml/html_void_elements.rb
175
+ - lib/oga/xml/entities.rb
176
+ - lib/oga/xml/default_namespace.rb
177
+ - lib/oga/xml/attribute.rb
181
178
  - lib/oga/xml/xml_declaration.rb
182
- - lib/oga/xml/cdata.rb
183
- - lib/oga/xml/traversal.rb
179
+ - lib/oga/xml/processing_instruction.rb
180
+ - lib/oga/xml/comment.rb
181
+ - lib/oga/html/parser.rb
182
+ - lib/oga/html/sax_parser.rb
183
+ - lib/oga/html/entities.rb
184
184
  - lib/oga/xpath/lexer.rb
185
- - lib/oga/xpath/evaluator.rb
186
185
  - lib/oga/xpath/parser.rb
187
- - ext/c/lexer.c
188
- - ext/c/lexer.rl
189
- - ext/c/lexer.h
190
- - ext/c/liboga.c
191
- - ext/c/extconf.rb
192
- - ext/c/liboga.h
186
+ - lib/oga/xpath/evaluator.rb
193
187
  - ext/ragel/base_lexer.rl
194
188
  - ext/java/Liboga.java
195
189
  - ext/java/org/liboga/xml/Lexer.java
196
190
  - ext/java/org/liboga/xml/Lexer.rl
191
+ - ext/c/extconf.rb
192
+ - ext/c/lexer.rl
193
+ - ext/c/lexer.h
194
+ - ext/c/liboga.c
195
+ - ext/c/lexer.c
196
+ - ext/c/liboga.h
197
197
  - README.md
198
198
  - LICENSE
199
199
  - oga.gemspec