oga 1.0.2-java → 1.0.3-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/ext/c/lexer.c +394 -312
- data/ext/c/lexer.rl +3 -3
- data/ext/java/org/liboga/xml/Lexer.java +216 -172
- data/ext/java/org/liboga/xml/Lexer.rl +1 -1
- data/ext/ragel/base_lexer.rl +30 -11
- data/lib/liboga.jar +0 -0
- data/lib/oga/blacklist.rb +2 -2
- data/lib/oga/css/parser.rb +26 -28
- data/lib/oga/entity_decoder.rb +2 -2
- data/lib/oga/html/entities.rb +1 -1
- data/lib/oga/lru.rb +6 -6
- data/lib/oga/oga.rb +14 -14
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +2 -2
- data/lib/oga/xml/attribute.rb +16 -18
- data/lib/oga/xml/cdata.rb +1 -1
- data/lib/oga/xml/character_node.rb +3 -5
- data/lib/oga/xml/comment.rb +1 -1
- data/lib/oga/xml/doctype.rb +21 -23
- data/lib/oga/xml/document.rb +11 -17
- data/lib/oga/xml/element.rb +19 -29
- data/lib/oga/xml/entities.rb +3 -3
- data/lib/oga/xml/lexer.rb +34 -15
- data/lib/oga/xml/namespace.rb +8 -10
- data/lib/oga/xml/node.rb +8 -10
- data/lib/oga/xml/node_set.rb +16 -18
- data/lib/oga/xml/parser.rb +1 -1
- data/lib/oga/xml/processing_instruction.rb +3 -5
- data/lib/oga/xml/pull_parser.rb +6 -9
- data/lib/oga/xml/querying.rb +4 -4
- data/lib/oga/xml/sax_parser.rb +4 -4
- data/lib/oga/xml/text.rb +4 -4
- data/lib/oga/xml/xml_declaration.rb +11 -15
- data/lib/oga/xpath/evaluator.rb +81 -81
- metadata +66 -66
data/lib/oga/xml/lexer.rb
CHANGED
@@ -34,12 +34,20 @@ module Oga
|
|
34
34
|
# However, it is perfectly save to use different instances per thread.
|
35
35
|
# There is no _global_ state used by this lexer.
|
36
36
|
#
|
37
|
-
#
|
38
|
-
#
|
37
|
+
# ## Strict Mode
|
38
|
+
#
|
39
|
+
# By default the lexer is rather permissive regarding the input. For
|
40
|
+
# example, missing closing tags are inserted by default. To disable this
|
41
|
+
# behaviour the lexer can be run in "strict mode" by setting `:strict` to
|
42
|
+
# `true`:
|
43
|
+
#
|
44
|
+
# lexer = Oga::XML::Lexer.new('...', :strict => true)
|
45
|
+
#
|
46
|
+
# Strict mode only applies to XML documents.
|
47
|
+
#
|
48
|
+
# @private
|
39
49
|
#
|
40
50
|
class Lexer
|
41
|
-
attr_reader :html
|
42
|
-
|
43
51
|
# These are all constant/frozen to remove the need for String allocations
|
44
52
|
# every time they are referenced in the lexer.
|
45
53
|
HTML_SCRIPT = 'script'.freeze
|
@@ -99,13 +107,17 @@ module Oga
|
|
99
107
|
#
|
100
108
|
# @param [Hash] options
|
101
109
|
#
|
102
|
-
# @option options [
|
103
|
-
# the input as HTML instead of
|
104
|
-
# HTML void elements such as `<link href="">`.
|
110
|
+
# @option options [TrueClass|FalseClass] :html When set to `true` the
|
111
|
+
# lexer will treat the input as HTML instead of XML. This makes it
|
112
|
+
# possible to lex HTML void elements such as `<link href="">`.
|
113
|
+
#
|
114
|
+
# @option options [TrueClass|FalseClass] :strict Enables/disables strict
|
115
|
+
# parsing of XML documents, disabled by default.
|
105
116
|
#
|
106
117
|
def initialize(data, options = {})
|
107
|
-
@data
|
108
|
-
@html
|
118
|
+
@data = data
|
119
|
+
@html = options[:html]
|
120
|
+
@strict = options[:strict] || false
|
109
121
|
|
110
122
|
reset
|
111
123
|
end
|
@@ -163,7 +175,7 @@ module Oga
|
|
163
175
|
|
164
176
|
reset
|
165
177
|
|
166
|
-
|
178
|
+
tokens
|
167
179
|
end
|
168
180
|
|
169
181
|
##
|
@@ -193,7 +205,7 @@ module Oga
|
|
193
205
|
end
|
194
206
|
|
195
207
|
# Add any missing closing tags
|
196
|
-
|
208
|
+
if !strict? and !@elements.empty?
|
197
209
|
@elements.length.times { on_element_end }
|
198
210
|
end
|
199
211
|
ensure
|
@@ -204,21 +216,28 @@ module Oga
|
|
204
216
|
# @return [TrueClass|FalseClass]
|
205
217
|
#
|
206
218
|
def html?
|
207
|
-
|
219
|
+
@html == true
|
220
|
+
end
|
221
|
+
|
222
|
+
##
|
223
|
+
# @return [TrueClass|FalseClass]
|
224
|
+
#
|
225
|
+
def strict?
|
226
|
+
@strict
|
208
227
|
end
|
209
228
|
|
210
229
|
##
|
211
230
|
# @return [TrueClass|FalseClass]
|
212
231
|
#
|
213
232
|
def html_script?
|
214
|
-
|
233
|
+
html? && current_element == HTML_SCRIPT
|
215
234
|
end
|
216
235
|
|
217
236
|
##
|
218
237
|
# @return [TrueClass|FalseClass]
|
219
238
|
#
|
220
239
|
def html_style?
|
221
|
-
|
240
|
+
html? && current_element == HTML_STYLE
|
222
241
|
end
|
223
242
|
|
224
243
|
private
|
@@ -250,7 +269,7 @@ module Oga
|
|
250
269
|
# @return [String]
|
251
270
|
#
|
252
271
|
def current_element
|
253
|
-
|
272
|
+
@elements.last
|
254
273
|
end
|
255
274
|
|
256
275
|
##
|
data/lib/oga/xml/namespace.rb
CHANGED
@@ -4,14 +4,12 @@ module Oga
|
|
4
4
|
# The Namespace class contains information about XML namespaces such as the
|
5
5
|
# name and URI.
|
6
6
|
#
|
7
|
-
# @!attribute [r] name
|
8
|
-
# @return [String]
|
9
|
-
#
|
10
|
-
# @!attribute [r] uri
|
11
|
-
# @return [String]
|
12
|
-
#
|
13
7
|
class Namespace
|
14
|
-
|
8
|
+
# @return [String]
|
9
|
+
attr_accessor :name
|
10
|
+
|
11
|
+
# @return [String]
|
12
|
+
attr_accessor :uri
|
15
13
|
|
16
14
|
##
|
17
15
|
# @param [Hash] options
|
@@ -28,14 +26,14 @@ module Oga
|
|
28
26
|
# @return [String]
|
29
27
|
#
|
30
28
|
def to_s
|
31
|
-
|
29
|
+
name.to_s
|
32
30
|
end
|
33
31
|
|
34
32
|
##
|
35
33
|
# @return [String]
|
36
34
|
#
|
37
35
|
def inspect
|
38
|
-
|
36
|
+
"Namespace(name: #{name.inspect} uri: #{uri.inspect})"
|
39
37
|
end
|
40
38
|
|
41
39
|
##
|
@@ -43,7 +41,7 @@ module Oga
|
|
43
41
|
# @return [TrueClass|FalseClass]
|
44
42
|
#
|
45
43
|
def ==(other)
|
46
|
-
|
44
|
+
other.is_a?(self.class) && name == other.name && uri == other.uri
|
47
45
|
end
|
48
46
|
end # Namespace
|
49
47
|
end # XML
|
data/lib/oga/xml/node.rb
CHANGED
@@ -5,12 +5,10 @@ module Oga
|
|
5
5
|
# {Oga::XML::NodeSet} and can be used to query surrounding and parent
|
6
6
|
# nodes.
|
7
7
|
#
|
8
|
-
# @!attribute [r] node_set
|
9
|
-
# @return [Oga::XML::NodeSet]
|
10
|
-
#
|
11
8
|
class Node
|
12
9
|
include Traversal
|
13
10
|
|
11
|
+
# @return [Oga::XML::NodeSet]
|
14
12
|
attr_reader :node_set
|
15
13
|
|
16
14
|
##
|
@@ -42,7 +40,7 @@ module Oga
|
|
42
40
|
# @return [Oga::XML::NodeSet]
|
43
41
|
#
|
44
42
|
def children
|
45
|
-
|
43
|
+
@children ||= NodeSet.new([], self)
|
46
44
|
end
|
47
45
|
|
48
46
|
##
|
@@ -65,7 +63,7 @@ module Oga
|
|
65
63
|
# @return [Oga::XML::Node]
|
66
64
|
#
|
67
65
|
def parent
|
68
|
-
|
66
|
+
node_set ? node_set.owner : nil
|
69
67
|
end
|
70
68
|
|
71
69
|
##
|
@@ -76,7 +74,7 @@ module Oga
|
|
76
74
|
def previous
|
77
75
|
index = node_set.index(self) - 1
|
78
76
|
|
79
|
-
|
77
|
+
index >= 0 ? node_set[index] : nil
|
80
78
|
end
|
81
79
|
|
82
80
|
##
|
@@ -88,7 +86,7 @@ module Oga
|
|
88
86
|
index = node_set.index(self) + 1
|
89
87
|
length = node_set.length
|
90
88
|
|
91
|
-
|
89
|
+
index <= length ? node_set[index] : nil
|
92
90
|
end
|
93
91
|
|
94
92
|
##
|
@@ -142,7 +140,7 @@ module Oga
|
|
142
140
|
@root_node = node
|
143
141
|
end
|
144
142
|
|
145
|
-
|
143
|
+
@root_node
|
146
144
|
end
|
147
145
|
|
148
146
|
##
|
@@ -186,14 +184,14 @@ module Oga
|
|
186
184
|
@html_p = root.is_a?(Document) && root.html?
|
187
185
|
end
|
188
186
|
|
189
|
-
|
187
|
+
@html_p
|
190
188
|
end
|
191
189
|
|
192
190
|
##
|
193
191
|
# @return [TrueClass|FalseClass]
|
194
192
|
#
|
195
193
|
def xml?
|
196
|
-
|
194
|
+
!html?
|
197
195
|
end
|
198
196
|
end # Element
|
199
197
|
end # XML
|
data/lib/oga/xml/node_set.rb
CHANGED
@@ -31,12 +31,10 @@ module Oga
|
|
31
31
|
# If ownership was not handled then you'd have to manually set the
|
32
32
|
# `element` variable's `node_set` attribute after pushing it into a set.
|
33
33
|
#
|
34
|
-
# @!attribute [rw] owner
|
35
|
-
# @return [Oga::XML::Node]
|
36
|
-
#
|
37
34
|
class NodeSet
|
38
35
|
include Enumerable
|
39
36
|
|
37
|
+
# @return [Oga::XML::Node]
|
40
38
|
attr_accessor :owner
|
41
39
|
|
42
40
|
##
|
@@ -65,7 +63,7 @@ module Oga
|
|
65
63
|
# @return [Oga::XML::Node]
|
66
64
|
#
|
67
65
|
def last
|
68
|
-
|
66
|
+
@nodes[-1]
|
69
67
|
end
|
70
68
|
|
71
69
|
##
|
@@ -74,7 +72,7 @@ module Oga
|
|
74
72
|
# @return [TrueClass|FalseClass]
|
75
73
|
#
|
76
74
|
def empty?
|
77
|
-
|
75
|
+
@nodes.empty?
|
78
76
|
end
|
79
77
|
|
80
78
|
##
|
@@ -83,7 +81,7 @@ module Oga
|
|
83
81
|
# @return [Fixnum]
|
84
82
|
#
|
85
83
|
def length
|
86
|
-
|
84
|
+
@nodes.length
|
87
85
|
end
|
88
86
|
|
89
87
|
alias_method :count, :length
|
@@ -96,7 +94,7 @@ module Oga
|
|
96
94
|
# @return [Fixnum]
|
97
95
|
#
|
98
96
|
def index(node)
|
99
|
-
|
97
|
+
@nodes.index(node)
|
100
98
|
end
|
101
99
|
|
102
100
|
##
|
@@ -137,7 +135,7 @@ module Oga
|
|
137
135
|
|
138
136
|
remove_ownership(node)
|
139
137
|
|
140
|
-
|
138
|
+
node
|
141
139
|
end
|
142
140
|
|
143
141
|
##
|
@@ -150,7 +148,7 @@ module Oga
|
|
150
148
|
|
151
149
|
remove_ownership(node)
|
152
150
|
|
153
|
-
|
151
|
+
node
|
154
152
|
end
|
155
153
|
|
156
154
|
##
|
@@ -174,7 +172,7 @@ module Oga
|
|
174
172
|
# @return [Oga::XML::Node]
|
175
173
|
#
|
176
174
|
def [](index)
|
177
|
-
|
175
|
+
@nodes[index]
|
178
176
|
end
|
179
177
|
|
180
178
|
##
|
@@ -183,7 +181,7 @@ module Oga
|
|
183
181
|
# @return [Array]
|
184
182
|
#
|
185
183
|
def to_a
|
186
|
-
|
184
|
+
@nodes
|
187
185
|
end
|
188
186
|
|
189
187
|
##
|
@@ -194,7 +192,7 @@ module Oga
|
|
194
192
|
# @return [Oga::XML::NodeSet]
|
195
193
|
#
|
196
194
|
def +(other)
|
197
|
-
|
195
|
+
self.class.new(to_a | other.to_a)
|
198
196
|
end
|
199
197
|
|
200
198
|
##
|
@@ -204,7 +202,7 @@ module Oga
|
|
204
202
|
# @param [Oga::XML::NodeSet] other
|
205
203
|
#
|
206
204
|
def ==(other)
|
207
|
-
|
205
|
+
other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
|
208
206
|
end
|
209
207
|
|
210
208
|
##
|
@@ -216,7 +214,7 @@ module Oga
|
|
216
214
|
# @param [Array<Oga::XML::Node>] nodes
|
217
215
|
#
|
218
216
|
def equal_nodes?(nodes)
|
219
|
-
|
217
|
+
@nodes == nodes
|
220
218
|
end
|
221
219
|
|
222
220
|
##
|
@@ -261,7 +259,7 @@ module Oga
|
|
261
259
|
|
262
260
|
remove_ownership(removed) if removed
|
263
261
|
|
264
|
-
|
262
|
+
removed
|
265
263
|
end
|
266
264
|
|
267
265
|
##
|
@@ -279,7 +277,7 @@ module Oga
|
|
279
277
|
end
|
280
278
|
end
|
281
279
|
|
282
|
-
|
280
|
+
values
|
283
281
|
end
|
284
282
|
|
285
283
|
alias_method :attr, :attribute
|
@@ -298,7 +296,7 @@ module Oga
|
|
298
296
|
end
|
299
297
|
end
|
300
298
|
|
301
|
-
|
299
|
+
text
|
302
300
|
end
|
303
301
|
|
304
302
|
##
|
@@ -307,7 +305,7 @@ module Oga
|
|
307
305
|
def inspect
|
308
306
|
values = @nodes.map(&:inspect).join(', ')
|
309
307
|
|
310
|
-
|
308
|
+
"NodeSet(#{values})"
|
311
309
|
end
|
312
310
|
|
313
311
|
private
|
data/lib/oga/xml/parser.rb
CHANGED
@@ -291,7 +291,7 @@ class Parser < LL::Driver
|
|
291
291
|
# @return [Oga::XML::Document]
|
292
292
|
#
|
293
293
|
def on_document(children = [])
|
294
|
-
document = Document.new(:type => @lexer.html ? :html : :xml)
|
294
|
+
document = Document.new(:type => @lexer.html? ? :html : :xml)
|
295
295
|
|
296
296
|
children.each do |child|
|
297
297
|
if child.is_a?(Doctype)
|
@@ -3,10 +3,8 @@ module Oga
|
|
3
3
|
##
|
4
4
|
# Class used for storing information about a single processing instruction.
|
5
5
|
#
|
6
|
-
# @!attribute [rw] name
|
7
|
-
# @return [String]
|
8
|
-
#
|
9
6
|
class ProcessingInstruction < CharacterNode
|
7
|
+
# @return [String]
|
10
8
|
attr_accessor :name
|
11
9
|
|
12
10
|
##
|
@@ -25,14 +23,14 @@ module Oga
|
|
25
23
|
# @return [String]
|
26
24
|
#
|
27
25
|
def to_xml
|
28
|
-
|
26
|
+
"<?#{name}#{text}?>"
|
29
27
|
end
|
30
28
|
|
31
29
|
##
|
32
30
|
# @return [String]
|
33
31
|
#
|
34
32
|
def inspect
|
35
|
-
|
33
|
+
"ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
|
36
34
|
end
|
37
35
|
end # ProcessingInstruction
|
38
36
|
end # XML
|
data/lib/oga/xml/pull_parser.rb
CHANGED
@@ -19,16 +19,13 @@ module Oga
|
|
19
19
|
# This parses yields proper XML instances such as {Oga::XML::Element}.
|
20
20
|
# Doctypes and XML declarations are ignored by this parser.
|
21
21
|
#
|
22
|
-
# @!attribute [r] node
|
23
|
-
# The current node.
|
24
|
-
# @return [Oga::XML::Node]
|
25
|
-
#
|
26
|
-
# @!attribute [r] nesting
|
27
|
-
# Array containing the names of the currently nested elements.
|
28
|
-
# @return [Array]
|
29
|
-
#
|
30
22
|
class PullParser < Parser
|
31
|
-
|
23
|
+
# @return [Oga::XML::Node]
|
24
|
+
attr_reader :node
|
25
|
+
|
26
|
+
# Array containing the names of the currently nested elements.
|
27
|
+
# @return [Array]
|
28
|
+
attr_reader :nesting
|
32
29
|
|
33
30
|
##
|
34
31
|
# @return [Array]
|
data/lib/oga/xml/querying.rb
CHANGED
@@ -13,7 +13,7 @@ module Oga
|
|
13
13
|
# @see [Oga::XPath::Evaluator#initialize]
|
14
14
|
#
|
15
15
|
def xpath(expression, variables = {})
|
16
|
-
|
16
|
+
XPath::Evaluator.new(self, variables).evaluate(expression)
|
17
17
|
end
|
18
18
|
|
19
19
|
##
|
@@ -25,7 +25,7 @@ module Oga
|
|
25
25
|
def at_xpath(*args)
|
26
26
|
result = xpath(*args)
|
27
27
|
|
28
|
-
|
28
|
+
result.is_a?(XML::NodeSet) ? result.first : result
|
29
29
|
end
|
30
30
|
|
31
31
|
##
|
@@ -37,7 +37,7 @@ module Oga
|
|
37
37
|
def css(expression)
|
38
38
|
ast = CSS::Parser.parse_with_cache(expression)
|
39
39
|
|
40
|
-
|
40
|
+
XPath::Evaluator.new(self).evaluate_ast(ast)
|
41
41
|
end
|
42
42
|
|
43
43
|
##
|
@@ -49,7 +49,7 @@ module Oga
|
|
49
49
|
def at_css(*args)
|
50
50
|
result = css(*args)
|
51
51
|
|
52
|
-
|
52
|
+
result.is_a?(XML::NodeSet) ? result.first : result
|
53
53
|
end
|
54
54
|
end # Querying
|
55
55
|
end # XML
|