oga 1.0.2-java → 1.0.3-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/ext/c/lexer.c +394 -312
- data/ext/c/lexer.rl +3 -3
- data/ext/java/org/liboga/xml/Lexer.java +216 -172
- data/ext/java/org/liboga/xml/Lexer.rl +1 -1
- data/ext/ragel/base_lexer.rl +30 -11
- data/lib/liboga.jar +0 -0
- data/lib/oga/blacklist.rb +2 -2
- data/lib/oga/css/parser.rb +26 -28
- data/lib/oga/entity_decoder.rb +2 -2
- data/lib/oga/html/entities.rb +1 -1
- data/lib/oga/lru.rb +6 -6
- data/lib/oga/oga.rb +14 -14
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +2 -2
- data/lib/oga/xml/attribute.rb +16 -18
- data/lib/oga/xml/cdata.rb +1 -1
- data/lib/oga/xml/character_node.rb +3 -5
- data/lib/oga/xml/comment.rb +1 -1
- data/lib/oga/xml/doctype.rb +21 -23
- data/lib/oga/xml/document.rb +11 -17
- data/lib/oga/xml/element.rb +19 -29
- data/lib/oga/xml/entities.rb +3 -3
- data/lib/oga/xml/lexer.rb +34 -15
- data/lib/oga/xml/namespace.rb +8 -10
- data/lib/oga/xml/node.rb +8 -10
- data/lib/oga/xml/node_set.rb +16 -18
- data/lib/oga/xml/parser.rb +1 -1
- data/lib/oga/xml/processing_instruction.rb +3 -5
- data/lib/oga/xml/pull_parser.rb +6 -9
- data/lib/oga/xml/querying.rb +4 -4
- data/lib/oga/xml/sax_parser.rb +4 -4
- data/lib/oga/xml/text.rb +4 -4
- data/lib/oga/xml/xml_declaration.rb +11 -15
- data/lib/oga/xpath/evaluator.rb +81 -81
- metadata +66 -66
data/lib/oga/version.rb
CHANGED
data/lib/oga/whitelist.rb
CHANGED
@@ -7,14 +7,14 @@ module Oga
|
|
7
7
|
# @return [TrueClass|FalseClass]
|
8
8
|
#
|
9
9
|
def allow?(name)
|
10
|
-
|
10
|
+
names.include?(name)
|
11
11
|
end
|
12
12
|
|
13
13
|
##
|
14
14
|
# @return [Oga::Blacklist]
|
15
15
|
#
|
16
16
|
def to_blacklist
|
17
|
-
|
17
|
+
Blacklist.new(names)
|
18
18
|
end
|
19
19
|
end # Whitelist
|
20
20
|
end # Oga
|
data/lib/oga/xml/attribute.rb
CHANGED
@@ -3,19 +3,17 @@ module Oga
|
|
3
3
|
##
|
4
4
|
# Class for storing information about a single XML attribute.
|
5
5
|
#
|
6
|
-
# @!attribute [rw] name
|
7
|
-
# The name of the attribute.
|
8
|
-
# @return [String]
|
9
|
-
#
|
10
|
-
# @!attribute [rw] namespace_name
|
11
|
-
# @return [String]
|
12
|
-
#
|
13
|
-
# @!attribute [r] element
|
14
|
-
# The element this attribute belongs to.
|
15
|
-
# @return [Oga::XML::Element]
|
16
|
-
#
|
17
6
|
class Attribute
|
18
|
-
|
7
|
+
# The name of the attribute.
|
8
|
+
# @return [String]
|
9
|
+
attr_accessor :name
|
10
|
+
|
11
|
+
# @return [String]
|
12
|
+
attr_accessor :namespace_name
|
13
|
+
|
14
|
+
# The element this attribute belongs to.
|
15
|
+
# @return [Oga::XML::Element]
|
16
|
+
attr_accessor :element
|
19
17
|
|
20
18
|
##
|
21
19
|
# The default namespace available to all attributes. This namespace can
|
@@ -59,7 +57,7 @@ module Oga
|
|
59
57
|
end
|
60
58
|
end
|
61
59
|
|
62
|
-
|
60
|
+
@namespace
|
63
61
|
end
|
64
62
|
|
65
63
|
##
|
@@ -81,14 +79,14 @@ module Oga
|
|
81
79
|
@decoded = true
|
82
80
|
end
|
83
81
|
|
84
|
-
|
82
|
+
@value
|
85
83
|
end
|
86
84
|
|
87
85
|
##
|
88
86
|
# @return [String]
|
89
87
|
#
|
90
88
|
def text
|
91
|
-
|
89
|
+
value.to_s
|
92
90
|
end
|
93
91
|
|
94
92
|
alias_method :to_s, :text
|
@@ -105,7 +103,7 @@ module Oga
|
|
105
103
|
|
106
104
|
enc_value = value ? Entities.encode(value) : nil
|
107
105
|
|
108
|
-
|
106
|
+
%Q(#{full_name}="#{enc_value}")
|
109
107
|
end
|
110
108
|
|
111
109
|
##
|
@@ -122,7 +120,7 @@ module Oga
|
|
122
120
|
end
|
123
121
|
end
|
124
122
|
|
125
|
-
|
123
|
+
"Attribute(#{segments.join(' ')})"
|
126
124
|
end
|
127
125
|
|
128
126
|
private
|
@@ -131,7 +129,7 @@ module Oga
|
|
131
129
|
# @return [TrueClass|FalseClass]
|
132
130
|
#
|
133
131
|
def html?
|
134
|
-
|
132
|
+
!!@element && @element.html?
|
135
133
|
end
|
136
134
|
end # Attribute
|
137
135
|
end # XML
|
data/lib/oga/xml/cdata.rb
CHANGED
@@ -4,10 +4,8 @@ module Oga
|
|
4
4
|
# Base class for nodes that represent a text-like value such as Text and
|
5
5
|
# Comment nodes.
|
6
6
|
#
|
7
|
-
# @!attribute [rw] text
|
8
|
-
# @return [String]
|
9
|
-
#
|
10
7
|
class CharacterNode < Node
|
8
|
+
# @return [String]
|
11
9
|
attr_accessor :text
|
12
10
|
|
13
11
|
##
|
@@ -25,14 +23,14 @@ module Oga
|
|
25
23
|
# @return [String]
|
26
24
|
#
|
27
25
|
def to_xml
|
28
|
-
|
26
|
+
text.to_s
|
29
27
|
end
|
30
28
|
|
31
29
|
##
|
32
30
|
# @return [String]
|
33
31
|
#
|
34
32
|
def inspect
|
35
|
-
|
33
|
+
"#{self.class.to_s.split('::').last}(#{text.inspect})"
|
36
34
|
end
|
37
35
|
end # CharacterNode
|
38
36
|
end # XML
|
data/lib/oga/xml/comment.rb
CHANGED
data/lib/oga/xml/doctype.rb
CHANGED
@@ -3,28 +3,26 @@ module Oga
|
|
3
3
|
##
|
4
4
|
# Class used for storing information about Doctypes.
|
5
5
|
#
|
6
|
-
# @!attribute [rw] name
|
7
|
-
# The name of the doctype (e.g. "HTML").
|
8
|
-
# @return [String]
|
9
|
-
#
|
10
|
-
# @!attribute [rw] type
|
11
|
-
# The type of the doctype (e.g. "PUBLIC").
|
12
|
-
# @return [String]
|
13
|
-
#
|
14
|
-
# @!attribute [rw] public_id
|
15
|
-
# The public ID of the doctype.
|
16
|
-
# @return [String]
|
17
|
-
#
|
18
|
-
# @!attribute [rw] system_id
|
19
|
-
# The system ID of the doctype.
|
20
|
-
# @return [String]
|
21
|
-
#
|
22
|
-
# @!attribute [rw] inline_rules
|
23
|
-
# The inline doctype rules.
|
24
|
-
# @return [String]
|
25
|
-
#
|
26
6
|
class Doctype
|
27
|
-
|
7
|
+
# The name of the doctype (e.g. "HTML").
|
8
|
+
# @return [String]
|
9
|
+
attr_accessor :name
|
10
|
+
|
11
|
+
# The type of the doctype (e.g. "PUBLIC").
|
12
|
+
# @return [String]
|
13
|
+
attr_accessor :type
|
14
|
+
|
15
|
+
# The public ID of the doctype.
|
16
|
+
# @return [String]
|
17
|
+
attr_accessor :public_id
|
18
|
+
|
19
|
+
# The system ID of the doctype.
|
20
|
+
# @return [String]
|
21
|
+
attr_accessor :system_id
|
22
|
+
|
23
|
+
# The inline doctype rules.
|
24
|
+
# @return [String]
|
25
|
+
attr_accessor :inline_rules
|
28
26
|
|
29
27
|
##
|
30
28
|
# @example
|
@@ -58,7 +56,7 @@ module Oga
|
|
58
56
|
segments << %Q{ "#{system_id}"} if system_id
|
59
57
|
segments << " [#{inline_rules}]" if inline_rules
|
60
58
|
|
61
|
-
|
59
|
+
segments + '>'
|
62
60
|
end
|
63
61
|
|
64
62
|
##
|
@@ -77,7 +75,7 @@ module Oga
|
|
77
75
|
end
|
78
76
|
end
|
79
77
|
|
80
|
-
|
78
|
+
"Doctype(#{segments.join(' ')})"
|
81
79
|
end
|
82
80
|
end # Doctype
|
83
81
|
end # XML
|
data/lib/oga/xml/document.rb
CHANGED
@@ -4,24 +4,18 @@ module Oga
|
|
4
4
|
# Class used for storing information about an entire XML document. This
|
5
5
|
# includes the doctype, XML declaration, child nodes and more.
|
6
6
|
#
|
7
|
-
# @!attribute [rw] doctype
|
8
|
-
# The doctype of the document.
|
9
|
-
# @return [Oga::XML::Doctype]
|
10
|
-
#
|
11
|
-
# @!attribute [rw] xml_declaration
|
12
|
-
# The XML declaration of the document.
|
13
|
-
# @return [Oga::XML::XmlDeclaration]
|
14
|
-
#
|
15
|
-
# @!attribute [r] type
|
16
|
-
# The document type, either `:xml` or `:html`.
|
17
|
-
# @return [Symbol]
|
18
|
-
#
|
19
7
|
class Document
|
20
8
|
include Querying
|
21
9
|
include Traversal
|
22
10
|
|
23
|
-
|
11
|
+
# @return [Oga::XML::Doctype]
|
12
|
+
attr_accessor :doctype
|
13
|
+
|
14
|
+
# @return [Oga::XML::XmlDeclaration]
|
15
|
+
attr_accessor :xml_declaration
|
24
16
|
|
17
|
+
# The document type, either `:xml` or `:html`.
|
18
|
+
# @return [Symbol]
|
25
19
|
attr_reader :type
|
26
20
|
|
27
21
|
##
|
@@ -44,7 +38,7 @@ module Oga
|
|
44
38
|
# @return [Oga::XML::NodeSet]
|
45
39
|
#
|
46
40
|
def children
|
47
|
-
|
41
|
+
@children ||= NodeSet.new([], self)
|
48
42
|
end
|
49
43
|
|
50
44
|
##
|
@@ -76,14 +70,14 @@ module Oga
|
|
76
70
|
xml = xml_declaration.to_xml + "\n" + xml.strip
|
77
71
|
end
|
78
72
|
|
79
|
-
|
73
|
+
xml
|
80
74
|
end
|
81
75
|
|
82
76
|
##
|
83
77
|
# @return [TrueClass|FalseClass]
|
84
78
|
#
|
85
79
|
def html?
|
86
|
-
|
80
|
+
type.equal?(:html)
|
87
81
|
end
|
88
82
|
|
89
83
|
##
|
@@ -103,7 +97,7 @@ module Oga
|
|
103
97
|
end
|
104
98
|
end
|
105
99
|
|
106
|
-
|
100
|
+
<<-EOF.strip
|
107
101
|
Document(
|
108
102
|
#{segments.join("\n ")}
|
109
103
|
)
|
data/lib/oga/xml/element.rb
CHANGED
@@ -4,29 +4,19 @@ module Oga
|
|
4
4
|
# Class that contains information about an XML element such as the name,
|
5
5
|
# attributes and child nodes.
|
6
6
|
#
|
7
|
-
# @!attribute [rw] name
|
8
|
-
# The name of the element.
|
9
|
-
# @return [String]
|
10
|
-
#
|
11
|
-
# @!attribute [r] namespace_name
|
12
|
-
# The name of the namespace.
|
13
|
-
# @return [String]
|
14
|
-
#
|
15
|
-
# @!attribute [rw] attributes
|
16
|
-
# The attributes of the element.
|
17
|
-
# @return [Array<Oga::XML::Attribute>]
|
18
|
-
#
|
19
|
-
# @!attribute [rw] namespaces
|
20
|
-
# The registered namespaces.
|
21
|
-
# @return [Hash]
|
22
|
-
#
|
23
7
|
class Element < Node
|
24
8
|
include Querying
|
25
9
|
|
10
|
+
# @return [String]
|
26
11
|
attr_reader :namespace_name
|
27
12
|
|
28
|
-
|
13
|
+
# @return [String]
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# @return [Array<Oga::XML::Attribute>]
|
17
|
+
attr_accessor :attributes
|
29
18
|
|
19
|
+
# @return [Hash]
|
30
20
|
attr_writer :namespaces
|
31
21
|
|
32
22
|
##
|
@@ -105,7 +95,7 @@ module Oga
|
|
105
95
|
def get(name)
|
106
96
|
found = attribute(name)
|
107
97
|
|
108
|
-
|
98
|
+
found ? found.value : nil
|
109
99
|
end
|
110
100
|
|
111
101
|
##
|
@@ -175,7 +165,7 @@ module Oga
|
|
175
165
|
@namespace = available[namespace_name] || available[XMLNS_PREFIX]
|
176
166
|
end
|
177
167
|
|
178
|
-
|
168
|
+
@namespace
|
179
169
|
end
|
180
170
|
|
181
171
|
##
|
@@ -185,7 +175,7 @@ module Oga
|
|
185
175
|
# @return [Hash]
|
186
176
|
#
|
187
177
|
def namespaces
|
188
|
-
|
178
|
+
html? ? {} : @namespaces
|
189
179
|
end
|
190
180
|
|
191
181
|
##
|
@@ -195,7 +185,7 @@ module Oga
|
|
195
185
|
# @return [TrueClass|FalseClass]
|
196
186
|
#
|
197
187
|
def default_namespace?
|
198
|
-
|
188
|
+
namespace == DEFAULT_NAMESPACE || namespace.nil?
|
199
189
|
end
|
200
190
|
|
201
191
|
##
|
@@ -204,7 +194,7 @@ module Oga
|
|
204
194
|
# @return [String]
|
205
195
|
#
|
206
196
|
def text
|
207
|
-
|
197
|
+
children.text
|
208
198
|
end
|
209
199
|
|
210
200
|
##
|
@@ -219,7 +209,7 @@ module Oga
|
|
219
209
|
text << node.text
|
220
210
|
end
|
221
211
|
|
222
|
-
|
212
|
+
text
|
223
213
|
end
|
224
214
|
|
225
215
|
##
|
@@ -235,7 +225,7 @@ module Oga
|
|
235
225
|
nodes << child if child.is_a?(Text)
|
236
226
|
end
|
237
227
|
|
238
|
-
|
228
|
+
nodes
|
239
229
|
end
|
240
230
|
|
241
231
|
##
|
@@ -290,7 +280,7 @@ module Oga
|
|
290
280
|
segments << "#{attr}: #{value.inspect}"
|
291
281
|
end
|
292
282
|
|
293
|
-
|
283
|
+
"Element(#{segments.join(' ')})"
|
294
284
|
end
|
295
285
|
|
296
286
|
##
|
@@ -339,7 +329,7 @@ module Oga
|
|
339
329
|
end
|
340
330
|
end
|
341
331
|
|
342
|
-
|
332
|
+
@available_namespaces
|
343
333
|
end
|
344
334
|
|
345
335
|
##
|
@@ -356,7 +346,7 @@ module Oga
|
|
356
346
|
self_closing = false
|
357
347
|
end
|
358
348
|
|
359
|
-
|
349
|
+
self_closing
|
360
350
|
end
|
361
351
|
|
362
352
|
##
|
@@ -411,7 +401,7 @@ module Oga
|
|
411
401
|
def split_name(name)
|
412
402
|
segments = name.to_s.split(':')
|
413
403
|
|
414
|
-
|
404
|
+
[segments.pop, segments.pop]
|
415
405
|
end
|
416
406
|
|
417
407
|
##
|
@@ -431,7 +421,7 @@ module Oga
|
|
431
421
|
ns_matches = true
|
432
422
|
end
|
433
423
|
|
434
|
-
|
424
|
+
name_matches && ns_matches
|
435
425
|
end
|
436
426
|
end # Element
|
437
427
|
end # XML
|
data/lib/oga/xml/entities.rb
CHANGED
@@ -42,7 +42,7 @@ module Oga
|
|
42
42
|
#
|
43
43
|
# @return [Regexp]
|
44
44
|
#
|
45
|
-
REGULAR_ENTITY = /&[a-zA-
|
45
|
+
REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
|
46
46
|
|
47
47
|
##
|
48
48
|
# Regexp for matching XML/HTML entities such as "&".
|
@@ -74,7 +74,7 @@ module Oga
|
|
74
74
|
end
|
75
75
|
end
|
76
76
|
|
77
|
-
|
77
|
+
input
|
78
78
|
end
|
79
79
|
|
80
80
|
##
|
@@ -85,7 +85,7 @@ module Oga
|
|
85
85
|
# @return [String]
|
86
86
|
#
|
87
87
|
def self.encode(input, mapping = ENCODE_MAPPING)
|
88
|
-
|
88
|
+
input.gsub(ENCODE_REGEXP, mapping)
|
89
89
|
end
|
90
90
|
end # Entities
|
91
91
|
end # XML
|