oga 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/ext/c/lexer.c +394 -312
- data/ext/c/lexer.rl +3 -3
- data/ext/java/org/liboga/xml/Lexer.java +216 -172
- data/ext/java/org/liboga/xml/Lexer.rl +1 -1
- data/ext/ragel/base_lexer.rl +30 -11
- data/lib/oga/blacklist.rb +2 -2
- data/lib/oga/css/parser.rb +26 -28
- data/lib/oga/entity_decoder.rb +2 -2
- data/lib/oga/html/entities.rb +1 -1
- data/lib/oga/lru.rb +6 -6
- data/lib/oga/oga.rb +14 -14
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +2 -2
- data/lib/oga/xml/attribute.rb +16 -18
- data/lib/oga/xml/cdata.rb +1 -1
- data/lib/oga/xml/character_node.rb +3 -5
- data/lib/oga/xml/comment.rb +1 -1
- data/lib/oga/xml/doctype.rb +21 -23
- data/lib/oga/xml/document.rb +11 -17
- data/lib/oga/xml/element.rb +19 -29
- data/lib/oga/xml/entities.rb +3 -3
- data/lib/oga/xml/lexer.rb +34 -15
- data/lib/oga/xml/namespace.rb +8 -10
- data/lib/oga/xml/node.rb +8 -10
- data/lib/oga/xml/node_set.rb +16 -18
- data/lib/oga/xml/parser.rb +1 -1
- data/lib/oga/xml/processing_instruction.rb +3 -5
- data/lib/oga/xml/pull_parser.rb +6 -9
- data/lib/oga/xml/querying.rb +4 -4
- data/lib/oga/xml/sax_parser.rb +4 -4
- data/lib/oga/xml/text.rb +4 -4
- data/lib/oga/xml/xml_declaration.rb +11 -15
- data/lib/oga/xpath/evaluator.rb +81 -81
- metadata +3 -3
data/lib/oga/version.rb
CHANGED
data/lib/oga/whitelist.rb
CHANGED
@@ -7,14 +7,14 @@ module Oga
|
|
7
7
|
# @return [TrueClass|FalseClass]
|
8
8
|
#
|
9
9
|
def allow?(name)
|
10
|
-
|
10
|
+
names.include?(name)
|
11
11
|
end
|
12
12
|
|
13
13
|
##
|
14
14
|
# @return [Oga::Blacklist]
|
15
15
|
#
|
16
16
|
def to_blacklist
|
17
|
-
|
17
|
+
Blacklist.new(names)
|
18
18
|
end
|
19
19
|
end # Whitelist
|
20
20
|
end # Oga
|
data/lib/oga/xml/attribute.rb
CHANGED
@@ -3,19 +3,17 @@ module Oga
|
|
3
3
|
##
|
4
4
|
# Class for storing information about a single XML attribute.
|
5
5
|
#
|
6
|
-
# @!attribute [rw] name
|
7
|
-
# The name of the attribute.
|
8
|
-
# @return [String]
|
9
|
-
#
|
10
|
-
# @!attribute [rw] namespace_name
|
11
|
-
# @return [String]
|
12
|
-
#
|
13
|
-
# @!attribute [r] element
|
14
|
-
# The element this attribute belongs to.
|
15
|
-
# @return [Oga::XML::Element]
|
16
|
-
#
|
17
6
|
class Attribute
|
18
|
-
|
7
|
+
# The name of the attribute.
|
8
|
+
# @return [String]
|
9
|
+
attr_accessor :name
|
10
|
+
|
11
|
+
# @return [String]
|
12
|
+
attr_accessor :namespace_name
|
13
|
+
|
14
|
+
# The element this attribute belongs to.
|
15
|
+
# @return [Oga::XML::Element]
|
16
|
+
attr_accessor :element
|
19
17
|
|
20
18
|
##
|
21
19
|
# The default namespace available to all attributes. This namespace can
|
@@ -59,7 +57,7 @@ module Oga
|
|
59
57
|
end
|
60
58
|
end
|
61
59
|
|
62
|
-
|
60
|
+
@namespace
|
63
61
|
end
|
64
62
|
|
65
63
|
##
|
@@ -81,14 +79,14 @@ module Oga
|
|
81
79
|
@decoded = true
|
82
80
|
end
|
83
81
|
|
84
|
-
|
82
|
+
@value
|
85
83
|
end
|
86
84
|
|
87
85
|
##
|
88
86
|
# @return [String]
|
89
87
|
#
|
90
88
|
def text
|
91
|
-
|
89
|
+
value.to_s
|
92
90
|
end
|
93
91
|
|
94
92
|
alias_method :to_s, :text
|
@@ -105,7 +103,7 @@ module Oga
|
|
105
103
|
|
106
104
|
enc_value = value ? Entities.encode(value) : nil
|
107
105
|
|
108
|
-
|
106
|
+
%Q(#{full_name}="#{enc_value}")
|
109
107
|
end
|
110
108
|
|
111
109
|
##
|
@@ -122,7 +120,7 @@ module Oga
|
|
122
120
|
end
|
123
121
|
end
|
124
122
|
|
125
|
-
|
123
|
+
"Attribute(#{segments.join(' ')})"
|
126
124
|
end
|
127
125
|
|
128
126
|
private
|
@@ -131,7 +129,7 @@ module Oga
|
|
131
129
|
# @return [TrueClass|FalseClass]
|
132
130
|
#
|
133
131
|
def html?
|
134
|
-
|
132
|
+
!!@element && @element.html?
|
135
133
|
end
|
136
134
|
end # Attribute
|
137
135
|
end # XML
|
data/lib/oga/xml/cdata.rb
CHANGED
@@ -4,10 +4,8 @@ module Oga
|
|
4
4
|
# Base class for nodes that represent a text-like value such as Text and
|
5
5
|
# Comment nodes.
|
6
6
|
#
|
7
|
-
# @!attribute [rw] text
|
8
|
-
# @return [String]
|
9
|
-
#
|
10
7
|
class CharacterNode < Node
|
8
|
+
# @return [String]
|
11
9
|
attr_accessor :text
|
12
10
|
|
13
11
|
##
|
@@ -25,14 +23,14 @@ module Oga
|
|
25
23
|
# @return [String]
|
26
24
|
#
|
27
25
|
def to_xml
|
28
|
-
|
26
|
+
text.to_s
|
29
27
|
end
|
30
28
|
|
31
29
|
##
|
32
30
|
# @return [String]
|
33
31
|
#
|
34
32
|
def inspect
|
35
|
-
|
33
|
+
"#{self.class.to_s.split('::').last}(#{text.inspect})"
|
36
34
|
end
|
37
35
|
end # CharacterNode
|
38
36
|
end # XML
|
data/lib/oga/xml/comment.rb
CHANGED
data/lib/oga/xml/doctype.rb
CHANGED
@@ -3,28 +3,26 @@ module Oga
|
|
3
3
|
##
|
4
4
|
# Class used for storing information about Doctypes.
|
5
5
|
#
|
6
|
-
# @!attribute [rw] name
|
7
|
-
# The name of the doctype (e.g. "HTML").
|
8
|
-
# @return [String]
|
9
|
-
#
|
10
|
-
# @!attribute [rw] type
|
11
|
-
# The type of the doctype (e.g. "PUBLIC").
|
12
|
-
# @return [String]
|
13
|
-
#
|
14
|
-
# @!attribute [rw] public_id
|
15
|
-
# The public ID of the doctype.
|
16
|
-
# @return [String]
|
17
|
-
#
|
18
|
-
# @!attribute [rw] system_id
|
19
|
-
# The system ID of the doctype.
|
20
|
-
# @return [String]
|
21
|
-
#
|
22
|
-
# @!attribute [rw] inline_rules
|
23
|
-
# The inline doctype rules.
|
24
|
-
# @return [String]
|
25
|
-
#
|
26
6
|
class Doctype
|
27
|
-
|
7
|
+
# The name of the doctype (e.g. "HTML").
|
8
|
+
# @return [String]
|
9
|
+
attr_accessor :name
|
10
|
+
|
11
|
+
# The type of the doctype (e.g. "PUBLIC").
|
12
|
+
# @return [String]
|
13
|
+
attr_accessor :type
|
14
|
+
|
15
|
+
# The public ID of the doctype.
|
16
|
+
# @return [String]
|
17
|
+
attr_accessor :public_id
|
18
|
+
|
19
|
+
# The system ID of the doctype.
|
20
|
+
# @return [String]
|
21
|
+
attr_accessor :system_id
|
22
|
+
|
23
|
+
# The inline doctype rules.
|
24
|
+
# @return [String]
|
25
|
+
attr_accessor :inline_rules
|
28
26
|
|
29
27
|
##
|
30
28
|
# @example
|
@@ -58,7 +56,7 @@ module Oga
|
|
58
56
|
segments << %Q{ "#{system_id}"} if system_id
|
59
57
|
segments << " [#{inline_rules}]" if inline_rules
|
60
58
|
|
61
|
-
|
59
|
+
segments + '>'
|
62
60
|
end
|
63
61
|
|
64
62
|
##
|
@@ -77,7 +75,7 @@ module Oga
|
|
77
75
|
end
|
78
76
|
end
|
79
77
|
|
80
|
-
|
78
|
+
"Doctype(#{segments.join(' ')})"
|
81
79
|
end
|
82
80
|
end # Doctype
|
83
81
|
end # XML
|
data/lib/oga/xml/document.rb
CHANGED
@@ -4,24 +4,18 @@ module Oga
|
|
4
4
|
# Class used for storing information about an entire XML document. This
|
5
5
|
# includes the doctype, XML declaration, child nodes and more.
|
6
6
|
#
|
7
|
-
# @!attribute [rw] doctype
|
8
|
-
# The doctype of the document.
|
9
|
-
# @return [Oga::XML::Doctype]
|
10
|
-
#
|
11
|
-
# @!attribute [rw] xml_declaration
|
12
|
-
# The XML declaration of the document.
|
13
|
-
# @return [Oga::XML::XmlDeclaration]
|
14
|
-
#
|
15
|
-
# @!attribute [r] type
|
16
|
-
# The document type, either `:xml` or `:html`.
|
17
|
-
# @return [Symbol]
|
18
|
-
#
|
19
7
|
class Document
|
20
8
|
include Querying
|
21
9
|
include Traversal
|
22
10
|
|
23
|
-
|
11
|
+
# @return [Oga::XML::Doctype]
|
12
|
+
attr_accessor :doctype
|
13
|
+
|
14
|
+
# @return [Oga::XML::XmlDeclaration]
|
15
|
+
attr_accessor :xml_declaration
|
24
16
|
|
17
|
+
# The document type, either `:xml` or `:html`.
|
18
|
+
# @return [Symbol]
|
25
19
|
attr_reader :type
|
26
20
|
|
27
21
|
##
|
@@ -44,7 +38,7 @@ module Oga
|
|
44
38
|
# @return [Oga::XML::NodeSet]
|
45
39
|
#
|
46
40
|
def children
|
47
|
-
|
41
|
+
@children ||= NodeSet.new([], self)
|
48
42
|
end
|
49
43
|
|
50
44
|
##
|
@@ -76,14 +70,14 @@ module Oga
|
|
76
70
|
xml = xml_declaration.to_xml + "\n" + xml.strip
|
77
71
|
end
|
78
72
|
|
79
|
-
|
73
|
+
xml
|
80
74
|
end
|
81
75
|
|
82
76
|
##
|
83
77
|
# @return [TrueClass|FalseClass]
|
84
78
|
#
|
85
79
|
def html?
|
86
|
-
|
80
|
+
type.equal?(:html)
|
87
81
|
end
|
88
82
|
|
89
83
|
##
|
@@ -103,7 +97,7 @@ module Oga
|
|
103
97
|
end
|
104
98
|
end
|
105
99
|
|
106
|
-
|
100
|
+
<<-EOF.strip
|
107
101
|
Document(
|
108
102
|
#{segments.join("\n ")}
|
109
103
|
)
|
data/lib/oga/xml/element.rb
CHANGED
@@ -4,29 +4,19 @@ module Oga
|
|
4
4
|
# Class that contains information about an XML element such as the name,
|
5
5
|
# attributes and child nodes.
|
6
6
|
#
|
7
|
-
# @!attribute [rw] name
|
8
|
-
# The name of the element.
|
9
|
-
# @return [String]
|
10
|
-
#
|
11
|
-
# @!attribute [r] namespace_name
|
12
|
-
# The name of the namespace.
|
13
|
-
# @return [String]
|
14
|
-
#
|
15
|
-
# @!attribute [rw] attributes
|
16
|
-
# The attributes of the element.
|
17
|
-
# @return [Array<Oga::XML::Attribute>]
|
18
|
-
#
|
19
|
-
# @!attribute [rw] namespaces
|
20
|
-
# The registered namespaces.
|
21
|
-
# @return [Hash]
|
22
|
-
#
|
23
7
|
class Element < Node
|
24
8
|
include Querying
|
25
9
|
|
10
|
+
# @return [String]
|
26
11
|
attr_reader :namespace_name
|
27
12
|
|
28
|
-
|
13
|
+
# @return [String]
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# @return [Array<Oga::XML::Attribute>]
|
17
|
+
attr_accessor :attributes
|
29
18
|
|
19
|
+
# @return [Hash]
|
30
20
|
attr_writer :namespaces
|
31
21
|
|
32
22
|
##
|
@@ -105,7 +95,7 @@ module Oga
|
|
105
95
|
def get(name)
|
106
96
|
found = attribute(name)
|
107
97
|
|
108
|
-
|
98
|
+
found ? found.value : nil
|
109
99
|
end
|
110
100
|
|
111
101
|
##
|
@@ -175,7 +165,7 @@ module Oga
|
|
175
165
|
@namespace = available[namespace_name] || available[XMLNS_PREFIX]
|
176
166
|
end
|
177
167
|
|
178
|
-
|
168
|
+
@namespace
|
179
169
|
end
|
180
170
|
|
181
171
|
##
|
@@ -185,7 +175,7 @@ module Oga
|
|
185
175
|
# @return [Hash]
|
186
176
|
#
|
187
177
|
def namespaces
|
188
|
-
|
178
|
+
html? ? {} : @namespaces
|
189
179
|
end
|
190
180
|
|
191
181
|
##
|
@@ -195,7 +185,7 @@ module Oga
|
|
195
185
|
# @return [TrueClass|FalseClass]
|
196
186
|
#
|
197
187
|
def default_namespace?
|
198
|
-
|
188
|
+
namespace == DEFAULT_NAMESPACE || namespace.nil?
|
199
189
|
end
|
200
190
|
|
201
191
|
##
|
@@ -204,7 +194,7 @@ module Oga
|
|
204
194
|
# @return [String]
|
205
195
|
#
|
206
196
|
def text
|
207
|
-
|
197
|
+
children.text
|
208
198
|
end
|
209
199
|
|
210
200
|
##
|
@@ -219,7 +209,7 @@ module Oga
|
|
219
209
|
text << node.text
|
220
210
|
end
|
221
211
|
|
222
|
-
|
212
|
+
text
|
223
213
|
end
|
224
214
|
|
225
215
|
##
|
@@ -235,7 +225,7 @@ module Oga
|
|
235
225
|
nodes << child if child.is_a?(Text)
|
236
226
|
end
|
237
227
|
|
238
|
-
|
228
|
+
nodes
|
239
229
|
end
|
240
230
|
|
241
231
|
##
|
@@ -290,7 +280,7 @@ module Oga
|
|
290
280
|
segments << "#{attr}: #{value.inspect}"
|
291
281
|
end
|
292
282
|
|
293
|
-
|
283
|
+
"Element(#{segments.join(' ')})"
|
294
284
|
end
|
295
285
|
|
296
286
|
##
|
@@ -339,7 +329,7 @@ module Oga
|
|
339
329
|
end
|
340
330
|
end
|
341
331
|
|
342
|
-
|
332
|
+
@available_namespaces
|
343
333
|
end
|
344
334
|
|
345
335
|
##
|
@@ -356,7 +346,7 @@ module Oga
|
|
356
346
|
self_closing = false
|
357
347
|
end
|
358
348
|
|
359
|
-
|
349
|
+
self_closing
|
360
350
|
end
|
361
351
|
|
362
352
|
##
|
@@ -411,7 +401,7 @@ module Oga
|
|
411
401
|
def split_name(name)
|
412
402
|
segments = name.to_s.split(':')
|
413
403
|
|
414
|
-
|
404
|
+
[segments.pop, segments.pop]
|
415
405
|
end
|
416
406
|
|
417
407
|
##
|
@@ -431,7 +421,7 @@ module Oga
|
|
431
421
|
ns_matches = true
|
432
422
|
end
|
433
423
|
|
434
|
-
|
424
|
+
name_matches && ns_matches
|
435
425
|
end
|
436
426
|
end # Element
|
437
427
|
end # XML
|
data/lib/oga/xml/entities.rb
CHANGED
@@ -42,7 +42,7 @@ module Oga
|
|
42
42
|
#
|
43
43
|
# @return [Regexp]
|
44
44
|
#
|
45
|
-
REGULAR_ENTITY = /&[a-zA-
|
45
|
+
REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
|
46
46
|
|
47
47
|
##
|
48
48
|
# Regexp for matching XML/HTML entities such as "&".
|
@@ -74,7 +74,7 @@ module Oga
|
|
74
74
|
end
|
75
75
|
end
|
76
76
|
|
77
|
-
|
77
|
+
input
|
78
78
|
end
|
79
79
|
|
80
80
|
##
|
@@ -85,7 +85,7 @@ module Oga
|
|
85
85
|
# @return [String]
|
86
86
|
#
|
87
87
|
def self.encode(input, mapping = ENCODE_MAPPING)
|
88
|
-
|
88
|
+
input.gsub(ENCODE_REGEXP, mapping)
|
89
89
|
end
|
90
90
|
end # Entities
|
91
91
|
end # XML
|