oga 1.2.3-java → 1.3.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/css_selectors.md +1 -1
- data/lib/liboga.jar +0 -0
- data/lib/oga.rb +6 -1
- data/lib/oga/blacklist.rb +0 -10
- data/lib/oga/css/lexer.rb +530 -255
- data/lib/oga/css/parser.rb +232 -230
- data/lib/oga/entity_decoder.rb +0 -4
- data/lib/oga/html/entities.rb +0 -4
- data/lib/oga/html/parser.rb +0 -4
- data/lib/oga/html/sax_parser.rb +0 -4
- data/lib/oga/lru.rb +0 -26
- data/lib/oga/oga.rb +0 -8
- data/lib/oga/ruby/generator.rb +225 -0
- data/lib/oga/ruby/node.rb +189 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +0 -6
- data/lib/oga/xml/attribute.rb +13 -20
- data/lib/oga/xml/cdata.rb +0 -4
- data/lib/oga/xml/character_node.rb +0 -8
- data/lib/oga/xml/comment.rb +0 -4
- data/lib/oga/xml/default_namespace.rb +0 -2
- data/lib/oga/xml/doctype.rb +0 -8
- data/lib/oga/xml/document.rb +10 -14
- data/lib/oga/xml/element.rb +1 -52
- data/lib/oga/xml/entities.rb +0 -26
- data/lib/oga/xml/expanded_name.rb +12 -0
- data/lib/oga/xml/html_void_elements.rb +0 -2
- data/lib/oga/xml/lexer.rb +0 -86
- data/lib/oga/xml/namespace.rb +0 -10
- data/lib/oga/xml/node.rb +18 -34
- data/lib/oga/xml/node_set.rb +0 -50
- data/lib/oga/xml/parser.rb +13 -50
- data/lib/oga/xml/processing_instruction.rb +0 -8
- data/lib/oga/xml/pull_parser.rb +0 -18
- data/lib/oga/xml/querying.rb +58 -19
- data/lib/oga/xml/sax_parser.rb +0 -18
- data/lib/oga/xml/text.rb +0 -12
- data/lib/oga/xml/traversal.rb +0 -4
- data/lib/oga/xml/xml_declaration.rb +0 -8
- data/lib/oga/xpath/compiler.rb +1568 -0
- data/lib/oga/xpath/conversion.rb +102 -0
- data/lib/oga/xpath/lexer.rb +1844 -1238
- data/lib/oga/xpath/parser.rb +182 -153
- metadata +7 -3
- data/lib/oga/xpath/evaluator.rb +0 -1800
data/lib/oga/entity_decoder.rb
CHANGED
@@ -1,17 +1,13 @@
|
|
1
1
|
module Oga
|
2
2
|
module EntityDecoder
|
3
|
-
##
|
4
3
|
# @see [decode]
|
5
|
-
#
|
6
4
|
def self.try_decode(input, html = false)
|
7
5
|
input ? decode(input, html) : nil
|
8
6
|
end
|
9
7
|
|
10
|
-
##
|
11
8
|
# @param [String] input
|
12
9
|
# @param [TrueClass|FalseClass] html
|
13
10
|
# @return [String]
|
14
|
-
#
|
15
11
|
def self.decode(input, html = false)
|
16
12
|
decoder = html ? HTML::Entities : XML::Entities
|
17
13
|
|
data/lib/oga/html/entities.rb
CHANGED
@@ -1,14 +1,12 @@
|
|
1
1
|
module Oga
|
2
2
|
module HTML
|
3
3
|
module Entities
|
4
|
-
##
|
5
4
|
# Hash mapping HTML entities to their Unicode character replacements.
|
6
5
|
#
|
7
6
|
# Based on the JSON output as listed at
|
8
7
|
# http://www.w3.org/TR/html5/syntax.html#named-character-references
|
9
8
|
#
|
10
9
|
# @return [Hash]
|
11
|
-
#
|
12
10
|
DECODE_MAPPING = {
|
13
11
|
'Á' => [193].pack('U*'),
|
14
12
|
'á' => [225].pack('U*'),
|
@@ -2137,11 +2135,9 @@ module Oga
|
|
2137
2135
|
'‌' => [8204].pack('U*'),
|
2138
2136
|
}
|
2139
2137
|
|
2140
|
-
##
|
2141
2138
|
# Decodes HTML entities.
|
2142
2139
|
#
|
2143
2140
|
# @see [decode]
|
2144
|
-
#
|
2145
2141
|
def self.decode(input)
|
2146
2142
|
XML::Entities.decode(input, DECODE_MAPPING)
|
2147
2143
|
end
|
data/lib/oga/html/parser.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
module Oga
|
2
2
|
module HTML
|
3
|
-
##
|
4
3
|
# Parser for processing HTML input. This parser is a small wrapper around
|
5
4
|
# {Oga::XML::Parser} and takes care of setting the various options required
|
6
5
|
# for parsing HTML documents.
|
@@ -8,13 +7,10 @@ module Oga
|
|
8
7
|
# A basic example:
|
9
8
|
#
|
10
9
|
# Oga::HTML::Parser.new('<meta charset="utf-8">').parse
|
11
|
-
#
|
12
10
|
class Parser < XML::Parser
|
13
|
-
##
|
14
11
|
# @param [String|IO] data
|
15
12
|
# @param [Hash] options
|
16
13
|
# @see [Oga::XML::Parser#initialize]
|
17
|
-
#
|
18
14
|
def initialize(data, options = {})
|
19
15
|
options = options.merge(:html => true)
|
20
16
|
|
data/lib/oga/html/sax_parser.rb
CHANGED
@@ -1,13 +1,9 @@
|
|
1
1
|
module Oga
|
2
2
|
module HTML
|
3
|
-
##
|
4
3
|
# SAX parser for HTML documents. See the documentation of
|
5
4
|
# {Oga::XML::SaxParser} for more information.
|
6
|
-
#
|
7
5
|
class SaxParser < XML::SaxParser
|
8
|
-
##
|
9
6
|
# @see [Oga::XML::SaxParser#initialize]
|
10
|
-
#
|
11
7
|
def initialize(handler, data, options = {})
|
12
8
|
options = options.merge(:html => true)
|
13
9
|
|
data/lib/oga/lru.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
module Oga
|
2
|
-
##
|
3
2
|
# Thread-safe LRU cache using a Hash as the underlying storage engine.
|
4
3
|
# Whenever the size of the cache exceeds the given limit the oldest keys are
|
5
4
|
# removed (base on insert order).
|
@@ -22,11 +21,8 @@ module Oga
|
|
22
21
|
# cache.keys # => [:b, :c, :d]
|
23
22
|
#
|
24
23
|
# @api private
|
25
|
-
#
|
26
24
|
class LRU
|
27
|
-
##
|
28
25
|
# @param [Fixnum] maximum
|
29
|
-
#
|
30
26
|
def initialize(maximum = 1024)
|
31
27
|
@maximum = maximum
|
32
28
|
@cache = {}
|
@@ -35,9 +31,7 @@ module Oga
|
|
35
31
|
@owner = Thread.current
|
36
32
|
end
|
37
33
|
|
38
|
-
##
|
39
34
|
# @param [Fixnum] value
|
40
|
-
#
|
41
35
|
def maximum=(value)
|
42
36
|
synchronize do
|
43
37
|
@maximum = value
|
@@ -46,30 +40,24 @@ module Oga
|
|
46
40
|
end
|
47
41
|
end
|
48
42
|
|
49
|
-
##
|
50
43
|
# @return [Fixnum]
|
51
|
-
#
|
52
44
|
def maximum
|
53
45
|
synchronize { @maximum }
|
54
46
|
end
|
55
47
|
|
56
|
-
##
|
57
48
|
# Returns the value of the key.
|
58
49
|
#
|
59
50
|
# @param [Mixed] key
|
60
51
|
# @return [Mixed]
|
61
|
-
#
|
62
52
|
def [](key)
|
63
53
|
synchronize { @cache[key] }
|
64
54
|
end
|
65
55
|
|
66
|
-
##
|
67
56
|
# Sets the key and its value. Old keys are discarded if the LRU size exceeds
|
68
57
|
# the limit.
|
69
58
|
#
|
70
59
|
# @param [Mixed] key
|
71
60
|
# @param [Mixed] value
|
72
|
-
#
|
73
61
|
def []=(key, value)
|
74
62
|
synchronize do
|
75
63
|
@cache[key] = value
|
@@ -82,35 +70,27 @@ module Oga
|
|
82
70
|
end
|
83
71
|
end
|
84
72
|
|
85
|
-
##
|
86
73
|
# Returns a key if it exists, otherwise yields the supplied block and uses
|
87
74
|
# its return value as the key value.
|
88
75
|
#
|
89
76
|
# @param [Mixed] key
|
90
77
|
# @return [Mixed]
|
91
|
-
#
|
92
78
|
def get_or_set(key)
|
93
79
|
synchronize { self[key] ||= yield }
|
94
80
|
end
|
95
81
|
|
96
|
-
##
|
97
82
|
# @return [Array]
|
98
|
-
#
|
99
83
|
def keys
|
100
84
|
synchronize { @keys }
|
101
85
|
end
|
102
86
|
|
103
|
-
##
|
104
87
|
# @param [Mixed] key
|
105
88
|
# @return [TrueClass|FalseClass]
|
106
|
-
#
|
107
89
|
def key?(key)
|
108
90
|
synchronize { @cache.key?(key) }
|
109
91
|
end
|
110
92
|
|
111
|
-
##
|
112
93
|
# Removes all keys from the cache.
|
113
|
-
#
|
114
94
|
def clear
|
115
95
|
synchronize do
|
116
96
|
@keys.clear
|
@@ -118,9 +98,7 @@ module Oga
|
|
118
98
|
end
|
119
99
|
end
|
120
100
|
|
121
|
-
##
|
122
101
|
# @return [Fixnum]
|
123
|
-
#
|
124
102
|
def size
|
125
103
|
synchronize { @cache.size }
|
126
104
|
end
|
@@ -129,10 +107,8 @@ module Oga
|
|
129
107
|
|
130
108
|
private
|
131
109
|
|
132
|
-
##
|
133
110
|
# Yields the supplied block in a synchronized manner (if needed). This
|
134
111
|
# method is heavily based on `MonitorMixin#mon_enter`.
|
135
|
-
#
|
136
112
|
def synchronize
|
137
113
|
if @owner != Thread.current
|
138
114
|
@mutex.synchronize do
|
@@ -147,10 +123,8 @@ module Oga
|
|
147
123
|
end
|
148
124
|
end
|
149
125
|
|
150
|
-
##
|
151
126
|
# Removes old keys until the size of the hash no longer exceeds the maximum
|
152
127
|
# size.
|
153
|
-
#
|
154
128
|
def resize
|
155
129
|
return unless size > @maximum
|
156
130
|
|
data/lib/oga/oga.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
module Oga
|
2
|
-
##
|
3
2
|
# Parses the given XML document.
|
4
3
|
#
|
5
4
|
# @example
|
@@ -8,12 +7,10 @@ module Oga
|
|
8
7
|
# @see [Oga::XML::Lexer#initialize]
|
9
8
|
#
|
10
9
|
# @return [Oga::XML::Document]
|
11
|
-
#
|
12
10
|
def self.parse_xml(xml, options = {})
|
13
11
|
XML::Parser.new(xml, options).parse
|
14
12
|
end
|
15
13
|
|
16
|
-
##
|
17
14
|
# Parses the given HTML document.
|
18
15
|
#
|
19
16
|
# @example
|
@@ -22,12 +19,10 @@ module Oga
|
|
22
19
|
# @see [Oga::XML::Lexer#initialize]
|
23
20
|
#
|
24
21
|
# @return [Oga::XML::Document]
|
25
|
-
#
|
26
22
|
def self.parse_html(html, options = {})
|
27
23
|
HTML::Parser.new(html, options).parse
|
28
24
|
end
|
29
25
|
|
30
|
-
##
|
31
26
|
# Parses the given XML document using the SAX parser.
|
32
27
|
#
|
33
28
|
# @example
|
@@ -36,12 +31,10 @@ module Oga
|
|
36
31
|
# Oga.sax_parse_html(handler, '<root>Hello</root>')
|
37
32
|
#
|
38
33
|
# @see [Oga::XML::SaxParser#initialize]
|
39
|
-
#
|
40
34
|
def self.sax_parse_xml(handler, xml, options = {})
|
41
35
|
XML::SaxParser.new(handler, xml, options).parse
|
42
36
|
end
|
43
37
|
|
44
|
-
##
|
45
38
|
# Parses the given HTML document using the SAX parser.
|
46
39
|
#
|
47
40
|
# @example
|
@@ -50,7 +43,6 @@ module Oga
|
|
50
43
|
# Oga.sax_parse_html(handler, '<script>foo()</script>')
|
51
44
|
#
|
52
45
|
# @see [Oga::XML::SaxParser#initialize]
|
53
|
-
#
|
54
46
|
def self.sax_parse_html(handler, html, options = {})
|
55
47
|
HTML::SaxParser.new(handler, html, options).parse
|
56
48
|
end
|
@@ -0,0 +1,225 @@
|
|
1
|
+
module Oga
|
2
|
+
module Ruby
|
3
|
+
# Class for converting a Ruby AST to a String.
|
4
|
+
#
|
5
|
+
# This class takes a {Oga::Ruby::Node} instance and converts it (and its
|
6
|
+
# child nodes) to a String that in turn can be passed to `eval` and the
|
7
|
+
# likes.
|
8
|
+
class Generator
|
9
|
+
# @param [Oga::Ruby::Node] ast
|
10
|
+
# @return [String]
|
11
|
+
def process(ast)
|
12
|
+
send(:"on_#{ast.type}", ast)
|
13
|
+
end
|
14
|
+
|
15
|
+
# @param [Oga::Ruby::Node] ast
|
16
|
+
# @return [String]
|
17
|
+
def on_followed_by(ast)
|
18
|
+
ast.to_a.map { |child| process(child) }.join("\n\n")
|
19
|
+
end
|
20
|
+
|
21
|
+
# Processes an assignment node.
|
22
|
+
#
|
23
|
+
# @param [Oga::Ruby::Node] ast
|
24
|
+
# @return [String]
|
25
|
+
def on_assign(ast)
|
26
|
+
var, val = *ast
|
27
|
+
|
28
|
+
var_str = process(var)
|
29
|
+
val_str = process(val)
|
30
|
+
|
31
|
+
"#{var_str} = #{val_str}"
|
32
|
+
end
|
33
|
+
|
34
|
+
# Processes a mass assignment node.
|
35
|
+
#
|
36
|
+
# @param [Oga::Ruby::Node] ast
|
37
|
+
# @return [String]
|
38
|
+
def on_massign(ast)
|
39
|
+
vars, val = *ast
|
40
|
+
|
41
|
+
var_names = vars.map { |var| process(var) }
|
42
|
+
val_str = process(val)
|
43
|
+
|
44
|
+
"#{var_names.join(', ')} = #{val_str}"
|
45
|
+
end
|
46
|
+
|
47
|
+
# Processes a `begin` node.
|
48
|
+
#
|
49
|
+
# @param [Oga::Ruby::Node] ast
|
50
|
+
# @return [String]
|
51
|
+
def on_begin(ast)
|
52
|
+
body = process(ast.to_a[0])
|
53
|
+
|
54
|
+
<<-EOF
|
55
|
+
begin
|
56
|
+
#{body}
|
57
|
+
end
|
58
|
+
EOF
|
59
|
+
end
|
60
|
+
|
61
|
+
# Processes an equality node.
|
62
|
+
#
|
63
|
+
# @param [Oga::Ruby::Node] ast
|
64
|
+
# @return [String]
|
65
|
+
def on_eq(ast)
|
66
|
+
left, right = *ast
|
67
|
+
|
68
|
+
left_str = process(left)
|
69
|
+
right_str = process(right)
|
70
|
+
|
71
|
+
"#{left_str} == #{right_str}"
|
72
|
+
end
|
73
|
+
|
74
|
+
# Processes a boolean "and" node.
|
75
|
+
#
|
76
|
+
# @param [Oga::Ruby::Node] ast
|
77
|
+
# @return [String]
|
78
|
+
def on_and(ast)
|
79
|
+
left, right = *ast
|
80
|
+
|
81
|
+
left_str = process(left)
|
82
|
+
right_str = process(right)
|
83
|
+
|
84
|
+
"#{left_str} && #{right_str}"
|
85
|
+
end
|
86
|
+
|
87
|
+
# Processes a boolean "or" node.
|
88
|
+
#
|
89
|
+
# @param [Oga::Ruby::Node] ast
|
90
|
+
# @return [String]
|
91
|
+
def on_or(ast)
|
92
|
+
left, right = *ast
|
93
|
+
|
94
|
+
left_str = process(left)
|
95
|
+
right_str = process(right)
|
96
|
+
|
97
|
+
"(#{left_str} || #{right_str})"
|
98
|
+
end
|
99
|
+
|
100
|
+
# Processes an if statement node.
|
101
|
+
#
|
102
|
+
# @param [Oga::Ruby::Node] ast
|
103
|
+
# @return [String]
|
104
|
+
def on_if(ast)
|
105
|
+
cond, body, else_body = *ast
|
106
|
+
|
107
|
+
cond_str = process(cond)
|
108
|
+
body_str = process(body)
|
109
|
+
|
110
|
+
if else_body
|
111
|
+
else_str = process(else_body)
|
112
|
+
|
113
|
+
<<-EOF
|
114
|
+
if #{cond_str}
|
115
|
+
#{body_str}
|
116
|
+
else
|
117
|
+
#{else_str}
|
118
|
+
end
|
119
|
+
EOF
|
120
|
+
else
|
121
|
+
<<-EOF
|
122
|
+
if #{cond_str}
|
123
|
+
#{body_str}
|
124
|
+
end
|
125
|
+
EOF
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Processes a while statement node.
|
130
|
+
#
|
131
|
+
# @param [Oga::Ruby::Node] ast
|
132
|
+
# @return [String]
|
133
|
+
def on_while(ast)
|
134
|
+
cond, body = *ast
|
135
|
+
|
136
|
+
cond_str = process(cond)
|
137
|
+
body_str = process(body)
|
138
|
+
|
139
|
+
<<-EOF
|
140
|
+
while #{cond_str}
|
141
|
+
#{body_str}
|
142
|
+
end
|
143
|
+
EOF
|
144
|
+
end
|
145
|
+
|
146
|
+
# Processes a method call node.
|
147
|
+
#
|
148
|
+
# @param [Oga::Ruby::Node] ast
|
149
|
+
# @return [String]
|
150
|
+
def on_send(ast)
|
151
|
+
receiver, name, *args = *ast
|
152
|
+
|
153
|
+
call = name
|
154
|
+
brackets = name == '[]'
|
155
|
+
|
156
|
+
unless args.empty?
|
157
|
+
arg_str = args.map { |arg| process(arg) }.join(', ')
|
158
|
+
call = brackets ? "[#{arg_str}]" : "#{call}(#{arg_str})"
|
159
|
+
end
|
160
|
+
|
161
|
+
if receiver
|
162
|
+
rec_str = process(receiver)
|
163
|
+
call = brackets ? "#{rec_str}#{call}" : "#{rec_str}.#{call}"
|
164
|
+
end
|
165
|
+
|
166
|
+
call
|
167
|
+
end
|
168
|
+
|
169
|
+
# Processes a block node.
|
170
|
+
#
|
171
|
+
# @param [Oga::Ruby::Node] ast
|
172
|
+
# @return [String]
|
173
|
+
def on_block(ast)
|
174
|
+
receiver, args, body = *ast
|
175
|
+
|
176
|
+
receiver_str = process(receiver)
|
177
|
+
body_str = body ? process(body) : nil
|
178
|
+
arg_strs = args.map { |arg| process(arg) }
|
179
|
+
|
180
|
+
<<-EOF
|
181
|
+
#{receiver_str} do |#{arg_strs.join(', ')}|
|
182
|
+
#{body_str}
|
183
|
+
end
|
184
|
+
EOF
|
185
|
+
end
|
186
|
+
|
187
|
+
# Processes a Range node.
|
188
|
+
#
|
189
|
+
# @param [Oga::Ruby::Node] ast
|
190
|
+
# @return [String]
|
191
|
+
def on_range(ast)
|
192
|
+
start, stop = *ast
|
193
|
+
|
194
|
+
start_str = process(start)
|
195
|
+
stop_str = process(stop)
|
196
|
+
|
197
|
+
"(#{start_str}..#{stop_str})"
|
198
|
+
end
|
199
|
+
|
200
|
+
# Processes a string node.
|
201
|
+
#
|
202
|
+
# @param [Oga::Ruby::Node] ast
|
203
|
+
# @return [String]
|
204
|
+
def on_string(ast)
|
205
|
+
ast.to_a[0].inspect
|
206
|
+
end
|
207
|
+
|
208
|
+
# Processes a Symbol node.
|
209
|
+
#
|
210
|
+
# @param [Oga::Ruby::Node] ast
|
211
|
+
# @return [String]
|
212
|
+
def on_symbol(ast)
|
213
|
+
ast.to_a[0].to_sym.inspect
|
214
|
+
end
|
215
|
+
|
216
|
+
# Processes a literal node.
|
217
|
+
#
|
218
|
+
# @param [Oga::Ruby::Node] ast
|
219
|
+
# @return [String]
|
220
|
+
def on_lit(ast)
|
221
|
+
ast.to_a[0]
|
222
|
+
end
|
223
|
+
end # Generator
|
224
|
+
end # Ruby
|
225
|
+
end # Oga
|