nokogiri 1.3.0-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +233 -0
- data/CHANGELOG.rdoc +222 -0
- data/Manifest.txt +247 -0
- data/README.ja.rdoc +103 -0
- data/README.rdoc +117 -0
- data/Rakefile +205 -0
- data/bin/nokogiri +47 -0
- data/ext/nokogiri/extconf.rb +89 -0
- data/ext/nokogiri/html_document.c +183 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +30 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser.c +57 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/nokogiri.c +81 -0
- data/ext/nokogiri/nokogiri.h +149 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_cdata.c +53 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +51 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +308 -0
- data/ext/nokogiri/xml_document.h +21 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +102 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +24 -0
- data/ext/nokogiri/xml_io.h +10 -0
- data/ext/nokogiri/xml_namespace.c +69 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +928 -0
- data/ext/nokogiri/xml_node.h +14 -0
- data/ext/nokogiri/xml_node_set.c +386 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +572 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +106 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +336 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +86 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +107 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +203 -0
- data/ext/nokogiri/xml_syntax_error.h +12 -0
- data/ext/nokogiri/xml_text.c +47 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +252 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +36 -0
- data/lib/nokogiri.rb +110 -0
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +748 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
- data/lib/nokogiri/css/node.rb +107 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +11 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +172 -0
- data/lib/nokogiri/decorators.rb +2 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +37 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
- data/lib/nokogiri/ffi/libxml.rb +314 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +107 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
- data/lib/nokogiri/ffi/xml/node.rb +380 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +217 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
- data/lib/nokogiri/ffi/xml/schema.rb +55 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/hpricot.rb +62 -0
- data/lib/nokogiri/html.rb +34 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +71 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +47 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +29 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +62 -0
- data/lib/nokogiri/xml/attr.rb +9 -0
- data/lib/nokogiri/xml/builder.rb +254 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/document.rb +100 -0
- data/lib/nokogiri/xml/document_fragment.rb +49 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/entity_declaration.rb +11 -0
- data/lib/nokogiri/xml/fragment_handler.rb +55 -0
- data/lib/nokogiri/xml/namespace.rb +7 -0
- data/lib/nokogiri/xml/node.rb +745 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +238 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +80 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +66 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +3 -0
- data/lib/nokogiri/xml/sax/document.rb +143 -0
- data/lib/nokogiri/xml/sax/parser.rb +101 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +65 -0
- data/lib/nokogiri/xml/syntax_error.rb +34 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +64 -0
- data/tasks/test.rb +161 -0
- data/test/css/test_nthiness.rb +160 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +176 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +123 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +11 -0
- data/test/hpricot/test_alter.rb +68 -0
- data/test/hpricot/test_builder.rb +20 -0
- data/test/hpricot/test_parser.rb +426 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +77 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +52 -0
- data/test/html/test_builder.rb +156 -0
- data/test/html/test_document.rb +361 -0
- data/test/html/test_document_encoding.rb +46 -0
- data/test/html/test_document_fragment.rb +97 -0
- data/test/html/test_element_description.rb +95 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +165 -0
- data/test/test_convert_xpath.rb +186 -0
- data/test/test_css_cache.rb +56 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +127 -0
- data/test/test_reader.rb +316 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +169 -0
- data/test/xml/sax/test_push_parser.rb +92 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_builder.rb +73 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +23 -0
- data/test/xml/test_document.rb +397 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +76 -0
- data/test/xml/test_dtd.rb +42 -0
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +43 -0
- data/test/xml/test_node.rb +808 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +84 -0
- data/test/xml/test_node_set.rb +368 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +65 -0
- data/test/xml/test_text.rb +18 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +409 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class GeneratedTokenizer < GeneratedParser
|
4
|
+
|
5
|
+
macro
|
6
|
+
nl \n|\r\n|\r|\f
|
7
|
+
w [\s\r\n\f]*
|
8
|
+
nonascii [^\\\\0-\\\\177]
|
9
|
+
num -?([0-9]+|[0-9]*\.[0-9]+)
|
10
|
+
unicode \\\\\\\\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?
|
11
|
+
|
12
|
+
escape {unicode}|\\\\\\\[^\n\r\f0-9A-Fa-f]
|
13
|
+
nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
|
14
|
+
nmstart [_A-Za-z]|{nonascii}|{escape}
|
15
|
+
ident [-@]?({nmstart})({nmchar})*
|
16
|
+
name ({nmchar})+
|
17
|
+
string1 "([^\n\r\f"]|\\{nl}|{nonascii}|{escape})*"
|
18
|
+
string2 '([^\n\r\f']|\\{nl}|{nonascii}|{escape})*'
|
19
|
+
string {string1}|{string2}
|
20
|
+
|
21
|
+
rule
|
22
|
+
|
23
|
+
# [:state] pattern [actions]
|
24
|
+
|
25
|
+
{ident}\(\s* { [:FUNCTION, text] }
|
26
|
+
{ident} { [:IDENT, text] }
|
27
|
+
\#{name} { [:HASH, text] }
|
28
|
+
{w}~={w} { [:INCLUDES, text] }
|
29
|
+
{w}\|={w} { [:DASHMATCH, text] }
|
30
|
+
{w}\^={w} { [:PREFIXMATCH, text] }
|
31
|
+
{w}\$={w} { [:SUFFIXMATCH, text] }
|
32
|
+
{w}\*={w} { [:SUBSTRINGMATCH, text] }
|
33
|
+
{w}!={w} { [:NOT_EQUAL, text] }
|
34
|
+
{w}={w} { [:EQUAL, text] }
|
35
|
+
{w}\) { [:RPAREN, text] }
|
36
|
+
{w}\[{w} { [:LSQUARE, text] }
|
37
|
+
{w}\] { [:RSQUARE, text] }
|
38
|
+
{w}\+{w} { [:PLUS, text] }
|
39
|
+
{w}>{w} { [:GREATER, text] }
|
40
|
+
{w},{w} { [:COMMA, text] }
|
41
|
+
{w}~{w} { [:TILDE, text] }
|
42
|
+
\:not\({w} { [:NOT, text] }
|
43
|
+
{num} { [:NUMBER, text] }
|
44
|
+
{w}\/\/{w} { [:DOUBLESLASH, text] }
|
45
|
+
{w}\/{w} { [:SLASH, text] }
|
46
|
+
|
47
|
+
U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
|
48
|
+
|
49
|
+
[\s\t\r\n\f]+ { [:S, text] }
|
50
|
+
{string} { [:STRING, text] }
|
51
|
+
. { [text, text] }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class XPathVisitor # :nodoc:
|
4
|
+
def visit_function node
|
5
|
+
# note that nth-child and nth-last-child are preprocessed in css/node.rb.
|
6
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
8
|
+
|
9
|
+
case node.value.first
|
10
|
+
when /^text\(/
|
11
|
+
'child::text()'
|
12
|
+
when /^self\(/
|
13
|
+
"self::#{node.value[1]}"
|
14
|
+
when /^(eq|nth|nth-of-type|nth-child)\(/
|
15
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
16
|
+
an_plus_b(node.value[1])
|
17
|
+
else
|
18
|
+
"position() = " + node.value[1]
|
19
|
+
end
|
20
|
+
when /^(first|first-of-type)\(/
|
21
|
+
"position() = 1"
|
22
|
+
when /^(last|last-of-type)\(/
|
23
|
+
"position() = last()"
|
24
|
+
when /^(nth-last-child|nth-last-of-type)\(/
|
25
|
+
"position() = last() - #{node.value[1]}"
|
26
|
+
when /^contains\(/
|
27
|
+
"contains(., #{node.value[1]})"
|
28
|
+
when /^gt\(/
|
29
|
+
"position() > #{node.value[1]}"
|
30
|
+
when /^only-child\(/
|
31
|
+
"last() = 1"
|
32
|
+
when /^comment\(/
|
33
|
+
"comment()"
|
34
|
+
else
|
35
|
+
args = ['.'] + node.value[1..-1]
|
36
|
+
"#{node.value.first}#{args.join(', ')})"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def visit_not node
|
41
|
+
'not(' + node.value.first.accept(self) + ')'
|
42
|
+
end
|
43
|
+
|
44
|
+
def visit_preceding_selector node
|
45
|
+
node.value.last.accept(self) +
|
46
|
+
'[preceding-sibling::' +
|
47
|
+
node.value.first.accept(self) +
|
48
|
+
']'
|
49
|
+
end
|
50
|
+
|
51
|
+
def visit_direct_adjacent_selector node
|
52
|
+
node.value.first.accept(self) +
|
53
|
+
"/following-sibling::*[1]/self::" +
|
54
|
+
node.value.last.accept(self)
|
55
|
+
end
|
56
|
+
|
57
|
+
def visit_id node
|
58
|
+
node.value.first =~ /^#(.*)$/
|
59
|
+
"@id = '#{$1}'"
|
60
|
+
end
|
61
|
+
|
62
|
+
def visit_attribute_condition node
|
63
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
|
64
|
+
''
|
65
|
+
else
|
66
|
+
'@'
|
67
|
+
end
|
68
|
+
attribute += node.value.first.accept(self)
|
69
|
+
|
70
|
+
# Support non-standard css
|
71
|
+
attribute.gsub!(/^@@/, '@')
|
72
|
+
|
73
|
+
return attribute unless node.value.length == 3
|
74
|
+
|
75
|
+
value = node.value.last
|
76
|
+
value = "'#{value}'" if value !~ /^['"]/
|
77
|
+
|
78
|
+
case node.value[1]
|
79
|
+
when :equal
|
80
|
+
attribute + " = " + "#{value}"
|
81
|
+
when :not_equal
|
82
|
+
attribute + " != " + "#{value}"
|
83
|
+
when :substring_match
|
84
|
+
"contains(#{attribute}, #{value})"
|
85
|
+
when :prefix_match
|
86
|
+
"starts-with(#{attribute}, #{value})"
|
87
|
+
when :dash_match
|
88
|
+
"#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
|
89
|
+
when :includes
|
90
|
+
"contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
|
91
|
+
when :suffix_match
|
92
|
+
"substring(#{attribute}, string-length(#{attribute}) - " +
|
93
|
+
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
94
|
+
else
|
95
|
+
attribute + " #{node.value[1]} " + "#{value}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def visit_pseudo_class node
|
100
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
|
101
|
+
node.value.first.accept(self)
|
102
|
+
else
|
103
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
|
104
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
105
|
+
|
106
|
+
case node.value.first
|
107
|
+
when "first" then "position() = 1"
|
108
|
+
when "last" then "position() = last()"
|
109
|
+
when "first-of-type" then "position() = 1"
|
110
|
+
when "last-of-type" then "position() = last()"
|
111
|
+
when "only-of-type" then "last() = 1"
|
112
|
+
when "empty" then "not(node())"
|
113
|
+
when "parent" then "node()"
|
114
|
+
when "root" then "not(parent::*)"
|
115
|
+
else
|
116
|
+
node.value.first + "(.)"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def visit_class_condition node
|
122
|
+
"contains(concat(' ', @class, ' '), ' #{node.value.first} ')"
|
123
|
+
end
|
124
|
+
|
125
|
+
def visit_combinator node
|
126
|
+
node.value.first.accept(self) + ' and ' +
|
127
|
+
node.value.last.accept(self)
|
128
|
+
end
|
129
|
+
|
130
|
+
def visit_conditional_selector node
|
131
|
+
node.value.first.accept(self) + '[' +
|
132
|
+
node.value.last.accept(self) + ']'
|
133
|
+
end
|
134
|
+
|
135
|
+
def visit_descendant_selector node
|
136
|
+
node.value.first.accept(self) +
|
137
|
+
'//' +
|
138
|
+
node.value.last.accept(self)
|
139
|
+
end
|
140
|
+
|
141
|
+
def visit_child_selector node
|
142
|
+
node.value.first.accept(self) +
|
143
|
+
'/' +
|
144
|
+
node.value.last.accept(self)
|
145
|
+
end
|
146
|
+
|
147
|
+
def visit_element_name node
|
148
|
+
node.value.first
|
149
|
+
end
|
150
|
+
|
151
|
+
def accept node
|
152
|
+
node.accept(self)
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
def an_plus_b node
|
157
|
+
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
158
|
+
|
159
|
+
a = node.value[0].to_i
|
160
|
+
b = node.value[3].to_i
|
161
|
+
|
162
|
+
if (b == 0)
|
163
|
+
return "(position() mod #{a}) = 0"
|
164
|
+
else
|
165
|
+
compare = (a < 0) ? "<=" : ">="
|
166
|
+
return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
module Hpricot
|
4
|
+
module Node # :nodoc:
|
5
|
+
def search *paths
|
6
|
+
ns = paths.last.is_a?(Hash) ? paths.pop : {}
|
7
|
+
converted = paths.map { |path|
|
8
|
+
convert_to_xpath(path)
|
9
|
+
}.flatten.uniq
|
10
|
+
|
11
|
+
super(*converted + [ns])
|
12
|
+
end
|
13
|
+
def /(path); search(path) end
|
14
|
+
|
15
|
+
def xpath *args
|
16
|
+
return super if args.length > 0
|
17
|
+
path
|
18
|
+
end
|
19
|
+
|
20
|
+
def raw_attributes; self end
|
21
|
+
|
22
|
+
def get_element_by_id element_id
|
23
|
+
search("//*[@id='#{element_id}']").first
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_elements_by_tag_name tag
|
27
|
+
search("//#{tag}")
|
28
|
+
end
|
29
|
+
|
30
|
+
def convert_to_xpath(rule)
|
31
|
+
rule = rule.to_s
|
32
|
+
case rule
|
33
|
+
when %r{^//}
|
34
|
+
[".#{Hpricot::XPathVisitor.xpath_namespace_helper(rule)}"]
|
35
|
+
when %r{^/}
|
36
|
+
[Hpricot::XPathVisitor.xpath_namespace_helper(rule)]
|
37
|
+
when %r{^.//}
|
38
|
+
[Hpricot::XPathVisitor.xpath_namespace_helper(rule)]
|
39
|
+
else
|
40
|
+
visitor = CSS::XPathVisitor.new
|
41
|
+
visitor.extend(Hpricot::XPathVisitor)
|
42
|
+
CSS.xpath_for(rule, :prefix => ".//", :visitor => visitor)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def target
|
47
|
+
name
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_original_html
|
51
|
+
to_html
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
module Hpricot
|
4
|
+
module NodeSet
|
5
|
+
|
6
|
+
# Select nodes matching the supplied rule.
|
7
|
+
# Note that positional rules (like <tt>:nth()</tt>) aren't currently supported.
|
8
|
+
#
|
9
|
+
# example:
|
10
|
+
# node_set.filter('.ohmy') # selects nodes from the set with class "ohmy"
|
11
|
+
# node_set.filter('a#link2') # selects nodes from the set with child node <a id='link2'>
|
12
|
+
# node_set.filter('a[@id="link2"]') # selects nodes from the set with child node <a id='link2'>
|
13
|
+
def filter(rule)
|
14
|
+
filter_transformer( lambda {|j| j}, rule ) # identity transformer
|
15
|
+
end
|
16
|
+
|
17
|
+
# The complement to filter, select nodes <em>not</em> matching the supplied rule.
|
18
|
+
# Note that positional rules (like <tt>:nth()</tt>) aren't currently supported.
|
19
|
+
#
|
20
|
+
# See filter for examples.
|
21
|
+
#
|
22
|
+
# Also note that you can pass a XML::Node object instead of a
|
23
|
+
# rule to remove that object from the node set (if it is
|
24
|
+
# present):
|
25
|
+
# node_set.not(node_to_exclude) # selects all nodes EXCEPT node_to_exclude
|
26
|
+
#
|
27
|
+
def not(rule)
|
28
|
+
filter_transformer( lambda {|j| !j}, rule ) # negation transformer
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
def filter_transformer(transformer, rule) # :nodoc:
|
33
|
+
sub_set = XML::NodeSet.new(document)
|
34
|
+
document.decorate(sub_set)
|
35
|
+
|
36
|
+
if rule.is_a?(XML::Node)
|
37
|
+
each { |node| sub_set << node if transformer.call(node == rule) }
|
38
|
+
return sub_set
|
39
|
+
end
|
40
|
+
|
41
|
+
ctx = CSS.parse(rule.to_s)
|
42
|
+
visitor = CSS::XPathVisitor.new
|
43
|
+
visitor.extend(Hpricot::XPathVisitor)
|
44
|
+
each do |node|
|
45
|
+
if transformer.call(node.at(".//self::" + visitor.accept(ctx.first)))
|
46
|
+
sub_set << node
|
47
|
+
end
|
48
|
+
end
|
49
|
+
sub_set
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
module Hpricot
|
4
|
+
####
|
5
|
+
# This mixin does custom adjustments to deal with _whyML
|
6
|
+
module XPathVisitor
|
7
|
+
###
|
8
|
+
# Visit attribute condition nodes with +node+
|
9
|
+
def visit_attribute_condition node
|
10
|
+
unless (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /^@/)
|
11
|
+
node.value.first.value[0] = "child::" +
|
12
|
+
node.value.first.value[0]
|
13
|
+
end
|
14
|
+
super(node).gsub(/child::text\(\)/, 'normalize-space(child::text())')
|
15
|
+
end
|
16
|
+
|
17
|
+
# take a path like '//t:sam' and convert to xpath "*[name()='t:sam']"
|
18
|
+
def self.xpath_namespace_helper rule
|
19
|
+
rule.split(/\//).collect do |tag|
|
20
|
+
if match = tag.match(/^(\w+:\w+)(.*)/)
|
21
|
+
"*[name()='#{match[1]}']#{match[2]}"
|
22
|
+
else
|
23
|
+
tag
|
24
|
+
end
|
25
|
+
end.join("/")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
###
|
4
|
+
# The Slop decorator implements method missing such that a methods may be
|
5
|
+
# used instead of XPath or CSS. See Nokogiri.Slop
|
6
|
+
module Slop
|
7
|
+
###
|
8
|
+
# look for node with +name+. See Nokogiri.Slop
|
9
|
+
def method_missing name, *args, &block
|
10
|
+
if args.empty?
|
11
|
+
list = xpath("./#{name}")
|
12
|
+
elsif args.first.is_a? Hash
|
13
|
+
hash = args.first
|
14
|
+
if hash[:css]
|
15
|
+
list = css("#{name}#{hash[:css]}")
|
16
|
+
elsif hash[:xpath]
|
17
|
+
conds = Array(hash[:xpath]).join(' and ')
|
18
|
+
list = xpath("./#{name}[#{conds}]")
|
19
|
+
end
|
20
|
+
else
|
21
|
+
CSS::Parser.without_cache do
|
22
|
+
list = xpath(
|
23
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => "./")
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
super if list.empty?
|
29
|
+
list.length == 1 ? list.first : list
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
class Document < XML::Document
|
4
|
+
|
5
|
+
attr_accessor :cstruct # :nodoc:
|
6
|
+
|
7
|
+
def self.new(*args) # :nodoc:
|
8
|
+
uri = args[0]
|
9
|
+
external_id = args[1]
|
10
|
+
doc = wrap(LibXML.htmlNewDoc(uri, external_id))
|
11
|
+
doc.send :initialize, *args
|
12
|
+
doc
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.read_io(io, url, encoding, options) # :nodoc:
|
16
|
+
wrap_with_error_handling do
|
17
|
+
LibXML.htmlReadIO(IoCallbacks.reader(io), nil, nil, url, encoding, options)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.read_memory(string, url, encoding, options) # :nodoc:
|
22
|
+
wrap_with_error_handling do
|
23
|
+
LibXML.htmlReadMemory(string, string.length, url, encoding, options)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def meta_encoding=(encoding) # :nodoc:
|
28
|
+
LibXML.htmlSetMetaEncoding(cstruct, encoding)
|
29
|
+
encoding
|
30
|
+
end
|
31
|
+
|
32
|
+
def meta_encoding # :nodoc:
|
33
|
+
LibXML.htmlGetMetaEncoding(cstruct)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|