superfeedr-nokogiri 1.4.0.20091116183308
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +330 -0
- data/CHANGELOG.rdoc +314 -0
- data/Manifest.txt +269 -0
- data/README.ja.rdoc +105 -0
- data/README.rdoc +118 -0
- data/Rakefile +244 -0
- data/bin/nokogiri +49 -0
- data/ext/nokogiri/extconf.rb +145 -0
- data/ext/nokogiri/html_document.c +145 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +92 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +89 -0
- data/ext/nokogiri/nokogiri.h +145 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +67 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +54 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +52 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +388 -0
- data/ext/nokogiri/xml_document.h +24 -0
- data/ext/nokogiri/xml_document_fragment.c +46 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +192 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_entity_decl.c +97 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +31 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_namespace.c +74 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +1060 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +397 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +593 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +159 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +286 -0
- data/ext/nokogiri/xml_sax_parser.h +43 -0
- data/ext/nokogiri/xml_sax_parser_context.c +155 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +114 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +156 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +261 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +239 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +116 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +646 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +162 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +356 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +135 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +444 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +227 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
- data/lib/nokogiri/ffi/xml/schema.rb +92 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/html.rb +35 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +88 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +48 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +33 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +405 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +131 -0
- data/lib/nokogiri/xml/document_fragment.rb +69 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +15 -0
- data/lib/nokogiri/xml/fragment_handler.rb +71 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +665 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +307 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +85 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +74 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +160 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +61 -0
- data/lib/nokogiri/xml/syntax_error.rb +38 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +71 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +183 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +136 -0
- data/test/html/sax/test_parser.rb +64 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +390 -0
- data/test/html/test_document_encoding.rb +77 -0
- data/test/html/test_document_fragment.rb +132 -0
- data/test/html/test_element_description.rb +94 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +228 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +134 -0
- data/test/test_reader.rb +358 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +307 -0
- data/test/xml/sax/test_parser_context.rb +56 -0
- data/test/xml/sax/test_push_parser.rb +131 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +167 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +607 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +138 -0
- data/test/xml/test_dtd.rb +82 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +83 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +889 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_set.rb +531 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +89 -0
- data/test/xml/test_syntax_error.rb +27 -0
- data/test/xml/test_text.rb +30 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +430 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class GeneratedTokenizer < GeneratedParser
|
4
|
+
|
5
|
+
macro
|
6
|
+
nl \n|\r\n|\r|\f
|
7
|
+
w [\s\r\n\f]*
|
8
|
+
nonascii [^\0-\177]
|
9
|
+
num -?([0-9]+|[0-9]*\.[0-9]+)
|
10
|
+
unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?
|
11
|
+
|
12
|
+
escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
|
13
|
+
nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
|
14
|
+
nmstart [_A-Za-z]|{nonascii}|{escape}
|
15
|
+
ident [-@]?({nmstart})({nmchar})*
|
16
|
+
name ({nmchar})+
|
17
|
+
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
|
18
|
+
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
|
19
|
+
string {string1}|{string2}
|
20
|
+
|
21
|
+
rule
|
22
|
+
|
23
|
+
# [:state] pattern [actions]
|
24
|
+
|
25
|
+
{ident}\(\s* { [:FUNCTION, text] }
|
26
|
+
{ident} { [:IDENT, text] }
|
27
|
+
\#{name} { [:HASH, text] }
|
28
|
+
{w}~={w} { [:INCLUDES, text] }
|
29
|
+
{w}\|={w} { [:DASHMATCH, text] }
|
30
|
+
{w}\^={w} { [:PREFIXMATCH, text] }
|
31
|
+
{w}\$={w} { [:SUFFIXMATCH, text] }
|
32
|
+
{w}\*={w} { [:SUBSTRINGMATCH, text] }
|
33
|
+
{w}!={w} { [:NOT_EQUAL, text] }
|
34
|
+
{w}={w} { [:EQUAL, text] }
|
35
|
+
{w}\) { [:RPAREN, text] }
|
36
|
+
{w}\[{w} { [:LSQUARE, text] }
|
37
|
+
{w}\] { [:RSQUARE, text] }
|
38
|
+
{w}\+{w} { [:PLUS, text] }
|
39
|
+
{w}>{w} { [:GREATER, text] }
|
40
|
+
{w},{w} { [:COMMA, text] }
|
41
|
+
{w}~{w} { [:TILDE, text] }
|
42
|
+
\:not\({w} { [:NOT, text] }
|
43
|
+
{num} { [:NUMBER, text] }
|
44
|
+
{w}\/\/{w} { [:DOUBLESLASH, text] }
|
45
|
+
{w}\/{w} { [:SLASH, text] }
|
46
|
+
|
47
|
+
U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
|
48
|
+
|
49
|
+
[\s\t\r\n\f]+ { [:S, text] }
|
50
|
+
{string} { [:STRING, text] }
|
51
|
+
. { [text, text] }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class XPathVisitor # :nodoc:
|
4
|
+
def visit_function node
|
5
|
+
# note that nth-child and nth-last-child are preprocessed in css/node.rb.
|
6
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
8
|
+
|
9
|
+
case node.value.first
|
10
|
+
when /^text\(/
|
11
|
+
'child::text()'
|
12
|
+
when /^self\(/
|
13
|
+
"self::#{node.value[1]}"
|
14
|
+
when /^(eq|nth|nth-of-type|nth-child)\(/
|
15
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
16
|
+
an_plus_b(node.value[1])
|
17
|
+
else
|
18
|
+
"position() = " + node.value[1]
|
19
|
+
end
|
20
|
+
when /^(first|first-of-type)\(/
|
21
|
+
"position() = 1"
|
22
|
+
when /^(last|last-of-type)\(/
|
23
|
+
"position() = last()"
|
24
|
+
when /^(nth-last-child|nth-last-of-type)\(/
|
25
|
+
"position() = last() - #{node.value[1]}"
|
26
|
+
when /^contains\(/
|
27
|
+
"contains(., #{node.value[1]})"
|
28
|
+
when /^gt\(/
|
29
|
+
"position() > #{node.value[1]}"
|
30
|
+
when /^only-child\(/
|
31
|
+
"last() = 1"
|
32
|
+
when /^comment\(/
|
33
|
+
"comment()"
|
34
|
+
else
|
35
|
+
args = ['.'] + node.value[1..-1]
|
36
|
+
"#{node.value.first}#{args.join(', ')})"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def visit_not node
|
41
|
+
'not(' + node.value.first.accept(self) + ')'
|
42
|
+
end
|
43
|
+
|
44
|
+
def visit_preceding_selector node
|
45
|
+
node.value.last.accept(self) +
|
46
|
+
'[preceding-sibling::' +
|
47
|
+
node.value.first.accept(self) +
|
48
|
+
']'
|
49
|
+
end
|
50
|
+
|
51
|
+
def visit_id node
|
52
|
+
node.value.first =~ /^#(.*)$/
|
53
|
+
"@id = '#{$1}'"
|
54
|
+
end
|
55
|
+
|
56
|
+
def visit_attribute_condition node
|
57
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
|
58
|
+
''
|
59
|
+
else
|
60
|
+
'@'
|
61
|
+
end
|
62
|
+
attribute += node.value.first.accept(self)
|
63
|
+
|
64
|
+
# Support non-standard css
|
65
|
+
attribute.gsub!(/^@@/, '@')
|
66
|
+
|
67
|
+
return attribute unless node.value.length == 3
|
68
|
+
|
69
|
+
value = node.value.last
|
70
|
+
value = "'#{value}'" if value !~ /^['"]/
|
71
|
+
|
72
|
+
case node.value[1]
|
73
|
+
when :equal
|
74
|
+
attribute + " = " + "#{value}"
|
75
|
+
when :not_equal
|
76
|
+
attribute + " != " + "#{value}"
|
77
|
+
when :substring_match
|
78
|
+
"contains(#{attribute}, #{value})"
|
79
|
+
when :prefix_match
|
80
|
+
"starts-with(#{attribute}, #{value})"
|
81
|
+
when :dash_match
|
82
|
+
"#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
|
83
|
+
when :includes
|
84
|
+
"contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
|
85
|
+
when :suffix_match
|
86
|
+
"substring(#{attribute}, string-length(#{attribute}) - " +
|
87
|
+
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
88
|
+
else
|
89
|
+
attribute + " #{node.value[1]} " + "#{value}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def visit_pseudo_class node
|
94
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
|
95
|
+
node.value.first.accept(self)
|
96
|
+
else
|
97
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
|
98
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
99
|
+
|
100
|
+
case node.value.first
|
101
|
+
when "first" then "position() = 1"
|
102
|
+
when "last" then "position() = last()"
|
103
|
+
when "first-of-type" then "position() = 1"
|
104
|
+
when "last-of-type" then "position() = last()"
|
105
|
+
when "only-of-type" then "last() = 1"
|
106
|
+
when "empty" then "not(node())"
|
107
|
+
when "parent" then "node()"
|
108
|
+
when "root" then "not(parent::*)"
|
109
|
+
else
|
110
|
+
node.value.first + "(.)"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def visit_class_condition node
|
116
|
+
"contains(concat(' ', @class, ' '), ' #{node.value.first} ')"
|
117
|
+
end
|
118
|
+
|
119
|
+
{
|
120
|
+
'combinator' => ' and ',
|
121
|
+
'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
|
122
|
+
'descendant_selector' => '//',
|
123
|
+
'child_selector' => '/',
|
124
|
+
}.each do |k,v|
|
125
|
+
class_eval %{
|
126
|
+
def visit_#{k} node
|
127
|
+
"\#{node.value.first.accept(self)}#{v}\#{node.value.last.accept(self)}"
|
128
|
+
end
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
def visit_conditional_selector node
|
133
|
+
node.value.first.accept(self) + '[' +
|
134
|
+
node.value.last.accept(self) + ']'
|
135
|
+
end
|
136
|
+
|
137
|
+
def visit_element_name node
|
138
|
+
node.value.first
|
139
|
+
end
|
140
|
+
|
141
|
+
def accept node
|
142
|
+
node.accept(self)
|
143
|
+
end
|
144
|
+
|
145
|
+
private
|
146
|
+
def an_plus_b node
|
147
|
+
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
148
|
+
|
149
|
+
a = node.value[0].to_i
|
150
|
+
b = node.value[3].to_i
|
151
|
+
|
152
|
+
if (b == 0)
|
153
|
+
return "(position() mod #{a}) = 0"
|
154
|
+
else
|
155
|
+
compare = (a < 0) ? "<=" : ">="
|
156
|
+
return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
###
|
4
|
+
# The Slop decorator implements method missing such that a methods may be
|
5
|
+
# used instead of XPath or CSS. See Nokogiri.Slop
|
6
|
+
module Slop
|
7
|
+
###
|
8
|
+
# look for node with +name+. See Nokogiri.Slop
|
9
|
+
def method_missing name, *args, &block
|
10
|
+
if args.empty?
|
11
|
+
list = xpath("./#{name}")
|
12
|
+
elsif args.first.is_a? Hash
|
13
|
+
hash = args.first
|
14
|
+
if hash[:css]
|
15
|
+
list = css("#{name}#{hash[:css]}")
|
16
|
+
elsif hash[:xpath]
|
17
|
+
conds = Array(hash[:xpath]).join(' and ')
|
18
|
+
list = xpath("./#{name}[#{conds}]")
|
19
|
+
end
|
20
|
+
else
|
21
|
+
CSS::Parser.without_cache do
|
22
|
+
list = xpath(
|
23
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => "./")
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
super if list.empty?
|
29
|
+
list.length == 1 ? list.first : list
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
class Document < XML::Document
|
4
|
+
|
5
|
+
attr_accessor :cstruct # :nodoc:
|
6
|
+
|
7
|
+
def self.new(*args) # :nodoc:
|
8
|
+
uri = args[0]
|
9
|
+
external_id = args[1]
|
10
|
+
doc = wrap(LibXML.htmlNewDoc(uri, external_id))
|
11
|
+
doc.send :initialize, *args
|
12
|
+
doc
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.read_io(io, url, encoding, options) # :nodoc:
|
16
|
+
wrap_with_error_handling do
|
17
|
+
LibXML.htmlReadIO(IoCallbacks.reader(io), nil, nil, url, encoding, options)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.read_memory(string, url, encoding, options) # :nodoc:
|
22
|
+
wrap_with_error_handling do
|
23
|
+
LibXML.htmlReadMemory(string, string.length, url, encoding, options)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
class ElementDescription
|
4
|
+
|
5
|
+
attr_accessor :cstruct # :nodoc:
|
6
|
+
|
7
|
+
def required_attributes # :nodoc:
|
8
|
+
get_string_array_from :attrs_req
|
9
|
+
end
|
10
|
+
|
11
|
+
def deprecated_attributes # :nodoc:
|
12
|
+
get_string_array_from :attrs_depr
|
13
|
+
end
|
14
|
+
|
15
|
+
def optional_attributes # :nodoc:
|
16
|
+
get_string_array_from :attrs_opt
|
17
|
+
end
|
18
|
+
|
19
|
+
def default_sub_element # :nodoc:
|
20
|
+
cstruct[:defaultsubelt]
|
21
|
+
end
|
22
|
+
|
23
|
+
def sub_elements # :nodoc:
|
24
|
+
get_string_array_from :subelts
|
25
|
+
end
|
26
|
+
|
27
|
+
def description # :nodoc:
|
28
|
+
cstruct[:desc]
|
29
|
+
end
|
30
|
+
|
31
|
+
def inline? # :nodoc:
|
32
|
+
cstruct[:isinline] != 0
|
33
|
+
end
|
34
|
+
|
35
|
+
def deprecated? # :nodoc:
|
36
|
+
cstruct[:depr] != 0
|
37
|
+
end
|
38
|
+
|
39
|
+
def empty? # :nodoc:
|
40
|
+
cstruct[:empty] != 0
|
41
|
+
end
|
42
|
+
|
43
|
+
def save_end_tag? # :nodoc:
|
44
|
+
cstruct[:saveEndTag] != 0
|
45
|
+
end
|
46
|
+
|
47
|
+
def implied_end_tag? # :nodoc:
|
48
|
+
cstruct[:endTag] != 0
|
49
|
+
end
|
50
|
+
|
51
|
+
def implied_start_tag? # :nodoc:
|
52
|
+
cstruct[:startTag] != 0
|
53
|
+
end
|
54
|
+
|
55
|
+
def name # :nodoc:
|
56
|
+
cstruct[:name]
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.[](tag_name) # :nodoc:
|
60
|
+
ptr = LibXML.htmlTagLookup(tag_name)
|
61
|
+
return nil if ptr.null?
|
62
|
+
|
63
|
+
desc = allocate
|
64
|
+
desc.cstruct = LibXML::HtmlElemDesc.new(ptr)
|
65
|
+
desc
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def get_string_array_from(sym) # :nodoc:
|
71
|
+
list = []
|
72
|
+
return list if cstruct[sym].null?
|
73
|
+
|
74
|
+
j = 0
|
75
|
+
while (ptr = cstruct[sym].get_pointer(j * FFI.type_size(:pointer))) && ! ptr.null?
|
76
|
+
list << ptr.read_string
|
77
|
+
j += 1
|
78
|
+
end
|
79
|
+
|
80
|
+
list
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
class EntityLookup
|
4
|
+
|
5
|
+
def get(key) # :nodoc:
|
6
|
+
ptr = LibXML.htmlEntityLookup(key.to_s)
|
7
|
+
return nil if ptr.null?
|
8
|
+
|
9
|
+
cstruct = LibXML::HtmlEntityDesc.new(ptr)
|
10
|
+
EntityDescription.new cstruct[:value], cstruct[:name], cstruct[:desc]
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# :stopdoc:
|
2
|
+
module Nokogiri
|
3
|
+
module HTML
|
4
|
+
module SAX
|
5
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
6
|
+
attr_accessor :cstruct
|
7
|
+
|
8
|
+
def self.file filename, encoding
|
9
|
+
ctx = LibXML.htmlCreateFileParserCtxt filename, encoding
|
10
|
+
pc = allocate
|
11
|
+
pc.cstruct = LibXML::XmlParserContext.new ctx
|
12
|
+
pc
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.memory data, encoding
|
16
|
+
raise ArgumentError unless data
|
17
|
+
raise "data cannot be empty" unless data.length > 0
|
18
|
+
|
19
|
+
ctx = LibXML.htmlCreateMemoryParserCtxt data, data.length
|
20
|
+
pc = allocate
|
21
|
+
pc.cstruct = LibXML::XmlParserContext.new ctx
|
22
|
+
if encoding
|
23
|
+
enc = LibXML.xmlParseCharEncoding(encoding)
|
24
|
+
if enc != LibXML::XML_CHAR_ENCODING_ERROR
|
25
|
+
LibXML.xmlSwitchEncoding(ctx, enc)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
pc
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_with sax_handler, type = :html
|
32
|
+
super
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
# :startdoc:
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# :stopdoc:
|
2
|
+
module Nokogiri
|
3
|
+
module IoCallbacks
|
4
|
+
|
5
|
+
class << self
|
6
|
+
|
7
|
+
def plain_old_reader(io)
|
8
|
+
lambda do |ctx, buffer, len|
|
9
|
+
string = io.read(len)
|
10
|
+
return 0 if string.nil?
|
11
|
+
buffer.put_bytes(0, string, 0, string.length)
|
12
|
+
string.length
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
if defined?(FFI::IO.native_read)
|
17
|
+
def ffi_io_native_reader(io)
|
18
|
+
if io.is_a?(StringIO)
|
19
|
+
plain_old_reader(io)
|
20
|
+
else
|
21
|
+
lambda do |ctx, buffer, len|
|
22
|
+
rcode = FFI::IO.native_read(io, buffer, len)
|
23
|
+
(rcode < 0) ? 0 : rcode
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
alias :reader :ffi_io_native_reader
|
28
|
+
else
|
29
|
+
alias :reader :plain_old_reader
|
30
|
+
end
|
31
|
+
|
32
|
+
def writer(io)
|
33
|
+
lambda do |context, buffer, len|
|
34
|
+
io.write buffer
|
35
|
+
len
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
# :startdoc:
|