nokogiri 1.2.3-x86-mswin32-60 → 1.4.5-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +18 -7
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +297 -3
- data/CHANGELOG.rdoc +289 -0
- data/Manifest.txt +148 -37
- data/README.ja.rdoc +20 -20
- data/README.rdoc +53 -22
- data/Rakefile +127 -211
- data/bin/nokogiri +54 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +89 -54
- data/ext/nokogiri/html_document.c +34 -27
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +276 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +7 -5
- data/ext/nokogiri/html_entity_lookup.h +1 -1
- data/ext/nokogiri/html_sax_parser_context.c +94 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/{native.c → nokogiri.c} +31 -7
- data/ext/nokogiri/{native.h → nokogiri.h} +68 -41
- data/ext/nokogiri/xml_attr.c +20 -9
- data/ext/nokogiri/xml_attr.h +1 -1
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +21 -9
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +18 -6
- data/ext/nokogiri/xml_comment.h +1 -1
- data/ext/nokogiri/xml_document.c +247 -68
- data/ext/nokogiri/xml_document.h +5 -3
- data/ext/nokogiri/xml_document_fragment.c +15 -7
- data/ext/nokogiri/xml_document_fragment.h +1 -1
- data/ext/nokogiri/xml_dtd.c +110 -10
- data/ext/nokogiri/xml_dtd.h +3 -1
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +16 -5
- data/ext/nokogiri/xml_entity_reference.h +1 -1
- data/ext/nokogiri/xml_io.c +40 -8
- data/ext/nokogiri/xml_io.h +2 -1
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +84 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +782 -225
- data/ext/nokogiri/xml_node.h +2 -4
- data/ext/nokogiri/xml_node_set.c +253 -34
- data/ext/nokogiri/xml_node_set.h +2 -2
- data/ext/nokogiri/xml_processing_instruction.c +17 -5
- data/ext/nokogiri/xml_processing_instruction.h +1 -1
- data/ext/nokogiri/xml_reader.c +277 -85
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +168 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +183 -111
- data/ext/nokogiri/xml_sax_parser.h +30 -1
- data/ext/nokogiri/xml_sax_parser_context.c +199 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +42 -12
- data/ext/nokogiri/xml_sax_push_parser.h +1 -1
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +28 -173
- data/ext/nokogiri/xml_syntax_error.h +2 -1
- data/ext/nokogiri/xml_text.c +16 -6
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +104 -47
- data/ext/nokogiri/xml_xpath_context.h +1 -1
- data/ext/nokogiri/xslt_stylesheet.c +161 -19
- data/ext/nokogiri/xslt_stylesheet.h +1 -1
- data/lib/nokogiri.rb +47 -8
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +6 -3
- data/lib/nokogiri/css/node.rb +14 -12
- data/lib/nokogiri/css/parser.rb +665 -62
- data/lib/nokogiri/css/parser.y +20 -10
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/tokenizer.rb +148 -5
- data/lib/nokogiri/css/tokenizer.rex +10 -9
- data/lib/nokogiri/css/xpath_visitor.rb +47 -44
- data/lib/nokogiri/decorators/slop.rb +8 -4
- data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +81 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +420 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +38 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +20 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +117 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xml_parser_input.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +38 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +174 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +36 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +559 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +150 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +236 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +143 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +79 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +51 -0
- data/lib/nokogiri/ffi/xml/schema.rb +109 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +9 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +153 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +77 -0
- data/lib/nokogiri/html.rb +13 -47
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +201 -7
- data/lib/nokogiri/html/document_fragment.rb +41 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +2 -0
- data/lib/nokogiri/html/sax/parser.rb +34 -3
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/version.rb +40 -1
- data/lib/nokogiri/version_warning.rb +14 -0
- data/lib/nokogiri/xml.rb +32 -53
- data/lib/nokogiri/xml/attr.rb +5 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +349 -29
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +166 -14
- data/lib/nokogiri/xml/document_fragment.rb +76 -1
- data/lib/nokogiri/xml/dtd.rb +16 -3
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +561 -166
- data/lib/nokogiri/xml/node/save_options.rb +22 -2
- data/lib/nokogiri/xml/node_set.rb +202 -40
- data/lib/nokogiri/xml/parse_options.rb +93 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -0
- data/lib/nokogiri/xml/reader.rb +93 -8
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +1 -7
- data/lib/nokogiri/xml/sax/document.rb +107 -2
- data/lib/nokogiri/xml/sax/parser.rb +57 -7
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +13 -1
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +4 -1
- data/lib/nokogiri/xml/xpath.rb +1 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +3 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -0
- data/lib/nokogiri/xslt.rb +26 -2
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/xsd/xmlparser/nokogiri.rb +45 -9
- data/tasks/cross_compile.rb +173 -0
- data/tasks/test.rb +25 -69
- data/test/css/test_nthiness.rb +3 -4
- data/test/css/test_parser.rb +75 -20
- data/test/css/test_tokenizer.rb +23 -1
- data/test/css/test_xpath_visitor.rb +10 -1
- data/test/decorators/test_slop.rb +16 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +101 -23
- data/test/html/sax/test_parser.rb +81 -2
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +39 -8
- data/test/html/test_document.rb +186 -23
- data/test/html/test_document_encoding.rb +78 -1
- data/test/html/test_document_fragment.rb +253 -0
- data/test/html/test_element_description.rb +98 -0
- data/test/html/test_named_characters.rb +1 -1
- data/test/html/test_node.rb +124 -36
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +1 -52
- data/test/test_css_cache.rb +2 -13
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_memory_leak.rb +88 -19
- data/test/test_nokogiri.rb +38 -5
- data/test/test_reader.rb +188 -6
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +183 -83
- data/test/xml/node/test_save_options.rb +1 -1
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +175 -4
- data/test/xml/sax/test_parser_context.rb +113 -0
- data/test/xml/sax/test_push_parser.rb +90 -2
- data/test/xml/test_attr.rb +35 -1
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +186 -1
- data/test/xml/test_cdata.rb +32 -1
- data/test/xml/test_comment.rb +13 -1
- data/test/xml/test_document.rb +415 -43
- data/test/xml/test_document_encoding.rb +1 -1
- data/test/xml/test_document_fragment.rb +173 -5
- data/test/xml/test_dtd.rb +61 -6
- data/test/xml/test_dtd_encoding.rb +3 -1
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +120 -0
- data/test/xml/test_entity_reference.rb +5 -1
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +546 -201
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +33 -3
- data/test/xml/test_node_reparenting.rb +321 -0
- data/test/xml/test_node_set.rb +538 -2
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +6 -1
- data/test/xml/test_reader_encoding.rb +1 -1
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +94 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +35 -1
- data/test/xml/test_unparented_node.rb +5 -5
- data/test/xml/test_xpath.rb +142 -11
- data/test/xslt/test_custom_functions.rb +94 -0
- metadata +328 -92
- data/ext/nokogiri/html_sax_parser.c +0 -57
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/native.so +0 -0
- data/ext/nokogiri/xml_xpath.c +0 -53
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +0 -30
- data/lib/nokogiri/css/generated_parser.rb +0 -713
- data/lib/nokogiri/css/generated_tokenizer.rb +0 -144
- data/lib/nokogiri/decorators.rb +0 -2
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -28
- data/lib/nokogiri/hpricot.rb +0 -51
- data/lib/nokogiri/xml/comment.rb +0 -6
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/entity_declaration.rb +0 -9
- data/lib/nokogiri/xml/fragment_handler.rb +0 -34
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -11
- data/test/hpricot/test_alter.rb +0 -68
- data/test/hpricot/test_builder.rb +0 -20
- data/test/hpricot/test_parser.rb +0 -426
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -77
- data/test/hpricot/test_xml.rb +0 -30
- data/test/test_gc.rb +0 -15
@@ -0,0 +1,18 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class Text < CharacterData
|
4
|
+
|
5
|
+
def self.new(string, document, *rest) # :nodoc:
|
6
|
+
node_ptr = LibXML.xmlNewText(string)
|
7
|
+
node_cstruct = LibXML::XmlNode.new(node_ptr)
|
8
|
+
node_cstruct[:doc] = document.cstruct[:doc]
|
9
|
+
|
10
|
+
node = Node.wrap(node_cstruct, self)
|
11
|
+
node.send :initialize, string, document, *rest
|
12
|
+
yield node if block_given?
|
13
|
+
node
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class XPathContext
|
4
|
+
|
5
|
+
attr_accessor :cstruct # :nodoc:
|
6
|
+
|
7
|
+
def register_ns(prefix, uri) # :nodoc:
|
8
|
+
LibXML.xmlXPathRegisterNs(cstruct, prefix, uri)
|
9
|
+
end
|
10
|
+
|
11
|
+
def register_variable(name, value) # :nodoc:
|
12
|
+
xml_value = LibXML.xmlXPathNewCString(value);
|
13
|
+
LibXML.xmlXPathRegisterVariable(cstruct, name, xml_value);
|
14
|
+
end
|
15
|
+
|
16
|
+
def evaluate(search_path, xpath_handler=nil) # :nodoc:
|
17
|
+
lookup = nil # to keep lambda in scope long enough to avoid a possible GC tragedy
|
18
|
+
query = search_path.to_s
|
19
|
+
|
20
|
+
if xpath_handler
|
21
|
+
lookup = lambda do |ctx, name, uri|
|
22
|
+
return nil unless xpath_handler.respond_to?(name)
|
23
|
+
ruby_funcall name, xpath_handler
|
24
|
+
end
|
25
|
+
LibXML.xmlXPathRegisterFuncLookup(cstruct, lookup, nil);
|
26
|
+
end
|
27
|
+
|
28
|
+
exception_handler = lambda do |ctx, error|
|
29
|
+
raise XPath::SyntaxError.wrap(error)
|
30
|
+
end
|
31
|
+
LibXML.xmlResetLastError()
|
32
|
+
LibXML.xmlSetStructuredErrorFunc(nil, exception_handler)
|
33
|
+
|
34
|
+
generic_exception_handler = lambda do |ctx, msg|
|
35
|
+
raise RuntimeError.new(msg) # TODO: varargs
|
36
|
+
end
|
37
|
+
LibXML.xmlSetGenericErrorFunc(nil, generic_exception_handler)
|
38
|
+
|
39
|
+
xpath_ptr = LibXML.xmlXPathEvalExpression(query, cstruct)
|
40
|
+
|
41
|
+
LibXML.xmlSetStructuredErrorFunc(nil, nil)
|
42
|
+
LibXML.xmlSetGenericErrorFunc(nil, nil)
|
43
|
+
|
44
|
+
if xpath_ptr.null?
|
45
|
+
error = LibXML.xmlGetLastError()
|
46
|
+
raise XPath::SyntaxError.wrap(error)
|
47
|
+
end
|
48
|
+
|
49
|
+
xpath = XML::XPath.new
|
50
|
+
xpath.cstruct = LibXML::XmlXpathObject.new(xpath_ptr)
|
51
|
+
xpath.document = cstruct.document.ruby_doc
|
52
|
+
|
53
|
+
case xpath.cstruct[:type]
|
54
|
+
when LibXML::XmlXpathObject::XPATH_NODESET
|
55
|
+
if xpath.cstruct[:nodesetval].null?
|
56
|
+
NodeSet.new(xpath.document)
|
57
|
+
else
|
58
|
+
NodeSet.wrap(xpath.cstruct[:nodesetval], xpath.document)
|
59
|
+
end
|
60
|
+
when LibXML::XmlXpathObject::XPATH_STRING
|
61
|
+
xpath.cstruct[:stringval]
|
62
|
+
when LibXML::XmlXpathObject::XPATH_NUMBER
|
63
|
+
xpath.cstruct[:floatval]
|
64
|
+
when LibXML::XmlXpathObject::XPATH_BOOLEAN
|
65
|
+
0 != xpath.cstruct[:boolval]
|
66
|
+
else
|
67
|
+
NodeSet.new(xpath.document)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.new(node) # :nodoc:
|
72
|
+
LibXML.xmlXPathInit()
|
73
|
+
|
74
|
+
ptr = LibXML.xmlXPathNewContext(node.cstruct[:doc])
|
75
|
+
|
76
|
+
ctx = allocate
|
77
|
+
ctx.cstruct = LibXML::XmlXpathContext.new(ptr)
|
78
|
+
ctx.cstruct[:node] = node.cstruct
|
79
|
+
ctx
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
#
|
85
|
+
# returns a lambda that will call the handler function with marshalled parameters
|
86
|
+
#
|
87
|
+
def ruby_funcall(name, xpath_handler) # :nodoc:
|
88
|
+
lambda do |ctx, nargs|
|
89
|
+
parser_context = LibXML::XmlXpathParserContext.new(ctx)
|
90
|
+
context_cstruct = parser_context.context
|
91
|
+
document = context_cstruct.document.ruby_doc
|
92
|
+
|
93
|
+
params = []
|
94
|
+
|
95
|
+
nargs.times do |j|
|
96
|
+
obj = LibXML::XmlXpathObject.new(LibXML.valuePop(ctx))
|
97
|
+
case obj[:type]
|
98
|
+
when LibXML::XmlXpathObject::XPATH_STRING
|
99
|
+
params.unshift obj[:stringval]
|
100
|
+
when LibXML::XmlXpathObject::XPATH_BOOLEAN
|
101
|
+
params.unshift obj[:boolval] == 1
|
102
|
+
when LibXML::XmlXpathObject::XPATH_NUMBER
|
103
|
+
params.unshift obj[:floatval]
|
104
|
+
when LibXML::XmlXpathObject::XPATH_NODESET
|
105
|
+
params.unshift NodeSet.wrap(obj[:nodesetval], document)
|
106
|
+
else
|
107
|
+
char_ptr = params.unshift LibXML.xmlXPathCastToString(obj)
|
108
|
+
string = char_ptr.read_string
|
109
|
+
LibXML.xmlFree(char_ptr)
|
110
|
+
string
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
result = xpath_handler.send(name, *params)
|
115
|
+
|
116
|
+
case result.class.to_s
|
117
|
+
when Fixnum.to_s, Float.to_s, Bignum.to_s
|
118
|
+
LibXML.xmlXPathReturnNumber(ctx, result)
|
119
|
+
when String.to_s
|
120
|
+
LibXML.xmlXPathReturnString(
|
121
|
+
ctx,
|
122
|
+
LibXML.xmlXPathWrapCString(result)
|
123
|
+
)
|
124
|
+
when TrueClass.to_s
|
125
|
+
LibXML.xmlXPathReturnTrue(ctx)
|
126
|
+
when FalseClass.to_s
|
127
|
+
LibXML.xmlXPathReturnFalse(ctx)
|
128
|
+
when NilClass.to_s
|
129
|
+
;
|
130
|
+
when Array.to_s
|
131
|
+
node_set = XML::NodeSet.new(document, result)
|
132
|
+
LibXML.xmlXPathReturnNodeSet(
|
133
|
+
ctx,
|
134
|
+
LibXML.xmlXPathNodeSetMerge(nil, node_set.cstruct)
|
135
|
+
)
|
136
|
+
else
|
137
|
+
if result.is_a?(XML::NodeSet)
|
138
|
+
LibXML.xmlXPathReturnNodeSet(
|
139
|
+
ctx,
|
140
|
+
LibXML.xmlXPathNodeSetMerge(nil, result.cstruct)
|
141
|
+
)
|
142
|
+
else
|
143
|
+
raise RuntimeError.new("Invalid return type #{result.class.inspect}")
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
nil
|
148
|
+
end # lambda
|
149
|
+
end # ruby_funcall
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XSLT
|
3
|
+
@modules = {}
|
4
|
+
|
5
|
+
@method_caller = lambda do |context, nargs|
|
6
|
+
# TODO
|
7
|
+
end
|
8
|
+
|
9
|
+
@init_func = lambda do |context, uri|
|
10
|
+
klass = @modules[uri]
|
11
|
+
klass.instance_methods(false).each do |method_name|
|
12
|
+
LibXML.xsltRegisterExtFunction(context, method_name, uri, @method_caller)
|
13
|
+
end
|
14
|
+
klass.new
|
15
|
+
end
|
16
|
+
|
17
|
+
@shutdown_func = lambda do |context, uri, data|
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.register(uri, klass) # :nodoc:
|
21
|
+
raise NotImplementedError.new("sorry, you should implement me.")
|
22
|
+
end
|
23
|
+
|
24
|
+
class Stylesheet
|
25
|
+
|
26
|
+
attr_accessor :cstruct # :nodoc:
|
27
|
+
|
28
|
+
def self.parse_stylesheet_doc(document) # :nodoc:
|
29
|
+
LibXML.exsltRegisterAll
|
30
|
+
|
31
|
+
generic_exception_handler = lambda do |ctx, msg|
|
32
|
+
raise RuntimeError.new(msg) # TODO: varargs
|
33
|
+
end
|
34
|
+
LibXML.xsltSetGenericErrorFunc(nil, generic_exception_handler)
|
35
|
+
|
36
|
+
ss = LibXML.xsltParseStylesheetDoc(LibXML.xmlCopyDoc(document.cstruct, 1)) # 1 => recursive
|
37
|
+
|
38
|
+
LibXML.xsltSetGenericErrorFunc(nil, nil)
|
39
|
+
|
40
|
+
obj = allocate
|
41
|
+
obj.cstruct = LibXML::XsltStylesheet.new(ss)
|
42
|
+
obj
|
43
|
+
end
|
44
|
+
|
45
|
+
def serialize(document) # :nodoc:
|
46
|
+
buf_ptr = FFI::Buffer.new :pointer
|
47
|
+
buf_len = FFI::Buffer.new :int
|
48
|
+
LibXML.xsltSaveResultToString(buf_ptr, buf_len, document.cstruct, cstruct)
|
49
|
+
buf = Nokogiri::LibXML::XmlAlloc.new(buf_ptr.get_pointer(0))
|
50
|
+
buf.pointer.read_string(buf_len.get_int(0))
|
51
|
+
end
|
52
|
+
|
53
|
+
def transform(document, params=[]) # :nodoc:
|
54
|
+
unless document.kind_of? Nokogiri::XML::Document
|
55
|
+
raise ArgumentError, "argument must be a Nokogiri::XML::Document"
|
56
|
+
end
|
57
|
+
|
58
|
+
params = params.to_a.flatten if params.is_a?(Hash)
|
59
|
+
raise(TypeError) unless params.is_a?(Array)
|
60
|
+
|
61
|
+
param_arr = FFI::MemoryPointer.new(:pointer, params.length + 1, false)
|
62
|
+
|
63
|
+
# Keep the MemoryPointer instances alive until after the call
|
64
|
+
ptrs = params.map { |param | FFI::MemoryPointer.from_string(param.to_s) }
|
65
|
+
param_arr.put_array_of_pointer(0, ptrs)
|
66
|
+
|
67
|
+
# Terminate the list with a NULL pointer
|
68
|
+
param_arr.put_pointer(LibXML.pointer_offset(params.length), nil)
|
69
|
+
|
70
|
+
ptr = LibXML.xsltApplyStylesheet(cstruct, document.cstruct, param_arr)
|
71
|
+
raise(RuntimeError, "could not perform xslt transform on document") if ptr.null?
|
72
|
+
|
73
|
+
XML::Document.wrap(ptr)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,69 +1,35 @@
|
|
1
1
|
require 'nokogiri/html/entity_lookup'
|
2
2
|
require 'nokogiri/html/document'
|
3
|
+
require 'nokogiri/html/document_fragment'
|
4
|
+
require 'nokogiri/html/sax/parser_context'
|
3
5
|
require 'nokogiri/html/sax/parser'
|
6
|
+
require 'nokogiri/html/element_description'
|
4
7
|
|
5
8
|
module Nokogiri
|
6
9
|
class << self
|
7
10
|
###
|
8
|
-
# Parse HTML.
|
9
|
-
|
10
|
-
|
11
|
-
# encoding that should be used when processing the document. +options+
|
12
|
-
# is a number that sets options in the parser, such as
|
13
|
-
# Nokogiri::XML::PARSE_RECOVER. See the constants in
|
14
|
-
# Nokogiri::XML.
|
15
|
-
def HTML thing, url = nil, encoding = nil, options = 2145
|
16
|
-
Nokogiri::HTML.parse(thing, url, encoding, options)
|
11
|
+
# Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
|
12
|
+
def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
13
|
+
Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
|
17
14
|
end
|
18
15
|
end
|
19
16
|
|
20
17
|
module HTML
|
21
|
-
# Parser options
|
22
|
-
PARSE_NOERROR = 1 << 5 # No error reports
|
23
|
-
PARSE_NOWARNING = 1 << 6 # No warnings
|
24
|
-
PARSE_PEDANTIC = 1 << 7 # Pedantic errors
|
25
|
-
PARSE_NOBLANKS = 1 << 8 # Remove blanks nodes
|
26
|
-
PARSE_NONET = 1 << 11 # No network access
|
27
|
-
|
28
18
|
class << self
|
29
19
|
###
|
30
|
-
# Parse HTML.
|
31
|
-
def parse
|
32
|
-
|
33
|
-
encoding ||= string_or_io.encoding.name
|
34
|
-
end
|
35
|
-
|
36
|
-
if string_or_io.respond_to?(:read)
|
37
|
-
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
38
|
-
return Document.read_io(string_or_io, url, encoding, options)
|
39
|
-
end
|
40
|
-
|
41
|
-
return Document.new if(string_or_io.length == 0)
|
42
|
-
Document.read_memory(string_or_io, url, encoding, options)
|
20
|
+
# Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
|
21
|
+
def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
22
|
+
Document.parse(thing, url, encoding, options, &block)
|
43
23
|
end
|
44
24
|
|
45
25
|
####
|
46
26
|
# Parse a fragment from +string+ in to a NodeSet.
|
47
|
-
def fragment string
|
48
|
-
|
49
|
-
fragment = XML::DocumentFragment.new(doc)
|
50
|
-
finder = lambda { |c, f|
|
51
|
-
c.each do |child|
|
52
|
-
if string == child.content && child.name == 'text'
|
53
|
-
fragment.add_child(child)
|
54
|
-
end
|
55
|
-
fragment.add_child(child) if string =~ /<#{child.name}/
|
56
|
-
end
|
57
|
-
return fragment if fragment.children.length > 0
|
58
|
-
|
59
|
-
c.each do |child|
|
60
|
-
finder.call(child.children, f)
|
61
|
-
end
|
62
|
-
}
|
63
|
-
finder.call(doc.children, finder)
|
64
|
-
fragment
|
27
|
+
def fragment string, encoding = nil
|
28
|
+
HTML::DocumentFragment.parse string, encoding
|
65
29
|
end
|
66
30
|
end
|
31
|
+
|
32
|
+
# Instance of Nokogiri::HTML::EntityLookup
|
67
33
|
NamedCharacters = EntityLookup.new
|
68
34
|
end
|
69
35
|
end
|
@@ -1,6 +1,32 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module HTML
|
3
|
-
|
3
|
+
###
|
4
|
+
# Nokogiri HTML builder is used for building HTML documents. It is very
|
5
|
+
# similar to the Nokogiri::XML::Builder. In fact, you should go read the
|
6
|
+
# documentation for Nokogiri::XML::Builder before reading this
|
7
|
+
# documentation.
|
8
|
+
#
|
9
|
+
# == Synopsis:
|
10
|
+
#
|
11
|
+
# Create an HTML document with a body that has an onload attribute, and a
|
12
|
+
# span tag with a class of "bold" that has content of "Hello world".
|
13
|
+
#
|
14
|
+
# builder = Nokogiri::HTML::Builder.new do |doc|
|
15
|
+
# doc.html {
|
16
|
+
# doc.body(:onload => 'some_func();') {
|
17
|
+
# doc.span.bold {
|
18
|
+
# doc.text "Hello world"
|
19
|
+
# }
|
20
|
+
# }
|
21
|
+
# }
|
22
|
+
# end
|
23
|
+
# puts builder.to_html
|
24
|
+
#
|
25
|
+
# The HTML builder inherits from the XML builder, so make sure to read the
|
26
|
+
# Nokogiri::XML::Builder documentation.
|
27
|
+
class Builder < Nokogiri::XML::Builder
|
28
|
+
###
|
29
|
+
# Convert the builder to HTML
|
4
30
|
def to_html
|
5
31
|
@doc.to_html
|
6
32
|
end
|
@@ -1,14 +1,208 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module HTML
|
3
|
-
class Document < XML::Document
|
3
|
+
class Document < Nokogiri::XML::Document
|
4
|
+
###
|
5
|
+
# Get the meta tag encoding for this document. If there is no meta tag,
|
6
|
+
# then nil is returned.
|
7
|
+
def meta_encoding
|
8
|
+
meta = meta_content_type and
|
9
|
+
/charset\s*=\s*([\w-]+)/i.match(meta['content'])[1]
|
10
|
+
end
|
11
|
+
|
12
|
+
###
|
13
|
+
# Set the meta tag encoding for this document. If there is no meta
|
14
|
+
# content tag, the encoding is not set.
|
15
|
+
def meta_encoding= encoding
|
16
|
+
meta = meta_content_type and
|
17
|
+
meta['content'] = "text/html; charset=%s" % encoding
|
18
|
+
end
|
19
|
+
|
20
|
+
def meta_content_type
|
21
|
+
css('meta[@http-equiv]').find { |node|
|
22
|
+
node['http-equiv'] =~ /\AContent-Type\z/i
|
23
|
+
}
|
24
|
+
end
|
25
|
+
private :meta_content_type
|
26
|
+
|
27
|
+
###
|
28
|
+
# Get the title string of this document. Return nil if there is
|
29
|
+
# no title tag.
|
30
|
+
def title
|
31
|
+
title = at('title') and title.inner_text
|
32
|
+
end
|
33
|
+
|
34
|
+
###
|
35
|
+
# Set the title string of this document. If there is no head
|
36
|
+
# element, the title is not set.
|
37
|
+
def title=(text)
|
38
|
+
unless title = at('title')
|
39
|
+
head = at('head') or return nil
|
40
|
+
title = Nokogiri::XML::Node.new('title', self)
|
41
|
+
head << title
|
42
|
+
end
|
43
|
+
title.children = XML::Text.new(text, self)
|
44
|
+
end
|
45
|
+
|
4
46
|
####
|
5
|
-
# Serialize
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
47
|
+
# Serialize Node using +options+. Save options can also be set using a
|
48
|
+
# block. See SaveOptions.
|
49
|
+
#
|
50
|
+
# These two statements are equivalent:
|
51
|
+
#
|
52
|
+
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
|
53
|
+
#
|
54
|
+
# or
|
55
|
+
#
|
56
|
+
# node.serialize(:encoding => 'UTF-8') do |config|
|
57
|
+
# config.format.as_xml
|
58
|
+
# end
|
59
|
+
#
|
60
|
+
def serialize options = {}
|
61
|
+
options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
|
62
|
+
super
|
63
|
+
end
|
64
|
+
|
65
|
+
####
|
66
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
67
|
+
def fragment tags = nil
|
68
|
+
DocumentFragment.new(self, tags, self.root)
|
69
|
+
end
|
70
|
+
|
71
|
+
class << self
|
72
|
+
###
|
73
|
+
# Parse HTML. +thing+ may be a String, or any object that
|
74
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
75
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
76
|
+
# encoding that should be used when processing the document. +options+
|
77
|
+
# is a number that sets options in the parser, such as
|
78
|
+
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
79
|
+
# Nokogiri::XML::ParseOptions.
|
80
|
+
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
|
81
|
+
|
82
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
|
83
|
+
# Give the options to the user
|
84
|
+
yield options if block_given?
|
85
|
+
|
86
|
+
if string_or_io.respond_to?(:encoding)
|
87
|
+
unless string_or_io.encoding.name == "ASCII-8BIT"
|
88
|
+
encoding ||= string_or_io.encoding.name
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
if string_or_io.respond_to?(:read)
|
93
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
94
|
+
if !encoding
|
95
|
+
# Perform further encoding detection that libxml2 does
|
96
|
+
# not do.
|
97
|
+
string_or_io = EncodingReader.new(string_or_io)
|
98
|
+
begin
|
99
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
100
|
+
rescue EncodingFoundException => e
|
101
|
+
# A retry is required because libxml2 has a problem in
|
102
|
+
# that it cannot switch encoding well in the middle of
|
103
|
+
# parsing, especially if it has already seen a
|
104
|
+
# non-ASCII character when it finds an encoding hint.
|
105
|
+
encoding = e.encoding
|
106
|
+
end
|
107
|
+
end
|
108
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
109
|
+
end
|
110
|
+
|
111
|
+
# read_memory pukes on empty docs
|
112
|
+
return new if string_or_io.nil? or string_or_io.empty?
|
113
|
+
|
114
|
+
if !encoding
|
115
|
+
encoding = EncodingReader.detect_encoding(string_or_io)
|
116
|
+
end
|
117
|
+
|
118
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
class EncodingFoundException < Exception # :nodoc:
|
123
|
+
attr_reader :encoding
|
124
|
+
|
125
|
+
def initialize(encoding)
|
126
|
+
@encoding = encoding
|
127
|
+
super("encoding found: %s" % encoding)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
class EncodingReader # :nodoc:
|
132
|
+
class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
133
|
+
attr_reader :encoding
|
134
|
+
|
135
|
+
def found(encoding)
|
136
|
+
@encoding = encoding
|
137
|
+
throw :found
|
138
|
+
end
|
139
|
+
|
140
|
+
def not_found(encoding)
|
141
|
+
found nil
|
142
|
+
end
|
143
|
+
|
144
|
+
def start_element(name, attrs = [])
|
145
|
+
case name
|
146
|
+
when /\A(?:div|h1|img|p|br)\z/
|
147
|
+
not_found
|
148
|
+
when 'meta'
|
149
|
+
attr = Hash[attrs]
|
150
|
+
http_equiv = attr['http-equiv'] and
|
151
|
+
http_equiv.match(/\AContent-Type\z/i) and
|
152
|
+
content = attr['content'] and
|
153
|
+
m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
|
154
|
+
found m[1]
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.detect_encoding(chunk)
|
160
|
+
m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
|
161
|
+
return Nokogiri.XML(m[1]).encoding
|
162
|
+
|
163
|
+
handler = SAXHandler.new
|
164
|
+
parser = Nokogiri::HTML::SAX::Parser.new(handler)
|
165
|
+
catch(:found) {
|
166
|
+
parser.parse(chunk)
|
167
|
+
}
|
168
|
+
handler.encoding
|
169
|
+
rescue => e
|
170
|
+
nil
|
171
|
+
end
|
172
|
+
|
173
|
+
def initialize(io)
|
174
|
+
@io = io
|
175
|
+
@firstchunk = nil
|
176
|
+
end
|
177
|
+
|
178
|
+
def read(len)
|
179
|
+
# no support for a call without len
|
180
|
+
|
181
|
+
if !@firstchunk
|
182
|
+
@firstchunk = @io.read(len) or return nil
|
183
|
+
|
184
|
+
# This implementation expects and assumes that the first
|
185
|
+
# call from htmlReadIO() is made with a length long enough
|
186
|
+
# (~1KB) to achieve further encoding detection that
|
187
|
+
# libxml2 does not do.
|
188
|
+
if encoding = EncodingReader.detect_encoding(@firstchunk)
|
189
|
+
raise EncodingFoundException, encoding
|
190
|
+
end
|
191
|
+
|
192
|
+
# This chunk is stored for the next read in retry.
|
193
|
+
return @firstchunk
|
194
|
+
end
|
10
195
|
|
11
|
-
|
196
|
+
ret = @firstchunk.slice!(0, len)
|
197
|
+
if (len -= ret.length) > 0
|
198
|
+
rest = @io.read(len) and ret << rest
|
199
|
+
end
|
200
|
+
if ret.empty?
|
201
|
+
nil
|
202
|
+
else
|
203
|
+
ret
|
204
|
+
end
|
205
|
+
end
|
12
206
|
end
|
13
207
|
end
|
14
208
|
end
|