nokogiri 1.0.0 → 1.6.8.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.autotest +26 -0
- data/.cross_rubies +9 -0
- data/.editorconfig +17 -0
- data/.gemtest +0 -0
- data/.travis.yml +51 -0
- data/CHANGELOG.rdoc +1160 -0
- data/CONTRIBUTING.md +42 -0
- data/C_CODING_STYLE.rdoc +33 -0
- data/Gemfile +22 -0
- data/LICENSE.txt +31 -0
- data/Manifest.txt +284 -40
- data/README.md +166 -0
- data/ROADMAP.md +111 -0
- data/Rakefile +310 -199
- data/STANDARD_RESPONSES.md +47 -0
- data/Y_U_NO_GEMSPEC.md +155 -0
- data/appveyor.yml +22 -0
- data/bin/nokogiri +118 -0
- data/build_all +45 -0
- data/dependencies.yml +29 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +664 -34
- data/ext/nokogiri/html_document.c +120 -33
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +279 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +116 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +145 -0
- data/ext/nokogiri/nokogiri.h +131 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +23 -19
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +69 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +501 -54
- data/ext/nokogiri/xml_document.h +14 -1
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +109 -24
- data/ext/nokogiri/xml_dtd.h +3 -1
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +60 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +117 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1285 -315
- data/ext/nokogiri/xml_node.h +4 -6
- data/ext/nokogiri/xml_node_set.c +415 -54
- data/ext/nokogiri/xml_node_set.h +6 -2
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +316 -77
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +161 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +215 -80
- data/ext/nokogiri/xml_sax_parser.h +30 -1
- data/ext/nokogiri/xml_sax_parser_context.c +262 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +45 -175
- data/ext/nokogiri/xml_syntax_error.h +4 -2
- data/ext/nokogiri/xml_text.c +37 -14
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +230 -13
- data/ext/nokogiri/xml_xpath_context.h +2 -1
- data/ext/nokogiri/xslt_stylesheet.c +196 -34
- data/ext/nokogiri/xslt_stylesheet.h +6 -1
- data/lib/nokogiri/css/node.rb +18 -61
- data/lib/nokogiri/css/parser.rb +725 -17
- data/lib/nokogiri/css/parser.y +126 -63
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +148 -5
- data/lib/nokogiri/css/tokenizer.rex +31 -39
- data/lib/nokogiri/css/xpath_visitor.rb +109 -51
- data/lib/nokogiri/css.rb +24 -3
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +329 -3
- data/lib/nokogiri/html/document_fragment.rb +39 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +35 -4
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/html/sax/push_parser.rb +36 -0
- data/lib/nokogiri/html.rb +18 -76
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +106 -1
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +395 -31
- data/lib/nokogiri/xml/cdata.rb +4 -2
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +267 -12
- data/lib/nokogiri/xml/document_fragment.rb +149 -0
- data/lib/nokogiri/xml/dtd.rb +27 -1
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node.rb +748 -109
- data/lib/nokogiri/xml/node_set.rb +200 -72
- data/lib/nokogiri/xml/parse_options.rb +120 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +102 -4
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax/document.rb +114 -2
- data/lib/nokogiri/xml/sax/parser.rb +97 -7
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/sax.rb +2 -7
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/searchable.rb +221 -0
- data/lib/nokogiri/xml/syntax_error.rb +27 -1
- data/lib/nokogiri/xml/text.rb +4 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath.rb +4 -0
- data/lib/nokogiri/xml/xpath_context.rb +3 -1
- data/lib/nokogiri/xml.rb +45 -38
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/nokogiri/xslt.rb +47 -2
- data/lib/nokogiri.rb +117 -24
- data/lib/xsd/xmlparser/nokogiri.rb +102 -0
- data/patches/sort-patches-by-date +25 -0
- data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
- data/suppressions/README.txt +1 -0
- data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
- data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
- data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
- data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +73 -6
- data/test/css/test_parser.rb +184 -39
- data/test/css/test_tokenizer.rb +72 -19
- data/test/css/test_xpath_visitor.rb +44 -2
- data/test/decorators/test_slop.rb +20 -0
- data/test/files/2ch.html +108 -0
- data/test/files/GH_1042.html +18 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/atom.xml +344 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/namespace_pressure_test.xml +1684 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/saml/saml20assertion_schema.xsd +283 -0
- data/test/files/saml/saml20protocol_schema.xsd +302 -0
- data/test/files/saml/xenc_schema.xsd +146 -0
- data/test/files/saml/xmldsig_schema.xsd +318 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/shift_jis_no_charset.html +9 -0
- data/test/files/slow-xpath.xml +25509 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/test_document_url/bar.xml +2 -0
- data/test/files/test_document_url/document.dtd +4 -0
- data/test/files/test_document_url/document.xml +6 -0
- data/test/files/tlm.html +2 -1
- data/test/files/to_be_xincluded.xml +2 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/files/xinclude.xml +4 -0
- data/test/helper.rb +124 -13
- data/test/html/sax/test_parser.rb +118 -4
- data/test/html/sax/test_parser_context.rb +46 -0
- data/test/html/sax/test_push_parser.rb +87 -0
- data/test/html/test_builder.rb +94 -8
- data/test/html/test_document.rb +626 -11
- data/test/html/test_document_encoding.rb +145 -0
- data/test/html/test_document_fragment.rb +301 -0
- data/test/html/test_element_description.rb +105 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +212 -0
- data/test/html/test_node_encoding.rb +85 -0
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
- data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
- data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
- data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
- data/test/namespaces/test_namespaces_preservation.rb +31 -0
- data/test/test_convert_xpath.rb +2 -47
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +48 -0
- data/test/test_memory_leak.rb +156 -0
- data/test/test_nokogiri.rb +103 -1
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +293 -8
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +309 -8
- data/test/xml/sax/test_parser_context.rb +115 -0
- data/test/xml/sax/test_push_parser.rb +157 -0
- data/test/xml/test_attr.rb +67 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +327 -2
- data/test/xml/test_c14n.rb +180 -0
- data/test/xml/test_cdata.rb +32 -2
- data/test/xml/test_comment.rb +40 -0
- data/test/xml/test_document.rb +846 -35
- data/test/xml/test_document_encoding.rb +31 -0
- data/test/xml/test_document_fragment.rb +271 -0
- data/test/xml/test_dtd.rb +153 -9
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +251 -0
- data/test/xml/test_namespace.rb +96 -0
- data/test/xml/test_node.rb +1126 -105
- data/test/xml/test_node_attributes.rb +115 -0
- data/test/xml/test_node_encoding.rb +69 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +549 -0
- data/test/xml/test_node_set.rb +668 -9
- data/test/xml/test_parse_options.rb +64 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader.rb +589 -0
- data/test/xml/test_reader_encoding.rb +134 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +142 -0
- data/test/xml/test_syntax_error.rb +30 -0
- data/test/xml/test_text.rb +49 -2
- data/test/xml/test_unparented_node.rb +440 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +445 -0
- data/test/xslt/test_custom_functions.rb +133 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- data/test_all +107 -0
- metadata +459 -115
- data/History.txt +0 -6
- data/README.ja.txt +0 -86
- data/README.txt +0 -87
- data/ext/nokogiri/html_sax_parser.c +0 -32
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/ext/nokogiri/native.c +0 -40
- data/ext/nokogiri/native.h +0 -51
- data/ext/nokogiri/xml_xpath.c +0 -46
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/lib/nokogiri/css/generated_parser.rb +0 -653
- data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators.rb +0 -1
- data/lib/nokogiri/hpricot.rb +0 -47
- data/lib/nokogiri/xml/after_handler.rb +0 -18
- data/lib/nokogiri/xml/before_handler.rb +0 -32
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/entity_declaration.rb +0 -9
- data/nokogiri.gemspec +0 -34
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -7
- data/test/hpricot/test_alter.rb +0 -67
- data/test/hpricot/test_builder.rb +0 -27
- data/test/hpricot/test_parser.rb +0 -423
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -78
- data/test/hpricot/test_xml.rb +0 -30
- data/test/test_reader.rb +0 -222
@@ -1,19 +1,50 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module HTML
|
3
|
+
###
|
4
|
+
# Nokogiri lets you write a SAX parser to process HTML but get HTML
|
5
|
+
# correction features.
|
6
|
+
#
|
7
|
+
# See Nokogiri::HTML::SAX::Parser for a basic example of using a
|
8
|
+
# SAX parser with HTML.
|
9
|
+
#
|
10
|
+
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
3
11
|
module SAX
|
4
|
-
|
12
|
+
###
|
13
|
+
# This class lets you perform SAX style parsing on HTML with HTML
|
14
|
+
# error correction.
|
15
|
+
#
|
16
|
+
# Here is a basic usage example:
|
17
|
+
#
|
18
|
+
# class MyDoc < Nokogiri::XML::SAX::Document
|
19
|
+
# def start_element name, attributes = []
|
20
|
+
# puts "found a #{name}"
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new)
|
25
|
+
# parser.parse(File.read(ARGV[0], mode: 'rb'))
|
26
|
+
#
|
27
|
+
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
28
|
+
class Parser < Nokogiri::XML::SAX::Parser
|
5
29
|
###
|
6
30
|
# Parse html stored in +data+ using +encoding+
|
7
31
|
def parse_memory data, encoding = 'UTF-8'
|
8
|
-
|
32
|
+
raise ArgumentError unless data
|
33
|
+
return unless data.length > 0
|
34
|
+
ctx = ParserContext.memory(data, encoding)
|
35
|
+
yield ctx if block_given?
|
36
|
+
ctx.parse_with self
|
9
37
|
end
|
10
38
|
|
11
39
|
###
|
12
40
|
# Parse a file with +filename+
|
13
41
|
def parse_file filename, encoding = 'UTF-8'
|
14
|
-
raise
|
42
|
+
raise ArgumentError unless filename
|
43
|
+
raise Errno::ENOENT unless File.exist?(filename)
|
15
44
|
raise Errno::EISDIR if File.directory?(filename)
|
16
|
-
|
45
|
+
ctx = ParserContext.file(filename, encoding)
|
46
|
+
yield ctx if block_given?
|
47
|
+
ctx.parse_with self
|
17
48
|
end
|
18
49
|
end
|
19
50
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
module SAX
|
4
|
+
###
|
5
|
+
# Context for HTML SAX parsers. This class is usually not instantiated
|
6
|
+
# by the user. Instead, you should be looking at
|
7
|
+
# Nokogiri::HTML::SAX::Parser
|
8
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
9
|
+
def self.new thing, encoding = 'UTF-8'
|
10
|
+
[:read, :close].all? { |x| thing.respond_to?(x) } ? super :
|
11
|
+
memory(thing, encoding)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
module SAX
|
4
|
+
class PushParser
|
5
|
+
|
6
|
+
# The Nokogiri::HTML::SAX::Document on which the PushParser will be
|
7
|
+
# operating
|
8
|
+
attr_accessor :document
|
9
|
+
|
10
|
+
def initialize(doc = HTML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
|
11
|
+
@document = doc
|
12
|
+
@encoding = encoding
|
13
|
+
@sax_parser = HTML::SAX::Parser.new(doc, @encoding)
|
14
|
+
|
15
|
+
## Create our push parser context
|
16
|
+
initialize_native(@sax_parser, file_name, encoding)
|
17
|
+
end
|
18
|
+
|
19
|
+
###
|
20
|
+
# Write a +chunk+ of HTML to the PushParser. Any callback methods
|
21
|
+
# that can be called will be called immediately.
|
22
|
+
def write chunk, last_chunk = false
|
23
|
+
native_write(chunk, last_chunk)
|
24
|
+
end
|
25
|
+
alias :<< :write
|
26
|
+
|
27
|
+
###
|
28
|
+
# Finish the parsing. This method is only necessary for
|
29
|
+
# Nokogiri::HTML::SAX::Document#end_document to be called.
|
30
|
+
def finish
|
31
|
+
write '', true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,95 +1,37 @@
|
|
1
|
+
require 'nokogiri/html/entity_lookup'
|
1
2
|
require 'nokogiri/html/document'
|
3
|
+
require 'nokogiri/html/document_fragment'
|
4
|
+
require 'nokogiri/html/sax/parser_context'
|
2
5
|
require 'nokogiri/html/sax/parser'
|
6
|
+
require 'nokogiri/html/sax/push_parser'
|
7
|
+
require 'nokogiri/html/element_description'
|
8
|
+
require 'nokogiri/html/element_description_defaults'
|
3
9
|
|
4
10
|
module Nokogiri
|
5
11
|
class << self
|
6
|
-
|
7
|
-
|
12
|
+
###
|
13
|
+
# Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
|
14
|
+
def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
15
|
+
Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
|
8
16
|
end
|
9
17
|
end
|
10
18
|
|
11
19
|
module HTML
|
12
|
-
# Parser options
|
13
|
-
PARSE_NOERROR = 1 << 5 # No error reports
|
14
|
-
PARSE_NOWARNING = 1 << 6 # No warnings
|
15
|
-
PARSE_PEDANTIC = 1 << 7 # Pedantic errors
|
16
|
-
PARSE_NOBLANKS = 1 << 8 # Remove blanks nodes
|
17
|
-
PARSE_NONET = 1 << 11 # No network access
|
18
|
-
|
19
20
|
class << self
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
25
|
-
|
26
|
-
Document.read_memory(string_or_io, url, encoding, options)
|
21
|
+
###
|
22
|
+
# Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
|
23
|
+
def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
24
|
+
Document.parse(thing, url, encoding, options, &block)
|
27
25
|
end
|
28
26
|
|
29
27
|
####
|
30
28
|
# Parse a fragment from +string+ in to a NodeSet.
|
31
|
-
def fragment string
|
32
|
-
|
33
|
-
finder = lambda { |children, f|
|
34
|
-
children.each do |child|
|
35
|
-
return children if string =~ /<#{child.name}/
|
36
|
-
finder.call(child.children, f)
|
37
|
-
end
|
38
|
-
}
|
39
|
-
finder.call(doc.children, finder)
|
29
|
+
def fragment string, encoding = nil
|
30
|
+
HTML::DocumentFragment.parse string, encoding
|
40
31
|
end
|
41
32
|
end
|
42
33
|
|
43
|
-
|
44
|
-
|
45
|
-
"Aring"=>197, "Atilde"=>195, "Auml"=>196, "Beta"=>914, "Ccedil"=>199,
|
46
|
-
"Chi"=>935, "Dagger"=>8225, "Delta"=>916, "ETH"=>208, "Eacute"=>201,
|
47
|
-
"Ecirc"=>202, "Egrave"=>200, "Epsilon"=>917, "Eta"=>919, "Euml"=>203,
|
48
|
-
"Gamma"=>915, "Iacute"=>205, "Icirc"=>206, "Igrave"=>204, "Iota"=>921,
|
49
|
-
"Iuml"=>207, "Kappa"=>922, "Lambda"=>923, "Mu"=>924, "Ntilde"=>209, "Nu"=>925,
|
50
|
-
"OElig"=>338, "Oacute"=>211, "Ocirc"=>212, "Ograve"=>210, "Omega"=>937,
|
51
|
-
"Omicron"=>927, "Oslash"=>216, "Otilde"=>213, "Ouml"=>214, "Phi"=>934,
|
52
|
-
"Pi"=>928, "Prime"=>8243, "Psi"=>936, "Rho"=>929, "Scaron"=>352, "Sigma"=>931,
|
53
|
-
"THORN"=>222, "Tau"=>932, "Theta"=>920, "Uacute"=>218, "Ucirc"=>219,
|
54
|
-
"Ugrave"=>217, "Upsilon"=>933, "Uuml"=>220, "Xi"=>926, "Yacute"=>221,
|
55
|
-
"Yuml"=>376, "Zeta"=>918, "aacute"=>225, "acirc"=>226, "acute"=>180,
|
56
|
-
"aelig"=>230, "agrave"=>224, "alefsym"=>8501, "alpha"=>945, "amp"=>38,
|
57
|
-
"and"=>8743, "ang"=>8736, "apos"=>39, "aring"=>229, "asymp"=>8776,
|
58
|
-
"atilde"=>227, "auml"=>228, "bdquo"=>8222, "beta"=>946, "brvbar"=>166,
|
59
|
-
"bull"=>8226, "cap"=>8745, "ccedil"=>231, "cedil"=>184, "cent"=>162,
|
60
|
-
"chi"=>967, "circ"=>710, "clubs"=>9827, "cong"=>8773, "copy"=>169,
|
61
|
-
"crarr"=>8629, "cup"=>8746, "curren"=>164, "dArr"=>8659, "dagger"=>8224,
|
62
|
-
"darr"=>8595, "deg"=>176, "delta"=>948, "diams"=>9830, "divide"=>247,
|
63
|
-
"eacute"=>233, "ecirc"=>234, "egrave"=>232, "empty"=>8709, "emsp"=>8195,
|
64
|
-
"ensp"=>8194, "epsilon"=>949, "equiv"=>8801, "eta"=>951, "eth"=>240,
|
65
|
-
"euml"=>235, "euro"=>8364, "exist"=>8707, "fnof"=>402, "forall"=>8704,
|
66
|
-
"frac12"=>189, "frac14"=>188, "frac34"=>190, "frasl"=>8260, "gamma"=>947,
|
67
|
-
"ge"=>8805, "gt"=>62, "hArr"=>8660, "harr"=>8596, "hearts"=>9829,
|
68
|
-
"hellip"=>8230, "iacute"=>237, "icirc"=>238, "iexcl"=>161, "igrave"=>236,
|
69
|
-
"image"=>8465, "infin"=>8734, "int"=>8747, "iota"=>953, "iquest"=>191,
|
70
|
-
"isin"=>8712, "iuml"=>239, "kappa"=>954, "lArr"=>8656, "lambda"=>955,
|
71
|
-
"lang"=>9001, "laquo"=>171, "larr"=>8592, "lceil"=>8968, "ldquo"=>8220,
|
72
|
-
"le"=>8804, "lfloor"=>8970, "lowast"=>8727, "loz"=>9674, "lrm"=>8206,
|
73
|
-
"lsaquo"=>8249, "lsquo"=>8216, "lt"=>60, "macr"=>175, "mdash"=>8212,
|
74
|
-
"micro"=>181, "middot"=>183, "minus"=>8722, "mu"=>956, "nabla"=>8711,
|
75
|
-
"nbsp"=>160, "ndash"=>8211, "ne"=>8800, "ni"=>8715, "not"=>172, "notin"=>8713,
|
76
|
-
"nsub"=>8836, "ntilde"=>241, "nu"=>957, "oacute"=>243, "ocirc"=>244,
|
77
|
-
"oelig"=>339, "ograve"=>242, "oline"=>8254, "omega"=>969, "omicron"=>959,
|
78
|
-
"oplus"=>8853, "or"=>8744, "ordf"=>170, "ordm"=>186, "oslash"=>248,
|
79
|
-
"otilde"=>245, "otimes"=>8855, "ouml"=>246, "para"=>182, "part"=>8706,
|
80
|
-
"permil"=>8240, "perp"=>8869, "phi"=>966, "pi"=>960, "piv"=>982,
|
81
|
-
"plusmn"=>177, "pound"=>163, "prime"=>8242, "prod"=>8719, "prop"=>8733,
|
82
|
-
"psi"=>968, "quot"=>34, "rArr"=>8658, "radic"=>8730, "rang"=>9002,
|
83
|
-
"raquo"=>187, "rarr"=>8594, "rceil"=>8969, "rdquo"=>8221, "real"=>8476,
|
84
|
-
"reg"=>174, "rfloor"=>8971, "rho"=>961, "rlm"=>8207, "rsaquo"=>8250,
|
85
|
-
"rsquo"=>8217, "sbquo"=>8218, "scaron"=>353, "sdot"=>8901, "sect"=>167,
|
86
|
-
"shy"=>173, "sigma"=>963, "sigmaf"=>962, "sim"=>8764, "spades"=>9824,
|
87
|
-
"sub"=>8834, "sube"=>8838, "sum"=>8721, "sup"=>8835, "sup1"=>185, "sup2"=>178,
|
88
|
-
"sup3"=>179, "supe"=>8839, "szlig"=>223, "tau"=>964, "there4"=>8756,
|
89
|
-
"theta"=>952, "thetasym"=>977, "thinsp"=>8201, "thorn"=>254, "tilde"=>732,
|
90
|
-
"times"=>215, "trade"=>8482, "uArr"=>8657, "uacute"=>250, "uarr"=>8593,
|
91
|
-
"ucirc"=>251, "ugrave"=>249, "uml"=>168, "upsih"=>978, "upsilon"=>965,
|
92
|
-
"uuml"=>252, "weierp"=>8472, "xi"=>958, "yacute"=>253, "yen"=>165,
|
93
|
-
"yuml"=>255, "zeta"=>950, "zwj"=>8205, "zwnj"=>8204}
|
34
|
+
# Instance of Nokogiri::HTML::EntityLookup
|
35
|
+
NamedCharacters = EntityLookup.new
|
94
36
|
end
|
95
37
|
end
|
data/lib/nokogiri/version.rb
CHANGED
@@ -1,3 +1,108 @@
|
|
1
1
|
module Nokogiri
|
2
|
-
|
2
|
+
# The version of Nokogiri you are using
|
3
|
+
VERSION = '1.6.8.1'
|
4
|
+
|
5
|
+
class VersionInfo # :nodoc:
|
6
|
+
def jruby?
|
7
|
+
::JRUBY_VERSION if RUBY_PLATFORM == "java"
|
8
|
+
end
|
9
|
+
|
10
|
+
def engine
|
11
|
+
defined?(RUBY_ENGINE) ? RUBY_ENGINE : 'mri'
|
12
|
+
end
|
13
|
+
|
14
|
+
def loaded_parser_version
|
15
|
+
LIBXML_PARSER_VERSION.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.collect{ |j|
|
16
|
+
j.to_i
|
17
|
+
}.join(".")
|
18
|
+
end
|
19
|
+
|
20
|
+
def compiled_parser_version
|
21
|
+
LIBXML_VERSION
|
22
|
+
end
|
23
|
+
|
24
|
+
def libxml2?
|
25
|
+
defined?(LIBXML_VERSION)
|
26
|
+
end
|
27
|
+
|
28
|
+
def libxml2_using_system?
|
29
|
+
! libxml2_using_packaged?
|
30
|
+
end
|
31
|
+
|
32
|
+
def libxml2_using_packaged?
|
33
|
+
NOKOGIRI_USE_PACKAGED_LIBRARIES
|
34
|
+
end
|
35
|
+
|
36
|
+
def warnings
|
37
|
+
return [] unless libxml2?
|
38
|
+
|
39
|
+
if compiled_parser_version != loaded_parser_version
|
40
|
+
["Nokogiri was built against LibXML version #{compiled_parser_version}, but has dynamically loaded #{loaded_parser_version}"]
|
41
|
+
else
|
42
|
+
[]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_hash
|
47
|
+
hash_info = {}
|
48
|
+
hash_info['warnings'] = []
|
49
|
+
hash_info['nokogiri'] = Nokogiri::VERSION
|
50
|
+
hash_info['ruby'] = {}
|
51
|
+
hash_info['ruby']['version'] = ::RUBY_VERSION
|
52
|
+
hash_info['ruby']['platform'] = ::RUBY_PLATFORM
|
53
|
+
hash_info['ruby']['description'] = ::RUBY_DESCRIPTION
|
54
|
+
hash_info['ruby']['engine'] = engine
|
55
|
+
hash_info['ruby']['jruby'] = jruby? if jruby?
|
56
|
+
|
57
|
+
if libxml2?
|
58
|
+
hash_info['libxml'] = {}
|
59
|
+
hash_info['libxml']['binding'] = 'extension'
|
60
|
+
if libxml2_using_packaged?
|
61
|
+
hash_info['libxml']['source'] = "packaged"
|
62
|
+
hash_info['libxml']['libxml2_path'] = NOKOGIRI_LIBXML2_PATH
|
63
|
+
hash_info['libxml']['libxslt_path'] = NOKOGIRI_LIBXSLT_PATH
|
64
|
+
hash_info['libxml']['libxml2_patches'] = NOKOGIRI_LIBXML2_PATCHES
|
65
|
+
hash_info['libxml']['libxslt_patches'] = NOKOGIRI_LIBXSLT_PATCHES
|
66
|
+
else
|
67
|
+
hash_info['libxml']['source'] = "system"
|
68
|
+
end
|
69
|
+
hash_info['libxml']['compiled'] = compiled_parser_version
|
70
|
+
hash_info['libxml']['loaded'] = loaded_parser_version
|
71
|
+
hash_info['warnings'] = warnings
|
72
|
+
elsif jruby?
|
73
|
+
hash_info['xerces'] = Nokogiri::XERCES_VERSION
|
74
|
+
hash_info['nekohtml'] = Nokogiri::NEKO_VERSION
|
75
|
+
end
|
76
|
+
|
77
|
+
hash_info
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_markdown
|
81
|
+
begin
|
82
|
+
require 'psych'
|
83
|
+
rescue LoadError
|
84
|
+
end
|
85
|
+
require 'yaml'
|
86
|
+
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
87
|
+
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
88
|
+
end
|
89
|
+
|
90
|
+
# FIXME: maybe switch to singleton?
|
91
|
+
@@instance = new
|
92
|
+
@@instance.warnings.each do |warning|
|
93
|
+
warn "WARNING: #{warning}"
|
94
|
+
end
|
95
|
+
def self.instance; @@instance; end
|
96
|
+
end
|
97
|
+
|
98
|
+
# More complete version information about libxml
|
99
|
+
VERSION_INFO = VersionInfo.instance.to_hash
|
100
|
+
|
101
|
+
def self.uses_libxml? # :nodoc:
|
102
|
+
VersionInfo.instance.libxml2?
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.jruby? # :nodoc:
|
106
|
+
VersionInfo.instance.jruby?
|
107
|
+
end
|
3
108
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
###
|
4
|
+
# Represents an attribute declaration in a DTD
|
5
|
+
class AttributeDecl < Nokogiri::XML::Node
|
6
|
+
undef_method :attribute_nodes
|
7
|
+
undef_method :attributes
|
8
|
+
undef_method :content
|
9
|
+
undef_method :namespace
|
10
|
+
undef_method :namespace_definitions
|
11
|
+
undef_method :line if method_defined?(:line)
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|