superfeedr-nokogiri 1.4.0.20091116183308
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +330 -0
- data/CHANGELOG.rdoc +314 -0
- data/Manifest.txt +269 -0
- data/README.ja.rdoc +105 -0
- data/README.rdoc +118 -0
- data/Rakefile +244 -0
- data/bin/nokogiri +49 -0
- data/ext/nokogiri/extconf.rb +145 -0
- data/ext/nokogiri/html_document.c +145 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +92 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +89 -0
- data/ext/nokogiri/nokogiri.h +145 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +67 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +54 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +52 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +388 -0
- data/ext/nokogiri/xml_document.h +24 -0
- data/ext/nokogiri/xml_document_fragment.c +46 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +192 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_entity_decl.c +97 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +31 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_namespace.c +74 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +1060 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +397 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +593 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +159 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +286 -0
- data/ext/nokogiri/xml_sax_parser.h +43 -0
- data/ext/nokogiri/xml_sax_parser_context.c +155 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +114 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +156 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +261 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +239 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +116 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +646 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +162 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +356 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +135 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +444 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +227 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
- data/lib/nokogiri/ffi/xml/schema.rb +92 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/html.rb +35 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +88 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +48 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +33 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +405 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +131 -0
- data/lib/nokogiri/xml/document_fragment.rb +69 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +15 -0
- data/lib/nokogiri/xml/fragment_handler.rb +71 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +665 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +307 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +85 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +74 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +160 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +61 -0
- data/lib/nokogiri/xml/syntax_error.rb +38 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +71 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +183 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +136 -0
- data/test/html/sax/test_parser.rb +64 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +390 -0
- data/test/html/test_document_encoding.rb +77 -0
- data/test/html/test_document_fragment.rb +132 -0
- data/test/html/test_element_description.rb +94 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +228 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +134 -0
- data/test/test_reader.rb +358 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +307 -0
- data/test/xml/sax/test_parser_context.rb +56 -0
- data/test/xml/sax/test_push_parser.rb +131 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +167 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +607 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +138 -0
- data/test/xml/test_dtd.rb +82 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +83 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +889 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_set.rb +531 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +89 -0
- data/test/xml/test_syntax_error.rb +27 -0
- data/test/xml/test_text.rb +30 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +430 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class XPath
|
4
|
+
|
5
|
+
attr_accessor :cstruct # :nodoc:
|
6
|
+
|
7
|
+
def node_set # :nodoc:
|
8
|
+
ptr = cstruct[:nodesetval] if cstruct[:nodesetval]
|
9
|
+
ptr = LibXML.xmlXPathNodeSetCreate(nil) if ptr.null?
|
10
|
+
|
11
|
+
set = XML::NodeSet.new(@document)
|
12
|
+
set.cstruct = LibXML::XmlNodeSet.new(ptr)
|
13
|
+
set.document = @document
|
14
|
+
set
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class XPathContext
|
4
|
+
|
5
|
+
attr_accessor :cstruct # :nodoc:
|
6
|
+
|
7
|
+
def register_ns(prefix, uri) # :nodoc:
|
8
|
+
LibXML.xmlXPathRegisterNs(cstruct, prefix, uri)
|
9
|
+
end
|
10
|
+
|
11
|
+
def evaluate(search_path, xpath_handler=nil) # :nodoc:
|
12
|
+
lookup = nil # to keep lambda in scope long enough to avoid a possible GC tragedy
|
13
|
+
query = search_path.to_s
|
14
|
+
|
15
|
+
if xpath_handler
|
16
|
+
lookup = lambda do |ctx, name, uri|
|
17
|
+
return nil unless xpath_handler.respond_to?(name)
|
18
|
+
ruby_funcall name, xpath_handler
|
19
|
+
end
|
20
|
+
LibXML.xmlXPathRegisterFuncLookup(cstruct, lookup, nil);
|
21
|
+
end
|
22
|
+
|
23
|
+
exception_handler = lambda do |ctx, error|
|
24
|
+
raise XPath::SyntaxError.wrap(error)
|
25
|
+
end
|
26
|
+
LibXML.xmlResetLastError()
|
27
|
+
LibXML.xmlSetStructuredErrorFunc(nil, exception_handler)
|
28
|
+
|
29
|
+
generic_exception_handler = lambda do |ctx, msg|
|
30
|
+
raise RuntimeError.new(msg) # TODO: varargs
|
31
|
+
end
|
32
|
+
LibXML.xmlSetGenericErrorFunc(nil, generic_exception_handler)
|
33
|
+
|
34
|
+
xpath_ptr = LibXML.xmlXPathEvalExpression(query, cstruct)
|
35
|
+
|
36
|
+
LibXML.xmlSetStructuredErrorFunc(nil, nil)
|
37
|
+
LibXML.xmlSetGenericErrorFunc(nil, nil)
|
38
|
+
|
39
|
+
if xpath_ptr.null?
|
40
|
+
error = LibXML.xmlGetLastError()
|
41
|
+
raise XPath::SyntaxError.wrap(error)
|
42
|
+
end
|
43
|
+
|
44
|
+
xpath = XML::XPath.new
|
45
|
+
xpath.cstruct = LibXML::XmlXpathObject.new(xpath_ptr)
|
46
|
+
xpath.document = cstruct[:doc]
|
47
|
+
xpath
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.new(node) # :nodoc:
|
51
|
+
LibXML.xmlXPathInit()
|
52
|
+
|
53
|
+
ptr = LibXML.xmlXPathNewContext(node.cstruct[:doc])
|
54
|
+
|
55
|
+
ctx = allocate
|
56
|
+
ctx.cstruct = LibXML::XmlXpathContext.new(ptr)
|
57
|
+
ctx.cstruct[:node] = node.cstruct
|
58
|
+
ctx
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
#
|
64
|
+
# returns a lambda that will call the handler function with marshalled parameters
|
65
|
+
#
|
66
|
+
def ruby_funcall(name, xpath_handler) # :nodoc:
|
67
|
+
lambda do |ctx, nargs|
|
68
|
+
parser_context = LibXML::XmlXpathParserContext.new(ctx)
|
69
|
+
context = parser_context.context
|
70
|
+
doc = context.doc.ruby_doc
|
71
|
+
|
72
|
+
params = []
|
73
|
+
|
74
|
+
nargs.times do |j|
|
75
|
+
obj = LibXML::XmlXpathObject.new(LibXML.valuePop(ctx))
|
76
|
+
case obj[:type]
|
77
|
+
when LibXML::XmlXpathObject::XPATH_STRING
|
78
|
+
params.unshift obj[:stringval]
|
79
|
+
when LibXML::XmlXpathObject::XPATH_BOOLEAN
|
80
|
+
params.unshift obj[:boolval] == 1
|
81
|
+
when LibXML::XmlXpathObject::XPATH_NUMBER
|
82
|
+
params.unshift obj[:floatval]
|
83
|
+
when LibXML::XmlXpathObject::XPATH_NODESET
|
84
|
+
ns_ptr = LibXML::XmlNodeSet.new(obj[:nodesetval])
|
85
|
+
set = NodeSet.allocate
|
86
|
+
set.cstruct = ns_ptr
|
87
|
+
params.unshift set
|
88
|
+
else
|
89
|
+
char_ptr = params.unshift LibXML.xmlXPathCastToString(obj)
|
90
|
+
string = char_ptr.read_string
|
91
|
+
LibXML.xmlFree(char_ptr)
|
92
|
+
string
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
result = xpath_handler.send(name, *params)
|
97
|
+
|
98
|
+
case result.class.to_s
|
99
|
+
when Fixnum.to_s, Float.to_s, Bignum.to_s
|
100
|
+
LibXML.xmlXPathReturnNumber(ctx, result)
|
101
|
+
when String.to_s
|
102
|
+
LibXML.xmlXPathReturnString(
|
103
|
+
ctx,
|
104
|
+
LibXML.xmlXPathWrapCString(result)
|
105
|
+
)
|
106
|
+
when TrueClass.to_s
|
107
|
+
LibXML.xmlXPathReturnTrue(ctx)
|
108
|
+
when FalseClass.to_s
|
109
|
+
LibXML.xmlXPathReturnFalse(ctx)
|
110
|
+
when NilClass.to_s
|
111
|
+
;
|
112
|
+
when Array.to_s
|
113
|
+
node_set = XML::NodeSet.new(doc, result)
|
114
|
+
LibXML.xmlXPathReturnNodeSet(
|
115
|
+
ctx,
|
116
|
+
LibXML.xmlXPathNodeSetMerge(nil, node_set.cstruct)
|
117
|
+
)
|
118
|
+
else
|
119
|
+
if result.is_a?(XML::NodeSet)
|
120
|
+
LibXML.xmlXPathReturnNodeSet(
|
121
|
+
ctx,
|
122
|
+
LibXML.xmlXPathNodeSetMerge(nil, result.cstruct)
|
123
|
+
)
|
124
|
+
else
|
125
|
+
raise RuntimeError.new("Invalid return type #{result.class.inspect}")
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
nil
|
130
|
+
end # lambda
|
131
|
+
end # ruby_funcall
|
132
|
+
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XSLT
|
3
|
+
class Stylesheet
|
4
|
+
|
5
|
+
attr_accessor :cstruct # :nodoc:
|
6
|
+
|
7
|
+
def self.parse_stylesheet_doc(document) # :nodoc:
|
8
|
+
LibXML.exsltRegisterAll
|
9
|
+
|
10
|
+
generic_exception_handler = lambda do |ctx, msg|
|
11
|
+
raise RuntimeError.new(msg) # TODO: varargs
|
12
|
+
end
|
13
|
+
LibXML.xsltSetGenericErrorFunc(nil, generic_exception_handler)
|
14
|
+
|
15
|
+
ss = LibXML.xsltParseStylesheetDoc(LibXML.xmlCopyDoc(document.cstruct, 1)) # 1 => recursive
|
16
|
+
|
17
|
+
LibXML.xsltSetGenericErrorFunc(nil, nil)
|
18
|
+
|
19
|
+
obj = allocate
|
20
|
+
obj.cstruct = LibXML::XsltStylesheet.new(ss)
|
21
|
+
obj
|
22
|
+
end
|
23
|
+
|
24
|
+
def serialize(document) # :nodoc:
|
25
|
+
buf_ptr = FFI::MemoryPointer.new :pointer
|
26
|
+
buf_len = FFI::MemoryPointer.new :int
|
27
|
+
LibXML.xsltSaveResultToString(buf_ptr, buf_len, document.cstruct, cstruct)
|
28
|
+
buf = Nokogiri::LibXML::XmlAlloc.new(buf_ptr.read_pointer)
|
29
|
+
buf.pointer.read_string(buf_len.read_int)
|
30
|
+
end
|
31
|
+
|
32
|
+
def transform(document, params=[]) # :nodoc:
|
33
|
+
param_arr = FFI::MemoryPointer.new(:pointer, params.length + 1)
|
34
|
+
params.each_with_index do |param, j|
|
35
|
+
param_arr[j].put_pointer(0, FFI::MemoryPointer.from_string(param.to_s))
|
36
|
+
end
|
37
|
+
param_arr[params.length].put_pointer(0,nil)
|
38
|
+
|
39
|
+
ptr = LibXML.xsltApplyStylesheet(cstruct, document.cstruct, param_arr)
|
40
|
+
raise(RuntimeError, "could not perform xslt transform on document") if ptr.null?
|
41
|
+
|
42
|
+
XML::Document.wrap(ptr)
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'nokogiri/html/entity_lookup'
|
2
|
+
require 'nokogiri/html/document'
|
3
|
+
require 'nokogiri/html/document_fragment'
|
4
|
+
require 'nokogiri/html/sax/parser_context'
|
5
|
+
require 'nokogiri/html/sax/parser'
|
6
|
+
require 'nokogiri/html/element_description'
|
7
|
+
|
8
|
+
module Nokogiri
|
9
|
+
class << self
|
10
|
+
###
|
11
|
+
# Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
|
12
|
+
def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
13
|
+
Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
module HTML
|
18
|
+
class << self
|
19
|
+
###
|
20
|
+
# Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
|
21
|
+
def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
22
|
+
Document.parse(thing, url, encoding, options, &block)
|
23
|
+
end
|
24
|
+
|
25
|
+
####
|
26
|
+
# Parse a fragment from +string+ in to a NodeSet.
|
27
|
+
def fragment string
|
28
|
+
HTML::DocumentFragment.parse(string)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Instance of Nokogiri::HTML::EntityLookup
|
33
|
+
NamedCharacters = EntityLookup.new
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
###
|
4
|
+
# Nokogiri HTML builder is used for building HTML documents. It is very
|
5
|
+
# similar to the Nokogiri::XML::Builder. In fact, you should go read the
|
6
|
+
# documentation for Nokogiri::XML::Builder before reading this
|
7
|
+
# documentation.
|
8
|
+
#
|
9
|
+
# == Synopsis:
|
10
|
+
#
|
11
|
+
# Create an HTML document with a body that has an onload attribute, and a
|
12
|
+
# span tag with a class of "bold" that has content of "Hello world".
|
13
|
+
#
|
14
|
+
# builder = Nokogiri::HTML::Builder.new do |doc|
|
15
|
+
# doc.html {
|
16
|
+
# doc.body(:onload => 'some_func();') {
|
17
|
+
# doc.span.bold {
|
18
|
+
# doc.text "Hello world"
|
19
|
+
# }
|
20
|
+
# }
|
21
|
+
# }
|
22
|
+
# end
|
23
|
+
# puts builder.to_html
|
24
|
+
#
|
25
|
+
# The HTML builder inherits from the XML builder, so make sure to read the
|
26
|
+
# Nokogiri::XML::Builder documentation.
|
27
|
+
class Builder < Nokogiri::XML::Builder
|
28
|
+
###
|
29
|
+
# Convert the builder to HTML
|
30
|
+
def to_html
|
31
|
+
@doc.to_html
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
class Document < Nokogiri::XML::Document
|
4
|
+
###
|
5
|
+
# Get the meta tag encoding for this document. If there is no meta tag,
|
6
|
+
# then nil is returned
|
7
|
+
def meta_encoding
|
8
|
+
return nil unless meta = css('meta').find { |node|
|
9
|
+
node['http-equiv'] =~ /Content-Type/i
|
10
|
+
}
|
11
|
+
|
12
|
+
/charset\s*=\s*([\w\d-]+)/i.match(meta['content'])[1]
|
13
|
+
end
|
14
|
+
|
15
|
+
###
|
16
|
+
# Set the meta tag encoding for this document. If there is no meta
|
17
|
+
# content tag, nil is returned and the encoding is not set.
|
18
|
+
def meta_encoding= encoding
|
19
|
+
return nil unless meta = css('meta').find { |node|
|
20
|
+
node['http-equiv'] =~ /Content-Type/i
|
21
|
+
}
|
22
|
+
|
23
|
+
meta['content'] = "text/html; charset=%s" % encoding
|
24
|
+
encoding
|
25
|
+
end
|
26
|
+
|
27
|
+
####
|
28
|
+
# Serialize Node using +options+. Save options can also be set using a
|
29
|
+
# block. See SaveOptions.
|
30
|
+
#
|
31
|
+
# These two statements are equivalent:
|
32
|
+
#
|
33
|
+
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
|
34
|
+
#
|
35
|
+
# or
|
36
|
+
#
|
37
|
+
# node.serialize(:encoding => 'UTF-8') do |config|
|
38
|
+
# config.format.as_xml
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
def serialize options = {}, &block
|
42
|
+
options[:save_with] ||= XML::Node::SaveOptions::FORMAT |
|
43
|
+
XML::Node::SaveOptions::AS_HTML |
|
44
|
+
XML::Node::SaveOptions::NO_DECLARATION |
|
45
|
+
XML::Node::SaveOptions::NO_EMPTY_TAGS
|
46
|
+
super
|
47
|
+
end
|
48
|
+
|
49
|
+
####
|
50
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
51
|
+
def fragment tags = nil
|
52
|
+
DocumentFragment.new(self, tags)
|
53
|
+
end
|
54
|
+
|
55
|
+
class << self
|
56
|
+
###
|
57
|
+
# Parse HTML. +thing+ may be a String, or any object that
|
58
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
59
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
60
|
+
# encoding that should be used when processing the document. +options+
|
61
|
+
# is a number that sets options in the parser, such as
|
62
|
+
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
63
|
+
# Nokogiri::XML::ParseOptions.
|
64
|
+
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
65
|
+
|
66
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
|
67
|
+
# Give the options to the user
|
68
|
+
yield options if block_given?
|
69
|
+
|
70
|
+
if string_or_io.respond_to?(:encoding)
|
71
|
+
encoding ||= string_or_io.encoding.name
|
72
|
+
end
|
73
|
+
|
74
|
+
if string_or_io.respond_to?(:read)
|
75
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
76
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
77
|
+
end
|
78
|
+
|
79
|
+
# read_memory pukes on empty docs
|
80
|
+
return new if string_or_io.nil? or string_or_io.empty?
|
81
|
+
|
82
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
4
|
+
|
5
|
+
class << self
|
6
|
+
####
|
7
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
8
|
+
def parse tags
|
9
|
+
self.new(HTML::Document.new, tags)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
class ElementDescription
|
4
|
+
###
|
5
|
+
# Is this element a block element?
|
6
|
+
def block?
|
7
|
+
!inline?
|
8
|
+
end
|
9
|
+
|
10
|
+
###
|
11
|
+
# Convert this description to a string
|
12
|
+
def to_s
|
13
|
+
"#{name}: #{description}"
|
14
|
+
end
|
15
|
+
|
16
|
+
###
|
17
|
+
# Inspection information
|
18
|
+
def inspect
|
19
|
+
"#<#{self.class.name}: #{name} #{description}>"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
###
|
4
|
+
# Nokogiri lets you write a SAX parser to process HTML but get HTML
|
5
|
+
# correction features.
|
6
|
+
#
|
7
|
+
# See Nokogiri::HTML::SAX::Parser for a basic example of using a
|
8
|
+
# SAX parser with HTML.
|
9
|
+
#
|
10
|
+
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
11
|
+
module SAX
|
12
|
+
###
|
13
|
+
# This class lets you perform SAX style parsing on HTML with HTML
|
14
|
+
# error correction.
|
15
|
+
#
|
16
|
+
# Here is a basic usage example:
|
17
|
+
#
|
18
|
+
# class MyDoc < Nokogiri::XML::SAX::Document
|
19
|
+
# def start_element name, attributes = []
|
20
|
+
# puts "found a #{name}"
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new)
|
25
|
+
# parser.parse(File.read(ARGV[0], 'rb'))
|
26
|
+
#
|
27
|
+
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
28
|
+
class Parser < Nokogiri::XML::SAX::Parser
|
29
|
+
###
|
30
|
+
# Parse html stored in +data+ using +encoding+
|
31
|
+
def parse_memory data, encoding = 'UTF-8'
|
32
|
+
raise ArgumentError unless data
|
33
|
+
return unless data.length > 0
|
34
|
+
ParserContext.memory(data, encoding).parse_with self
|
35
|
+
end
|
36
|
+
|
37
|
+
###
|
38
|
+
# Parse a file with +filename+
|
39
|
+
def parse_file filename, encoding = 'UTF-8'
|
40
|
+
raise ArgumentError unless filename
|
41
|
+
raise Errno::ENOENT unless File.exists?(filename)
|
42
|
+
raise Errno::EISDIR if File.directory?(filename)
|
43
|
+
ParserContext.file(filename, encoding).parse_with self
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|