tenderlove-nokogiri 0.0.0-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +120 -0
- data/README.ja.txt +86 -0
- data/README.txt +87 -0
- data/Rakefile +264 -0
- data/ext/nokogiri/extconf.rb +59 -0
- data/ext/nokogiri/html_document.c +83 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_sax_parser.c +32 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/native.c +40 -0
- data/ext/nokogiri/native.h +51 -0
- data/ext/nokogiri/xml_cdata.c +52 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_document.c +159 -0
- data/ext/nokogiri/xml_document.h +10 -0
- data/ext/nokogiri/xml_dtd.c +117 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_node.c +709 -0
- data/ext/nokogiri/xml_node.h +15 -0
- data/ext/nokogiri/xml_node_set.c +124 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_reader.c +429 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_sax_parser.c +174 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_syntax_error.c +194 -0
- data/ext/nokogiri/xml_syntax_error.h +11 -0
- data/ext/nokogiri/xml_text.c +29 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +46 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +81 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +108 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri/css/node.rb +95 -0
- data/lib/nokogiri/css/parser.rb +24 -0
- data/lib/nokogiri/css/parser.y +198 -0
- data/lib/nokogiri/css/tokenizer.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rex +63 -0
- data/lib/nokogiri/css/xpath_visitor.rb +165 -0
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +58 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +17 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators.rb +1 -0
- data/lib/nokogiri/hpricot.rb +47 -0
- data/lib/nokogiri/html/builder.rb +9 -0
- data/lib/nokogiri/html/document.rb +9 -0
- data/lib/nokogiri/html/sax/parser.rb +21 -0
- data/lib/nokogiri/html.rb +95 -0
- data/lib/nokogiri/version.rb +3 -0
- data/lib/nokogiri/xml/after_handler.rb +18 -0
- data/lib/nokogiri/xml/before_handler.rb +32 -0
- data/lib/nokogiri/xml/builder.rb +79 -0
- data/lib/nokogiri/xml/cdata.rb +9 -0
- data/lib/nokogiri/xml/document.rb +30 -0
- data/lib/nokogiri/xml/dtd.rb +6 -0
- data/lib/nokogiri/xml/node.rb +195 -0
- data/lib/nokogiri/xml/node_set.rb +183 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +14 -0
- data/lib/nokogiri/xml/sax/document.rb +59 -0
- data/lib/nokogiri/xml/sax/parser.rb +33 -0
- data/lib/nokogiri/xml/sax.rb +9 -0
- data/lib/nokogiri/xml/syntax_error.rb +21 -0
- data/lib/nokogiri/xml/text.rb +6 -0
- data/lib/nokogiri/xml/xpath.rb +6 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xslt/stylesheet.rb +6 -0
- data/lib/nokogiri/xslt.rb +11 -0
- data/lib/nokogiri.rb +51 -0
- data/nokogiri.gemspec +34 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +224 -0
- data/test/css/test_tokenizer.rb +162 -0
- data/test/css/test_xpath_visitor.rb +54 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +70 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +7 -0
- data/test/hpricot/test_alter.rb +67 -0
- data/test/hpricot/test_builder.rb +27 -0
- data/test/hpricot/test_parser.rb +423 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +78 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +27 -0
- data/test/html/test_builder.rb +78 -0
- data/test/html/test_document.rb +86 -0
- data/test/test_convert_xpath.rb +180 -0
- data/test/test_nokogiri.rb +36 -0
- data/test/test_reader.rb +222 -0
- data/test/test_xslt_transforms.rb +29 -0
- data/test/xml/sax/test_parser.rb +93 -0
- data/test/xml/test_builder.rb +16 -0
- data/test/xml/test_cdata.rb +18 -0
- data/test/xml/test_document.rb +171 -0
- data/test/xml/test_dtd.rb +43 -0
- data/test/xml/test_node.rb +223 -0
- data/test/xml/test_node_set.rb +116 -0
- data/test/xml/test_text.rb +13 -0
- metadata +214 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
|
|
3
|
+
module Nokogiri
|
|
4
|
+
module Hpricot
|
|
5
|
+
STag = String
|
|
6
|
+
Elem = XML::Node
|
|
7
|
+
NamedCharacters = Nokogiri::HTML::NamedCharacters
|
|
8
|
+
class << self
|
|
9
|
+
def parse(*args)
|
|
10
|
+
doc = Nokogiri.parse(*args)
|
|
11
|
+
add_decorators(doc)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def XML(string)
|
|
15
|
+
doc = Nokogiri::XML.parse(string)
|
|
16
|
+
add_decorators(doc)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def make string
|
|
20
|
+
ns = XML::NodeSet.new
|
|
21
|
+
ns << XML::Text.new(string)
|
|
22
|
+
ns
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def add_decorators(doc)
|
|
26
|
+
doc.decorators['node'] << Decorators::Hpricot::Node
|
|
27
|
+
doc.decorators['element'] << Decorators::Hpricot::Node
|
|
28
|
+
doc.decorators['document'] << Decorators::Hpricot::Node
|
|
29
|
+
doc.decorators['nodeset'] << Decorators::Hpricot::NodeSet
|
|
30
|
+
doc.decorate!
|
|
31
|
+
doc
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
class << self
|
|
37
|
+
def Hpricot(*args, &block)
|
|
38
|
+
if block_given?
|
|
39
|
+
builder = Nokogiri::HTML::Builder.new(&block)
|
|
40
|
+
Nokogiri::Hpricot.add_decorators(builder.doc)
|
|
41
|
+
else
|
|
42
|
+
doc = Nokogiri::HTML.parse(*args)
|
|
43
|
+
Nokogiri::Hpricot.add_decorators(doc)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module HTML
|
|
3
|
+
module SAX
|
|
4
|
+
class Parser < XML::SAX::Parser
|
|
5
|
+
###
|
|
6
|
+
# Parse html stored in +data+ using +encoding+
|
|
7
|
+
def parse_memory data, encoding = 'UTF-8'
|
|
8
|
+
native_parse_memory(data, encoding)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
###
|
|
12
|
+
# Parse a file with +filename+
|
|
13
|
+
def parse_file filename, encoding = 'UTF-8'
|
|
14
|
+
raise Errno::ENOENT unless File.exists?(filename)
|
|
15
|
+
raise Errno::EISDIR if File.directory?(filename)
|
|
16
|
+
native_parse_file filename, encoding
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
require 'nokogiri/html/document'
|
|
2
|
+
require 'nokogiri/html/sax/parser'
|
|
3
|
+
|
|
4
|
+
module Nokogiri
|
|
5
|
+
class << self
|
|
6
|
+
def HTML thing, url = nil, encoding = nil, options = 2145
|
|
7
|
+
Nokogiri::HTML.parse(thing, url, encoding, options)
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
module HTML
|
|
12
|
+
# Parser options
|
|
13
|
+
PARSE_NOERROR = 1 << 5 # No error reports
|
|
14
|
+
PARSE_NOWARNING = 1 << 6 # No warnings
|
|
15
|
+
PARSE_PEDANTIC = 1 << 7 # Pedantic errors
|
|
16
|
+
PARSE_NOBLANKS = 1 << 8 # Remove blanks nodes
|
|
17
|
+
PARSE_NONET = 1 << 11 # No network access
|
|
18
|
+
|
|
19
|
+
class << self
|
|
20
|
+
def parse string_or_io, url = nil, encoding = nil, options = 2145
|
|
21
|
+
if string_or_io.respond_to?(:read)
|
|
22
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
|
23
|
+
string_or_io = string_or_io.read
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
Document.read_memory(string_or_io, url, encoding, options)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
####
|
|
30
|
+
# Parse a fragment from +string+ in to a NodeSet.
|
|
31
|
+
def fragment string
|
|
32
|
+
doc = parse(string)
|
|
33
|
+
finder = lambda { |children, f|
|
|
34
|
+
children.each do |child|
|
|
35
|
+
return children if string =~ /<#{child.name}/
|
|
36
|
+
finder.call(child.children, f)
|
|
37
|
+
end
|
|
38
|
+
}
|
|
39
|
+
finder.call(doc.children, finder)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
NamedCharacters =
|
|
44
|
+
{"AElig"=>198, "Aacute"=>193, "Acirc"=>194, "Agrave"=>192, "Alpha"=>913,
|
|
45
|
+
"Aring"=>197, "Atilde"=>195, "Auml"=>196, "Beta"=>914, "Ccedil"=>199,
|
|
46
|
+
"Chi"=>935, "Dagger"=>8225, "Delta"=>916, "ETH"=>208, "Eacute"=>201,
|
|
47
|
+
"Ecirc"=>202, "Egrave"=>200, "Epsilon"=>917, "Eta"=>919, "Euml"=>203,
|
|
48
|
+
"Gamma"=>915, "Iacute"=>205, "Icirc"=>206, "Igrave"=>204, "Iota"=>921,
|
|
49
|
+
"Iuml"=>207, "Kappa"=>922, "Lambda"=>923, "Mu"=>924, "Ntilde"=>209, "Nu"=>925,
|
|
50
|
+
"OElig"=>338, "Oacute"=>211, "Ocirc"=>212, "Ograve"=>210, "Omega"=>937,
|
|
51
|
+
"Omicron"=>927, "Oslash"=>216, "Otilde"=>213, "Ouml"=>214, "Phi"=>934,
|
|
52
|
+
"Pi"=>928, "Prime"=>8243, "Psi"=>936, "Rho"=>929, "Scaron"=>352, "Sigma"=>931,
|
|
53
|
+
"THORN"=>222, "Tau"=>932, "Theta"=>920, "Uacute"=>218, "Ucirc"=>219,
|
|
54
|
+
"Ugrave"=>217, "Upsilon"=>933, "Uuml"=>220, "Xi"=>926, "Yacute"=>221,
|
|
55
|
+
"Yuml"=>376, "Zeta"=>918, "aacute"=>225, "acirc"=>226, "acute"=>180,
|
|
56
|
+
"aelig"=>230, "agrave"=>224, "alefsym"=>8501, "alpha"=>945, "amp"=>38,
|
|
57
|
+
"and"=>8743, "ang"=>8736, "apos"=>39, "aring"=>229, "asymp"=>8776,
|
|
58
|
+
"atilde"=>227, "auml"=>228, "bdquo"=>8222, "beta"=>946, "brvbar"=>166,
|
|
59
|
+
"bull"=>8226, "cap"=>8745, "ccedil"=>231, "cedil"=>184, "cent"=>162,
|
|
60
|
+
"chi"=>967, "circ"=>710, "clubs"=>9827, "cong"=>8773, "copy"=>169,
|
|
61
|
+
"crarr"=>8629, "cup"=>8746, "curren"=>164, "dArr"=>8659, "dagger"=>8224,
|
|
62
|
+
"darr"=>8595, "deg"=>176, "delta"=>948, "diams"=>9830, "divide"=>247,
|
|
63
|
+
"eacute"=>233, "ecirc"=>234, "egrave"=>232, "empty"=>8709, "emsp"=>8195,
|
|
64
|
+
"ensp"=>8194, "epsilon"=>949, "equiv"=>8801, "eta"=>951, "eth"=>240,
|
|
65
|
+
"euml"=>235, "euro"=>8364, "exist"=>8707, "fnof"=>402, "forall"=>8704,
|
|
66
|
+
"frac12"=>189, "frac14"=>188, "frac34"=>190, "frasl"=>8260, "gamma"=>947,
|
|
67
|
+
"ge"=>8805, "gt"=>62, "hArr"=>8660, "harr"=>8596, "hearts"=>9829,
|
|
68
|
+
"hellip"=>8230, "iacute"=>237, "icirc"=>238, "iexcl"=>161, "igrave"=>236,
|
|
69
|
+
"image"=>8465, "infin"=>8734, "int"=>8747, "iota"=>953, "iquest"=>191,
|
|
70
|
+
"isin"=>8712, "iuml"=>239, "kappa"=>954, "lArr"=>8656, "lambda"=>955,
|
|
71
|
+
"lang"=>9001, "laquo"=>171, "larr"=>8592, "lceil"=>8968, "ldquo"=>8220,
|
|
72
|
+
"le"=>8804, "lfloor"=>8970, "lowast"=>8727, "loz"=>9674, "lrm"=>8206,
|
|
73
|
+
"lsaquo"=>8249, "lsquo"=>8216, "lt"=>60, "macr"=>175, "mdash"=>8212,
|
|
74
|
+
"micro"=>181, "middot"=>183, "minus"=>8722, "mu"=>956, "nabla"=>8711,
|
|
75
|
+
"nbsp"=>160, "ndash"=>8211, "ne"=>8800, "ni"=>8715, "not"=>172, "notin"=>8713,
|
|
76
|
+
"nsub"=>8836, "ntilde"=>241, "nu"=>957, "oacute"=>243, "ocirc"=>244,
|
|
77
|
+
"oelig"=>339, "ograve"=>242, "oline"=>8254, "omega"=>969, "omicron"=>959,
|
|
78
|
+
"oplus"=>8853, "or"=>8744, "ordf"=>170, "ordm"=>186, "oslash"=>248,
|
|
79
|
+
"otilde"=>245, "otimes"=>8855, "ouml"=>246, "para"=>182, "part"=>8706,
|
|
80
|
+
"permil"=>8240, "perp"=>8869, "phi"=>966, "pi"=>960, "piv"=>982,
|
|
81
|
+
"plusmn"=>177, "pound"=>163, "prime"=>8242, "prod"=>8719, "prop"=>8733,
|
|
82
|
+
"psi"=>968, "quot"=>34, "rArr"=>8658, "radic"=>8730, "rang"=>9002,
|
|
83
|
+
"raquo"=>187, "rarr"=>8594, "rceil"=>8969, "rdquo"=>8221, "real"=>8476,
|
|
84
|
+
"reg"=>174, "rfloor"=>8971, "rho"=>961, "rlm"=>8207, "rsaquo"=>8250,
|
|
85
|
+
"rsquo"=>8217, "sbquo"=>8218, "scaron"=>353, "sdot"=>8901, "sect"=>167,
|
|
86
|
+
"shy"=>173, "sigma"=>963, "sigmaf"=>962, "sim"=>8764, "spades"=>9824,
|
|
87
|
+
"sub"=>8834, "sube"=>8838, "sum"=>8721, "sup"=>8835, "sup1"=>185, "sup2"=>178,
|
|
88
|
+
"sup3"=>179, "supe"=>8839, "szlig"=>223, "tau"=>964, "there4"=>8756,
|
|
89
|
+
"theta"=>952, "thetasym"=>977, "thinsp"=>8201, "thorn"=>254, "tilde"=>732,
|
|
90
|
+
"times"=>215, "trade"=>8482, "uArr"=>8657, "uacute"=>250, "uarr"=>8593,
|
|
91
|
+
"ucirc"=>251, "ugrave"=>249, "uml"=>168, "upsih"=>978, "upsilon"=>965,
|
|
92
|
+
"uuml"=>252, "weierp"=>8472, "xi"=>958, "yacute"=>253, "yen"=>165,
|
|
93
|
+
"yuml"=>255, "zeta"=>950, "zwj"=>8205, "zwnj"=>8204}
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class AfterHandler < BeforeHandler
|
|
4
|
+
attr_accessor :after_nodes
|
|
5
|
+
|
|
6
|
+
def initialize node, original_html
|
|
7
|
+
super
|
|
8
|
+
@after_nodes = []
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def end_element name
|
|
12
|
+
return unless @original_html =~ /<#{name}/i
|
|
13
|
+
@after_nodes << @stack.last if @stack.length == 1
|
|
14
|
+
@stack.pop
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class BeforeHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
|
4
|
+
def initialize node, original_html
|
|
5
|
+
@original_html = original_html
|
|
6
|
+
@node = node
|
|
7
|
+
@stack = []
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def start_element name, attrs = []
|
|
11
|
+
return unless @original_html =~ /<#{name}/i
|
|
12
|
+
node = Node.new(name)
|
|
13
|
+
Hash[*attrs].each do |k,v|
|
|
14
|
+
node[k] = v
|
|
15
|
+
end
|
|
16
|
+
node.parent = @stack.last if @stack.length != 0
|
|
17
|
+
@stack << node
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def characters string
|
|
21
|
+
node = @stack.last
|
|
22
|
+
node.content += string
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def end_element name
|
|
26
|
+
return unless @original_html =~ /<#{name}/i
|
|
27
|
+
@node.add_previous_sibling @stack.last if @stack.length == 1
|
|
28
|
+
@stack.pop
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class Builder
|
|
4
|
+
attr_accessor :doc, :parent
|
|
5
|
+
def initialize(&block)
|
|
6
|
+
namespace = self.class.name.split('::')
|
|
7
|
+
namespace[-1] = 'Document'
|
|
8
|
+
@doc = eval(namespace.join('::')).new
|
|
9
|
+
@parent = @doc
|
|
10
|
+
instance_eval(&block)
|
|
11
|
+
@parent = @doc
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def text(string)
|
|
15
|
+
node = Nokogiri::XML::Text.new(string)
|
|
16
|
+
insert(node)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def cdata(string)
|
|
20
|
+
node = Nokogiri::XML::CData.new(@doc, string)
|
|
21
|
+
insert(node)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def to_xml
|
|
25
|
+
@doc.to_xml
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def method_missing(method, *args, &block)
|
|
29
|
+
node = Nokogiri::XML::Node.new(method.to_s) { |n|
|
|
30
|
+
if content = args.first
|
|
31
|
+
if content.is_a?(Hash)
|
|
32
|
+
content.each { |k,v| n[k.to_s] = v.to_s }
|
|
33
|
+
else
|
|
34
|
+
n.content = content
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
}
|
|
38
|
+
insert(node, &block)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
def insert(node, &block)
|
|
43
|
+
node.parent = @parent
|
|
44
|
+
if block_given?
|
|
45
|
+
@parent = node
|
|
46
|
+
instance_eval(&block)
|
|
47
|
+
@parent = node.parent
|
|
48
|
+
end
|
|
49
|
+
NodeBuilder.new(node, self)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
class NodeBuilder # :nodoc:
|
|
53
|
+
def initialize(node, doc_builder)
|
|
54
|
+
@node = node
|
|
55
|
+
@doc_builder = doc_builder
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def method_missing(method, *args, &block)
|
|
59
|
+
case method.to_s
|
|
60
|
+
when /^(.*)!$/
|
|
61
|
+
@node['id'] = $1
|
|
62
|
+
@node.content = args.first if args.first
|
|
63
|
+
when /^(.*)=/
|
|
64
|
+
@node[$1] = args.first
|
|
65
|
+
else
|
|
66
|
+
@node['class'] =
|
|
67
|
+
((@node['class'] || '').split(/\s/) + [method.to_s]).join(' ')
|
|
68
|
+
@node.content = args.first if args.first
|
|
69
|
+
end
|
|
70
|
+
if block_given?
|
|
71
|
+
@doc_builder.parent = @node
|
|
72
|
+
return @doc_builder.instance_eval(&block)
|
|
73
|
+
end
|
|
74
|
+
self
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class Document < Node
|
|
4
|
+
def decorators
|
|
5
|
+
@decorators ||= Hash.new { |h,k| h[k] = [] }
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def name
|
|
9
|
+
'document'
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
###
|
|
13
|
+
# Apply any decorators to +node+
|
|
14
|
+
def decorate(node)
|
|
15
|
+
key = node.class.name.split('::').last.downcase
|
|
16
|
+
decorators[key].each do |klass|
|
|
17
|
+
node.extend(klass)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_xml
|
|
22
|
+
serialize
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def namespaces
|
|
26
|
+
root ? root.collect_namespaces : {}
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class Node
|
|
4
|
+
CDATA_SECTION_NODE = 4
|
|
5
|
+
COMMENT_NODE = 8
|
|
6
|
+
DOCUMENT_NODE = 9
|
|
7
|
+
HTML_DOCUMENT_NODE = 13
|
|
8
|
+
DTD_NODE = 14
|
|
9
|
+
ELEMENT_DECL = 15
|
|
10
|
+
ATTRIBUTE_DECL = 16
|
|
11
|
+
ENTITY_DECL = 17
|
|
12
|
+
NAMESPACE_DECL = 18
|
|
13
|
+
XINCLUDE_START = 19
|
|
14
|
+
XINCLUDE_END = 20
|
|
15
|
+
DOCB_DOCUMENT_NODE = 21
|
|
16
|
+
|
|
17
|
+
@@owned = {}
|
|
18
|
+
|
|
19
|
+
###
|
|
20
|
+
# Decorate this node with the decorators set up in this node's Document
|
|
21
|
+
def decorate!
|
|
22
|
+
document.decorate(self) if document
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
###
|
|
26
|
+
# Get the list of children for this node as a NodeSet
|
|
27
|
+
def children
|
|
28
|
+
list = NodeSet.new
|
|
29
|
+
list.document = document
|
|
30
|
+
document.decorate(list)
|
|
31
|
+
|
|
32
|
+
first = self.child
|
|
33
|
+
return list unless first # Empty list
|
|
34
|
+
|
|
35
|
+
list << first unless first.blank?
|
|
36
|
+
while first = first.next
|
|
37
|
+
list << first unless first.blank?
|
|
38
|
+
end
|
|
39
|
+
list
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
###
|
|
43
|
+
# Search this node for +paths+. +paths+ can be XPath or CSS, and an
|
|
44
|
+
# optional hash of namespaces may be appended.
|
|
45
|
+
# See Node#xpath and Node#css.
|
|
46
|
+
def search *paths
|
|
47
|
+
ns = paths.last.is_a?(Hash) ? paths.pop : {}
|
|
48
|
+
xpath(*(paths.map { |path|
|
|
49
|
+
path =~ /^(\.\/|\/)/ ? path : CSS::Parser.parse(path).map { |ast|
|
|
50
|
+
ast.to_xpath
|
|
51
|
+
}
|
|
52
|
+
}.flatten.uniq) + [ns])
|
|
53
|
+
end
|
|
54
|
+
alias :/ :search
|
|
55
|
+
|
|
56
|
+
def xpath *paths
|
|
57
|
+
ns = paths.last.is_a?(Hash) ? paths.pop : {}
|
|
58
|
+
|
|
59
|
+
return NodeSet.new unless document.root
|
|
60
|
+
|
|
61
|
+
sets = paths.map { |path|
|
|
62
|
+
ctx = XPathContext.new(self)
|
|
63
|
+
ctx.register_namespaces(ns)
|
|
64
|
+
set = ctx.evaluate(path).node_set
|
|
65
|
+
set.document = document
|
|
66
|
+
document.decorate(set)
|
|
67
|
+
set
|
|
68
|
+
}
|
|
69
|
+
return sets.first if sets.length == 1
|
|
70
|
+
|
|
71
|
+
NodeSet.new do |combined|
|
|
72
|
+
document.decorate(combined)
|
|
73
|
+
sets.each do |set|
|
|
74
|
+
set.each do |node|
|
|
75
|
+
combined << node
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def css *rules
|
|
82
|
+
xpath(*(rules.map { |rule|
|
|
83
|
+
CSS::Parser.parse(rule).map { |ast| "." + ast.to_xpath }
|
|
84
|
+
}.flatten.uniq))
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def at path, ns = {}
|
|
88
|
+
search("#{path}", ns).first
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def [](property)
|
|
92
|
+
return nil unless key?(property)
|
|
93
|
+
get(property)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def next
|
|
97
|
+
next_sibling
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def remove
|
|
101
|
+
unlink
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
####
|
|
105
|
+
# Create nodes from +data+ and insert them before this node
|
|
106
|
+
# (as a sibling).
|
|
107
|
+
def before data
|
|
108
|
+
classes = document.class.name.split('::')
|
|
109
|
+
classes[-1] = 'SAX::Parser'
|
|
110
|
+
|
|
111
|
+
parser = eval(classes.join('::')).new(BeforeHandler.new(self, data))
|
|
112
|
+
parser.parse(data)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
####
|
|
116
|
+
# Create nodes from +data+ and insert them after this node
|
|
117
|
+
# (as a sibling).
|
|
118
|
+
def after data
|
|
119
|
+
classes = document.class.name.split('::')
|
|
120
|
+
classes[-1] = 'SAX::Parser'
|
|
121
|
+
|
|
122
|
+
handler = AfterHandler.new(self, data)
|
|
123
|
+
parser = eval(classes.join('::')).new(handler)
|
|
124
|
+
parser.parse(data)
|
|
125
|
+
handler.after_nodes.reverse.each do |sibling|
|
|
126
|
+
self.add_next_sibling sibling
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def has_attribute?(property)
|
|
131
|
+
key? property
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
alias :get_attribute :[]
|
|
135
|
+
def set_attribute(name, value)
|
|
136
|
+
self[name] = value
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def text
|
|
140
|
+
content
|
|
141
|
+
end
|
|
142
|
+
alias :inner_text :text
|
|
143
|
+
|
|
144
|
+
####
|
|
145
|
+
# Set the content to +string+.
|
|
146
|
+
# If +encode+, encode any special characters first.
|
|
147
|
+
def content= string, encode = true
|
|
148
|
+
self.native_content = encode_special_chars(string)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def comment?
|
|
152
|
+
type == COMMENT_NODE
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def cdata?
|
|
156
|
+
type == CDATA_SECTION_NODE
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def xml?
|
|
160
|
+
type == DOCUMENT_NODE
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def html?
|
|
164
|
+
type == HTML_DOCUMENT_NODE
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def to_html
|
|
168
|
+
to_xml
|
|
169
|
+
end
|
|
170
|
+
alias :to_s :to_html
|
|
171
|
+
alias :inner_html :to_html
|
|
172
|
+
|
|
173
|
+
def css_path
|
|
174
|
+
path.split(/\//).map { |part|
|
|
175
|
+
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
|
176
|
+
}.compact.join(' > ')
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# recursively get all namespaces from this node and its subtree
|
|
180
|
+
def collect_namespaces
|
|
181
|
+
# TODO: print warning message if a prefix refers to more than one URI in the document?
|
|
182
|
+
ns = {}
|
|
183
|
+
traverse {|j| ns.merge!(j.namespaces)}
|
|
184
|
+
ns
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
####
|
|
188
|
+
# Yields self and all children to +block+ recursively.
|
|
189
|
+
def traverse(&block)
|
|
190
|
+
children.each{|j| j.traverse(&block) }
|
|
191
|
+
block.call(self)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|