tenderlove-nokogiri 0.0.0.20081001111445
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +105 -0
- data/README.txt +51 -0
- data/Rakefile +70 -0
- data/ext/nokogiri/extconf.rb +24 -0
- data/ext/nokogiri/html_document.c +85 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_sax_parser.c +32 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/native.c +35 -0
- data/ext/nokogiri/native.h +32 -0
- data/ext/nokogiri/xml_cdata.c +36 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_document.c +159 -0
- data/ext/nokogiri/xml_document.h +10 -0
- data/ext/nokogiri/xml_node.c +573 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +90 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_reader.c +420 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_sax_parser.c +161 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_text.c +25 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +39 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +69 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +83 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +45 -0
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/css/node.rb +95 -0
- data/lib/nokogiri/css/parser.rb +24 -0
- data/lib/nokogiri/css/parser.y +198 -0
- data/lib/nokogiri/css/tokenizer.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rex +63 -0
- data/lib/nokogiri/css/xpath_visitor.rb +153 -0
- data/lib/nokogiri/decorators.rb +1 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +47 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +13 -0
- data/lib/nokogiri/hpricot.rb +46 -0
- data/lib/nokogiri/html.rb +64 -0
- data/lib/nokogiri/html/builder.rb +9 -0
- data/lib/nokogiri/html/document.rb +9 -0
- data/lib/nokogiri/html/sax/parser.rb +21 -0
- data/lib/nokogiri/version.rb +3 -0
- data/lib/nokogiri/xml.rb +29 -0
- data/lib/nokogiri/xml/after_handler.rb +18 -0
- data/lib/nokogiri/xml/before_handler.rb +32 -0
- data/lib/nokogiri/xml/builder.rb +79 -0
- data/lib/nokogiri/xml/document.rb +22 -0
- data/lib/nokogiri/xml/node.rb +162 -0
- data/lib/nokogiri/xml/node_set.rb +136 -0
- data/lib/nokogiri/xml/reader.rb +14 -0
- data/lib/nokogiri/xml/sax.rb +9 -0
- data/lib/nokogiri/xml/sax/document.rb +59 -0
- data/lib/nokogiri/xml/sax/parser.rb +33 -0
- data/lib/nokogiri/xml/text.rb +6 -0
- data/lib/nokogiri/xml/xpath.rb +6 -0
- data/lib/nokogiri/xslt.rb +11 -0
- data/lib/nokogiri/xslt/stylesheet.rb +6 -0
- data/nokogiri.gemspec +33 -0
- data/test/css/test_nthiness.rb +141 -0
- data/test/css/test_parser.rb +214 -0
- data/test/css/test_tokenizer.rb +162 -0
- data/test/files/staff.xml +57 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +70 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +7 -0
- data/test/hpricot/test_alter.rb +67 -0
- data/test/hpricot/test_builder.rb +27 -0
- data/test/hpricot/test_parser.rb +412 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +72 -0
- data/test/hpricot/test_xml.rb +26 -0
- data/test/html/sax/test_parser.rb +27 -0
- data/test/html/test_builder.rb +78 -0
- data/test/html/test_document.rb +22 -0
- data/test/test_convert_xpath.rb +173 -0
- data/test/test_nokogiri.rb +36 -0
- data/test/test_reader.rb +222 -0
- data/test/test_xslt_transforms.rb +29 -0
- data/test/xml/sax/test_parser.rb +93 -0
- data/test/xml/test_builder.rb +16 -0
- data/test/xml/test_document.rb +141 -0
- data/test/xml/test_node.rb +148 -0
- data/test/xml/test_node_set.rb +54 -0
- data/test/xml/test_text.rb +13 -0
- metadata +191 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module CSS
|
|
3
|
+
class XPathVisitor
|
|
4
|
+
def visit_function node
|
|
5
|
+
# note that nth-child and nth-last-child are preprocessed in css/node.rb.
|
|
6
|
+
case node.value.first
|
|
7
|
+
when /^text\(/
|
|
8
|
+
'child::text()'
|
|
9
|
+
when /^self\(/
|
|
10
|
+
"self::#{node.value[1]}"
|
|
11
|
+
when /^(eq|nth|nth-of-type|nth-child)\(/
|
|
12
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
|
13
|
+
an_plus_b(node.value[1])
|
|
14
|
+
else
|
|
15
|
+
"position() = " + node.value[1]
|
|
16
|
+
end
|
|
17
|
+
when /^(first|first-of-type)\(/
|
|
18
|
+
"position() = 1"
|
|
19
|
+
when /^(last|last-of-type)\(/
|
|
20
|
+
"position() = last()"
|
|
21
|
+
when /^(nth-last-child|nth-last-of-type)\(/
|
|
22
|
+
"position() = last() - #{node.value[1]}"
|
|
23
|
+
when /^contains\(/
|
|
24
|
+
"contains(., #{node.value[1]})"
|
|
25
|
+
when /^gt\(/
|
|
26
|
+
"position() > #{node.value[1]}"
|
|
27
|
+
when /^only-child\(/
|
|
28
|
+
"last() = 1"
|
|
29
|
+
else
|
|
30
|
+
node.value.first + ')'
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def visit_not node
|
|
35
|
+
'not(' + node.value.first.accept(self) + ')'
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def visit_preceding_selector node
|
|
39
|
+
node.value.last.accept(self) +
|
|
40
|
+
'[preceding-sibling::' +
|
|
41
|
+
node.value.first.accept(self) +
|
|
42
|
+
']'
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def visit_id node
|
|
46
|
+
node.value.first =~ /^#(.*)$/
|
|
47
|
+
"@id = '#{$1}'"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def visit_attribute_condition node
|
|
51
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /^@/)
|
|
52
|
+
''
|
|
53
|
+
else
|
|
54
|
+
'child::'
|
|
55
|
+
end
|
|
56
|
+
attribute += node.value.first.accept(self)
|
|
57
|
+
|
|
58
|
+
# Support non-standard css
|
|
59
|
+
attribute.gsub!(/^@@/, '@')
|
|
60
|
+
|
|
61
|
+
return attribute unless node.value.length == 3
|
|
62
|
+
|
|
63
|
+
value = node.value.last
|
|
64
|
+
value = "'#{value}'" if value !~ /^['"]/
|
|
65
|
+
|
|
66
|
+
case node.value[1]
|
|
67
|
+
when '*='
|
|
68
|
+
"contains(#{attribute}, #{value})"
|
|
69
|
+
when '~='
|
|
70
|
+
[ "#{attribute} = #{value}",
|
|
71
|
+
"starts-with(#{attribute}, concat(#{value}, \" \"))",
|
|
72
|
+
"contains(#{attribute}, concat(\" \", #{value}, \" \"))",
|
|
73
|
+
"(substring(#{attribute}, string-length(#{attribute}) - " +
|
|
74
|
+
"string-length(#{value}), string-length(#{value}) + 1) = concat(\" \", #{value}))"
|
|
75
|
+
].join(' or ')
|
|
76
|
+
when '$='
|
|
77
|
+
"substring(#{attribute}, string-length(#{attribute}) - " +
|
|
78
|
+
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
|
79
|
+
else
|
|
80
|
+
attribute + " #{node.value[1]} " + "#{value}"
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def visit_pseudo_class node
|
|
85
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
|
|
86
|
+
node.value.first.accept(self)
|
|
87
|
+
else
|
|
88
|
+
case node.value.first
|
|
89
|
+
when "first" then "position() = 1"
|
|
90
|
+
when "last" then "position() = last()"
|
|
91
|
+
when "first-of-type" then "position() = 1"
|
|
92
|
+
when "last-of-type" then "position() = last()"
|
|
93
|
+
when "only-of-type" then "last() = 1"
|
|
94
|
+
when "empty" then "not(node())"
|
|
95
|
+
when "parent" then "node()"
|
|
96
|
+
else
|
|
97
|
+
'1 = 1'
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def visit_class_condition node
|
|
103
|
+
"contains(@class, '#{node.value.first}')"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def visit_combinator node
|
|
107
|
+
node.value.first.accept(self) + ' and ' +
|
|
108
|
+
node.value.last.accept(self)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def visit_conditional_selector node
|
|
112
|
+
node.value.first.accept(self) + '[' +
|
|
113
|
+
node.value.last.accept(self) + ']'
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def visit_descendant_selector node
|
|
117
|
+
node.value.first.accept(self) +
|
|
118
|
+
'//' +
|
|
119
|
+
node.value.last.accept(self)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def visit_child_selector node
|
|
123
|
+
node.value.first.accept(self) +
|
|
124
|
+
'/' +
|
|
125
|
+
node.value.last.accept(self)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def visit_element_name node
|
|
129
|
+
node.value.first
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def accept node
|
|
133
|
+
node.accept(self)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
private
|
|
137
|
+
def an_plus_b node
|
|
138
|
+
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
|
139
|
+
|
|
140
|
+
a = node.value[0].to_i
|
|
141
|
+
b = node.value[3].to_i
|
|
142
|
+
|
|
143
|
+
if (b == 0)
|
|
144
|
+
return "(position() mod #{a}) = 0"
|
|
145
|
+
else
|
|
146
|
+
compare = (a < 0) ? "<=" : ">="
|
|
147
|
+
return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require 'nokogiri/decorators/hpricot'
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module Decorators
|
|
3
|
+
module Hpricot
|
|
4
|
+
module Node
|
|
5
|
+
def search path
|
|
6
|
+
super(*convert_to_xpath(path))
|
|
7
|
+
end
|
|
8
|
+
def /(path); search(path) end
|
|
9
|
+
|
|
10
|
+
def raw_attributes; self end
|
|
11
|
+
|
|
12
|
+
def get_element_by_id element_id
|
|
13
|
+
search("//*[@id='#{element_id}']").first
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def get_elements_by_tag_name tag
|
|
17
|
+
search("//#{tag}")
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def convert_to_xpath(rule)
|
|
21
|
+
rule = rule.to_s
|
|
22
|
+
case rule
|
|
23
|
+
when %r{^//}
|
|
24
|
+
[".#{rule}"]
|
|
25
|
+
when %r{^/}
|
|
26
|
+
[rule]
|
|
27
|
+
when %r{^.//}
|
|
28
|
+
[rule]
|
|
29
|
+
else
|
|
30
|
+
ctx = CSS::Parser.parse(rule)
|
|
31
|
+
visitor = CSS::XPathVisitor.new
|
|
32
|
+
visitor.extend(Hpricot::XPathVisitor)
|
|
33
|
+
ctx.map { |ast| './/' + visitor.accept(ast.preprocess!) }
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def target
|
|
38
|
+
name
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def to_original_html
|
|
42
|
+
to_html
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module Decorators
|
|
3
|
+
module Hpricot
|
|
4
|
+
module NodeSet
|
|
5
|
+
def filter rule
|
|
6
|
+
ctx = CSS::Parser.parse(rule.to_s)
|
|
7
|
+
visitor = CSS::XPathVisitor.new
|
|
8
|
+
visitor.extend(Hpricot::XPathVisitor)
|
|
9
|
+
search('.//self::' + visitor.accept(ctx.first))
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module Decorators
|
|
3
|
+
module Hpricot
|
|
4
|
+
####
|
|
5
|
+
# This mixin does custom adjustments to deal with _whyML
|
|
6
|
+
module XPathVisitor
|
|
7
|
+
def visit_attribute_condition node
|
|
8
|
+
super(node).gsub(/child::text\(\)/, 'normalize-space(child::text())')
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
|
|
3
|
+
module Nokogiri
|
|
4
|
+
module Hpricot
|
|
5
|
+
STag = String
|
|
6
|
+
Elem = XML::Node
|
|
7
|
+
NamedCharacters = Nokogiri::HTML::NamedCharacters
|
|
8
|
+
class << self
|
|
9
|
+
def parse(*args)
|
|
10
|
+
doc = Nokogiri.parse(*args)
|
|
11
|
+
add_decorators(doc)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def XML(string)
|
|
15
|
+
doc = Nokogiri::XML.parse(string)
|
|
16
|
+
add_decorators(doc)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def make string
|
|
20
|
+
ns = XML::NodeSet.new
|
|
21
|
+
ns << XML::Text.new(string)
|
|
22
|
+
ns
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def add_decorators(doc)
|
|
26
|
+
doc.decorators['node'] << Decorators::Hpricot::Node
|
|
27
|
+
doc.decorators['document'] << Decorators::Hpricot::Node
|
|
28
|
+
doc.decorators['nodeset'] << Decorators::Hpricot::NodeSet
|
|
29
|
+
doc.decorate!
|
|
30
|
+
doc
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
class << self
|
|
36
|
+
def Hpricot(*args, &block)
|
|
37
|
+
if block_given?
|
|
38
|
+
builder = Nokogiri::HTML::Builder.new(&block)
|
|
39
|
+
Nokogiri::Hpricot.add_decorators(builder.doc)
|
|
40
|
+
else
|
|
41
|
+
doc = Nokogiri::HTML.parse(*args)
|
|
42
|
+
Nokogiri::Hpricot.add_decorators(doc)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
require 'nokogiri/html/document'
|
|
2
|
+
require 'nokogiri/html/sax/parser'
|
|
3
|
+
|
|
4
|
+
module Nokogiri
|
|
5
|
+
module HTML
|
|
6
|
+
class << self
|
|
7
|
+
def parse(string, url = nil, encoding = nil, options = 32)
|
|
8
|
+
Document.read_memory(string, url, encoding, options)
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
NamedCharacters =
|
|
13
|
+
{"AElig"=>198, "Aacute"=>193, "Acirc"=>194, "Agrave"=>192, "Alpha"=>913,
|
|
14
|
+
"Aring"=>197, "Atilde"=>195, "Auml"=>196, "Beta"=>914, "Ccedil"=>199,
|
|
15
|
+
"Chi"=>935, "Dagger"=>8225, "Delta"=>916, "ETH"=>208, "Eacute"=>201,
|
|
16
|
+
"Ecirc"=>202, "Egrave"=>200, "Epsilon"=>917, "Eta"=>919, "Euml"=>203,
|
|
17
|
+
"Gamma"=>915, "Iacute"=>205, "Icirc"=>206, "Igrave"=>204, "Iota"=>921,
|
|
18
|
+
"Iuml"=>207, "Kappa"=>922, "Lambda"=>923, "Mu"=>924, "Ntilde"=>209, "Nu"=>925,
|
|
19
|
+
"OElig"=>338, "Oacute"=>211, "Ocirc"=>212, "Ograve"=>210, "Omega"=>937,
|
|
20
|
+
"Omicron"=>927, "Oslash"=>216, "Otilde"=>213, "Ouml"=>214, "Phi"=>934,
|
|
21
|
+
"Pi"=>928, "Prime"=>8243, "Psi"=>936, "Rho"=>929, "Scaron"=>352, "Sigma"=>931,
|
|
22
|
+
"THORN"=>222, "Tau"=>932, "Theta"=>920, "Uacute"=>218, "Ucirc"=>219,
|
|
23
|
+
"Ugrave"=>217, "Upsilon"=>933, "Uuml"=>220, "Xi"=>926, "Yacute"=>221,
|
|
24
|
+
"Yuml"=>376, "Zeta"=>918, "aacute"=>225, "acirc"=>226, "acute"=>180,
|
|
25
|
+
"aelig"=>230, "agrave"=>224, "alefsym"=>8501, "alpha"=>945, "amp"=>38,
|
|
26
|
+
"and"=>8743, "ang"=>8736, "apos"=>39, "aring"=>229, "asymp"=>8776,
|
|
27
|
+
"atilde"=>227, "auml"=>228, "bdquo"=>8222, "beta"=>946, "brvbar"=>166,
|
|
28
|
+
"bull"=>8226, "cap"=>8745, "ccedil"=>231, "cedil"=>184, "cent"=>162,
|
|
29
|
+
"chi"=>967, "circ"=>710, "clubs"=>9827, "cong"=>8773, "copy"=>169,
|
|
30
|
+
"crarr"=>8629, "cup"=>8746, "curren"=>164, "dArr"=>8659, "dagger"=>8224,
|
|
31
|
+
"darr"=>8595, "deg"=>176, "delta"=>948, "diams"=>9830, "divide"=>247,
|
|
32
|
+
"eacute"=>233, "ecirc"=>234, "egrave"=>232, "empty"=>8709, "emsp"=>8195,
|
|
33
|
+
"ensp"=>8194, "epsilon"=>949, "equiv"=>8801, "eta"=>951, "eth"=>240,
|
|
34
|
+
"euml"=>235, "euro"=>8364, "exist"=>8707, "fnof"=>402, "forall"=>8704,
|
|
35
|
+
"frac12"=>189, "frac14"=>188, "frac34"=>190, "frasl"=>8260, "gamma"=>947,
|
|
36
|
+
"ge"=>8805, "gt"=>62, "hArr"=>8660, "harr"=>8596, "hearts"=>9829,
|
|
37
|
+
"hellip"=>8230, "iacute"=>237, "icirc"=>238, "iexcl"=>161, "igrave"=>236,
|
|
38
|
+
"image"=>8465, "infin"=>8734, "int"=>8747, "iota"=>953, "iquest"=>191,
|
|
39
|
+
"isin"=>8712, "iuml"=>239, "kappa"=>954, "lArr"=>8656, "lambda"=>955,
|
|
40
|
+
"lang"=>9001, "laquo"=>171, "larr"=>8592, "lceil"=>8968, "ldquo"=>8220,
|
|
41
|
+
"le"=>8804, "lfloor"=>8970, "lowast"=>8727, "loz"=>9674, "lrm"=>8206,
|
|
42
|
+
"lsaquo"=>8249, "lsquo"=>8216, "lt"=>60, "macr"=>175, "mdash"=>8212,
|
|
43
|
+
"micro"=>181, "middot"=>183, "minus"=>8722, "mu"=>956, "nabla"=>8711,
|
|
44
|
+
"nbsp"=>160, "ndash"=>8211, "ne"=>8800, "ni"=>8715, "not"=>172, "notin"=>8713,
|
|
45
|
+
"nsub"=>8836, "ntilde"=>241, "nu"=>957, "oacute"=>243, "ocirc"=>244,
|
|
46
|
+
"oelig"=>339, "ograve"=>242, "oline"=>8254, "omega"=>969, "omicron"=>959,
|
|
47
|
+
"oplus"=>8853, "or"=>8744, "ordf"=>170, "ordm"=>186, "oslash"=>248,
|
|
48
|
+
"otilde"=>245, "otimes"=>8855, "ouml"=>246, "para"=>182, "part"=>8706,
|
|
49
|
+
"permil"=>8240, "perp"=>8869, "phi"=>966, "pi"=>960, "piv"=>982,
|
|
50
|
+
"plusmn"=>177, "pound"=>163, "prime"=>8242, "prod"=>8719, "prop"=>8733,
|
|
51
|
+
"psi"=>968, "quot"=>34, "rArr"=>8658, "radic"=>8730, "rang"=>9002,
|
|
52
|
+
"raquo"=>187, "rarr"=>8594, "rceil"=>8969, "rdquo"=>8221, "real"=>8476,
|
|
53
|
+
"reg"=>174, "rfloor"=>8971, "rho"=>961, "rlm"=>8207, "rsaquo"=>8250,
|
|
54
|
+
"rsquo"=>8217, "sbquo"=>8218, "scaron"=>353, "sdot"=>8901, "sect"=>167,
|
|
55
|
+
"shy"=>173, "sigma"=>963, "sigmaf"=>962, "sim"=>8764, "spades"=>9824,
|
|
56
|
+
"sub"=>8834, "sube"=>8838, "sum"=>8721, "sup"=>8835, "sup1"=>185, "sup2"=>178,
|
|
57
|
+
"sup3"=>179, "supe"=>8839, "szlig"=>223, "tau"=>964, "there4"=>8756,
|
|
58
|
+
"theta"=>952, "thetasym"=>977, "thinsp"=>8201, "thorn"=>254, "tilde"=>732,
|
|
59
|
+
"times"=>215, "trade"=>8482, "uArr"=>8657, "uacute"=>250, "uarr"=>8593,
|
|
60
|
+
"ucirc"=>251, "ugrave"=>249, "uml"=>168, "upsih"=>978, "upsilon"=>965,
|
|
61
|
+
"uuml"=>252, "weierp"=>8472, "xi"=>958, "yacute"=>253, "yen"=>165,
|
|
62
|
+
"yuml"=>255, "zeta"=>950, "zwj"=>8205, "zwnj"=>8204}
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module HTML
|
|
3
|
+
module SAX
|
|
4
|
+
class Parser < XML::SAX::Parser
|
|
5
|
+
###
|
|
6
|
+
# Parse html stored in +data+ using +encoding+
|
|
7
|
+
def parse_memory data, encoding = 'UTF-8'
|
|
8
|
+
native_parse_memory(data, encoding)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
###
|
|
12
|
+
# Parse a file with +filename+
|
|
13
|
+
def parse_file filename, encoding = 'UTF-8'
|
|
14
|
+
raise Errno::ENOENT unless File.exists?(filename)
|
|
15
|
+
raise Errno::EISDIR if File.directory?(filename)
|
|
16
|
+
native_parse_file filename, encoding
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
data/lib/nokogiri/xml.rb
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require 'nokogiri/xml/sax'
|
|
2
|
+
require 'nokogiri/xml/before_handler'
|
|
3
|
+
require 'nokogiri/xml/after_handler'
|
|
4
|
+
require 'nokogiri/xml/node'
|
|
5
|
+
require 'nokogiri/xml/text'
|
|
6
|
+
require 'nokogiri/xml/document'
|
|
7
|
+
require 'nokogiri/xml/node_set'
|
|
8
|
+
require 'nokogiri/xml/xpath'
|
|
9
|
+
require 'nokogiri/xml/builder'
|
|
10
|
+
require 'nokogiri/xml/reader'
|
|
11
|
+
|
|
12
|
+
module Nokogiri
|
|
13
|
+
module XML
|
|
14
|
+
class << self
|
|
15
|
+
def parse(string, url = nil, encoding = nil, options = 1)
|
|
16
|
+
return Document.new if string.nil? or string.empty? # read_memory pukes on empty docs
|
|
17
|
+
Document.read_memory(string, url, encoding, options)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def substitute_entities=(value = true)
|
|
21
|
+
Document.substitute_entities = value
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def load_external_subsets=(value = true)
|
|
25
|
+
Document.load_external_subsets = value
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
module Nokogiri
|
|
2
|
+
module XML
|
|
3
|
+
class AfterHandler < BeforeHandler
|
|
4
|
+
attr_accessor :after_nodes
|
|
5
|
+
|
|
6
|
+
def initialize node, original_html
|
|
7
|
+
super
|
|
8
|
+
@after_nodes = []
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def end_element name
|
|
12
|
+
return unless @original_html =~ /<#{name}/i
|
|
13
|
+
@after_nodes << @stack.last if @stack.length == 1
|
|
14
|
+
@stack.pop
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|