nokogiri 1.0.0-x86-mswin32-60

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (127) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +120 -0
  3. data/README.ja.txt +86 -0
  4. data/README.txt +87 -0
  5. data/Rakefile +264 -0
  6. data/ext/nokogiri/extconf.rb +59 -0
  7. data/ext/nokogiri/html_document.c +83 -0
  8. data/ext/nokogiri/html_document.h +10 -0
  9. data/ext/nokogiri/html_sax_parser.c +32 -0
  10. data/ext/nokogiri/html_sax_parser.h +11 -0
  11. data/ext/nokogiri/iconv.dll +0 -0
  12. data/ext/nokogiri/libexslt.dll +0 -0
  13. data/ext/nokogiri/libxml2.dll +0 -0
  14. data/ext/nokogiri/libxslt.dll +0 -0
  15. data/ext/nokogiri/native.c +40 -0
  16. data/ext/nokogiri/native.h +51 -0
  17. data/ext/nokogiri/native.so +0 -0
  18. data/ext/nokogiri/xml_cdata.c +52 -0
  19. data/ext/nokogiri/xml_cdata.h +9 -0
  20. data/ext/nokogiri/xml_document.c +159 -0
  21. data/ext/nokogiri/xml_document.h +10 -0
  22. data/ext/nokogiri/xml_dtd.c +117 -0
  23. data/ext/nokogiri/xml_dtd.h +8 -0
  24. data/ext/nokogiri/xml_node.c +709 -0
  25. data/ext/nokogiri/xml_node.h +15 -0
  26. data/ext/nokogiri/xml_node_set.c +124 -0
  27. data/ext/nokogiri/xml_node_set.h +9 -0
  28. data/ext/nokogiri/xml_reader.c +429 -0
  29. data/ext/nokogiri/xml_reader.h +10 -0
  30. data/ext/nokogiri/xml_sax_parser.c +174 -0
  31. data/ext/nokogiri/xml_sax_parser.h +10 -0
  32. data/ext/nokogiri/xml_syntax_error.c +194 -0
  33. data/ext/nokogiri/xml_syntax_error.h +11 -0
  34. data/ext/nokogiri/xml_text.c +29 -0
  35. data/ext/nokogiri/xml_text.h +9 -0
  36. data/ext/nokogiri/xml_xpath.c +46 -0
  37. data/ext/nokogiri/xml_xpath.h +11 -0
  38. data/ext/nokogiri/xml_xpath_context.c +81 -0
  39. data/ext/nokogiri/xml_xpath_context.h +9 -0
  40. data/ext/nokogiri/xslt_stylesheet.c +108 -0
  41. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  42. data/ext/nokogiri/zlib1.dll +0 -0
  43. data/lib/nokogiri.rb +51 -0
  44. data/lib/nokogiri/css.rb +6 -0
  45. data/lib/nokogiri/css/generated_parser.rb +653 -0
  46. data/lib/nokogiri/css/generated_tokenizer.rb +159 -0
  47. data/lib/nokogiri/css/node.rb +95 -0
  48. data/lib/nokogiri/css/parser.rb +24 -0
  49. data/lib/nokogiri/css/parser.y +198 -0
  50. data/lib/nokogiri/css/tokenizer.rb +9 -0
  51. data/lib/nokogiri/css/tokenizer.rex +63 -0
  52. data/lib/nokogiri/css/xpath_visitor.rb +165 -0
  53. data/lib/nokogiri/decorators.rb +1 -0
  54. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  55. data/lib/nokogiri/decorators/hpricot/node.rb +58 -0
  56. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  57. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +17 -0
  58. data/lib/nokogiri/hpricot.rb +47 -0
  59. data/lib/nokogiri/html.rb +95 -0
  60. data/lib/nokogiri/html/builder.rb +9 -0
  61. data/lib/nokogiri/html/document.rb +9 -0
  62. data/lib/nokogiri/html/sax/parser.rb +21 -0
  63. data/lib/nokogiri/version.rb +3 -0
  64. data/lib/nokogiri/xml.rb +67 -0
  65. data/lib/nokogiri/xml/after_handler.rb +18 -0
  66. data/lib/nokogiri/xml/before_handler.rb +32 -0
  67. data/lib/nokogiri/xml/builder.rb +79 -0
  68. data/lib/nokogiri/xml/cdata.rb +9 -0
  69. data/lib/nokogiri/xml/document.rb +30 -0
  70. data/lib/nokogiri/xml/dtd.rb +6 -0
  71. data/lib/nokogiri/xml/element.rb +6 -0
  72. data/lib/nokogiri/xml/entity_declaration.rb +9 -0
  73. data/lib/nokogiri/xml/node.rb +195 -0
  74. data/lib/nokogiri/xml/node_set.rb +183 -0
  75. data/lib/nokogiri/xml/notation.rb +6 -0
  76. data/lib/nokogiri/xml/reader.rb +14 -0
  77. data/lib/nokogiri/xml/sax.rb +9 -0
  78. data/lib/nokogiri/xml/sax/document.rb +59 -0
  79. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  80. data/lib/nokogiri/xml/syntax_error.rb +21 -0
  81. data/lib/nokogiri/xml/text.rb +6 -0
  82. data/lib/nokogiri/xml/xpath.rb +6 -0
  83. data/lib/nokogiri/xml/xpath_context.rb +14 -0
  84. data/lib/nokogiri/xslt.rb +11 -0
  85. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  86. data/nokogiri.gemspec +34 -0
  87. data/test/css/test_nthiness.rb +159 -0
  88. data/test/css/test_parser.rb +224 -0
  89. data/test/css/test_tokenizer.rb +162 -0
  90. data/test/css/test_xpath_visitor.rb +54 -0
  91. data/test/files/staff.xml +59 -0
  92. data/test/files/staff.xslt +32 -0
  93. data/test/files/tlm.html +850 -0
  94. data/test/helper.rb +70 -0
  95. data/test/hpricot/files/basic.xhtml +17 -0
  96. data/test/hpricot/files/boingboing.html +2266 -0
  97. data/test/hpricot/files/cy0.html +3653 -0
  98. data/test/hpricot/files/immob.html +400 -0
  99. data/test/hpricot/files/pace_application.html +1320 -0
  100. data/test/hpricot/files/tenderlove.html +16 -0
  101. data/test/hpricot/files/uswebgen.html +220 -0
  102. data/test/hpricot/files/utf8.html +1054 -0
  103. data/test/hpricot/files/week9.html +1723 -0
  104. data/test/hpricot/files/why.xml +19 -0
  105. data/test/hpricot/load_files.rb +7 -0
  106. data/test/hpricot/test_alter.rb +67 -0
  107. data/test/hpricot/test_builder.rb +27 -0
  108. data/test/hpricot/test_parser.rb +423 -0
  109. data/test/hpricot/test_paths.rb +15 -0
  110. data/test/hpricot/test_preserved.rb +78 -0
  111. data/test/hpricot/test_xml.rb +30 -0
  112. data/test/html/sax/test_parser.rb +27 -0
  113. data/test/html/test_builder.rb +78 -0
  114. data/test/html/test_document.rb +86 -0
  115. data/test/test_convert_xpath.rb +180 -0
  116. data/test/test_nokogiri.rb +36 -0
  117. data/test/test_reader.rb +222 -0
  118. data/test/test_xslt_transforms.rb +29 -0
  119. data/test/xml/sax/test_parser.rb +93 -0
  120. data/test/xml/test_builder.rb +16 -0
  121. data/test/xml/test_cdata.rb +18 -0
  122. data/test/xml/test_document.rb +171 -0
  123. data/test/xml/test_dtd.rb +43 -0
  124. data/test/xml/test_node.rb +223 -0
  125. data/test/xml/test_node_set.rb +116 -0
  126. data/test/xml/test_text.rb +13 -0
  127. metadata +217 -0
@@ -0,0 +1,9 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class Builder < XML::Builder
4
+ def to_html
5
+ @doc.to_html
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Nokogiri
2
+ module HTML
3
+ class Document < XML::Document
4
+ def to_html
5
+ serialize
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,21 @@
1
+ module Nokogiri
2
+ module HTML
3
+ module SAX
4
+ class Parser < XML::SAX::Parser
5
+ ###
6
+ # Parse html stored in +data+ using +encoding+
7
+ def parse_memory data, encoding = 'UTF-8'
8
+ native_parse_memory(data, encoding)
9
+ end
10
+
11
+ ###
12
+ # Parse a file with +filename+
13
+ def parse_file filename, encoding = 'UTF-8'
14
+ raise Errno::ENOENT unless File.exists?(filename)
15
+ raise Errno::EISDIR if File.directory?(filename)
16
+ native_parse_file filename, encoding
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,3 @@
1
+ module Nokogiri
2
+ VERSION = '1.0.0'
3
+ end
@@ -0,0 +1,67 @@
1
+ require 'nokogiri/xml/sax'
2
+ require 'nokogiri/xml/before_handler'
3
+ require 'nokogiri/xml/after_handler'
4
+ require 'nokogiri/xml/node'
5
+ require 'nokogiri/xml/dtd'
6
+ require 'nokogiri/xml/text'
7
+ require 'nokogiri/xml/cdata'
8
+ require 'nokogiri/xml/document'
9
+ require 'nokogiri/xml/node_set'
10
+ require 'nokogiri/xml/xpath'
11
+ require 'nokogiri/xml/xpath_context'
12
+ require 'nokogiri/xml/builder'
13
+ require 'nokogiri/xml/reader'
14
+ require 'nokogiri/xml/syntax_error'
15
+ require 'nokogiri/xml/notation'
16
+ require 'nokogiri/xml/element'
17
+ require 'nokogiri/xml/entity_declaration'
18
+
19
+ module Nokogiri
20
+ class << self
21
+ def XML thing, url = nil, encoding = nil, options = 1
22
+ Nokogiri::XML.parse(thing, url, encoding, options)
23
+ end
24
+ end
25
+
26
+ module XML
27
+ # Parser options
28
+ PARSE_RECOVER = 1 << 0 # Recover from errors
29
+ PARSE_NOENT = 1 << 1 # Substitute entities
30
+ PARSE_DTDLOAD = 1 << 2 # Load external subsets
31
+ PARSE_DTDATTR = 1 << 3 # Default DTD attributes
32
+ PARSE_DTDVALID = 1 << 4 # validate with the DTD
33
+ PARSE_NOERROR = 1 << 5 # suppress error reports
34
+ PARSE_NOWARNING = 1 << 6 # suppress warning reports
35
+ PARSE_PEDANTIC = 1 << 7 # pedantic error reporting
36
+ PARSE_NOBLANKS = 1 << 8 # remove blank nodes
37
+ PARSE_SAX1 = 1 << 9 # use the SAX1 interface internally
38
+ PARSE_XINCLUDE = 1 << 10 # Implement XInclude substitition
39
+ PARSE_NONET = 1 << 11 # Forbid network access
40
+ PARSE_NODICT = 1 << 12 # Do not reuse the context dictionnary
41
+ PARSE_NSCLEAN = 1 << 13 # remove redundant namespaces declarations
42
+ PARSE_NOCDATA = 1 << 14 # merge CDATA as text nodes
43
+ PARSE_NOXINCNODE = 1 << 15 # do not generate XINCLUDE START/END nodes
44
+
45
+ class << self
46
+ def parse string_or_io, url = nil, encoding = nil, options = 2159
47
+ if string_or_io.respond_to?(:read)
48
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
49
+ string_or_io = string_or_io.read
50
+ end
51
+
52
+ # read_memory pukes on empty docs
53
+ return Document.new if string_or_io.nil? or string_or_io.empty?
54
+
55
+ Document.read_memory(string_or_io, url, encoding, options)
56
+ end
57
+
58
+ def substitute_entities=(value = true)
59
+ Document.substitute_entities = value
60
+ end
61
+
62
+ def load_external_subsets=(value = true)
63
+ Document.load_external_subsets = value
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,18 @@
1
+ module Nokogiri
2
+ module XML
3
+ class AfterHandler < BeforeHandler
4
+ attr_accessor :after_nodes
5
+
6
+ def initialize node, original_html
7
+ super
8
+ @after_nodes = []
9
+ end
10
+
11
+ def end_element name
12
+ return unless @original_html =~ /<#{name}/i
13
+ @after_nodes << @stack.last if @stack.length == 1
14
+ @stack.pop
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,32 @@
1
+ module Nokogiri
2
+ module XML
3
+ class BeforeHandler < Nokogiri::XML::SAX::Document # :nodoc:
4
+ def initialize node, original_html
5
+ @original_html = original_html
6
+ @node = node
7
+ @stack = []
8
+ end
9
+
10
+ def start_element name, attrs = []
11
+ return unless @original_html =~ /<#{name}/i
12
+ node = Node.new(name)
13
+ Hash[*attrs].each do |k,v|
14
+ node[k] = v
15
+ end
16
+ node.parent = @stack.last if @stack.length != 0
17
+ @stack << node
18
+ end
19
+
20
+ def characters string
21
+ node = @stack.last
22
+ node.content += string
23
+ end
24
+
25
+ def end_element name
26
+ return unless @original_html =~ /<#{name}/i
27
+ @node.add_previous_sibling @stack.last if @stack.length == 1
28
+ @stack.pop
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,79 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Builder
4
+ attr_accessor :doc, :parent
5
+ def initialize(&block)
6
+ namespace = self.class.name.split('::')
7
+ namespace[-1] = 'Document'
8
+ @doc = eval(namespace.join('::')).new
9
+ @parent = @doc
10
+ instance_eval(&block)
11
+ @parent = @doc
12
+ end
13
+
14
+ def text(string)
15
+ node = Nokogiri::XML::Text.new(string)
16
+ insert(node)
17
+ end
18
+
19
+ def cdata(string)
20
+ node = Nokogiri::XML::CData.new(@doc, string)
21
+ insert(node)
22
+ end
23
+
24
+ def to_xml
25
+ @doc.to_xml
26
+ end
27
+
28
+ def method_missing(method, *args, &block)
29
+ node = Nokogiri::XML::Node.new(method.to_s) { |n|
30
+ if content = args.first
31
+ if content.is_a?(Hash)
32
+ content.each { |k,v| n[k.to_s] = v.to_s }
33
+ else
34
+ n.content = content
35
+ end
36
+ end
37
+ }
38
+ insert(node, &block)
39
+ end
40
+
41
+ private
42
+ def insert(node, &block)
43
+ node.parent = @parent
44
+ if block_given?
45
+ @parent = node
46
+ instance_eval(&block)
47
+ @parent = node.parent
48
+ end
49
+ NodeBuilder.new(node, self)
50
+ end
51
+
52
+ class NodeBuilder # :nodoc:
53
+ def initialize(node, doc_builder)
54
+ @node = node
55
+ @doc_builder = doc_builder
56
+ end
57
+
58
+ def method_missing(method, *args, &block)
59
+ case method.to_s
60
+ when /^(.*)!$/
61
+ @node['id'] = $1
62
+ @node.content = args.first if args.first
63
+ when /^(.*)=/
64
+ @node[$1] = args.first
65
+ else
66
+ @node['class'] =
67
+ ((@node['class'] || '').split(/\s/) + [method.to_s]).join(' ')
68
+ @node.content = args.first if args.first
69
+ end
70
+ if block_given?
71
+ @doc_builder.parent = @node
72
+ return @doc_builder.instance_eval(&block)
73
+ end
74
+ self
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,9 @@
1
+ module Nokogiri
2
+ module XML
3
+ class CDATA < Text
4
+ def name
5
+ 'cdata-section'
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,30 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Document < Node
4
+ def decorators
5
+ @decorators ||= Hash.new { |h,k| h[k] = [] }
6
+ end
7
+
8
+ def name
9
+ 'document'
10
+ end
11
+
12
+ ###
13
+ # Apply any decorators to +node+
14
+ def decorate(node)
15
+ key = node.class.name.split('::').last.downcase
16
+ decorators[key].each do |klass|
17
+ node.extend(klass)
18
+ end
19
+ end
20
+
21
+ def to_xml
22
+ serialize
23
+ end
24
+
25
+ def namespaces
26
+ root ? root.collect_namespaces : {}
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,6 @@
1
+ module Nokogiri
2
+ module XML
3
+ class DTD < Node
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Element < Node
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,9 @@
1
+ module Nokogiri
2
+ module XML
3
+ class EntityDeclaration < Node
4
+ def attributes
5
+ nil
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,195 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Node
4
+ CDATA_SECTION_NODE = 4
5
+ COMMENT_NODE = 8
6
+ DOCUMENT_NODE = 9
7
+ HTML_DOCUMENT_NODE = 13
8
+ DTD_NODE = 14
9
+ ELEMENT_DECL = 15
10
+ ATTRIBUTE_DECL = 16
11
+ ENTITY_DECL = 17
12
+ NAMESPACE_DECL = 18
13
+ XINCLUDE_START = 19
14
+ XINCLUDE_END = 20
15
+ DOCB_DOCUMENT_NODE = 21
16
+
17
+ @@owned = {}
18
+
19
+ ###
20
+ # Decorate this node with the decorators set up in this node's Document
21
+ def decorate!
22
+ document.decorate(self) if document
23
+ end
24
+
25
+ ###
26
+ # Get the list of children for this node as a NodeSet
27
+ def children
28
+ list = NodeSet.new
29
+ list.document = document
30
+ document.decorate(list)
31
+
32
+ first = self.child
33
+ return list unless first # Empty list
34
+
35
+ list << first unless first.blank?
36
+ while first = first.next
37
+ list << first unless first.blank?
38
+ end
39
+ list
40
+ end
41
+
42
+ ###
43
+ # Search this node for +paths+. +paths+ can be XPath or CSS, and an
44
+ # optional hash of namespaces may be appended.
45
+ # See Node#xpath and Node#css.
46
+ def search *paths
47
+ ns = paths.last.is_a?(Hash) ? paths.pop : {}
48
+ xpath(*(paths.map { |path|
49
+ path =~ /^(\.\/|\/)/ ? path : CSS::Parser.parse(path).map { |ast|
50
+ ast.to_xpath
51
+ }
52
+ }.flatten.uniq) + [ns])
53
+ end
54
+ alias :/ :search
55
+
56
+ def xpath *paths
57
+ ns = paths.last.is_a?(Hash) ? paths.pop : {}
58
+
59
+ return NodeSet.new unless document.root
60
+
61
+ sets = paths.map { |path|
62
+ ctx = XPathContext.new(self)
63
+ ctx.register_namespaces(ns)
64
+ set = ctx.evaluate(path).node_set
65
+ set.document = document
66
+ document.decorate(set)
67
+ set
68
+ }
69
+ return sets.first if sets.length == 1
70
+
71
+ NodeSet.new do |combined|
72
+ document.decorate(combined)
73
+ sets.each do |set|
74
+ set.each do |node|
75
+ combined << node
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ def css *rules
82
+ xpath(*(rules.map { |rule|
83
+ CSS::Parser.parse(rule).map { |ast| "." + ast.to_xpath }
84
+ }.flatten.uniq))
85
+ end
86
+
87
+ def at path, ns = {}
88
+ search("#{path}", ns).first
89
+ end
90
+
91
+ def [](property)
92
+ return nil unless key?(property)
93
+ get(property)
94
+ end
95
+
96
+ def next
97
+ next_sibling
98
+ end
99
+
100
+ def remove
101
+ unlink
102
+ end
103
+
104
+ ####
105
+ # Create nodes from +data+ and insert them before this node
106
+ # (as a sibling).
107
+ def before data
108
+ classes = document.class.name.split('::')
109
+ classes[-1] = 'SAX::Parser'
110
+
111
+ parser = eval(classes.join('::')).new(BeforeHandler.new(self, data))
112
+ parser.parse(data)
113
+ end
114
+
115
+ ####
116
+ # Create nodes from +data+ and insert them after this node
117
+ # (as a sibling).
118
+ def after data
119
+ classes = document.class.name.split('::')
120
+ classes[-1] = 'SAX::Parser'
121
+
122
+ handler = AfterHandler.new(self, data)
123
+ parser = eval(classes.join('::')).new(handler)
124
+ parser.parse(data)
125
+ handler.after_nodes.reverse.each do |sibling|
126
+ self.add_next_sibling sibling
127
+ end
128
+ end
129
+
130
+ def has_attribute?(property)
131
+ key? property
132
+ end
133
+
134
+ alias :get_attribute :[]
135
+ def set_attribute(name, value)
136
+ self[name] = value
137
+ end
138
+
139
+ def text
140
+ content
141
+ end
142
+ alias :inner_text :text
143
+
144
+ ####
145
+ # Set the content to +string+.
146
+ # If +encode+, encode any special characters first.
147
+ def content= string, encode = true
148
+ self.native_content = encode_special_chars(string)
149
+ end
150
+
151
+ def comment?
152
+ type == COMMENT_NODE
153
+ end
154
+
155
+ def cdata?
156
+ type == CDATA_SECTION_NODE
157
+ end
158
+
159
+ def xml?
160
+ type == DOCUMENT_NODE
161
+ end
162
+
163
+ def html?
164
+ type == HTML_DOCUMENT_NODE
165
+ end
166
+
167
+ def to_html
168
+ to_xml
169
+ end
170
+ alias :to_s :to_html
171
+ alias :inner_html :to_html
172
+
173
+ def css_path
174
+ path.split(/\//).map { |part|
175
+ part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
176
+ }.compact.join(' > ')
177
+ end
178
+
179
+ # recursively get all namespaces from this node and its subtree
180
+ def collect_namespaces
181
+ # TODO: print warning message if a prefix refers to more than one URI in the document?
182
+ ns = {}
183
+ traverse {|j| ns.merge!(j.namespaces)}
184
+ ns
185
+ end
186
+
187
+ ####
188
+ # Yields self and all children to +block+ recursively.
189
+ def traverse(&block)
190
+ children.each{|j| j.traverse(&block) }
191
+ block.call(self)
192
+ end
193
+ end
194
+ end
195
+ end