nokogiri 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (88) hide show
  1. data/History.ja.txt +34 -0
  2. data/History.txt +36 -0
  3. data/Manifest.txt +21 -0
  4. data/README.ja.txt +1 -1
  5. data/README.txt +1 -1
  6. data/Rakefile +27 -89
  7. data/ext/nokogiri/extconf.rb +48 -63
  8. data/ext/nokogiri/html_document.c +90 -29
  9. data/ext/nokogiri/html_sax_parser.c +23 -2
  10. data/ext/nokogiri/native.c +18 -8
  11. data/ext/nokogiri/native.h +22 -0
  12. data/ext/nokogiri/xml_attr.c +83 -0
  13. data/ext/nokogiri/xml_attr.h +9 -0
  14. data/ext/nokogiri/xml_cdata.c +1 -1
  15. data/ext/nokogiri/xml_document.c +84 -18
  16. data/ext/nokogiri/xml_document_fragment.c +38 -0
  17. data/ext/nokogiri/xml_document_fragment.h +10 -0
  18. data/ext/nokogiri/xml_dtd.c +2 -22
  19. data/ext/nokogiri/xml_entity_reference.c +41 -0
  20. data/ext/nokogiri/xml_entity_reference.h +9 -0
  21. data/ext/nokogiri/xml_io.c +10 -3
  22. data/ext/nokogiri/xml_io.h +1 -0
  23. data/ext/nokogiri/xml_node.c +116 -66
  24. data/ext/nokogiri/xml_node_set.c +5 -1
  25. data/ext/nokogiri/xml_processing_instruction.c +44 -0
  26. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  27. data/ext/nokogiri/xml_reader.c +20 -4
  28. data/ext/nokogiri/xml_sax_parser.c +51 -15
  29. data/ext/nokogiri/xml_sax_push_parser.c +85 -0
  30. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  31. data/ext/nokogiri/xml_syntax_error.c +12 -8
  32. data/ext/nokogiri/xml_syntax_error.h +2 -1
  33. data/ext/nokogiri/xml_xpath_context.c +11 -2
  34. data/ext/nokogiri/xslt_stylesheet.c +1 -6
  35. data/lib/nokogiri.rb +10 -13
  36. data/lib/nokogiri/css.rb +1 -1
  37. data/lib/nokogiri/css/generated_parser.rb +287 -295
  38. data/lib/nokogiri/css/generated_tokenizer.rb +36 -51
  39. data/lib/nokogiri/css/node.rb +1 -3
  40. data/lib/nokogiri/css/parser.rb +21 -12
  41. data/lib/nokogiri/css/parser.y +55 -44
  42. data/lib/nokogiri/css/syntax_error.rb +2 -1
  43. data/lib/nokogiri/css/tokenizer.rex +23 -32
  44. data/lib/nokogiri/decorators/hpricot/node_set.rb +1 -1
  45. data/lib/nokogiri/html.rb +10 -4
  46. data/lib/nokogiri/html/document.rb +6 -2
  47. data/lib/nokogiri/syntax_error.rb +4 -0
  48. data/lib/nokogiri/version.rb +2 -1
  49. data/lib/nokogiri/xml.rb +3 -1
  50. data/lib/nokogiri/xml/attr.rb +3 -4
  51. data/lib/nokogiri/xml/cdata.rb +1 -1
  52. data/lib/nokogiri/xml/document.rb +4 -7
  53. data/lib/nokogiri/xml/document_fragment.rb +9 -0
  54. data/lib/nokogiri/xml/dtd.rb +3 -0
  55. data/lib/nokogiri/xml/node.rb +144 -40
  56. data/lib/nokogiri/xml/node/save_options.rb +32 -0
  57. data/lib/nokogiri/xml/node_set.rb +11 -20
  58. data/lib/nokogiri/xml/processing_instruction.rb +6 -0
  59. data/lib/nokogiri/xml/reader.rb +5 -0
  60. data/lib/nokogiri/xml/sax.rb +1 -0
  61. data/lib/nokogiri/xml/sax/push_parser.rb +47 -0
  62. data/lib/nokogiri/xml/syntax_error.rb +3 -1
  63. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  64. data/tasks/test.rb +136 -0
  65. data/test/css/test_parser.rb +4 -0
  66. data/test/css/test_tokenizer.rb +30 -17
  67. data/test/css/test_xpath_visitor.rb +11 -0
  68. data/test/helper.rb +11 -0
  69. data/test/hpricot/test_builder.rb +2 -9
  70. data/test/hpricot/test_parser.rb +4 -4
  71. data/test/html/test_builder.rb +7 -7
  72. data/test/html/test_document.rb +90 -4
  73. data/test/html/test_node.rb +1 -0
  74. data/test/test_css_cache.rb +1 -3
  75. data/test/test_reader.rb +19 -1
  76. data/test/test_xslt_transforms.rb +1 -1
  77. data/test/xml/node/test_save_options.rb +20 -0
  78. data/test/xml/sax/test_parser.rb +17 -0
  79. data/test/xml/sax/test_push_parser.rb +67 -0
  80. data/test/xml/test_attr.rb +16 -0
  81. data/test/xml/test_cdata.rb +1 -1
  82. data/test/xml/test_document.rb +45 -0
  83. data/test/xml/test_document_fragment.rb +18 -0
  84. data/test/xml/test_dtd.rb +2 -4
  85. data/test/xml/test_entity_reference.rb +16 -0
  86. data/test/xml/test_node.rb +149 -80
  87. data/test/xml/test_processing_instruction.rb +24 -0
  88. metadata +28 -2
@@ -38,7 +38,7 @@ module Nokogiri
38
38
  return sub_set
39
39
  end
40
40
 
41
- ctx = CSS::Parser.parse(rule.to_s)
41
+ ctx = CSS.parse(rule.to_s)
42
42
  visitor = CSS::XPathVisitor.new
43
43
  visitor.extend(Hpricot::XPathVisitor)
44
44
  each do |node|
@@ -30,7 +30,7 @@ module Nokogiri
30
30
  def parse string_or_io, url = nil, encoding = nil, options = 2145
31
31
  if string_or_io.respond_to?(:read)
32
32
  url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
33
- string_or_io = string_or_io.read
33
+ return Document.read_io(string_or_io, url, encoding, options)
34
34
  end
35
35
 
36
36
  Document.read_memory(string_or_io, url, encoding, options)
@@ -40,13 +40,19 @@ module Nokogiri
40
40
  # Parse a fragment from +string+ in to a NodeSet.
41
41
  def fragment string
42
42
  doc = parse(string)
43
- finder = lambda { |children, f|
44
- children.each do |child|
45
- return children if string =~ /<#{child.name}/
43
+ fragment = XML::DocumentFragment.new(doc)
44
+ finder = lambda { |c, f|
45
+ c.each do |child|
46
+ fragment.add_child(child) if string =~ /<#{child.name}/
47
+ end
48
+ return fragment if fragment.children.length > 0
49
+
50
+ c.each do |child|
46
51
  finder.call(child.children, f)
47
52
  end
48
53
  }
49
54
  finder.call(doc.children, finder)
55
+ fragment
50
56
  end
51
57
  end
52
58
 
@@ -1,8 +1,12 @@
1
1
  module Nokogiri
2
2
  module HTML
3
3
  class Document < XML::Document
4
- def to_html
5
- serialize
4
+ def serialize encoding = nil, options = XML::Node::SaveOptions::FORMAT |
5
+ XML::Node::SaveOptions::AS_HTML |
6
+ XML::Node::SaveOptions::NO_DECLARATION |
7
+ XML::Node::SaveOptions::NO_EMPTY_TAGS
8
+
9
+ super(encoding, options)
6
10
  end
7
11
  end
8
12
  end
@@ -0,0 +1,4 @@
1
+ module Nokogiri
2
+ class SyntaxError < ::StandardError
3
+ end
4
+ end
@@ -1,3 +1,4 @@
1
1
  module Nokogiri
2
- VERSION = '1.1.1'
2
+ # The version of Nokogiri you are using
3
+ VERSION = '1.2.0'
3
4
  end
@@ -6,14 +6,16 @@ require 'nokogiri/xml/attr'
6
6
  require 'nokogiri/xml/dtd'
7
7
  require 'nokogiri/xml/text'
8
8
  require 'nokogiri/xml/cdata'
9
+ require 'nokogiri/xml/processing_instruction'
9
10
  require 'nokogiri/xml/comment'
10
11
  require 'nokogiri/xml/document'
12
+ require 'nokogiri/xml/document_fragment'
11
13
  require 'nokogiri/xml/node_set'
14
+ require 'nokogiri/xml/syntax_error'
12
15
  require 'nokogiri/xml/xpath'
13
16
  require 'nokogiri/xml/xpath_context'
14
17
  require 'nokogiri/xml/builder'
15
18
  require 'nokogiri/xml/reader'
16
- require 'nokogiri/xml/syntax_error'
17
19
  require 'nokogiri/xml/notation'
18
20
  require 'nokogiri/xml/element'
19
21
  require 'nokogiri/xml/entity_declaration'
@@ -1,10 +1,9 @@
1
1
  module Nokogiri
2
2
  module XML
3
3
  class Attr < Node
4
- def value
5
- children.first.to_s
6
- end
7
- alias :to_s :value
4
+ alias :value :content
5
+ alias :to_s :content
6
+ alias :content= :value=
8
7
  end
9
8
  end
10
9
  end
@@ -2,7 +2,7 @@ module Nokogiri
2
2
  module XML
3
3
  class CDATA < Text
4
4
  def name
5
- 'cdata-section'
5
+ '#cdata-section'
6
6
  end
7
7
  end
8
8
  end
@@ -1,6 +1,8 @@
1
1
  module Nokogiri
2
2
  module XML
3
3
  class Document < Node
4
+ attr_accessor :errors
5
+
4
6
  def name
5
7
  'document'
6
8
  end
@@ -39,13 +41,8 @@ module Nokogiri
39
41
  @node_cache ||= {}
40
42
  end
41
43
 
42
- def to_xml
43
- serialize
44
- end
45
-
46
- def inner_html
47
- serialize
48
- end
44
+ alias :to_xml :serialize
45
+ alias :inner_html :serialize
49
46
 
50
47
  def namespaces
51
48
  root ? root.collect_namespaces : {}
@@ -0,0 +1,9 @@
1
+ module Nokogiri
2
+ module XML
3
+ class DocumentFragment < Node
4
+ def name
5
+ '#document-fragment'
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,6 +1,9 @@
1
1
  module Nokogiri
2
2
  module XML
3
3
  class DTD < Node
4
+ def attributes
5
+ nil
6
+ end
4
7
  end
5
8
  end
6
9
  end
@@ -1,3 +1,6 @@
1
+ require 'stringio'
2
+ require 'nokogiri/xml/node/save_options'
3
+
1
4
  module Nokogiri
2
5
  module XML
3
6
  class Node
@@ -23,6 +26,7 @@ module Nokogiri
23
26
  XINCLUDE_END = 20
24
27
  DOCB_DOCUMENT_NODE = 21
25
28
 
29
+ # The Document associated with this Node.
26
30
  attr_accessor :document
27
31
 
28
32
  ###
@@ -76,7 +80,7 @@ module Nokogiri
76
80
  # def regex node_set, regex
77
81
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
78
82
  # end
79
- # })
83
+ # }.new)
80
84
  #
81
85
  def xpath *paths
82
86
  # Pop off our custom function handler if it exists
@@ -84,7 +88,7 @@ module Nokogiri
84
88
  Hash, String, Symbol
85
89
  ].include?(paths.last.class) ? paths.pop : nil
86
90
 
87
- ns = paths.last.is_a?(Hash) ? paths.pop : {}
91
+ ns = paths.last.is_a?(Hash) ? paths.pop : document.root.namespaces
88
92
 
89
93
  return NodeSet.new(document) unless document.root
90
94
 
@@ -134,45 +138,72 @@ module Nokogiri
134
138
  Hash, String, Symbol
135
139
  ].include?(rules.last.class) ? rules.pop : nil
136
140
 
137
- ns = rules.last.is_a?(Hash) ? rules.pop : {}
141
+ ns = rules.last.is_a?(Hash) ? rules.pop : document.root.namespaces
138
142
 
139
143
  rules = rules.map { |rule|
140
- CSS.xpath_for(rule, :prefix => ".//")
144
+ CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
141
145
  }.flatten.uniq + [ns, handler].compact
142
146
 
143
147
  xpath(*rules)
144
148
  end
145
149
 
146
- def at path, ns = {}
150
+ ###
151
+ # Search for the first occurrence of +path+.
152
+ # Returns nil if nothing is found, otherwise a Node.
153
+ def at path, ns = document.root.namespaces
147
154
  search(path, ns).first
148
155
  end
149
156
 
150
- def [](property)
151
- return nil unless key?(property)
152
- get(property)
153
- end
154
-
155
- def next
156
- next_sibling
157
- end
158
-
159
- def previous
160
- previous_sibling
161
- end
162
-
163
- def remove
164
- unlink
165
- end
157
+ ###
158
+ # Get the attribute value for the attribute +name+
159
+ def [](name)
160
+ return nil unless key?(name)
161
+ get(name)
162
+ end
163
+
164
+ alias :next :next_sibling
165
+ alias :previous :previous_sibling
166
+ alias :remove :unlink
167
+ alias :get_attribute :[]
168
+ alias :set_attribute :[]=
169
+ alias :text :content
170
+ alias :inner_text :content
171
+ alias :has_attribute? :key?
172
+ alias :<< :add_child
173
+ alias :name :node_name
174
+ alias :name= :node_name=
175
+ alias :type :node_type
176
+ alias :to_str :text
166
177
 
167
178
  ####
168
179
  # Returns a hash containing the node's attributes. The key is the
169
180
  # attribute name, the value is the string value of the attribute.
170
181
  def attributes
171
182
  Hash[*(attribute_nodes.map { |node|
172
- [node.name, node]
183
+ [node.node_name, node]
173
184
  }.flatten)]
174
185
  end
175
186
 
187
+ ###
188
+ # Get the attribute values for this Node.
189
+ def values
190
+ attribute_nodes.map { |node| node.value }
191
+ end
192
+
193
+ ###
194
+ # Get the attribute names for this Node.
195
+ def keys
196
+ attribute_nodes.map { |node| node.node_name }
197
+ end
198
+
199
+ ###
200
+ # Iterate over each attribute name and value pair for this Node.
201
+ def each &block
202
+ attribute_nodes.each { |node|
203
+ block.call(node.node_name, node.value)
204
+ }
205
+ end
206
+
176
207
  ###
177
208
  # Remove the attribute named +name+
178
209
  def remove_attribute name
@@ -206,20 +237,6 @@ module Nokogiri
206
237
  end
207
238
  end
208
239
 
209
- def has_attribute?(property)
210
- key? property
211
- end
212
-
213
- alias :get_attribute :[]
214
- def set_attribute(name, value)
215
- self[name] = value
216
- end
217
-
218
- def text
219
- content
220
- end
221
- alias :inner_text :text
222
-
223
240
  ####
224
241
  # Set the content to +string+.
225
242
  # If +encode+, encode any special characters first.
@@ -234,10 +251,6 @@ module Nokogiri
234
251
  parent_node
235
252
  end
236
253
 
237
- def << child
238
- add_child child
239
- end
240
-
241
254
  def comment?
242
255
  type == COMMENT_NODE
243
256
  end
@@ -323,11 +336,102 @@ Node.replace requires a Node argument, and cannot accept a Document.
323
336
  replace_with_node new_node
324
337
  end
325
338
 
339
+ ###
340
+ # Test to see if this Node is equal to +other+
326
341
  def == other
327
342
  return false unless other
328
343
  return false unless other.respond_to?(:pointer_id)
329
344
  pointer_id == other.pointer_id
330
345
  end
346
+
347
+ ###
348
+ # Serialize Node using +encoding+ and +save_options+. Save options
349
+ # can also be set using a block. See SaveOptions.
350
+ #
351
+ # These two statements are equivalent:
352
+ #
353
+ # node.serialize('UTF-8', FORMAT | AS_XML)
354
+ #
355
+ # or
356
+ #
357
+ # node.serialize('UTF-8') do |config|
358
+ # config.format.as_xml
359
+ # end
360
+ #
361
+ def serialize encoding = nil, save_options = SaveOptions::FORMAT, &block
362
+ io = StringIO.new
363
+ write_to io, encoding, save_options, &block
364
+ io.rewind
365
+ io.read
366
+ end
367
+
368
+ ###
369
+ # Serialize this Node to HTML using +encoding+
370
+ def to_html encoding = nil
371
+ # FIXME: this is a hack around broken libxml versions
372
+ return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
373
+
374
+ serialize(encoding, SaveOptions::FORMAT |
375
+ SaveOptions::NO_DECLARATION |
376
+ SaveOptions::NO_EMPTY_TAGS |
377
+ SaveOptions::AS_HTML)
378
+ end
379
+
380
+ ###
381
+ # Serialize this Node to XML using +encoding+
382
+ def to_xml encoding = nil
383
+ serialize(encoding, SaveOptions::FORMAT | SaveOptions::AS_XML)
384
+ end
385
+
386
+ ###
387
+ # Serialize this Node to XML using +encoding+
388
+ def to_xhtml encoding = nil
389
+ # FIXME: this is a hack around broken libxml versions
390
+ return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
391
+
392
+ serialize(encoding, SaveOptions::FORMAT |
393
+ SaveOptions::NO_DECLARATION |
394
+ SaveOptions::NO_EMPTY_TAGS |
395
+ SaveOptions::AS_XHTML)
396
+ end
397
+
398
+ ###
399
+ # Write Node to +io+ with +encoding+ and +save_options+
400
+ def write_to io, encoding = nil, save_options = SaveOptions::FORMAT
401
+ config = SaveOptions.new(save_options)
402
+ yield config if block_given?
403
+
404
+ native_write_to(io, encoding, config.options)
405
+ end
406
+
407
+ ###
408
+ # Write Node as HTML to +io+ with +encoding+
409
+ def write_html_to io, encoding = nil
410
+ write_to io, encoding, SaveOptions::FORMAT |
411
+ SaveOptions::NO_DECLARATION |
412
+ SaveOptions::NO_EMPTY_TAGS |
413
+ SaveOptions::AS_HTML
414
+ end
415
+
416
+ ###
417
+ # Write Node as XHTML to +io+ with +encoding+
418
+ def write_xhtml_to io, encoding = nil
419
+ write_to io, encoding, SaveOptions::FORMAT |
420
+ SaveOptions::NO_DECLARATION |
421
+ SaveOptions::NO_EMPTY_TAGS |
422
+ SaveOptions::AS_XHTML
423
+ end
424
+
425
+ ###
426
+ # Write Node as XML to +io+ with +encoding+
427
+ def write_xml_to io, encoding = nil
428
+ write_to io, encoding, SaveOptions::FORMAT | SaveOptions::AS_XML
429
+ end
430
+
431
+ def self.new_from_str string
432
+ $stderr.puts("This method is deprecated and will be removed in 1.2.0 or by March 1, 2009. Instead, use Nokogiri::HTML.fragment()")
433
+ Nokogiri::HTML.fragment(string).first
434
+ end
331
435
  end
332
436
  end
333
437
  end
@@ -0,0 +1,32 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Node
4
+ ###
5
+ # Save options for serializing nodes
6
+ class SaveOptions
7
+ FORMAT = 1 # Format serialized xml
8
+ NO_DECLARATION = 2 # Do not include delcarations
9
+ NO_EMPTY_TAGS = 4
10
+ NO_XHTML = 8
11
+ AS_XHTML = 16
12
+ AS_XML = 32
13
+ AS_HTML = 64
14
+
15
+ attr_reader :options
16
+ def initialize options = 0; @options = options; end
17
+ constants.each do |constant|
18
+ class_eval %{
19
+ def #{constant.downcase}
20
+ @options |= #{constant}
21
+ self
22
+ end
23
+
24
+ def #{constant.downcase}?
25
+ #{constant} & @options == #{constant}
26
+ end
27
+ }
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end