nokogiri 1.3.1 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (62) hide show
  1. data/CHANGELOG.ja.rdoc +25 -0
  2. data/CHANGELOG.rdoc +23 -0
  3. data/Manifest.txt +5 -0
  4. data/README.ja.rdoc +5 -5
  5. data/README.rdoc +3 -3
  6. data/Rakefile +27 -23
  7. data/ext/nokogiri/extconf.rb +54 -12
  8. data/ext/nokogiri/xml_document.c +4 -1
  9. data/ext/nokogiri/xml_document.h +2 -0
  10. data/ext/nokogiri/xml_dtd.c +29 -0
  11. data/ext/nokogiri/xml_node.c +9 -1
  12. data/ext/nokogiri/xml_node_set.c +5 -1
  13. data/ext/nokogiri/xml_relax_ng.c +50 -3
  14. data/ext/nokogiri/xml_sax_parser.c +84 -77
  15. data/ext/nokogiri/xml_schema.c +52 -3
  16. data/ext/nokogiri/xml_syntax_error.c +7 -0
  17. data/ext/nokogiri/xml_syntax_error.h +1 -0
  18. data/lib/nokogiri.rb +2 -2
  19. data/lib/nokogiri/css/parser.rb +2 -2
  20. data/lib/nokogiri/ffi/io_callbacks.rb +20 -12
  21. data/lib/nokogiri/ffi/libxml.rb +8 -0
  22. data/lib/nokogiri/ffi/xml/document.rb +1 -1
  23. data/lib/nokogiri/ffi/xml/dtd.rb +22 -6
  24. data/lib/nokogiri/ffi/xml/namespace.rb +9 -7
  25. data/lib/nokogiri/ffi/xml/node.rb +4 -0
  26. data/lib/nokogiri/ffi/xml/node_set.rb +4 -1
  27. data/lib/nokogiri/ffi/xml/relax_ng.rb +35 -3
  28. data/lib/nokogiri/ffi/xml/sax/parser.rb +20 -19
  29. data/lib/nokogiri/ffi/xml/schema.rb +41 -4
  30. data/lib/nokogiri/html.rb +2 -2
  31. data/lib/nokogiri/html/document.rb +3 -3
  32. data/lib/nokogiri/version.rb +2 -2
  33. data/lib/nokogiri/xml.rb +3 -3
  34. data/lib/nokogiri/xml/document.rb +14 -4
  35. data/lib/nokogiri/xml/fragment_handler.rb +8 -0
  36. data/lib/nokogiri/xml/node.rb +1 -104
  37. data/lib/nokogiri/xml/node_set.rb +46 -6
  38. data/lib/nokogiri/xml/parse_options.rb +7 -2
  39. data/lib/nokogiri/xml/relax_ng.rb +2 -2
  40. data/lib/nokogiri/xml/sax.rb +1 -0
  41. data/lib/nokogiri/xml/sax/document.rb +4 -4
  42. data/lib/nokogiri/xml/sax/legacy_handlers.rb +65 -0
  43. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  44. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  45. data/lib/nokogiri/xml/schema.rb +1 -5
  46. data/lib/xsd/xmlparser/nokogiri.rb +14 -7
  47. data/tasks/test.rb +1 -62
  48. data/test/files/bar/bar.xsd +4 -0
  49. data/test/files/foo/foo.xsd +4 -0
  50. data/test/files/snuggles.xml +3 -0
  51. data/test/files/valid_bar.xml +2 -0
  52. data/test/helper.rb +9 -8
  53. data/test/html/test_document_fragment.rb +14 -0
  54. data/test/test_reader.rb +10 -10
  55. data/test/xml/sax/test_parser.rb +77 -0
  56. data/test/xml/sax/test_push_parser.rb +11 -7
  57. data/test/xml/test_document.rb +25 -0
  58. data/test/xml/test_dtd.rb +6 -1
  59. data/test/xml/test_node.rb +7 -0
  60. data/test/xml/test_node_set.rb +19 -0
  61. data/test/xml/test_schema.rb +24 -0
  62. metadata +10 -5
@@ -1,12 +1,12 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.3.1'
3
+ VERSION = '1.3.2'
4
4
 
5
5
  # More complete version information about libxml
6
6
  VERSION_INFO = {}
7
7
  VERSION_INFO['warnings'] = []
8
8
  VERSION_INFO['nokogiri'] = VERSION
9
- if defined?(LIBXML_VERSION) && ! defined?(FFI)
9
+ if defined?(LIBXML_VERSION)
10
10
  VERSION_INFO['libxml'] = {}
11
11
  VERSION_INFO['libxml']['binding'] = 'extension'
12
12
  VERSION_INFO['libxml']['compiled'] = LIBXML_VERSION
@@ -24,7 +24,7 @@ module Nokogiri
24
24
  class << self
25
25
  ###
26
26
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
27
- def XML thing, url = nil, encoding = nil, options = 1, &block
27
+ def XML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block
28
28
  Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
29
29
  end
30
30
  end
@@ -34,7 +34,7 @@ module Nokogiri
34
34
  ###
35
35
  # Parse an XML document using the Nokogiri::XML::Reader API. See
36
36
  # Nokogiri::XML::Reader for mor information
37
- def Reader string_or_io, url = nil, encoding = nil, options = 0
37
+ def Reader string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT
38
38
 
39
39
  options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
40
40
  # Give the options to the user
@@ -48,7 +48,7 @@ module Nokogiri
48
48
 
49
49
  ###
50
50
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
51
- def parse thing, url = nil, encoding = nil, options = 1, &block
51
+ def parse thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
52
52
  Document.parse(thing, url, encoding, options, &block)
53
53
  end
54
54
 
@@ -31,6 +31,14 @@ module Nokogiri
31
31
  @decorators[key] ||= []
32
32
  end
33
33
 
34
+ ###
35
+ # Validate this Document against it's DTD. Returns a list of errors on
36
+ # the document or +nil+ when there is no DTD.
37
+ def validate
38
+ return nil unless internal_subset
39
+ internal_subset.validate self
40
+ end
41
+
34
42
  ###
35
43
  # Explore a document with shortcut methods.
36
44
  def slop!
@@ -54,6 +62,7 @@ module Nokogiri
54
62
 
55
63
  alias :to_xml :serialize
56
64
  alias :inner_html :serialize
65
+ alias :clone :dup
57
66
 
58
67
  # Get the hash of namespaces on the root Nokogiri::XML::Node
59
68
  def namespaces
@@ -66,7 +75,8 @@ module Nokogiri
66
75
  DocumentFragment.new(self, tags)
67
76
  end
68
77
 
69
- undef_method :swap, :parent, :namespace
78
+ undef_method :swap, :parent, :namespace, :default_namespace=
79
+ undef_method :add_namespace_definition
70
80
 
71
81
  class << self
72
82
  ###
@@ -75,9 +85,9 @@ module Nokogiri
75
85
  # +url+ is resource where this document is located. +encoding+ is the
76
86
  # encoding that should be used when processing the document. +options+
77
87
  # is a number that sets options in the parser, such as
78
- # Nokogiri::XML::PARSE_RECOVER. See the constants in
79
- # Nokogiri::XML.
80
- def parse string_or_io, url = nil, encoding = nil, options = 2145, &block
88
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
89
+ # Nokogiri::XML::ParseOptions.
90
+ def parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
81
91
 
82
92
  options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
83
93
  # Give the options to the user
@@ -46,6 +46,14 @@ module Nokogiri
46
46
  @stack.last << Nokogiri::XML::Text.new(string, @document)
47
47
  end
48
48
 
49
+ def comment string
50
+ @stack.last << Nokogiri::XML::Comment.new(@document, string)
51
+ end
52
+
53
+ def cdata_block string
54
+ @stack.last << Nokogiri::XML::CDATA.new(@document, string)
55
+ end
56
+
49
57
  def end_element name
50
58
  return unless @stack.last.name == name
51
59
  @stack.pop
@@ -462,7 +462,7 @@ module Nokogiri
462
462
 
463
463
  ####
464
464
  # replace this Node with the +new_node+ in the Document.
465
- def replace(new_node)
465
+ def replace new_node
466
466
  if new_node.is_a?(Document) || !new_node.is_a?(XML::Node)
467
467
  raise ArgumentError, <<-EOERR
468
468
  Node.replace requires a Node argument, and cannot accept a Document.
@@ -495,18 +495,6 @@ Node.replace requires a Node argument, and cannot accept a Document.
495
495
  # end
496
496
  #
497
497
  def serialize *args, &block
498
- if args.first && !args.first.is_a?(Hash)
499
- $stderr.puts(<<-eowarn)
500
- #{self.class}#serialize(encoding, save_opts) is deprecated and will be removed in
501
- Nokogiri version 1.4.0 *or* after June 1 2009.
502
- You called serialize from here:
503
-
504
- #{caller.first}
505
-
506
- Please change to #{self.class}#serialize(:encoding => enc, :save_with => opts)
507
- eowarn
508
- end
509
-
510
498
  options = args.first.is_a?(Hash) ? args.shift : {
511
499
  :encoding => args[0],
512
500
  :save_with => args[1] || SaveOptions::FORMAT
@@ -526,19 +514,6 @@ Please change to #{self.class}#serialize(:encoding => enc, :save_with => opts)
526
514
  # See Node#write_to for a list of +options+. For formatted output,
527
515
  # use Node#to_xhtml instead.
528
516
  def to_html options = {}
529
- if options.is_a?(String)
530
- $stderr.puts(<<-eowarn)
531
- Node#to_html(encoding) is deprecated and will be removed in
532
- Nokogiri version 1.4.0 *or* after June 1 2009.
533
- You called to_html from here:
534
-
535
- #{caller.first}
536
-
537
- Please change to Node#to_html(:encoding => #{options})
538
- eowarn
539
- options = { :encoding => options }
540
- end
541
-
542
517
  # FIXME: this is a hack around broken libxml versions
543
518
  return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
544
519
 
@@ -559,21 +534,6 @@ Please change to Node#to_html(:encoding => #{options})
559
534
  def to_xml options = {}
560
535
  encoding = nil
561
536
 
562
- # FIXME add a deprecation warning
563
- if options.is_a? String
564
- $stderr.puts(<<-eowarn)
565
- Node#to_xml(encoding) is deprecated and will be removed in
566
- Nokogiri version 1.4.0 *or* after June 1 2009.
567
- You called to_xml from here:
568
-
569
- #{caller.first}
570
-
571
- Please change to Node#to_xml(:encoding => #{options})
572
- eowarn
573
- options = {
574
- :encoding => options
575
- }
576
- end
577
537
  options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
578
538
 
579
539
  serialize(options)
@@ -586,19 +546,6 @@ Please change to Node#to_xml(:encoding => #{options})
586
546
  #
587
547
  # See Node#write_to for a list of +options+
588
548
  def to_xhtml options = {}
589
- if options.is_a?(String)
590
- options = { :encoding => options }
591
- $stderr.puts(<<-eowarn)
592
- Node#to_xml(encoding) is deprecated and will be removed in
593
- Nokogiri version 1.4.0 *or* after June 1 2009.
594
- You called to_xhtml from here:
595
-
596
- #{caller.first}
597
-
598
- Please change to Node#to_xhtml(:encoding => #{options})
599
- eowarn
600
- end
601
-
602
549
  # FIXME: this is a hack around broken libxml versions
603
550
  return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
604
551
 
@@ -628,18 +575,6 @@ Please change to Node#to_xhtml(:encoding => #{options})
628
575
  # node.write_to(io, :indent_text => '-', :indent => 2
629
576
  #
630
577
  def write_to io, *options
631
- if options.length > 0 && !options.first.is_a?(Hash)
632
- $stderr.puts(<<-eowarn)
633
- Node#write_to(io, encoding, save_options) is deprecated and will be removed in
634
- Nokogiri version 1.4.0 *or* after June 1 2009.
635
- You called write_to from here:
636
-
637
- #{caller.first}
638
-
639
- Please change to: Node#write_to(io, :encoding => e, :save_options => opts)
640
- eowarn
641
- end
642
-
643
578
  options = options.first.is_a?(Hash) ? options.shift : {}
644
579
  encoding = options[:encoding] || options[0]
645
580
  save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
@@ -658,19 +593,6 @@ Please change to: Node#write_to(io, :encoding => e, :save_options => opts)
658
593
  #
659
594
  # See Node#write_to for a list of +options+
660
595
  def write_html_to io, options = {}
661
- if options.is_a?(String)
662
- $stderr.puts(<<-eowarn)
663
- Node#write_html_to(io, encoding) is deprecated and will be removed in
664
- Nokogiri version 1.4.0 *or* after June 1 2009.
665
- You called write_html_to from here:
666
-
667
- #{caller.first}
668
-
669
- Please change to Node#write_html_to(io, :encoding => #{options})
670
- eowarn
671
- options = { :encoding => options }
672
- end
673
-
674
596
  # FIXME: this is a hack around broken libxml versions
675
597
  return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
676
598
 
@@ -686,19 +608,6 @@ Please change to Node#write_html_to(io, :encoding => #{options})
686
608
  #
687
609
  # See Node#write_to for a list of +options+
688
610
  def write_xhtml_to io, options = {}
689
- if options.is_a?(String)
690
- $stderr.puts(<<-eowarn)
691
- Node#write_xhtml_to(io, encoding) is deprecated and will be removed in
692
- Nokogiri version 1.4.0 *or* after June 1 2009.
693
- You called write_xhtml_to from here:
694
-
695
- #{caller.first}
696
-
697
- Please change to Node#write_xhtml_to(io, :encoding => #{options})
698
- eowarn
699
- options = { :encoding => options }
700
- end
701
-
702
611
  # FIXME: this is a hack around broken libxml versions
703
612
  return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
704
613
 
@@ -716,18 +625,6 @@ Please change to Node#write_xhtml_to(io, :encoding => #{options})
716
625
  #
717
626
  # See Node#write_to for a list of options
718
627
  def write_xml_to io, options = {}
719
- if options.is_a?(String)
720
- $stderr.puts(<<-eowarn)
721
- Node#write_xml_to(io, encoding) is deprecated and will be removed in
722
- Nokogiri version 1.4.0 *or* after June 1 2009.
723
- You called write_xml_to from here:
724
-
725
- #{caller.first}
726
-
727
- Please change to Node#write_xml_to(io, :encoding => #{options})
728
- eowarn
729
- options = { :encoding => options }
730
- end
731
628
  options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
732
629
  write_to io, options
733
630
  end
@@ -68,19 +68,59 @@ module Nokogiri
68
68
  # For more information see Nokogiri::XML::Node#css and
69
69
  # Nokogiri::XML::Node#xpath
70
70
  def search *paths
71
- ns = paths.last.is_a?(Hash) ? paths.pop : document.root.namespaces
71
+ ns = paths.last.is_a?(Hash) ? paths.pop :
72
+ (document.root ? document.root.namespaces : {})
73
+
72
74
  sub_set = NodeSet.new(document)
73
- document.decorate(sub_set)
74
75
  each do |node|
75
- node.search(*(paths + [ns])).each do |sub_node|
76
- sub_set << sub_node
76
+ paths.each do |path|
77
+ sub_set +=
78
+ send(path =~ /^(\.\/|\/)/ ? :xpath : :css, *(paths + [ns]))
77
79
  end
78
80
  end
81
+ document.decorate(sub_set)
79
82
  sub_set
80
83
  end
81
84
  alias :/ :search
82
- alias :xpath :search
83
- alias :css :search
85
+
86
+ ###
87
+ # Search this NodeSet for css +paths+
88
+ #
89
+ # For more information see Nokogiri::XML::Node#css
90
+ def css *paths
91
+ ns = paths.last.is_a?(Hash) ? paths.pop :
92
+ (document.root ? document.root.namespaces : {})
93
+
94
+ sub_set = NodeSet.new(document)
95
+
96
+ xpaths = paths.map { |rule|
97
+ [
98
+ CSS.xpath_for(rule.to_s, :prefix => ".//", :ns => ns),
99
+ CSS.xpath_for(rule.to_s, :prefix => "self::", :ns => ns)
100
+ ].join(' | ')
101
+ }
102
+ each do |node|
103
+ sub_set += node.xpath(*(xpaths + [ns]))
104
+ end
105
+ document.decorate(sub_set)
106
+ sub_set
107
+ end
108
+
109
+ ###
110
+ # Search this NodeSet for XPath +paths+
111
+ #
112
+ # For more information see Nokogiri::XML::Node#xpath
113
+ def xpath *paths
114
+ ns = paths.last.is_a?(Hash) ? paths.pop :
115
+ (document.root ? document.root.namespaces : {})
116
+
117
+ sub_set = NodeSet.new(document)
118
+ each do |node|
119
+ sub_set += node.xpath(*(paths + [ns]))
120
+ end
121
+ document.decorate(sub_set)
122
+ sub_set
123
+ end
84
124
 
85
125
  ###
86
126
  # If path is a string, search this document for +path+ returning the
@@ -38,13 +38,18 @@ module Nokogiri
38
38
  # do not generate XINCLUDE START/END nodes
39
39
  NOXINCNODE = 1 << 15
40
40
 
41
+ # the default options used for parsing XML documents
42
+ DEFAULT_XML = RECOVER
43
+ # the default options used for parsing HTML documents
44
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
45
+
41
46
  attr_accessor :options
42
- def initialize options = 0
47
+ def initialize options = STRICT
43
48
  @options = options
44
49
  end
45
50
 
46
51
  constants.each do |constant|
47
- next if constant == 'STRICT'
52
+ next if constant.to_sym == :STRICT
48
53
  class_eval %{
49
54
  def #{constant.downcase}
50
55
  @options |= #{constant}
@@ -18,8 +18,8 @@ module Nokogiri
18
18
  # Validate an XML document against a RelaxNG schema. Loop over the errors
19
19
  # that are returned and print them out:
20
20
  #
21
- # schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
22
- # doc = Nokogiri::XML(File.read(ADDRESS_XML_FILE))
21
+ # schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
22
+ # doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
23
23
  #
24
24
  # schema.validate(doc).each do |error|
25
25
  # puts error.message
@@ -1,3 +1,4 @@
1
1
  require 'nokogiri/xml/sax/document'
2
+ require 'nokogiri/xml/sax/legacy_handlers'
2
3
  require 'nokogiri/xml/sax/parser'
3
4
  require 'nokogiri/xml/sax/push_parser'
@@ -93,11 +93,11 @@ module Nokogiri
93
93
  ###
94
94
  # Called at the beginning of an element
95
95
  # +name+ is the element name
96
- # +attrs+ is a hash of attributes
96
+ # +attrs+ is a list of attributes
97
97
  # +prefix+ is the namespace prefix for the element
98
98
  # +uri+ is the associated namespace URI
99
- # +namespaces+ is a hash of namespace prefix:urls associated with the element
100
- def start_element_ns(name, attrs = {}, prefix = nil, uri = nil, namespaces = {})
99
+ # +ns+ is a hash of namespace prefix:urls associated with the element
100
+ def start_element_namespace name, attrs = {}, prefix = nil, uri = nil, ns = {}
101
101
  end
102
102
 
103
103
  ###
@@ -105,7 +105,7 @@ module Nokogiri
105
105
  # +name+ is the element's name
106
106
  # +prefix+ is the namespace prefix associated with the element
107
107
  # +uri+ is the associated namespace URI
108
- def end_element_ns(name, prefix = nil, uri = nil)
108
+ def end_element_namespace name, prefix = nil, uri = nil
109
109
  end
110
110
 
111
111
  ###
@@ -0,0 +1,65 @@
1
+ module Nokogiri
2
+ module XML
3
+ module SAX
4
+ # :stopdoc:
5
+ module LegacyHandlers
6
+ def start_element_namespace name,
7
+ attrs = [],
8
+ prefix = nil,
9
+ uri = nil,
10
+ ns = []
11
+
12
+ ##
13
+ # Deal with legacy interface
14
+ if @document.respond_to? :start_element_ns
15
+ unless @warned
16
+ warn <<-eowarn
17
+ Nokogiri::XML::SAX::Document#start_element_ns and end_element_ns are deprecated,
18
+ please change to start_element_namespace. start_element_ns will be removed by
19
+ version 1.4.0 or by August 1st, whichever comes first.
20
+ eowarn
21
+ @warned = true
22
+ end
23
+ attr_hash = {}
24
+ attrs.each do |attr|
25
+ attr_hash[attr.localname] = attr.value
26
+ end
27
+ ns_hash = Hash[*ns.flatten]
28
+ @document.start_element_ns name, attr_hash, prefix, uri, ns_hash
29
+ end
30
+
31
+ ###
32
+ # Deal with SAX v1 interface
33
+ name = [prefix, name].compact.join(':')
34
+ attributes = ns.map { |ns_prefix,ns_uri|
35
+ [['xmlns', ns_prefix].compact.join(':'), ns_uri]
36
+ } + attrs.map { |attr|
37
+ [[attr.prefix, attr.localname].compact.join(':'), attr.value]
38
+ }.flatten
39
+ @document.start_element name, attributes
40
+ end
41
+
42
+ def end_element_namespace name, prefix = nil, uri = nil
43
+ ##
44
+ # Deal with legacy interface
45
+ if @document.respond_to? :end_element_ns
46
+ unless @warned
47
+ warn <<-eowarn
48
+ Nokogiri::XML::SAX::Document#start_element_ns and end_element_ns are deprecated,
49
+ please change to start_element_namespace. start_element_ns will be removed by
50
+ version 1.4.0 or by August 1st, whichever comes first.
51
+ eowarn
52
+ @warned = true
53
+ end
54
+ @document.end_element_ns name, prefix, uri
55
+ end
56
+
57
+ ###
58
+ # Deal with SAX v1 interface
59
+ @document.end_element [prefix, name].compact.join(':')
60
+ end
61
+ end
62
+ # :startdoc:
63
+ end
64
+ end
65
+ end