nokogiri 1.0.5-x86-mswin32-60 → 1.0.6-x86-mswin32-60

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

data/History.ja.txt ADDED
@@ -0,0 +1,56 @@
1
+ === 1.0.6
2
+
3
+ * 5つの修正
4
+
5
+ * XPath のパーサーが SyntaxError を生じさせ解析停止させる
6
+ * CSS のパーサーが SyntaxError を生じさせ解析停止させる
7
+ * filter() と not() hpricot の互換性を追加
8
+ * CSS が Node#search 経由で検索し、常時対応する事が出来るようになった
9
+ * CSS より XPath 変換がキャッシュに入れられるようになった
10
+
11
+ === 1.0.5
12
+
13
+ * バグフィックス
14
+
15
+ * メーリンクリストを作成
16
+ * バグファイルを作成
17
+ * Windows 内で ENV['PATH'] が存在しない場合でも、存在出来るように設定完了
18
+ * Document 内の NodeSet#[] の結果をキャッシュする
19
+
20
+ === 1.0.4
21
+
22
+ * バグフィックス
23
+
24
+ * 弱参照からドキュメント参照へのメモリー管理の変換
25
+ * メモリリークに接続
26
+ * ビルダーブロックが取り囲んでいるコンテキストから
27
+ メソッドの呼び出しをする事が出来る
28
+
29
+ === 1.0.3
30
+
31
+ * 5つのバグ修正
32
+
33
+ * NodeSet が to_ary へ実行
34
+ * XML::Document#parent を除去
35
+ * GCバグ修正済み (Mike は最高!)
36
+ * 1.8.5互換性の為の RARRAY_LEN 除去
37
+ * inner_html 修正済み (Yahuda に感謝)
38
+
39
+ === 1.0.2
40
+
41
+ * 1つのバグ修正
42
+
43
+ * extconf.rb は frex や racc を調べないはず
44
+
45
+ === 1.0.1
46
+
47
+ * 1つのバグ修正
48
+
49
+ * extconf.rb が libdir や prefix を検索しない事を確認済み
50
+ それによって、ports libxml/ruby が正しくリンクする (lucsky に感謝!)
51
+
52
+ === 1.0.0 / 2008-07-13
53
+
54
+ * 1つの偉大な増進
55
+
56
+ * ご誕生である
data/History.txt CHANGED
@@ -1,3 +1,13 @@
1
+ === 1.0.6
2
+
3
+ * 5 Bugfixes
4
+
5
+ * XPath Parser raises a SyntaxError on parse failure
6
+ * CSS Parser raises a SyntaxError on parse failure
7
+ * filter() and not() hpricot compatibility added
8
+ * CSS searches via Node#search are now always relative
9
+ * CSS to XPath conversion is now cached
10
+
1
11
  === 1.0.5
2
12
 
3
13
  * Bugfixes
data/Manifest.txt CHANGED
@@ -1,3 +1,4 @@
1
+ History.ja.txt
1
2
  History.txt
2
3
  Manifest.txt
3
4
  README.ja.txt
@@ -16,6 +17,8 @@ ext/nokogiri/xml_document.c
16
17
  ext/nokogiri/xml_document.h
17
18
  ext/nokogiri/xml_dtd.c
18
19
  ext/nokogiri/xml_dtd.h
20
+ ext/nokogiri/xml_io.c
21
+ ext/nokogiri/xml_io.h
19
22
  ext/nokogiri/xml_node.c
20
23
  ext/nokogiri/xml_node.h
21
24
  ext/nokogiri/xml_node_set.c
@@ -41,6 +44,7 @@ lib/nokogiri/css/generated_tokenizer.rb
41
44
  lib/nokogiri/css/node.rb
42
45
  lib/nokogiri/css/parser.rb
43
46
  lib/nokogiri/css/parser.y
47
+ lib/nokogiri/css/syntax_error.rb
44
48
  lib/nokogiri/css/tokenizer.rb
45
49
  lib/nokogiri/css/tokenizer.rex
46
50
  lib/nokogiri/css/xpath_visitor.rb
@@ -74,6 +78,7 @@ lib/nokogiri/xml/sax/parser.rb
74
78
  lib/nokogiri/xml/syntax_error.rb
75
79
  lib/nokogiri/xml/text.rb
76
80
  lib/nokogiri/xml/xpath.rb
81
+ lib/nokogiri/xml/xpath/syntax_error.rb
77
82
  lib/nokogiri/xml/xpath_context.rb
78
83
  lib/nokogiri/xslt.rb
79
84
  lib/nokogiri/xslt/stylesheet.rb
@@ -107,6 +112,7 @@ test/html/sax/test_parser.rb
107
112
  test/html/test_builder.rb
108
113
  test/html/test_document.rb
109
114
  test/test_convert_xpath.rb
115
+ test/test_css_cache.rb
110
116
  test/test_gc.rb
111
117
  test/test_nokogiri.rb
112
118
  test/test_reader.rb
data/README.ja.txt CHANGED
@@ -18,13 +18,23 @@ Nokogiri はHTMLやXMLやSAXやXSLTやReaderのパーサーです。
18
18
 
19
19
  検索出来たり、正確にCSS3とXPathをサポート出来たりする。
20
20
 
21
- これはスピッドテストです:
21
+ これはスピードテストです:
22
22
 
23
23
  * http://gist.github.com/22176
24
24
 
25
25
  NokogiriはHpricotの代わりに使用出来る。
26
26
  その互換性は簡単に正しいCSSとXPathを使用する事が出来る。
27
27
 
28
+ == SUPPORT:
29
+
30
+ ノコギリのメーリングリストは:
31
+
32
+ * http://rubyforge.org/mailman/listinfo/nokogiri-talk
33
+
34
+ バグファイルは:
35
+
36
+ * http://nokogiri.lighthouseapp.com/projects/19607-nokogiri/overview
37
+
28
38
  == SYNOPSIS:
29
39
 
30
40
  require 'nokogiri'
@@ -55,6 +65,7 @@ NokogiriはHpricotの代わりに使用出来る。
55
65
 
56
66
  * ruby 1.8 or 1.9
57
67
  * libxml
68
+ * libxslt
58
69
 
59
70
  == INSTALL:
60
71
 
data/README.txt CHANGED
@@ -67,6 +67,7 @@ The bug tracker is available here:
67
67
 
68
68
  * ruby 1.8 or 1.9
69
69
  * libxml
70
+ * libxslt
70
71
 
71
72
  == INSTALL:
72
73
 
data/Rakefile CHANGED
@@ -289,10 +289,9 @@ end
289
289
 
290
290
  # Evil evil hack. Do not run tests when gem installs
291
291
  if ENV['RUBYARCHDIR']
292
- class << Rake::Task[:default]
293
- attr_writer :prerequisites
294
- end
295
- Rake::Task[:default].prerequisites = [:build]
292
+ prereqs = Rake::Task[:default].prerequisites
293
+ prereqs.clear
294
+ prereqs << :build
296
295
  end
297
296
 
298
297
  # vim: syntax=Ruby
@@ -11,6 +11,7 @@
11
11
  #include <libxml/HTMLparser.h>
12
12
  #include <libxml/HTMLtree.h>
13
13
 
14
+ #include <xml_io.h>
14
15
  #include <xml_document.h>
15
16
  #include <html_document.h>
16
17
  #include <xml_node.h>
Binary file
@@ -62,6 +62,47 @@ static VALUE root(VALUE self)
62
62
  return Nokogiri_wrap_xml_node(root) ;
63
63
  }
64
64
 
65
+ /*
66
+ * call-seq:
67
+ * read_io(io, url, encoding, options)
68
+ *
69
+ * Create a new document from an IO object
70
+ */
71
+ static VALUE read_io( VALUE klass,
72
+ VALUE io,
73
+ VALUE url,
74
+ VALUE encoding,
75
+ VALUE options )
76
+ {
77
+ const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
78
+ const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
79
+
80
+ xmlInitParser();
81
+
82
+ xmlDocPtr doc = xmlReadIO(
83
+ (xmlInputReadCallback)io_read_callback,
84
+ (xmlInputCloseCallback)io_close_callback,
85
+ (void *)io,
86
+ c_url,
87
+ c_enc,
88
+ NUM2INT(options)
89
+ );
90
+
91
+ if(doc == NULL) {
92
+ xmlFreeDoc(doc);
93
+ rb_raise(rb_eRuntimeError, "Couldn't create a document");
94
+ return Qnil;
95
+ }
96
+
97
+ return Nokogiri_wrap_xml_document(klass, doc);
98
+ }
99
+
100
+ /*
101
+ * call-seq:
102
+ * read_memory(string, url, encoding, options)
103
+ *
104
+ * Create a new document from a String
105
+ */
65
106
  static VALUE read_memory( VALUE klass,
66
107
  VALUE string,
67
108
  VALUE url,
@@ -85,6 +126,12 @@ static VALUE read_memory( VALUE klass,
85
126
  return Nokogiri_wrap_xml_document(klass, doc);
86
127
  }
87
128
 
129
+ /*
130
+ * call-seq:
131
+ * new
132
+ *
133
+ * Create a new document
134
+ */
88
135
  static VALUE new(int argc, VALUE *argv, VALUE klass)
89
136
  {
90
137
  VALUE version;
@@ -97,7 +144,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
97
144
 
98
145
  /*
99
146
  * call-seq:
100
- * substitute_entities_set bool)
147
+ * substitute_entities=(boolean)
101
148
  *
102
149
  * Set the global XML default for substitute entities.
103
150
  */
@@ -109,7 +156,7 @@ static VALUE substitute_entities_set(VALUE klass, VALUE value)
109
156
 
110
157
  /*
111
158
  * call-seq:
112
- * substitute_entities_set bool)
159
+ * load_external_subsets=(boolean)
113
160
  *
114
161
  * Set the global XML default for load external subsets.
115
162
  */
@@ -122,9 +169,19 @@ static VALUE load_external_subsets_set(VALUE klass, VALUE value)
122
169
  VALUE cNokogiriXmlDocument ;
123
170
  void init_xml_document()
124
171
  {
125
- VALUE klass = cNokogiriXmlDocument = rb_const_get(mNokogiriXml, rb_intern("Document"));
172
+ VALUE nokogiri = rb_define_module("Nokogiri");
173
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
174
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
175
+
176
+ /*
177
+ * Nokogiri::XML::Document wraps an xml document.
178
+ */
179
+ VALUE klass = rb_define_class_under(xml, "Document", node);
180
+
181
+ cNokogiriXmlDocument = klass;
126
182
 
127
183
  rb_define_singleton_method(klass, "read_memory", read_memory, 4);
184
+ rb_define_singleton_method(klass, "read_io", read_io, 4);
128
185
  rb_define_singleton_method(klass, "new", new, -1);
129
186
  rb_define_singleton_method(klass, "substitute_entities=", substitute_entities_set, 1);
130
187
  rb_define_singleton_method(klass, "load_external_subsets=", load_external_subsets_set, 1);
@@ -108,6 +108,10 @@ void init_xml_dtd()
108
108
  {
109
109
  VALUE nokogiri = rb_define_module("Nokogiri");
110
110
  VALUE xml = rb_define_module_under(nokogiri, "XML");
111
+
112
+ /*
113
+ * Nokogiri::XML::DTD wraps DTD nodes in an XML document
114
+ */
111
115
  VALUE klass = rb_define_class_under(xml, "DTD", cNokogiriXmlNode);
112
116
 
113
117
  rb_define_method(klass, "notations", notations, 0);
@@ -0,0 +1,17 @@
1
+ #include <xml_io.h>
2
+
3
+ int io_read_callback(void * ctx, char * buffer, int len) {
4
+ VALUE io = (VALUE)ctx;
5
+ VALUE string = rb_funcall(io, rb_intern("read"), 1, INT2NUM(len));
6
+
7
+ if(Qnil == string) return 0;
8
+ VALUE length = rb_funcall(string, rb_intern("length"), 0);
9
+
10
+ memcpy(buffer, StringValuePtr(string), (unsigned int)NUM2INT(length));
11
+
12
+ return NUM2INT(length);
13
+ }
14
+
15
+ int io_close_callback(void * ctx) {
16
+ return 0;
17
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_IO
2
+ #define NOKOGIRI_XML_IO
3
+
4
+ #include <native.h>
5
+
6
+ int io_read_callback(void * ctx, char * buffer, int len);
7
+ int io_close_callback(void * ctx);
8
+
9
+ #endif
@@ -514,7 +514,17 @@ static VALUE new_from_str(VALUE klass, VALUE xml)
514
514
 
515
515
  VALUE Nokogiri_wrap_xml_node(xmlNodePtr node)
516
516
  {
517
- VALUE rb_node = Qnil;
517
+ assert(node);
518
+ assert(node->doc);
519
+ assert(node->doc->_private);
520
+
521
+ VALUE index = INT2NUM((int)node);
522
+ VALUE document = (VALUE)node->doc->_private;
523
+
524
+ VALUE node_cache = rb_funcall(document, rb_intern("node_cache"), 0);
525
+ VALUE rb_node = rb_hash_aref(node_cache, index);
526
+
527
+ if(rb_node != Qnil) return rb_node;
518
528
 
519
529
  switch(node->type)
520
530
  {
@@ -544,11 +554,8 @@ VALUE Nokogiri_wrap_xml_node(xmlNodePtr node)
544
554
  rb_node = Data_Wrap_Struct(cNokogiriXmlNode, 0, 0, node) ;
545
555
  }
546
556
 
547
- assert(node);
548
- assert(node->doc);
549
- assert(node->doc->_private);
550
-
551
- rb_iv_set(rb_node, "@document",(VALUE)node->doc->_private);
557
+ rb_hash_aset(node_cache, index, rb_node);
558
+ rb_iv_set(rb_node, "@document", document);
552
559
  rb_funcall(rb_node, rb_intern("decorate!"), 0);
553
560
  return rb_node ;
554
561
  }
@@ -52,22 +52,7 @@ static VALUE index_at(VALUE self, VALUE number)
52
52
  if(i < 0)
53
53
  i = i + node_set->nodeNr;
54
54
 
55
- VALUE document = rb_funcall(self, rb_intern("document"), 0);
56
- if(Qnil == document)
57
- rb_raise(rb_eRuntimeError, "You forgot to set a document.");
58
-
59
- VALUE index = INT2NUM((int)node_set->nodeTab[i]);
60
-
61
- VALUE node_cache = rb_funcall(document, rb_intern("node_cache"), 0);
62
-
63
- VALUE node = rb_hash_aref(node_cache, index);
64
-
65
- if(Qnil == node) {
66
- node = Nokogiri_wrap_xml_node(node_set->nodeTab[i]);
67
- rb_hash_aset(node_cache, index, node);
68
- }
69
-
70
- return node;
55
+ return Nokogiri_wrap_xml_node(node_set->nodeTab[i]);
71
56
  }
72
57
 
73
58
  static void deallocate(xmlNodeSetPtr node_set)
@@ -39,7 +39,9 @@ static VALUE evaluate(VALUE self, VALUE search_path)
39
39
  xmlChar* query = (xmlChar *)StringValuePtr(search_path);
40
40
  xmlXPathObjectPtr xpath = xmlXPathEvalExpression(query, ctx);
41
41
  if(xpath == NULL) {
42
- rb_raise(rb_eRuntimeError, "Couldn't evaluate expression '%s'", query);
42
+ VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
43
+ VALUE error = rb_const_get(xpath, rb_intern("SyntaxError"));
44
+ rb_raise(error, "Couldn't evaluate expression '%s'", query);
43
45
  }
44
46
 
45
47
  VALUE xpath_object = Nokogiri_wrap_xml_xpath(xpath);
data/lib/nokogiri/css.rb CHANGED
@@ -1,6 +1,20 @@
1
1
  require 'nokogiri/css/node'
2
2
  require 'nokogiri/css/xpath_visitor'
3
- require 'nokogiri/css/generated_tokenizer'
4
3
  require 'nokogiri/css/generated_parser'
4
+ require 'nokogiri/css/generated_tokenizer'
5
5
  require 'nokogiri/css/tokenizer'
6
6
  require 'nokogiri/css/parser'
7
+ require 'nokogiri/css/syntax_error'
8
+
9
+ module Nokogiri
10
+ module CSS
11
+ class << self
12
+ def parse string
13
+ Parser.new.parse string
14
+ end
15
+ def xpath_for string, options={}
16
+ Parser.new.xpath_for string, options
17
+ end
18
+ end
19
+ end
20
+ end
@@ -6,7 +6,7 @@
6
6
 
7
7
  module Nokogiri
8
8
  module CSS
9
- class GeneratedTokenizer
9
+ class GeneratedTokenizer < GeneratedParser
10
10
  require 'strscan'
11
11
 
12
12
  class ScanError < StandardError ; end
@@ -11,9 +11,11 @@ module Nokogiri
11
11
  visitor.send(:"visit_#{type.to_s.downcase}", self)
12
12
  end
13
13
 
14
- def to_xpath prefix = '//', preprocess = true
15
- self.preprocess! if preprocess
16
- prefix + XPathVisitor.new.accept(self)
14
+ def to_xpath prefix = nil, visitor = nil
15
+ prefix ||= '//'
16
+ visitor ||= XPathVisitor.new
17
+ self.preprocess!
18
+ prefix + visitor.accept(self)
17
19
  end
18
20
 
19
21
  def preprocess!
@@ -1,23 +1,48 @@
1
1
  module Nokogiri
2
2
  module CSS
3
- class Parser < GeneratedParser
3
+ class Parser < GeneratedTokenizer
4
4
  class << self
5
5
  def parse string
6
6
  new.parse(string)
7
7
  end
8
- end
8
+ def xpath_for string, options={}
9
+ new.xpath_for(string, options)
10
+ end
9
11
 
10
- def initialize
11
- @tokenizer = Tokenizer.new
12
+ def set_cache setting
13
+ @cache_on = setting ? true : false
14
+ end
15
+ def cache_on?
16
+ @cache ||= {}
17
+ instance_variable_defined?('@cache_on') ? @cache_on : true
18
+ end
19
+ def check_cache string
20
+ return unless cache_on?
21
+ @cache[string]
22
+ end
23
+ def add_cache string, value
24
+ return value unless cache_on?
25
+ @cache[string] = value
26
+ end
27
+ def clear_cache
28
+ @cache = {}
29
+ end
12
30
  end
31
+ alias :parse :scan_str
32
+
33
+ def xpath_for string, options={}
34
+ v = self.class.check_cache(string)
35
+ return v unless v.nil?
13
36
 
14
- def parse string
15
- @tokenizer.scan string
16
- do_parse
37
+ prefix = options[:prefix] || nil
38
+ visitor = options[:visitor] || nil
39
+ args = [prefix, visitor]
40
+ self.class.add_cache(string, parse(string).map {|ast| ast.to_xpath(prefix, visitor)})
17
41
  end
18
42
 
19
- def next_token
20
- @tokenizer.next_token
43
+ def on_error error_token_id, error_value, value_stack
44
+ after = value_stack.compact.last
45
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
21
46
  end
22
47
  end
23
48
  end
@@ -0,0 +1,6 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class SyntaxError < ::SyntaxError
4
+ end
5
+ end
6
+ end
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  module CSS
3
- class GeneratedTokenizer
3
+ class GeneratedTokenizer < GeneratedParser
4
4
 
5
5
  macro
6
6
  nl \n|\r\n|\r|\f
@@ -8,8 +8,7 @@ module Nokogiri
8
8
  convert_to_xpath(path)
9
9
  }.flatten.uniq
10
10
 
11
- namespaces = document.xml? ? document.namespaces.merge(ns) : ns
12
- super(*converted + [namespaces])
11
+ super(*converted + [ns])
13
12
  end
14
13
  def /(path); search(path) end
15
14
 
@@ -32,16 +31,15 @@ module Nokogiri
32
31
  rule = rule.to_s
33
32
  case rule
34
33
  when %r{^//}
35
- [".#{rule}"]
34
+ [".#{Hpricot::XPathVisitor.xpath_namespace_helper(rule)}"]
36
35
  when %r{^/}
37
- [rule]
36
+ [Hpricot::XPathVisitor.xpath_namespace_helper(rule)]
38
37
  when %r{^.//}
39
- [rule]
38
+ [Hpricot::XPathVisitor.xpath_namespace_helper(rule)]
40
39
  else
41
- ctx = CSS::Parser.parse(rule)
42
40
  visitor = CSS::XPathVisitor.new
43
41
  visitor.extend(Hpricot::XPathVisitor)
44
- ctx.map { |ast| './/' + visitor.accept(ast.preprocess!) }
42
+ CSS.xpath_for(rule, :prefix => ".//", :visitor => visitor)
45
43
  end
46
44
  end
47
45
 
@@ -2,12 +2,52 @@ module Nokogiri
2
2
  module Decorators
3
3
  module Hpricot
4
4
  module NodeSet
5
- def filter rule
5
+
6
+ # Select nodes matching the supplied rule.
7
+ # Note that positional rules (like <tt>:nth()</tt>) aren't currently supported.
8
+ #
9
+ # example:
10
+ # node_set.filter('.ohmy') # selects nodes from the set with class "ohmy"
11
+ # node_set.filter('a#link2') # selects nodes from the set with child node <a id='link2'>
12
+ # node_set.filter('a[@id="link2"]') # selects nodes from the set with child node <a id='link2'>
13
+ def filter(rule)
14
+ filter_transformer( lambda {|j| j}, rule ) # identity transformer
15
+ end
16
+
17
+ # The complement to filter, select nodes <em>not</em> matching the supplied rule.
18
+ # Note that positional rules (like <tt>:nth()</tt>) aren't currently supported.
19
+ #
20
+ # See filter for examples.
21
+ #
22
+ # Also note that you can pass a XML::Node object instead of a
23
+ # rule to remove that object from the node set (if it is
24
+ # present):
25
+ # node_set.not(node_to_exclude) # selects all nodes EXCEPT node_to_exclude
26
+ #
27
+ def not(rule)
28
+ filter_transformer( lambda {|j| !j}, rule ) # negation transformer
29
+ end
30
+
31
+ private
32
+ def filter_transformer(transformer, rule)
33
+ sub_set = XML::NodeSet.new(document)
34
+ document.decorate(sub_set)
35
+
36
+ if rule.is_a?(XML::Node)
37
+ each { |node| sub_set << node if transformer.call(node == rule) }
38
+ return sub_set
39
+ end
40
+
6
41
  ctx = CSS::Parser.parse(rule.to_s)
7
42
  visitor = CSS::XPathVisitor.new
8
43
  visitor.extend(Hpricot::XPathVisitor)
9
- search('.//self::' + visitor.accept(ctx.first))
10
- end
44
+ each do |node|
45
+ if transformer.call(node.at(".//self::" + visitor.accept(ctx.first)))
46
+ sub_set << node
47
+ end
48
+ end
49
+ sub_set
50
+ end
11
51
  end
12
52
  end
13
53
  end
@@ -11,6 +11,17 @@ module Nokogiri
11
11
  end
12
12
  super(node).gsub(/child::text\(\)/, 'normalize-space(child::text())')
13
13
  end
14
+
15
+ # take a path like '//t:sam' and convert to xpath "*[name()='t:sam']"
16
+ def self.xpath_namespace_helper rule
17
+ rule.split(/\//).collect do |tag|
18
+ if match = tag.match(/^(\w+:\w+)(.*)/)
19
+ "*[name()='#{match[1]}']#{match[2]}"
20
+ else
21
+ tag
22
+ end
23
+ end.join("/")
24
+ end
14
25
  end
15
26
  end
16
27
  end
@@ -16,6 +16,11 @@ module Nokogiri
16
16
  add_decorators(doc)
17
17
  end
18
18
 
19
+ def HTML(string)
20
+ doc = Nokogiri::HTML.parse(string)
21
+ add_decorators(doc)
22
+ end
23
+
19
24
  def make string
20
25
  doc = XML::Document.new
21
26
  ns = XML::NodeSet.new(doc)
@@ -40,7 +45,7 @@ module Nokogiri
40
45
  builder = Nokogiri::HTML::Builder.new(&block)
41
46
  Nokogiri::Hpricot.add_decorators(builder.doc)
42
47
  else
43
- doc = Nokogiri::HTML.parse(*args)
48
+ doc = Nokogiri.parse(*args)
44
49
  Nokogiri::Hpricot.add_decorators(doc)
45
50
  end
46
51
  end
@@ -1,3 +1,3 @@
1
1
  module Nokogiri
2
- VERSION = '1.0.5'
2
+ VERSION = '1.0.6'
3
3
  end
data/lib/nokogiri/xml.rb CHANGED
@@ -46,7 +46,7 @@ module Nokogiri
46
46
  def parse string_or_io, url = nil, encoding = nil, options = 2159
47
47
  if string_or_io.respond_to?(:read)
48
48
  url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
49
- string_or_io = string_or_io.read
49
+ return Document.read_io(string_or_io, url, encoding, options)
50
50
  end
51
51
 
52
52
  # read_memory pukes on empty docs
@@ -45,9 +45,7 @@ module Nokogiri
45
45
  def search *paths
46
46
  ns = paths.last.is_a?(Hash) ? paths.pop : {}
47
47
  xpath(*(paths.map { |path|
48
- path =~ /^(\.\/|\/)/ ? path : CSS::Parser.parse(path).map { |ast|
49
- ast.to_xpath
50
- }
48
+ path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(path, :prefix => ".//")
51
49
  }.flatten.uniq) + [ns])
52
50
  end
53
51
  alias :/ :search
@@ -78,9 +76,7 @@ module Nokogiri
78
76
  end
79
77
 
80
78
  def css *rules
81
- xpath(*(rules.map { |rule|
82
- CSS::Parser.parse(rule).map { |ast| "." + ast.to_xpath }
83
- }.flatten.uniq))
79
+ xpath(*(rules.map { |rule| CSS.xpath_for(rule, :prefix => ".//") }.flatten.uniq))
84
80
  end
85
81
 
86
82
  def at path, ns = {}
@@ -192,11 +188,6 @@ module Nokogiri
192
188
  children.each{|j| j.traverse(&block) }
193
189
  block.call(self)
194
190
  end
195
-
196
- def == other
197
- pointer_id == other.pointer_id
198
- end
199
- alias :eql? :==
200
191
  end
201
192
  end
202
193
  end
@@ -1,3 +1,5 @@
1
+ require 'nokogiri/xml/xpath/syntax_error'
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class XPath
@@ -0,0 +1,8 @@
1
+ module Nokogiri
2
+ module XML
3
+ class XPath
4
+ class SyntaxError < ::SyntaxError
5
+ end
6
+ end
7
+ end
8
+ end
@@ -7,6 +7,10 @@ module Nokogiri
7
7
  @parser = Nokogiri::CSS::Parser.new
8
8
  end
9
9
 
10
+ def test_syntax_error_raised
11
+ assert_raises(CSS::SyntaxError) { @parser.parse("a[x=]") }
12
+ end
13
+
10
14
  def test_find_by_type
11
15
  ast = @parser.parse("a:nth-child(2)").first
12
16
  matches = ast.find_by_type(
@@ -69,24 +69,23 @@ class TestParser < Nokogiri::TestCase
69
69
  assert_equal 'link1', (doc/:p/:a).first['id']
70
70
  assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
71
71
 
72
- ### Modified: We're not supporting the filter() function
73
- #assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
74
- #assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
75
- #assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
76
- #assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
77
- #assert_equal 4, (doc/'p').filter('*').length
78
- #assert_equal 4, (doc/'p').filter('* *').length
79
- #eles = (doc/'p').filter('.ohmy')
80
- #assert_equal 1, eles.length
81
- #assert_equal 'ohmy', eles.first.get_attribute('class')
72
+ assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
73
+ assert_equal((doc/'p')[2], (doc/'p').filter('[text()="The third paragraph"]')[0])
74
+ assert_equal((doc/'p')[3], (doc/'p').filter('b')[0])
75
+ assert_equal((doc/'p')[1], (doc/'p').filter('a[@id="link2"]')[0])
76
+ assert_equal((doc/'p')[3], (doc/'p').filter('.last')[0])
77
+ assert_equal 4, (doc/'p').filter('*').length
78
+ assert_equal 3, (doc/'p').filter('* *').length
79
+ eles = (doc/'p').filter('.ohmy')
80
+ assert_equal 1, eles.length
81
+ assert_equal 'ohmy', eles.first.get_attribute('class')
82
82
  assert_equal 3, (doc/'p:not(.ohmy)').length
83
83
 
84
- ### Modified: We're not supporting the not() function
85
- #assert_equal 3, (doc/'p').not('.ohmy').length
86
- #assert_equal 3, (doc/'p').not(eles.first).length
87
- #assert_equal 2, (doc/'p').filter('[@class]').length
84
+ assert_equal 3, (doc/'p').not('.ohmy').length
85
+ assert_equal 3, (doc/'p').not(eles.first).length
86
+ assert_equal 2, (doc/'p').filter('[@class]').length
88
87
  assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
89
- #assert_equal 1, (doc/'p').filter('[@class~="final"]').length
88
+ assert_equal 1, (doc/'p').filter('[@class~="final"]').length
90
89
  assert_equal 2, (doc/'p > a').length
91
90
  assert_equal 1, (doc/'p.ohmy > a').length
92
91
  assert_equal 2, (doc/'p / a').length
@@ -249,17 +248,17 @@ class TestParser < Nokogiri::TestCase
249
248
  def test_class_search
250
249
  # test case sent by Chih-Chao Lam
251
250
  # Modified. libxml corrects this differently than hpricot
252
- doc = Nokogiri.Hpricot("<div class=xyz '>abc</div>")
251
+ doc = Nokogiri::Hpricot.HTML("<div class=xyz '>abc</div>")
253
252
  assert_equal 1, doc.search(".xyz").length
254
253
 
255
- doc = Nokogiri.Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
254
+ doc = Nokogiri::Hpricot.HTML("<div class=xyz>abc</div><div class=abc>xyz</div>")
256
255
  assert_equal 1, doc.search(".xyz").length
257
256
  assert_equal 4, doc.search("*").length
258
257
  end
259
258
 
260
259
  def test_kleene_star
261
260
  # bug noticed by raja bhatia
262
- doc = Nokogiri.Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
261
+ doc = Nokogiri::Hpricot.HTML("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
263
262
  assert_equal 2, doc.search("*[@class*='small']").length
264
263
  assert_equal 2, doc.search("*.small").length
265
264
  assert_equal 2, doc.search(".small").length
@@ -267,11 +266,11 @@ class TestParser < Nokogiri::TestCase
267
266
  end
268
267
 
269
268
  def test_empty_comment
270
- doc = Nokogiri.Hpricot("<p><!----></p>")
269
+ doc = Nokogiri::Hpricot.HTML("<p><!----></p>")
271
270
  doc = doc.search('//body').first
272
271
  assert doc.children[0].children[0].comment?
273
272
 
274
- doc = Nokogiri.Hpricot("<p><!-- --></p>")
273
+ doc = Nokogiri::Hpricot.HTML("<p><!-- --></p>")
275
274
  doc = doc.search('//body').first
276
275
  assert doc.children[0].children[0].comment?
277
276
  end
@@ -332,9 +331,9 @@ class TestParser < Nokogiri::TestCase
332
331
  ####
333
332
  # Modified. Added question. Don't care.
334
333
  def test_procins
335
- doc = Nokogiri.Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
334
+ doc = Nokogiri::Hpricot.HTML("<?php print('hello') ?>\n<?xml blah='blah'?>")
336
335
  assert_equal "php", doc.children[1].target
337
- assert_equal "blah='blah'?", doc.children[2].content
336
+ assert_equal "blah='blah'?", doc.children[2].content #"# quote added so emacs ruby-mode parser doesn't barf
338
337
  end
339
338
 
340
339
  ####
@@ -12,6 +12,38 @@ module Nokogiri
12
12
  assert html.html?
13
13
  end
14
14
 
15
+ def test_relative_css
16
+ html = Nokogiri::HTML(<<-eohtml)
17
+ <html>
18
+ <body>
19
+ <div>
20
+ <p>inside div tag</p>
21
+ </div>
22
+ <p>outside div tag</p>
23
+ </body>
24
+ </html>
25
+ eohtml
26
+ set = html.search('div').search('p')
27
+ assert_equal(1, set.length)
28
+ assert_equal('inside div tag', set.first.inner_text)
29
+ end
30
+
31
+ def test_multi_css
32
+ html = Nokogiri::HTML(<<-eohtml)
33
+ <html>
34
+ <body>
35
+ <div>
36
+ <p>p tag</p>
37
+ <a>a tag</a>
38
+ </div>
39
+ </body>
40
+ </html>
41
+ eohtml
42
+ set = html.css('p, a')
43
+ assert_equal(2, set.length)
44
+ assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
45
+ end
46
+
15
47
  def test_inner_text
16
48
  html = Nokogiri::HTML(<<-eohtml)
17
49
  <html>
@@ -151,6 +151,11 @@ class TestConvertXPath < Nokogiri::TestCase
151
151
  end
152
152
  end
153
153
 
154
+ def test_compat_mode_namespaces
155
+ assert_equal(".//*[name()='t:sam']", @NH.convert_to_xpath("//t:sam").first)
156
+ assert_equal(".//*[name()='t:sam'][@rel='bookmark'][1]", @NH.convert_to_xpath("//t:sam[@rel='bookmark'][1]").first)
157
+ end
158
+
154
159
  ##
155
160
  # 'and' is not supported by hpricot
156
161
  # def test_and
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
+
3
+ require 'rubygems'
4
+ require 'mocha'
5
+
6
+ class TestCssCache < Nokogiri::TestCase
7
+
8
+ def setup
9
+ @css = "a1 > b2 > c3"
10
+ @parse_result = Nokogiri::CSS.parse(@css)
11
+ @to_xpath_result = @parse_result.map {|ast| ast.to_xpath}
12
+ assert Nokogiri::CSS::Parser.cache_on?
13
+ end
14
+
15
+ def teardown
16
+ Nokogiri::CSS::Parser.clear_cache
17
+ Nokogiri::CSS::Parser.set_cache true
18
+ end
19
+
20
+ [ false, true ].each do |cache_setting|
21
+ define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do
22
+ times = cache_setting ? 1 : 6
23
+ Nokogiri::CSS::Parser.set_cache cache_setting
24
+
25
+ Nokogiri::CSS::Parser.any_instance.expects(:parse).with(@css).returns(@parse_result).times(times)
26
+ Nokogiri::CSS::Node.any_instance.expects(:to_xpath).returns(@to_xpath_result).times(times)
27
+
28
+ Nokogiri::CSS.xpath_for(@css)
29
+ Nokogiri::CSS.xpath_for(@css)
30
+ Nokogiri::CSS::Parser.xpath_for(@css)
31
+ Nokogiri::CSS::Parser.xpath_for(@css)
32
+ Nokogiri::CSS::Parser.new.xpath_for(@css)
33
+ Nokogiri::CSS::Parser.new.xpath_for(@css)
34
+ end
35
+
36
+ define_method "test_hpricot_cache_#{cache_setting ? "true" : "false"}" do
37
+ times = cache_setting ? 1 : 2
38
+ Nokogiri::CSS::Parser.set_cache cache_setting
39
+
40
+ nh = Nokogiri.Hpricot("<html></html>")
41
+ Nokogiri::CSS::Parser.any_instance.expects(:parse).with(@css).returns(@parse_result).times(times)
42
+ Nokogiri::CSS::Node.any_instance.expects(:to_xpath).returns(@to_xpath_result).times(times)
43
+
44
+ nh.convert_to_xpath(@css)
45
+ nh.convert_to_xpath(@css)
46
+ end
47
+ end
48
+
49
+
50
+ end
@@ -30,6 +30,8 @@ module Nokogiri
30
30
  xml = Nokogiri::XML(f)
31
31
  }
32
32
  assert xml.xml?
33
+ set = xml.search('//employee')
34
+ assert set.length > 0
33
35
  end
34
36
 
35
37
  def test_search_on_empty_documents
@@ -38,6 +40,12 @@ module Nokogiri
38
40
  assert_equal 0, ns.length
39
41
  end
40
42
 
43
+ def test_bad_xpath_raises_syntax_error
44
+ assert_raises(XML::XPath::SyntaxError) {
45
+ @xml.xpath('\\')
46
+ }
47
+ end
48
+
41
49
  def test_new_document_collect_namespaces
42
50
  doc = Nokogiri::XML::Document.new
43
51
  assert_equal({}, doc.collect_namespaces)
data/vendor/hoe.rb CHANGED
@@ -6,7 +6,16 @@ require 'rake/gempackagetask'
6
6
  require 'rake/rdoctask'
7
7
  require 'rake/testtask'
8
8
  require 'rbconfig'
9
- require 'rubyforge'
9
+ require 'uri'
10
+
11
+ if ENV['RUBYARCHDIR']
12
+ class RubyForge
13
+ VERSION = 'awesome'
14
+ end
15
+ else
16
+ require 'rubyforge'
17
+ end
18
+
10
19
  require 'yaml'
11
20
 
12
21
  begin
@@ -621,8 +630,6 @@ class Hoe
621
630
 
622
631
  Rake::RDocTask.new(:docs) do |rd|
623
632
  rd.main = "README.txt"
624
- rd.options << '-d' if
625
- `which dot` =~ /\/dot/ unless ENV['NODOT'] unless WINDOZE
626
633
  rd.rdoc_dir = 'doc'
627
634
  files = spec.files.grep(rdoc_pattern)
628
635
  files -= ['Manifest.txt']
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: x86-mswin32-60
6
6
  authors:
7
7
  - Aaron Patterson
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2008-11-11 00:00:00 -08:00
13
+ date: 2008-11-14 00:00:00 -08:00
14
14
  default_executable:
15
15
  dependencies: []
16
16
 
@@ -23,11 +23,13 @@ executables: []
23
23
  extensions: []
24
24
 
25
25
  extra_rdoc_files:
26
+ - History.ja.txt
26
27
  - History.txt
27
28
  - Manifest.txt
28
29
  - README.ja.txt
29
30
  - README.txt
30
31
  files:
32
+ - History.ja.txt
31
33
  - History.txt
32
34
  - Manifest.txt
33
35
  - README.ja.txt
@@ -46,6 +48,8 @@ files:
46
48
  - ext/nokogiri/xml_document.h
47
49
  - ext/nokogiri/xml_dtd.c
48
50
  - ext/nokogiri/xml_dtd.h
51
+ - ext/nokogiri/xml_io.c
52
+ - ext/nokogiri/xml_io.h
49
53
  - ext/nokogiri/xml_node.c
50
54
  - ext/nokogiri/xml_node.h
51
55
  - ext/nokogiri/xml_node_set.c
@@ -71,6 +75,7 @@ files:
71
75
  - lib/nokogiri/css/node.rb
72
76
  - lib/nokogiri/css/parser.rb
73
77
  - lib/nokogiri/css/parser.y
78
+ - lib/nokogiri/css/syntax_error.rb
74
79
  - lib/nokogiri/css/tokenizer.rb
75
80
  - lib/nokogiri/css/tokenizer.rex
76
81
  - lib/nokogiri/css/xpath_visitor.rb
@@ -104,6 +109,7 @@ files:
104
109
  - lib/nokogiri/xml/syntax_error.rb
105
110
  - lib/nokogiri/xml/text.rb
106
111
  - lib/nokogiri/xml/xpath.rb
112
+ - lib/nokogiri/xml/xpath/syntax_error.rb
107
113
  - lib/nokogiri/xml/xpath_context.rb
108
114
  - lib/nokogiri/xslt.rb
109
115
  - lib/nokogiri/xslt/stylesheet.rb
@@ -137,6 +143,7 @@ files:
137
143
  - test/html/test_builder.rb
138
144
  - test/html/test_document.rb
139
145
  - test/test_convert_xpath.rb
146
+ - test/test_css_cache.rb
140
147
  - test/test_gc.rb
141
148
  - test/test_nokogiri.rb
142
149
  - test/test_reader.rb
@@ -180,7 +187,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
187
  requirements: []
181
188
 
182
189
  rubyforge_project: nokogiri
183
- rubygems_version: 1.2.0
190
+ rubygems_version: 1.3.1
184
191
  signing_key:
185
192
  specification_version: 2
186
193
  summary: "Nokogiri (\xE9\x8B\xB8) is an HTML, XML, SAX, and Reader parser."
@@ -199,6 +206,7 @@ test_files:
199
206
  - test/html/test_builder.rb
200
207
  - test/html/test_document.rb
201
208
  - test/test_convert_xpath.rb
209
+ - test/test_css_cache.rb
202
210
  - test/test_gc.rb
203
211
  - test/test_nokogiri.rb
204
212
  - test/test_reader.rb