nokogiri 1.0.5 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/History.ja.txt +56 -0
- data/History.txt +10 -0
- data/Manifest.txt +6 -0
- data/README.ja.txt +12 -1
- data/README.txt +1 -0
- data/Rakefile +3 -4
- data/ext/nokogiri/native.h +1 -0
- data/ext/nokogiri/xml_document.c +60 -3
- data/ext/nokogiri/xml_dtd.c +4 -0
- data/ext/nokogiri/xml_io.c +17 -0
- data/ext/nokogiri/xml_io.h +9 -0
- data/ext/nokogiri/xml_node.c +13 -6
- data/ext/nokogiri/xml_node_set.c +1 -16
- data/ext/nokogiri/xml_xpath_context.c +3 -1
- data/lib/nokogiri/css.rb +15 -1
- data/lib/nokogiri/css/generated_tokenizer.rb +1 -1
- data/lib/nokogiri/css/node.rb +5 -3
- data/lib/nokogiri/css/parser.rb +34 -9
- data/lib/nokogiri/css/syntax_error.rb +6 -0
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/decorators/hpricot/node.rb +5 -7
- data/lib/nokogiri/decorators/hpricot/node_set.rb +43 -3
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +11 -0
- data/lib/nokogiri/hpricot.rb +6 -1
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xml/node.rb +2 -11
- data/lib/nokogiri/xml/xpath.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/test/css/test_parser.rb +4 -0
- data/test/hpricot/test_parser.rb +21 -22
- data/test/html/test_document.rb +32 -0
- data/test/test_convert_xpath.rb +5 -0
- data/test/test_css_cache.rb +50 -0
- data/test/xml/test_document.rb +8 -0
- data/vendor/hoe.rb +10 -3
- metadata +10 -2
data/History.ja.txt
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
=== 1.0.6
|
2
|
+
|
3
|
+
* 5つの修正
|
4
|
+
|
5
|
+
* XPath のパーサーが SyntaxError を生じさせ解析停止させる
|
6
|
+
* CSS のパーサーが SyntaxError を生じさせ解析停止させる
|
7
|
+
* filter() と not() hpricot の互換性を追加
|
8
|
+
* CSS が Node#search 経由で検索し、常時対応する事が出来るようになった
|
9
|
+
* CSS より XPath 変換がキャッシュに入れられるようになった
|
10
|
+
|
11
|
+
=== 1.0.5
|
12
|
+
|
13
|
+
* バグフィックス
|
14
|
+
|
15
|
+
* メーリンクリストを作成
|
16
|
+
* バグファイルを作成
|
17
|
+
* Windows 内で ENV['PATH'] が存在しない場合でも、存在出来るように設定完了
|
18
|
+
* Document 内の NodeSet#[] の結果をキャッシュする
|
19
|
+
|
20
|
+
=== 1.0.4
|
21
|
+
|
22
|
+
* バグフィックス
|
23
|
+
|
24
|
+
* 弱参照からドキュメント参照へのメモリー管理の変換
|
25
|
+
* メモリリークに接続
|
26
|
+
* ビルダーブロックが取り囲んでいるコンテキストから
|
27
|
+
メソッドの呼び出しをする事が出来る
|
28
|
+
|
29
|
+
=== 1.0.3
|
30
|
+
|
31
|
+
* 5つのバグ修正
|
32
|
+
|
33
|
+
* NodeSet が to_ary へ実行
|
34
|
+
* XML::Document#parent を除去
|
35
|
+
* GCバグ修正済み (Mike は最高!)
|
36
|
+
* 1.8.5互換性の為の RARRAY_LEN 除去
|
37
|
+
* inner_html 修正済み (Yahuda に感謝)
|
38
|
+
|
39
|
+
=== 1.0.2
|
40
|
+
|
41
|
+
* 1つのバグ修正
|
42
|
+
|
43
|
+
* extconf.rb は frex や racc を調べないはず
|
44
|
+
|
45
|
+
=== 1.0.1
|
46
|
+
|
47
|
+
* 1つのバグ修正
|
48
|
+
|
49
|
+
* extconf.rb が libdir や prefix を検索しない事を確認済み
|
50
|
+
それによって、ports libxml/ruby が正しくリンクする (lucsky に感謝!)
|
51
|
+
|
52
|
+
=== 1.0.0 / 2008-07-13
|
53
|
+
|
54
|
+
* 1つの偉大な増進
|
55
|
+
|
56
|
+
* ご誕生である
|
data/History.txt
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
=== 1.0.6
|
2
|
+
|
3
|
+
* 5 Bugfixes
|
4
|
+
|
5
|
+
* XPath Parser raises a SyntaxError on parse failure
|
6
|
+
* CSS Parser raises a SyntaxError on parse failure
|
7
|
+
* filter() and not() hpricot compatibility added
|
8
|
+
* CSS searches via Node#search are now always relative
|
9
|
+
* CSS to XPath conversion is now cached
|
10
|
+
|
1
11
|
=== 1.0.5
|
2
12
|
|
3
13
|
* Bugfixes
|
data/Manifest.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
History.ja.txt
|
1
2
|
History.txt
|
2
3
|
Manifest.txt
|
3
4
|
README.ja.txt
|
@@ -16,6 +17,8 @@ ext/nokogiri/xml_document.c
|
|
16
17
|
ext/nokogiri/xml_document.h
|
17
18
|
ext/nokogiri/xml_dtd.c
|
18
19
|
ext/nokogiri/xml_dtd.h
|
20
|
+
ext/nokogiri/xml_io.c
|
21
|
+
ext/nokogiri/xml_io.h
|
19
22
|
ext/nokogiri/xml_node.c
|
20
23
|
ext/nokogiri/xml_node.h
|
21
24
|
ext/nokogiri/xml_node_set.c
|
@@ -41,6 +44,7 @@ lib/nokogiri/css/generated_tokenizer.rb
|
|
41
44
|
lib/nokogiri/css/node.rb
|
42
45
|
lib/nokogiri/css/parser.rb
|
43
46
|
lib/nokogiri/css/parser.y
|
47
|
+
lib/nokogiri/css/syntax_error.rb
|
44
48
|
lib/nokogiri/css/tokenizer.rb
|
45
49
|
lib/nokogiri/css/tokenizer.rex
|
46
50
|
lib/nokogiri/css/xpath_visitor.rb
|
@@ -74,6 +78,7 @@ lib/nokogiri/xml/sax/parser.rb
|
|
74
78
|
lib/nokogiri/xml/syntax_error.rb
|
75
79
|
lib/nokogiri/xml/text.rb
|
76
80
|
lib/nokogiri/xml/xpath.rb
|
81
|
+
lib/nokogiri/xml/xpath/syntax_error.rb
|
77
82
|
lib/nokogiri/xml/xpath_context.rb
|
78
83
|
lib/nokogiri/xslt.rb
|
79
84
|
lib/nokogiri/xslt/stylesheet.rb
|
@@ -107,6 +112,7 @@ test/html/sax/test_parser.rb
|
|
107
112
|
test/html/test_builder.rb
|
108
113
|
test/html/test_document.rb
|
109
114
|
test/test_convert_xpath.rb
|
115
|
+
test/test_css_cache.rb
|
110
116
|
test/test_gc.rb
|
111
117
|
test/test_nokogiri.rb
|
112
118
|
test/test_reader.rb
|
data/README.ja.txt
CHANGED
@@ -18,13 +18,23 @@ Nokogiri はHTMLやXMLやSAXやXSLTやReaderのパーサーです。
|
|
18
18
|
|
19
19
|
検索出来たり、正確にCSS3とXPathをサポート出来たりする。
|
20
20
|
|
21
|
-
|
21
|
+
これはスピードテストです:
|
22
22
|
|
23
23
|
* http://gist.github.com/22176
|
24
24
|
|
25
25
|
NokogiriはHpricotの代わりに使用出来る。
|
26
26
|
その互換性は簡単に正しいCSSとXPathを使用する事が出来る。
|
27
27
|
|
28
|
+
== SUPPORT:
|
29
|
+
|
30
|
+
ノコギリのメーリングリストは:
|
31
|
+
|
32
|
+
* http://rubyforge.org/mailman/listinfo/nokogiri-talk
|
33
|
+
|
34
|
+
バグファイルは:
|
35
|
+
|
36
|
+
* http://nokogiri.lighthouseapp.com/projects/19607-nokogiri/overview
|
37
|
+
|
28
38
|
== SYNOPSIS:
|
29
39
|
|
30
40
|
require 'nokogiri'
|
@@ -55,6 +65,7 @@ NokogiriはHpricotの代わりに使用出来る。
|
|
55
65
|
|
56
66
|
* ruby 1.8 or 1.9
|
57
67
|
* libxml
|
68
|
+
* libxslt
|
58
69
|
|
59
70
|
== INSTALL:
|
60
71
|
|
data/README.txt
CHANGED
data/Rakefile
CHANGED
@@ -289,10 +289,9 @@ end
|
|
289
289
|
|
290
290
|
# Evil evil hack. Do not run tests when gem installs
|
291
291
|
if ENV['RUBYARCHDIR']
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
Rake::Task[:default].prerequisites = [:build]
|
292
|
+
prereqs = Rake::Task[:default].prerequisites
|
293
|
+
prereqs.clear
|
294
|
+
prereqs << :build
|
296
295
|
end
|
297
296
|
|
298
297
|
# vim: syntax=Ruby
|
data/ext/nokogiri/native.h
CHANGED
data/ext/nokogiri/xml_document.c
CHANGED
@@ -62,6 +62,47 @@ static VALUE root(VALUE self)
|
|
62
62
|
return Nokogiri_wrap_xml_node(root) ;
|
63
63
|
}
|
64
64
|
|
65
|
+
/*
|
66
|
+
* call-seq:
|
67
|
+
* read_io(io, url, encoding, options)
|
68
|
+
*
|
69
|
+
* Create a new document from an IO object
|
70
|
+
*/
|
71
|
+
static VALUE read_io( VALUE klass,
|
72
|
+
VALUE io,
|
73
|
+
VALUE url,
|
74
|
+
VALUE encoding,
|
75
|
+
VALUE options )
|
76
|
+
{
|
77
|
+
const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
|
78
|
+
const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
|
79
|
+
|
80
|
+
xmlInitParser();
|
81
|
+
|
82
|
+
xmlDocPtr doc = xmlReadIO(
|
83
|
+
(xmlInputReadCallback)io_read_callback,
|
84
|
+
(xmlInputCloseCallback)io_close_callback,
|
85
|
+
(void *)io,
|
86
|
+
c_url,
|
87
|
+
c_enc,
|
88
|
+
NUM2INT(options)
|
89
|
+
);
|
90
|
+
|
91
|
+
if(doc == NULL) {
|
92
|
+
xmlFreeDoc(doc);
|
93
|
+
rb_raise(rb_eRuntimeError, "Couldn't create a document");
|
94
|
+
return Qnil;
|
95
|
+
}
|
96
|
+
|
97
|
+
return Nokogiri_wrap_xml_document(klass, doc);
|
98
|
+
}
|
99
|
+
|
100
|
+
/*
|
101
|
+
* call-seq:
|
102
|
+
* read_memory(string, url, encoding, options)
|
103
|
+
*
|
104
|
+
* Create a new document from a String
|
105
|
+
*/
|
65
106
|
static VALUE read_memory( VALUE klass,
|
66
107
|
VALUE string,
|
67
108
|
VALUE url,
|
@@ -85,6 +126,12 @@ static VALUE read_memory( VALUE klass,
|
|
85
126
|
return Nokogiri_wrap_xml_document(klass, doc);
|
86
127
|
}
|
87
128
|
|
129
|
+
/*
|
130
|
+
* call-seq:
|
131
|
+
* new
|
132
|
+
*
|
133
|
+
* Create a new document
|
134
|
+
*/
|
88
135
|
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
89
136
|
{
|
90
137
|
VALUE version;
|
@@ -97,7 +144,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
|
|
97
144
|
|
98
145
|
/*
|
99
146
|
* call-seq:
|
100
|
-
*
|
147
|
+
* substitute_entities=(boolean)
|
101
148
|
*
|
102
149
|
* Set the global XML default for substitute entities.
|
103
150
|
*/
|
@@ -109,7 +156,7 @@ static VALUE substitute_entities_set(VALUE klass, VALUE value)
|
|
109
156
|
|
110
157
|
/*
|
111
158
|
* call-seq:
|
112
|
-
*
|
159
|
+
* load_external_subsets=(boolean)
|
113
160
|
*
|
114
161
|
* Set the global XML default for load external subsets.
|
115
162
|
*/
|
@@ -122,9 +169,19 @@ static VALUE load_external_subsets_set(VALUE klass, VALUE value)
|
|
122
169
|
VALUE cNokogiriXmlDocument ;
|
123
170
|
void init_xml_document()
|
124
171
|
{
|
125
|
-
VALUE
|
172
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
173
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
174
|
+
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
175
|
+
|
176
|
+
/*
|
177
|
+
* Nokogiri::XML::Document wraps an xml document.
|
178
|
+
*/
|
179
|
+
VALUE klass = rb_define_class_under(xml, "Document", node);
|
180
|
+
|
181
|
+
cNokogiriXmlDocument = klass;
|
126
182
|
|
127
183
|
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
|
184
|
+
rb_define_singleton_method(klass, "read_io", read_io, 4);
|
128
185
|
rb_define_singleton_method(klass, "new", new, -1);
|
129
186
|
rb_define_singleton_method(klass, "substitute_entities=", substitute_entities_set, 1);
|
130
187
|
rb_define_singleton_method(klass, "load_external_subsets=", load_external_subsets_set, 1);
|
data/ext/nokogiri/xml_dtd.c
CHANGED
@@ -108,6 +108,10 @@ void init_xml_dtd()
|
|
108
108
|
{
|
109
109
|
VALUE nokogiri = rb_define_module("Nokogiri");
|
110
110
|
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
111
|
+
|
112
|
+
/*
|
113
|
+
* Nokogiri::XML::DTD wraps DTD nodes in an XML document
|
114
|
+
*/
|
111
115
|
VALUE klass = rb_define_class_under(xml, "DTD", cNokogiriXmlNode);
|
112
116
|
|
113
117
|
rb_define_method(klass, "notations", notations, 0);
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#include <xml_io.h>
|
2
|
+
|
3
|
+
int io_read_callback(void * ctx, char * buffer, int len) {
|
4
|
+
VALUE io = (VALUE)ctx;
|
5
|
+
VALUE string = rb_funcall(io, rb_intern("read"), 1, INT2NUM(len));
|
6
|
+
|
7
|
+
if(Qnil == string) return 0;
|
8
|
+
VALUE length = rb_funcall(string, rb_intern("length"), 0);
|
9
|
+
|
10
|
+
memcpy(buffer, StringValuePtr(string), (unsigned int)NUM2INT(length));
|
11
|
+
|
12
|
+
return NUM2INT(length);
|
13
|
+
}
|
14
|
+
|
15
|
+
int io_close_callback(void * ctx) {
|
16
|
+
return 0;
|
17
|
+
}
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -514,7 +514,17 @@ static VALUE new_from_str(VALUE klass, VALUE xml)
|
|
514
514
|
|
515
515
|
VALUE Nokogiri_wrap_xml_node(xmlNodePtr node)
|
516
516
|
{
|
517
|
-
|
517
|
+
assert(node);
|
518
|
+
assert(node->doc);
|
519
|
+
assert(node->doc->_private);
|
520
|
+
|
521
|
+
VALUE index = INT2NUM((int)node);
|
522
|
+
VALUE document = (VALUE)node->doc->_private;
|
523
|
+
|
524
|
+
VALUE node_cache = rb_funcall(document, rb_intern("node_cache"), 0);
|
525
|
+
VALUE rb_node = rb_hash_aref(node_cache, index);
|
526
|
+
|
527
|
+
if(rb_node != Qnil) return rb_node;
|
518
528
|
|
519
529
|
switch(node->type)
|
520
530
|
{
|
@@ -544,11 +554,8 @@ VALUE Nokogiri_wrap_xml_node(xmlNodePtr node)
|
|
544
554
|
rb_node = Data_Wrap_Struct(cNokogiriXmlNode, 0, 0, node) ;
|
545
555
|
}
|
546
556
|
|
547
|
-
|
548
|
-
|
549
|
-
assert(node->doc->_private);
|
550
|
-
|
551
|
-
rb_iv_set(rb_node, "@document",(VALUE)node->doc->_private);
|
557
|
+
rb_hash_aset(node_cache, index, rb_node);
|
558
|
+
rb_iv_set(rb_node, "@document", document);
|
552
559
|
rb_funcall(rb_node, rb_intern("decorate!"), 0);
|
553
560
|
return rb_node ;
|
554
561
|
}
|
data/ext/nokogiri/xml_node_set.c
CHANGED
@@ -52,22 +52,7 @@ static VALUE index_at(VALUE self, VALUE number)
|
|
52
52
|
if(i < 0)
|
53
53
|
i = i + node_set->nodeNr;
|
54
54
|
|
55
|
-
|
56
|
-
if(Qnil == document)
|
57
|
-
rb_raise(rb_eRuntimeError, "You forgot to set a document.");
|
58
|
-
|
59
|
-
VALUE index = INT2NUM((int)node_set->nodeTab[i]);
|
60
|
-
|
61
|
-
VALUE node_cache = rb_funcall(document, rb_intern("node_cache"), 0);
|
62
|
-
|
63
|
-
VALUE node = rb_hash_aref(node_cache, index);
|
64
|
-
|
65
|
-
if(Qnil == node) {
|
66
|
-
node = Nokogiri_wrap_xml_node(node_set->nodeTab[i]);
|
67
|
-
rb_hash_aset(node_cache, index, node);
|
68
|
-
}
|
69
|
-
|
70
|
-
return node;
|
55
|
+
return Nokogiri_wrap_xml_node(node_set->nodeTab[i]);
|
71
56
|
}
|
72
57
|
|
73
58
|
static void deallocate(xmlNodeSetPtr node_set)
|
@@ -39,7 +39,9 @@ static VALUE evaluate(VALUE self, VALUE search_path)
|
|
39
39
|
xmlChar* query = (xmlChar *)StringValuePtr(search_path);
|
40
40
|
xmlXPathObjectPtr xpath = xmlXPathEvalExpression(query, ctx);
|
41
41
|
if(xpath == NULL) {
|
42
|
-
|
42
|
+
VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
|
43
|
+
VALUE error = rb_const_get(xpath, rb_intern("SyntaxError"));
|
44
|
+
rb_raise(error, "Couldn't evaluate expression '%s'", query);
|
43
45
|
}
|
44
46
|
|
45
47
|
VALUE xpath_object = Nokogiri_wrap_xml_xpath(xpath);
|
data/lib/nokogiri/css.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
require 'nokogiri/css/node'
|
2
2
|
require 'nokogiri/css/xpath_visitor'
|
3
|
-
require 'nokogiri/css/generated_tokenizer'
|
4
3
|
require 'nokogiri/css/generated_parser'
|
4
|
+
require 'nokogiri/css/generated_tokenizer'
|
5
5
|
require 'nokogiri/css/tokenizer'
|
6
6
|
require 'nokogiri/css/parser'
|
7
|
+
require 'nokogiri/css/syntax_error'
|
8
|
+
|
9
|
+
module Nokogiri
|
10
|
+
module CSS
|
11
|
+
class << self
|
12
|
+
def parse string
|
13
|
+
Parser.new.parse string
|
14
|
+
end
|
15
|
+
def xpath_for string, options={}
|
16
|
+
Parser.new.xpath_for string, options
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/nokogiri/css/node.rb
CHANGED
@@ -11,9 +11,11 @@ module Nokogiri
|
|
11
11
|
visitor.send(:"visit_#{type.to_s.downcase}", self)
|
12
12
|
end
|
13
13
|
|
14
|
-
def to_xpath prefix =
|
15
|
-
|
16
|
-
|
14
|
+
def to_xpath prefix = nil, visitor = nil
|
15
|
+
prefix ||= '//'
|
16
|
+
visitor ||= XPathVisitor.new
|
17
|
+
self.preprocess!
|
18
|
+
prefix + visitor.accept(self)
|
17
19
|
end
|
18
20
|
|
19
21
|
def preprocess!
|
data/lib/nokogiri/css/parser.rb
CHANGED
@@ -1,23 +1,48 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module CSS
|
3
|
-
class Parser <
|
3
|
+
class Parser < GeneratedTokenizer
|
4
4
|
class << self
|
5
5
|
def parse string
|
6
6
|
new.parse(string)
|
7
7
|
end
|
8
|
-
|
8
|
+
def xpath_for string, options={}
|
9
|
+
new.xpath_for(string, options)
|
10
|
+
end
|
9
11
|
|
10
|
-
|
11
|
-
|
12
|
+
def set_cache setting
|
13
|
+
@cache_on = setting ? true : false
|
14
|
+
end
|
15
|
+
def cache_on?
|
16
|
+
@cache ||= {}
|
17
|
+
instance_variable_defined?('@cache_on') ? @cache_on : true
|
18
|
+
end
|
19
|
+
def check_cache string
|
20
|
+
return unless cache_on?
|
21
|
+
@cache[string]
|
22
|
+
end
|
23
|
+
def add_cache string, value
|
24
|
+
return value unless cache_on?
|
25
|
+
@cache[string] = value
|
26
|
+
end
|
27
|
+
def clear_cache
|
28
|
+
@cache = {}
|
29
|
+
end
|
12
30
|
end
|
31
|
+
alias :parse :scan_str
|
32
|
+
|
33
|
+
def xpath_for string, options={}
|
34
|
+
v = self.class.check_cache(string)
|
35
|
+
return v unless v.nil?
|
13
36
|
|
14
|
-
|
15
|
-
|
16
|
-
|
37
|
+
prefix = options[:prefix] || nil
|
38
|
+
visitor = options[:visitor] || nil
|
39
|
+
args = [prefix, visitor]
|
40
|
+
self.class.add_cache(string, parse(string).map {|ast| ast.to_xpath(prefix, visitor)})
|
17
41
|
end
|
18
42
|
|
19
|
-
def
|
20
|
-
|
43
|
+
def on_error error_token_id, error_value, value_stack
|
44
|
+
after = value_stack.compact.last
|
45
|
+
raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
|
21
46
|
end
|
22
47
|
end
|
23
48
|
end
|
@@ -8,8 +8,7 @@ module Nokogiri
|
|
8
8
|
convert_to_xpath(path)
|
9
9
|
}.flatten.uniq
|
10
10
|
|
11
|
-
|
12
|
-
super(*converted + [namespaces])
|
11
|
+
super(*converted + [ns])
|
13
12
|
end
|
14
13
|
def /(path); search(path) end
|
15
14
|
|
@@ -32,16 +31,15 @@ module Nokogiri
|
|
32
31
|
rule = rule.to_s
|
33
32
|
case rule
|
34
33
|
when %r{^//}
|
35
|
-
[".#{rule}"]
|
34
|
+
[".#{Hpricot::XPathVisitor.xpath_namespace_helper(rule)}"]
|
36
35
|
when %r{^/}
|
37
|
-
[rule]
|
36
|
+
[Hpricot::XPathVisitor.xpath_namespace_helper(rule)]
|
38
37
|
when %r{^.//}
|
39
|
-
[rule]
|
38
|
+
[Hpricot::XPathVisitor.xpath_namespace_helper(rule)]
|
40
39
|
else
|
41
|
-
ctx = CSS::Parser.parse(rule)
|
42
40
|
visitor = CSS::XPathVisitor.new
|
43
41
|
visitor.extend(Hpricot::XPathVisitor)
|
44
|
-
|
42
|
+
CSS.xpath_for(rule, :prefix => ".//", :visitor => visitor)
|
45
43
|
end
|
46
44
|
end
|
47
45
|
|
@@ -2,12 +2,52 @@ module Nokogiri
|
|
2
2
|
module Decorators
|
3
3
|
module Hpricot
|
4
4
|
module NodeSet
|
5
|
-
|
5
|
+
|
6
|
+
# Select nodes matching the supplied rule.
|
7
|
+
# Note that positional rules (like <tt>:nth()</tt>) aren't currently supported.
|
8
|
+
#
|
9
|
+
# example:
|
10
|
+
# node_set.filter('.ohmy') # selects nodes from the set with class "ohmy"
|
11
|
+
# node_set.filter('a#link2') # selects nodes from the set with child node <a id='link2'>
|
12
|
+
# node_set.filter('a[@id="link2"]') # selects nodes from the set with child node <a id='link2'>
|
13
|
+
def filter(rule)
|
14
|
+
filter_transformer( lambda {|j| j}, rule ) # identity transformer
|
15
|
+
end
|
16
|
+
|
17
|
+
# The complement to filter, select nodes <em>not</em> matching the supplied rule.
|
18
|
+
# Note that positional rules (like <tt>:nth()</tt>) aren't currently supported.
|
19
|
+
#
|
20
|
+
# See filter for examples.
|
21
|
+
#
|
22
|
+
# Also note that you can pass a XML::Node object instead of a
|
23
|
+
# rule to remove that object from the node set (if it is
|
24
|
+
# present):
|
25
|
+
# node_set.not(node_to_exclude) # selects all nodes EXCEPT node_to_exclude
|
26
|
+
#
|
27
|
+
def not(rule)
|
28
|
+
filter_transformer( lambda {|j| !j}, rule ) # negation transformer
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
def filter_transformer(transformer, rule)
|
33
|
+
sub_set = XML::NodeSet.new(document)
|
34
|
+
document.decorate(sub_set)
|
35
|
+
|
36
|
+
if rule.is_a?(XML::Node)
|
37
|
+
each { |node| sub_set << node if transformer.call(node == rule) }
|
38
|
+
return sub_set
|
39
|
+
end
|
40
|
+
|
6
41
|
ctx = CSS::Parser.parse(rule.to_s)
|
7
42
|
visitor = CSS::XPathVisitor.new
|
8
43
|
visitor.extend(Hpricot::XPathVisitor)
|
9
|
-
|
10
|
-
|
44
|
+
each do |node|
|
45
|
+
if transformer.call(node.at(".//self::" + visitor.accept(ctx.first)))
|
46
|
+
sub_set << node
|
47
|
+
end
|
48
|
+
end
|
49
|
+
sub_set
|
50
|
+
end
|
11
51
|
end
|
12
52
|
end
|
13
53
|
end
|
@@ -11,6 +11,17 @@ module Nokogiri
|
|
11
11
|
end
|
12
12
|
super(node).gsub(/child::text\(\)/, 'normalize-space(child::text())')
|
13
13
|
end
|
14
|
+
|
15
|
+
# take a path like '//t:sam' and convert to xpath "*[name()='t:sam']"
|
16
|
+
def self.xpath_namespace_helper rule
|
17
|
+
rule.split(/\//).collect do |tag|
|
18
|
+
if match = tag.match(/^(\w+:\w+)(.*)/)
|
19
|
+
"*[name()='#{match[1]}']#{match[2]}"
|
20
|
+
else
|
21
|
+
tag
|
22
|
+
end
|
23
|
+
end.join("/")
|
24
|
+
end
|
14
25
|
end
|
15
26
|
end
|
16
27
|
end
|
data/lib/nokogiri/hpricot.rb
CHANGED
@@ -16,6 +16,11 @@ module Nokogiri
|
|
16
16
|
add_decorators(doc)
|
17
17
|
end
|
18
18
|
|
19
|
+
def HTML(string)
|
20
|
+
doc = Nokogiri::HTML.parse(string)
|
21
|
+
add_decorators(doc)
|
22
|
+
end
|
23
|
+
|
19
24
|
def make string
|
20
25
|
doc = XML::Document.new
|
21
26
|
ns = XML::NodeSet.new(doc)
|
@@ -40,7 +45,7 @@ module Nokogiri
|
|
40
45
|
builder = Nokogiri::HTML::Builder.new(&block)
|
41
46
|
Nokogiri::Hpricot.add_decorators(builder.doc)
|
42
47
|
else
|
43
|
-
doc = Nokogiri
|
48
|
+
doc = Nokogiri.parse(*args)
|
44
49
|
Nokogiri::Hpricot.add_decorators(doc)
|
45
50
|
end
|
46
51
|
end
|
data/lib/nokogiri/version.rb
CHANGED
data/lib/nokogiri/xml.rb
CHANGED
@@ -46,7 +46,7 @@ module Nokogiri
|
|
46
46
|
def parse string_or_io, url = nil, encoding = nil, options = 2159
|
47
47
|
if string_or_io.respond_to?(:read)
|
48
48
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
49
|
-
string_or_io
|
49
|
+
return Document.read_io(string_or_io, url, encoding, options)
|
50
50
|
end
|
51
51
|
|
52
52
|
# read_memory pukes on empty docs
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -45,9 +45,7 @@ module Nokogiri
|
|
45
45
|
def search *paths
|
46
46
|
ns = paths.last.is_a?(Hash) ? paths.pop : {}
|
47
47
|
xpath(*(paths.map { |path|
|
48
|
-
path =~ /^(\.\/|\/)/ ? path : CSS
|
49
|
-
ast.to_xpath
|
50
|
-
}
|
48
|
+
path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(path, :prefix => ".//")
|
51
49
|
}.flatten.uniq) + [ns])
|
52
50
|
end
|
53
51
|
alias :/ :search
|
@@ -78,9 +76,7 @@ module Nokogiri
|
|
78
76
|
end
|
79
77
|
|
80
78
|
def css *rules
|
81
|
-
xpath(*(rules.map { |rule|
|
82
|
-
CSS::Parser.parse(rule).map { |ast| "." + ast.to_xpath }
|
83
|
-
}.flatten.uniq))
|
79
|
+
xpath(*(rules.map { |rule| CSS.xpath_for(rule, :prefix => ".//") }.flatten.uniq))
|
84
80
|
end
|
85
81
|
|
86
82
|
def at path, ns = {}
|
@@ -192,11 +188,6 @@ module Nokogiri
|
|
192
188
|
children.each{|j| j.traverse(&block) }
|
193
189
|
block.call(self)
|
194
190
|
end
|
195
|
-
|
196
|
-
def == other
|
197
|
-
pointer_id == other.pointer_id
|
198
|
-
end
|
199
|
-
alias :eql? :==
|
200
191
|
end
|
201
192
|
end
|
202
193
|
end
|
data/lib/nokogiri/xml/xpath.rb
CHANGED
data/test/css/test_parser.rb
CHANGED
@@ -7,6 +7,10 @@ module Nokogiri
|
|
7
7
|
@parser = Nokogiri::CSS::Parser.new
|
8
8
|
end
|
9
9
|
|
10
|
+
def test_syntax_error_raised
|
11
|
+
assert_raises(CSS::SyntaxError) { @parser.parse("a[x=]") }
|
12
|
+
end
|
13
|
+
|
10
14
|
def test_find_by_type
|
11
15
|
ast = @parser.parse("a:nth-child(2)").first
|
12
16
|
matches = ast.find_by_type(
|
data/test/hpricot/test_parser.rb
CHANGED
@@ -69,24 +69,23 @@ class TestParser < Nokogiri::TestCase
|
|
69
69
|
assert_equal 'link1', (doc/:p/:a).first['id']
|
70
70
|
assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
|
71
71
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
72
|
+
assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
|
73
|
+
assert_equal((doc/'p')[2], (doc/'p').filter('[text()="The third paragraph"]')[0])
|
74
|
+
assert_equal((doc/'p')[3], (doc/'p').filter('b')[0])
|
75
|
+
assert_equal((doc/'p')[1], (doc/'p').filter('a[@id="link2"]')[0])
|
76
|
+
assert_equal((doc/'p')[3], (doc/'p').filter('.last')[0])
|
77
|
+
assert_equal 4, (doc/'p').filter('*').length
|
78
|
+
assert_equal 3, (doc/'p').filter('* *').length
|
79
|
+
eles = (doc/'p').filter('.ohmy')
|
80
|
+
assert_equal 1, eles.length
|
81
|
+
assert_equal 'ohmy', eles.first.get_attribute('class')
|
82
82
|
assert_equal 3, (doc/'p:not(.ohmy)').length
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
#assert_equal 2, (doc/'p').filter('[@class]').length
|
84
|
+
assert_equal 3, (doc/'p').not('.ohmy').length
|
85
|
+
assert_equal 3, (doc/'p').not(eles.first).length
|
86
|
+
assert_equal 2, (doc/'p').filter('[@class]').length
|
88
87
|
assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
|
89
|
-
|
88
|
+
assert_equal 1, (doc/'p').filter('[@class~="final"]').length
|
90
89
|
assert_equal 2, (doc/'p > a').length
|
91
90
|
assert_equal 1, (doc/'p.ohmy > a').length
|
92
91
|
assert_equal 2, (doc/'p / a').length
|
@@ -249,17 +248,17 @@ class TestParser < Nokogiri::TestCase
|
|
249
248
|
def test_class_search
|
250
249
|
# test case sent by Chih-Chao Lam
|
251
250
|
# Modified. libxml corrects this differently than hpricot
|
252
|
-
doc = Nokogiri.
|
251
|
+
doc = Nokogiri::Hpricot.HTML("<div class=xyz '>abc</div>")
|
253
252
|
assert_equal 1, doc.search(".xyz").length
|
254
253
|
|
255
|
-
doc = Nokogiri.
|
254
|
+
doc = Nokogiri::Hpricot.HTML("<div class=xyz>abc</div><div class=abc>xyz</div>")
|
256
255
|
assert_equal 1, doc.search(".xyz").length
|
257
256
|
assert_equal 4, doc.search("*").length
|
258
257
|
end
|
259
258
|
|
260
259
|
def test_kleene_star
|
261
260
|
# bug noticed by raja bhatia
|
262
|
-
doc = Nokogiri.
|
261
|
+
doc = Nokogiri::Hpricot.HTML("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
|
263
262
|
assert_equal 2, doc.search("*[@class*='small']").length
|
264
263
|
assert_equal 2, doc.search("*.small").length
|
265
264
|
assert_equal 2, doc.search(".small").length
|
@@ -267,11 +266,11 @@ class TestParser < Nokogiri::TestCase
|
|
267
266
|
end
|
268
267
|
|
269
268
|
def test_empty_comment
|
270
|
-
doc = Nokogiri.
|
269
|
+
doc = Nokogiri::Hpricot.HTML("<p><!----></p>")
|
271
270
|
doc = doc.search('//body').first
|
272
271
|
assert doc.children[0].children[0].comment?
|
273
272
|
|
274
|
-
doc = Nokogiri.
|
273
|
+
doc = Nokogiri::Hpricot.HTML("<p><!-- --></p>")
|
275
274
|
doc = doc.search('//body').first
|
276
275
|
assert doc.children[0].children[0].comment?
|
277
276
|
end
|
@@ -332,9 +331,9 @@ class TestParser < Nokogiri::TestCase
|
|
332
331
|
####
|
333
332
|
# Modified. Added question. Don't care.
|
334
333
|
def test_procins
|
335
|
-
doc = Nokogiri.
|
334
|
+
doc = Nokogiri::Hpricot.HTML("<?php print('hello') ?>\n<?xml blah='blah'?>")
|
336
335
|
assert_equal "php", doc.children[1].target
|
337
|
-
assert_equal "blah='blah'?", doc.children[2].content
|
336
|
+
assert_equal "blah='blah'?", doc.children[2].content #"# quote added so emacs ruby-mode parser doesn't barf
|
338
337
|
end
|
339
338
|
|
340
339
|
####
|
data/test/html/test_document.rb
CHANGED
@@ -12,6 +12,38 @@ module Nokogiri
|
|
12
12
|
assert html.html?
|
13
13
|
end
|
14
14
|
|
15
|
+
def test_relative_css
|
16
|
+
html = Nokogiri::HTML(<<-eohtml)
|
17
|
+
<html>
|
18
|
+
<body>
|
19
|
+
<div>
|
20
|
+
<p>inside div tag</p>
|
21
|
+
</div>
|
22
|
+
<p>outside div tag</p>
|
23
|
+
</body>
|
24
|
+
</html>
|
25
|
+
eohtml
|
26
|
+
set = html.search('div').search('p')
|
27
|
+
assert_equal(1, set.length)
|
28
|
+
assert_equal('inside div tag', set.first.inner_text)
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_multi_css
|
32
|
+
html = Nokogiri::HTML(<<-eohtml)
|
33
|
+
<html>
|
34
|
+
<body>
|
35
|
+
<div>
|
36
|
+
<p>p tag</p>
|
37
|
+
<a>a tag</a>
|
38
|
+
</div>
|
39
|
+
</body>
|
40
|
+
</html>
|
41
|
+
eohtml
|
42
|
+
set = html.css('p, a')
|
43
|
+
assert_equal(2, set.length)
|
44
|
+
assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
|
45
|
+
end
|
46
|
+
|
15
47
|
def test_inner_text
|
16
48
|
html = Nokogiri::HTML(<<-eohtml)
|
17
49
|
<html>
|
data/test/test_convert_xpath.rb
CHANGED
@@ -151,6 +151,11 @@ class TestConvertXPath < Nokogiri::TestCase
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
+
def test_compat_mode_namespaces
|
155
|
+
assert_equal(".//*[name()='t:sam']", @NH.convert_to_xpath("//t:sam").first)
|
156
|
+
assert_equal(".//*[name()='t:sam'][@rel='bookmark'][1]", @NH.convert_to_xpath("//t:sam[@rel='bookmark'][1]").first)
|
157
|
+
end
|
158
|
+
|
154
159
|
##
|
155
160
|
# 'and' is not supported by hpricot
|
156
161
|
# def test_and
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mocha'
|
5
|
+
|
6
|
+
class TestCssCache < Nokogiri::TestCase
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@css = "a1 > b2 > c3"
|
10
|
+
@parse_result = Nokogiri::CSS.parse(@css)
|
11
|
+
@to_xpath_result = @parse_result.map {|ast| ast.to_xpath}
|
12
|
+
assert Nokogiri::CSS::Parser.cache_on?
|
13
|
+
end
|
14
|
+
|
15
|
+
def teardown
|
16
|
+
Nokogiri::CSS::Parser.clear_cache
|
17
|
+
Nokogiri::CSS::Parser.set_cache true
|
18
|
+
end
|
19
|
+
|
20
|
+
[ false, true ].each do |cache_setting|
|
21
|
+
define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do
|
22
|
+
times = cache_setting ? 1 : 6
|
23
|
+
Nokogiri::CSS::Parser.set_cache cache_setting
|
24
|
+
|
25
|
+
Nokogiri::CSS::Parser.any_instance.expects(:parse).with(@css).returns(@parse_result).times(times)
|
26
|
+
Nokogiri::CSS::Node.any_instance.expects(:to_xpath).returns(@to_xpath_result).times(times)
|
27
|
+
|
28
|
+
Nokogiri::CSS.xpath_for(@css)
|
29
|
+
Nokogiri::CSS.xpath_for(@css)
|
30
|
+
Nokogiri::CSS::Parser.xpath_for(@css)
|
31
|
+
Nokogiri::CSS::Parser.xpath_for(@css)
|
32
|
+
Nokogiri::CSS::Parser.new.xpath_for(@css)
|
33
|
+
Nokogiri::CSS::Parser.new.xpath_for(@css)
|
34
|
+
end
|
35
|
+
|
36
|
+
define_method "test_hpricot_cache_#{cache_setting ? "true" : "false"}" do
|
37
|
+
times = cache_setting ? 1 : 2
|
38
|
+
Nokogiri::CSS::Parser.set_cache cache_setting
|
39
|
+
|
40
|
+
nh = Nokogiri.Hpricot("<html></html>")
|
41
|
+
Nokogiri::CSS::Parser.any_instance.expects(:parse).with(@css).returns(@parse_result).times(times)
|
42
|
+
Nokogiri::CSS::Node.any_instance.expects(:to_xpath).returns(@to_xpath_result).times(times)
|
43
|
+
|
44
|
+
nh.convert_to_xpath(@css)
|
45
|
+
nh.convert_to_xpath(@css)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
end
|
data/test/xml/test_document.rb
CHANGED
@@ -30,6 +30,8 @@ module Nokogiri
|
|
30
30
|
xml = Nokogiri::XML(f)
|
31
31
|
}
|
32
32
|
assert xml.xml?
|
33
|
+
set = xml.search('//employee')
|
34
|
+
assert set.length > 0
|
33
35
|
end
|
34
36
|
|
35
37
|
def test_search_on_empty_documents
|
@@ -38,6 +40,12 @@ module Nokogiri
|
|
38
40
|
assert_equal 0, ns.length
|
39
41
|
end
|
40
42
|
|
43
|
+
def test_bad_xpath_raises_syntax_error
|
44
|
+
assert_raises(XML::XPath::SyntaxError) {
|
45
|
+
@xml.xpath('\\')
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
41
49
|
def test_new_document_collect_namespaces
|
42
50
|
doc = Nokogiri::XML::Document.new
|
43
51
|
assert_equal({}, doc.collect_namespaces)
|
data/vendor/hoe.rb
CHANGED
@@ -6,7 +6,16 @@ require 'rake/gempackagetask'
|
|
6
6
|
require 'rake/rdoctask'
|
7
7
|
require 'rake/testtask'
|
8
8
|
require 'rbconfig'
|
9
|
-
require '
|
9
|
+
require 'uri'
|
10
|
+
|
11
|
+
if ENV['RUBYARCHDIR']
|
12
|
+
class RubyForge
|
13
|
+
VERSION = 'awesome'
|
14
|
+
end
|
15
|
+
else
|
16
|
+
require 'rubyforge'
|
17
|
+
end
|
18
|
+
|
10
19
|
require 'yaml'
|
11
20
|
|
12
21
|
begin
|
@@ -621,8 +630,6 @@ class Hoe
|
|
621
630
|
|
622
631
|
Rake::RDocTask.new(:docs) do |rd|
|
623
632
|
rd.main = "README.txt"
|
624
|
-
rd.options << '-d' if
|
625
|
-
`which dot` =~ /\/dot/ unless ENV['NODOT'] unless WINDOZE
|
626
633
|
rd.rdoc_dir = 'doc'
|
627
634
|
files = spec.files.grep(rdoc_pattern)
|
628
635
|
files -= ['Manifest.txt']
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aaron Patterson
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2008-11-
|
13
|
+
date: 2008-11-14 00:00:00 -08:00
|
14
14
|
default_executable:
|
15
15
|
dependencies: []
|
16
16
|
|
@@ -23,11 +23,13 @@ executables: []
|
|
23
23
|
extensions:
|
24
24
|
- Rakefile
|
25
25
|
extra_rdoc_files:
|
26
|
+
- History.ja.txt
|
26
27
|
- History.txt
|
27
28
|
- Manifest.txt
|
28
29
|
- README.ja.txt
|
29
30
|
- README.txt
|
30
31
|
files:
|
32
|
+
- History.ja.txt
|
31
33
|
- History.txt
|
32
34
|
- Manifest.txt
|
33
35
|
- README.ja.txt
|
@@ -46,6 +48,8 @@ files:
|
|
46
48
|
- ext/nokogiri/xml_document.h
|
47
49
|
- ext/nokogiri/xml_dtd.c
|
48
50
|
- ext/nokogiri/xml_dtd.h
|
51
|
+
- ext/nokogiri/xml_io.c
|
52
|
+
- ext/nokogiri/xml_io.h
|
49
53
|
- ext/nokogiri/xml_node.c
|
50
54
|
- ext/nokogiri/xml_node.h
|
51
55
|
- ext/nokogiri/xml_node_set.c
|
@@ -71,6 +75,7 @@ files:
|
|
71
75
|
- lib/nokogiri/css/node.rb
|
72
76
|
- lib/nokogiri/css/parser.rb
|
73
77
|
- lib/nokogiri/css/parser.y
|
78
|
+
- lib/nokogiri/css/syntax_error.rb
|
74
79
|
- lib/nokogiri/css/tokenizer.rb
|
75
80
|
- lib/nokogiri/css/tokenizer.rex
|
76
81
|
- lib/nokogiri/css/xpath_visitor.rb
|
@@ -104,6 +109,7 @@ files:
|
|
104
109
|
- lib/nokogiri/xml/syntax_error.rb
|
105
110
|
- lib/nokogiri/xml/text.rb
|
106
111
|
- lib/nokogiri/xml/xpath.rb
|
112
|
+
- lib/nokogiri/xml/xpath/syntax_error.rb
|
107
113
|
- lib/nokogiri/xml/xpath_context.rb
|
108
114
|
- lib/nokogiri/xslt.rb
|
109
115
|
- lib/nokogiri/xslt/stylesheet.rb
|
@@ -137,6 +143,7 @@ files:
|
|
137
143
|
- test/html/test_builder.rb
|
138
144
|
- test/html/test_document.rb
|
139
145
|
- test/test_convert_xpath.rb
|
146
|
+
- test/test_css_cache.rb
|
140
147
|
- test/test_gc.rb
|
141
148
|
- test/test_nokogiri.rb
|
142
149
|
- test/test_reader.rb
|
@@ -193,6 +200,7 @@ test_files:
|
|
193
200
|
- test/html/test_builder.rb
|
194
201
|
- test/html/test_document.rb
|
195
202
|
- test/test_convert_xpath.rb
|
203
|
+
- test/test_css_cache.rb
|
196
204
|
- test/test_gc.rb
|
197
205
|
- test/test_nokogiri.rb
|
198
206
|
- test/test_reader.rb
|