nokogiri 1.4.7 → 1.5.0.beta.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +8 -83
- data/CHANGELOG.rdoc +6 -80
- data/Manifest.txt +4 -74
- data/README.ja.rdoc +5 -1
- data/README.rdoc +8 -22
- data/Rakefile +79 -60
- data/bin/nokogiri +1 -6
- data/deps.rip +5 -0
- data/ext/nokogiri/extconf.rb +32 -53
- data/ext/nokogiri/nokogiri.c +0 -2
- data/ext/nokogiri/nokogiri.h +0 -9
- data/ext/nokogiri/xml_document.c +0 -14
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_io.c +7 -32
- data/ext/nokogiri/xml_node.c +31 -103
- data/ext/nokogiri/xml_node_set.c +8 -8
- data/ext/nokogiri/xml_reader.c +1 -20
- data/ext/nokogiri/xml_sax_parser.c +3 -5
- data/ext/nokogiri/xml_sax_parser_context.c +0 -40
- data/ext/nokogiri/xml_xpath_context.c +2 -35
- data/ext/nokogiri/xslt_stylesheet.c +6 -124
- data/lib/nokogiri.rb +7 -3
- data/lib/nokogiri/css.rb +3 -6
- data/lib/nokogiri/css/generated_parser.rb +669 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
- data/lib/nokogiri/css/parser.rb +70 -665
- data/lib/nokogiri/css/parser.y +1 -6
- data/lib/nokogiri/css/tokenizer.rb +3 -148
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +14 -16
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/html.rb +3 -2
- data/lib/nokogiri/html/document.rb +18 -134
- data/lib/nokogiri/html/document_fragment.rb +21 -26
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/sax/parser.rb +2 -6
- data/lib/nokogiri/version.rb +4 -9
- data/lib/nokogiri/xml/attribute_decl.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +3 -27
- data/lib/nokogiri/xml/document_fragment.rb +2 -9
- data/lib/nokogiri/xml/dtd.rb +1 -12
- data/lib/nokogiri/xml/element_decl.rb +1 -1
- data/lib/nokogiri/xml/entity_decl.rb +1 -1
- data/lib/nokogiri/xml/node.rb +75 -172
- data/lib/nokogiri/xml/node/save_options.rb +0 -10
- data/lib/nokogiri/xml/node_set.rb +3 -28
- data/lib/nokogiri/xml/parse_options.rb +0 -8
- data/lib/nokogiri/xml/reader.rb +6 -44
- data/lib/nokogiri/xml/sax/document.rb +5 -9
- data/lib/nokogiri/xml/schema.rb +1 -7
- data/lib/nokogiri/xslt.rb +5 -9
- data/tasks/cross_compile.rb +12 -27
- data/tasks/test.rb +0 -0
- data/test/css/test_parser.rb +19 -40
- data/test/css/test_tokenizer.rb +0 -8
- data/test/helper.rb +1 -4
- data/test/html/sax/test_parser.rb +21 -47
- data/test/html/sax/test_parser_context.rb +2 -2
- data/test/html/test_document.rb +3 -58
- data/test/html/test_document_encoding.rb +0 -53
- data/test/html/test_document_fragment.rb +13 -82
- data/test/html/test_element_description.rb +4 -2
- data/test/html/test_node.rb +0 -9
- data/test/test_memory_leak.rb +2 -57
- data/test/test_nokogiri.rb +14 -20
- data/test/test_reader.rb +7 -47
- data/test/test_xslt_transforms.rb +5 -8
- data/test/xml/sax/test_parser.rb +17 -34
- data/test/xml/sax/test_parser_context.rb +0 -50
- data/test/xml/sax/test_push_parser.rb +1 -18
- data/test/xml/test_attr.rb +4 -31
- data/test/xml/test_attribute_decl.rb +7 -3
- data/test/xml/test_builder.rb +5 -5
- data/test/xml/test_cdata.rb +3 -3
- data/test/xml/test_document.rb +18 -15
- data/test/xml/test_document_fragment.rb +20 -19
- data/test/xml/test_dtd.rb +13 -18
- data/test/xml/test_element_content.rb +1 -1
- data/test/xml/test_element_decl.rb +1 -1
- data/test/xml/test_entity_decl.rb +12 -10
- data/test/xml/test_namespace.rb +7 -5
- data/test/xml/test_node.rb +15 -54
- data/test/xml/test_node_reparenting.rb +42 -85
- data/test/xml/test_node_set.rb +2 -61
- data/test/xml/test_schema.rb +0 -5
- data/test/xml/test_text.rb +2 -11
- data/test/xml/test_unparented_node.rb +1 -1
- data/test/xml/test_xpath.rb +7 -43
- metadata +131 -155
- data/.gemtest +0 -0
- data/ext/nokogiri/depend +0 -358
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/lib/nokogiri/css/parser_extras.rb +0 -91
- data/lib/nokogiri/ffi/encoding_handler.rb +0 -42
- data/lib/nokogiri/ffi/html/document.rb +0 -28
- data/lib/nokogiri/ffi/html/element_description.rb +0 -81
- data/lib/nokogiri/ffi/html/entity_lookup.rb +0 -16
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +0 -38
- data/lib/nokogiri/ffi/io_callbacks.rb +0 -42
- data/lib/nokogiri/ffi/libxml.rb +0 -420
- data/lib/nokogiri/ffi/structs/common_node.rb +0 -38
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +0 -24
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +0 -13
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +0 -16
- data/lib/nokogiri/ffi/structs/xml_attr.rb +0 -20
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +0 -27
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +0 -16
- data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +0 -11
- data/lib/nokogiri/ffi/structs/xml_document.rb +0 -117
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +0 -28
- data/lib/nokogiri/ffi/structs/xml_element.rb +0 -26
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +0 -17
- data/lib/nokogiri/ffi/structs/xml_entity.rb +0 -32
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +0 -12
- data/lib/nokogiri/ffi/structs/xml_node.rb +0 -28
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +0 -53
- data/lib/nokogiri/ffi/structs/xml_notation.rb +0 -11
- data/lib/nokogiri/ffi/structs/xml_ns.rb +0 -15
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +0 -20
- data/lib/nokogiri/ffi/structs/xml_parser_input.rb +0 -19
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +0 -14
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +0 -51
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +0 -124
- data/lib/nokogiri/ffi/structs/xml_schema.rb +0 -13
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +0 -31
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +0 -12
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +0 -38
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +0 -35
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +0 -20
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +0 -13
- data/lib/nokogiri/ffi/weak_bucket.rb +0 -40
- data/lib/nokogiri/ffi/xml/attr.rb +0 -41
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +0 -27
- data/lib/nokogiri/ffi/xml/cdata.rb +0 -19
- data/lib/nokogiri/ffi/xml/comment.rb +0 -18
- data/lib/nokogiri/ffi/xml/document.rb +0 -174
- data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -21
- data/lib/nokogiri/ffi/xml/dtd.rb +0 -67
- data/lib/nokogiri/ffi/xml/element_content.rb +0 -43
- data/lib/nokogiri/ffi/xml/element_decl.rb +0 -19
- data/lib/nokogiri/ffi/xml/entity_decl.rb +0 -36
- data/lib/nokogiri/ffi/xml/entity_reference.rb +0 -19
- data/lib/nokogiri/ffi/xml/namespace.rb +0 -44
- data/lib/nokogiri/ffi/xml/node.rb +0 -559
- data/lib/nokogiri/ffi/xml/node_set.rb +0 -150
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +0 -20
- data/lib/nokogiri/ffi/xml/reader.rb +0 -236
- data/lib/nokogiri/ffi/xml/relax_ng.rb +0 -85
- data/lib/nokogiri/ffi/xml/sax/parser.rb +0 -143
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +0 -79
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +0 -51
- data/lib/nokogiri/ffi/xml/schema.rb +0 -109
- data/lib/nokogiri/ffi/xml/syntax_error.rb +0 -98
- data/lib/nokogiri/ffi/xml/text.rb +0 -18
- data/lib/nokogiri/ffi/xml/xpath.rb +0 -9
- data/lib/nokogiri/ffi/xml/xpath_context.rb +0 -153
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +0 -77
- data/test/decorators/test_slop.rb +0 -16
- data/test/ffi/test_document.rb +0 -35
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/xslt/test_custom_functions.rb +0 -94
data/CHANGELOG.ja.rdoc
CHANGED
@@ -1,83 +1,8 @@
|
|
1
|
-
=== 1.4.7 / 2011年7月1日
|
2
|
-
|
3
|
-
* バグの修正
|
4
|
-
|
5
|
-
* エンコーディング宣言のないHTMLファイルで部分的に重複したドキュメントが生成される問題を修正した. #478
|
6
|
-
|
7
|
-
=== 1.4.6 / 2011年6月19日
|
8
|
-
|
9
|
-
* ノート
|
10
|
-
|
11
|
-
* このバージョンは、1.4.5と機能的に同じです
|
12
|
-
* Rubyの1.8.6のサポートが復元されている
|
13
|
-
|
14
|
-
=== 1.4.5 / 2011年6月15日
|
15
|
-
|
16
|
-
* 新機能
|
17
|
-
|
18
|
-
* Nokogiri::HTML::Document#title アクセサメソッドでHTML文書のタイトルを読み書きできる
|
19
|
-
|
20
|
-
* バグの修正
|
21
|
-
|
22
|
-
* Node#serialize とその仲間達はSaveOptionオブジェクトを受け入れる
|
23
|
-
* Nokogiri::CSS::Parser has-a Nokogiri::CSS::Tokenizer
|
24
|
-
* [JRUBY+FFIのみ] 「弱い参照」はスレッドセーフになった. #355
|
25
|
-
* HTML::SAX::Parserから呼ばれるstart_element()コールバックのattributes引数はHTML::XML::Parserによるエミュレートコールバックと同じく連想配列になった. rel. #356
|
26
|
-
* HTML::SAX::Parserのparse*()メソッドはXML::SAX::Parser同様に渡されたブロックをコールバックするようになった.
|
27
|
-
* HTMLパーサーのエンコーディング判定をlibxml2の仕様を超えて拡張・改善した. (XML宣言のencodingを認識、非ASCII文字出現後のmetaタグも文字化けを生じずに反映)
|
28
|
-
* Document#remove_namespaces! は名前空間付きの属性に対応した. #396
|
29
|
-
|
30
|
-
=== 1.4.4 2010年11月15日
|
31
|
-
|
32
|
-
* 新機能
|
33
|
-
|
34
|
-
* XML::Node#children=ノード内のhtml reparented node(s)を返す事によって親の変更ができる。
|
35
|
-
* XSLT はfunction extensionsをサポート。#336
|
36
|
-
* XPath はパラメーター置換を結合する. #329
|
37
|
-
* XML::Reader node typeを一定化させる. #369
|
38
|
-
* SAX Parser context は行とコラムの両方の情報を提供する
|
39
|
-
|
40
|
-
* バグの修正
|
41
|
-
|
42
|
-
* XML::DTD#attributes は属性が存在しない際、nilの代わりに空のハッシュを返す
|
43
|
-
* XML::DTD#{keys,each} は文字通りに機能するようになった #324
|
44
|
-
* {XML,HTML}::DocumentFragment.{new,parse} 行送りと末尾の空白を除去しなくなった #319
|
45
|
-
* XML::Node#{add_child,add_previous_sibling,add_next_sibling,replace} は文字列を見送る際にNodeSetを返す
|
46
|
-
* 不確定タグはフレグメント内で要、不要に関係なく解析される #315
|
47
|
-
* XML::Node#{replace,add_previous_sibling,add_next_sibling} libxmlのtext node merging に関わるedge caseを修正する #308
|
48
|
-
* xpath handler argument が整列している最中に起こるGCでのsegfaultを修正 #345
|
49
|
-
* Slop decoratorが既に確定された定義と共に正常に機能させるための便宜上の処置 #330
|
50
|
-
* 子ノードが複製される際に起こるメモリ漏れの修正 #353
|
51
|
-
* an+b記号の無使用時に発生するoff-by-oneバグとnth-last-{child,of-type} CSSセレクターの修正 #354
|
52
|
-
* 非名前空間属性がSAX::Document#start_elementへパスできるように修正 #356
|
53
|
-
* libxml2 in-contextの解析バグの処置 #362
|
54
|
-
* フレグメント内のノードの中にあるNodeSet#wrapの修正 #331
|
55
|
-
|
56
|
-
=== 1.4.3 2010年7月28日
|
57
|
-
|
58
|
-
* 新しい機能
|
59
|
-
|
60
|
-
* XML::Reader#empty_element? - 子の無いエレメントにtrueを返す #262
|
61
|
-
* Node#remove_namespaces! - 1.4.2では 名前空間のみを取り除いていたが、
|
62
|
-
1.4.3 では名前空間及び、名前空間宣言も取り除く #294
|
63
|
-
|
64
|
-
* バグの修正
|
65
|
-
|
66
|
-
* XML::NodeSet#{include?,delete,push} はXML::Namespaceを受入れる
|
67
|
-
* XML::Document#parse - 1.4.3より文書内の文脈を解析する機能を追加
|
68
|
-
* XML::DocumentFragment#inner_html= 文脈解析を共に実行する #298, #281
|
69
|
-
* lib/nokogiri/css/parser.y はCSSと疑似選別の両方を機能
|
70
|
-
* 演算によって近隣に存在する併合型ノードへの遊離問題の有無に関わらず、一切の
|
71
|
-
弊害なしにテキストノードの繰り返しが実行可能 #283
|
72
|
-
* xmlFirstElementChild et al.による libxml2バージョンでの不適合性を修正 #303
|
73
|
-
* XML::Attr#add_namespace (!)文字通りの機能実現! #252
|
74
|
-
* HTML::DocumentFragment が文字列に存在するエンコードを使用 #305
|
75
|
-
|
76
1
|
=== 1.4.2 2010年5月22日
|
77
2
|
|
78
|
-
*
|
3
|
+
* 新しい機能
|
79
4
|
|
80
|
-
* XML::Node#parse
|
5
|
+
* XML::Node#parse 定義されたコンテキストノードで、XML 又はHTMLのフレグメント
|
81
6
|
を解析する
|
82
7
|
* XML::Node#namespacesが子ノードとその祖先ノード内で定義された全ての名前空間
|
83
8
|
を返すようになった(以前は祖先ノードの名前空間は返されなかった)
|
@@ -105,18 +30,18 @@
|
|
105
30
|
* XML::Node#xpath はNodeSetのオブジェクト以外のオブジェクトを返す GH #208
|
106
31
|
* XSLT::StyleSheet#transformはパラメーターのハッシュを受け入れる GH #223
|
107
32
|
* CSSのnot()の疑似セレクタの修正 GH #205
|
108
|
-
* XML::Builder
|
33
|
+
* XML::Builder はノード達が切り離されても破壊しない(vihaiの協力に感謝)
|
109
34
|
GH #228
|
110
35
|
* SAX parser経由でエンコードを強制することが出来る Eugene Pimenovに感謝!
|
111
36
|
GH #204
|
112
|
-
* XML::DocumentFragment はML::Node#parse
|
113
|
-
* XML Reader
|
37
|
+
* XML::DocumentFragment はML::Node#parse を使用して子供を限定する
|
38
|
+
* XML Reader内のメモリリークを修正 sdorさん、ありがとう! GH#244
|
114
39
|
|
115
40
|
* ノート
|
116
41
|
|
117
|
-
* 今日4月18日現在、Windows gems は libxml 2.7.
|
118
|
-
1.1.26にDLLs
|
119
|
-
2.7.3 と libxslt 1.1.24
|
42
|
+
* 今日4月18日現在、Windows gems は libxml 2.7.6 とlibxslt
|
43
|
+
1.1.26にDLLsを正規装備しています。このリリース以前にも既にDLLsはlibxml
|
44
|
+
2.7.3 と libxslt 1.1.24に正規装備されています。
|
120
45
|
|
121
46
|
=== 1.4.1 2009年12月10日
|
122
47
|
|
data/CHANGELOG.rdoc
CHANGED
@@ -1,88 +1,14 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
* Bugfixes
|
4
|
-
|
5
|
-
* Fix a bug in advanced encoding detection that leads to partially
|
6
|
-
duplicated document when parsing an HTML file with unknown
|
7
|
-
encoding. Thanks, Timothy Elliott (@ender672)! #478
|
8
|
-
|
9
|
-
=== 1.4.6 / 2011-06-19
|
1
|
+
== 1.5.0 beta1 / 2010/05/22
|
10
2
|
|
11
3
|
* Notes
|
12
4
|
|
13
|
-
*
|
14
|
-
* Ruby 1.8.6 support has been restored.
|
15
|
-
|
16
|
-
=== 1.4.5 / 2011-06-15
|
17
|
-
|
18
|
-
* New Features
|
19
|
-
|
20
|
-
* Nokogiri::HTML::Document#title accessor gets and sets the document title.
|
21
|
-
* extracted sets of Node::SaveOptions into Node::SaveOptions::DEFAULT_{X,H,XH}TML (refactor)
|
22
|
-
* Raise an exception if a string is passed to Nokogiri::XML::Schema#validate. #406
|
23
|
-
|
24
|
-
* Bugfixes
|
25
|
-
|
26
|
-
* Node#serialize-and-friends now accepts a SaveOption object as the, erm, save object.
|
27
|
-
* Nokogiri::CSS::Parser has-a Nokogiri::CSS::Tokenizer
|
28
|
-
* [JRUBY+FFI only] Weak references are now threadsafe. #355
|
29
|
-
* Make direct start_element() callback (currently used for
|
30
|
-
HTML::SAX::Parser) pass attributes in assoc array, just as
|
31
|
-
emulated start_element() callback does. rel. #356
|
32
|
-
* HTML::SAX::Parser should call back a block given to parse*() if any, just as XML::SAX::Parser does.
|
33
|
-
* Add further encoding detection to HTML parser that libxml2 does not do.
|
34
|
-
* Document#remove_namespaces! now handles attributes with namespaces. #396
|
35
|
-
* XSLT::Stylesheet#transform no longer segfaults when handed a non-XML::Document. #452
|
36
|
-
* XML::Reader no longer segfaults when under GC pressure. #439
|
37
|
-
|
38
|
-
=== 1.4.4 / 2010-11-15
|
5
|
+
* JRuby support is provided by a new pure-java backend.
|
39
6
|
|
40
|
-
*
|
41
|
-
|
42
|
-
* XML::Node#children= sets the node's inner html (much like #inner_html=), but returns the reparent node(s).
|
43
|
-
* XSLT supports function extensions. #336
|
44
|
-
* XPath bind parameter substitution. #329
|
45
|
-
* XML::Reader node type constants. #369
|
46
|
-
* SAX Parser context provides line and column information
|
47
|
-
|
48
|
-
* Bugfixes
|
49
|
-
|
50
|
-
* XML::DTD#attributes returns an empty hash instead of nil when there are no attributes.
|
51
|
-
* XML::DTD#{keys,each} now work as expected. #324
|
52
|
-
* {XML,HTML}::DocumentFragment.{new,parse} no longer strip leading and trailing whitespace. #319
|
53
|
-
* XML::Node#{add_child,add_previous_sibling,add_next_sibling,replace} return a NodeSet when passed a string.
|
54
|
-
* Unclosed tags parsed more robustly in fragments. #315
|
55
|
-
* XML::Node#{replace,add_previous_sibling,add_next_sibling} edge cases fixed related to libxml's text node merging. #308
|
56
|
-
* Fixed a segfault when GC occurs during xpath handler argument marshalling. #345
|
57
|
-
* Added hack to Slop decorator to work with previously defined methods. #330
|
58
|
-
* Fix a memory leak when duplicating child nodes. #353
|
59
|
-
* Fixed off-by-one bug with nth-last-{child,of-type} CSS selectors when NOT using an+b notation. #354
|
60
|
-
* Fixed passing of non-namespace attributes to SAX::Document#start_element. #356
|
61
|
-
* Workaround for libxml2 in-context parsing bug. #362
|
62
|
-
* Fixed NodeSet#wrap on nodes within a fragment. #331
|
63
|
-
|
64
|
-
=== 1.4.3 / 2010/07/28
|
65
|
-
|
66
|
-
* New Features
|
67
|
-
|
68
|
-
* XML::Reader#empty_element? returns true for empty elements. #262
|
69
|
-
* Node#remove_namespaces! now removes namespace *declarations* as well. #294
|
70
|
-
* NodeSet#at_xpath, NodeSet#at_css and NodeSet#> do what the corresponding
|
71
|
-
methods of Node do.
|
72
|
-
|
73
|
-
* Bugfixes
|
7
|
+
* Deprecations
|
74
8
|
|
75
|
-
*
|
76
|
-
*
|
77
|
-
*
|
78
|
-
* lib/nokogiri/css/parser.y Combined CSS functions + pseudo selectors fixed
|
79
|
-
* Reparenting text nodes is safe, even when the operation frees adjacent merged nodes. #283
|
80
|
-
* Fixed libxml2 versionitis issue with xmlFirstElementChild et al. #303
|
81
|
-
* XML::Attr#add_namespace now works as expected. #252
|
82
|
-
* HTML::DocumentFragment uses the string's encoding. #305
|
83
|
-
* Fix the CSS3 selector translation rule for the general sibling combinator
|
84
|
-
(a.k.a. preceding selector) that incorrectly converted "E + F G" to
|
85
|
-
"//F//G[preceding-sibling::E]".
|
9
|
+
* Ruby 1.8.6 is deprecated. Nokogiri will install, but official support is ended.
|
10
|
+
* LibXML 2.6.16 and earlier are deprecated. Nokogiri will refuse to install.
|
11
|
+
* FFI support is removed.
|
86
12
|
|
87
13
|
=== 1.4.2 / 2010/05/22
|
88
14
|
|
data/Manifest.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
.autotest
|
2
|
-
.gemtest
|
3
2
|
CHANGELOG.ja.rdoc
|
4
3
|
CHANGELOG.rdoc
|
5
4
|
Manifest.txt
|
@@ -7,7 +6,7 @@ README.ja.rdoc
|
|
7
6
|
README.rdoc
|
8
7
|
Rakefile
|
9
8
|
bin/nokogiri
|
10
|
-
|
9
|
+
deps.rip
|
11
10
|
ext/nokogiri/extconf.rb
|
12
11
|
ext/nokogiri/html_document.c
|
13
12
|
ext/nokogiri/html_document.h
|
@@ -45,8 +44,6 @@ ext/nokogiri/xml_entity_reference.c
|
|
45
44
|
ext/nokogiri/xml_entity_reference.h
|
46
45
|
ext/nokogiri/xml_io.c
|
47
46
|
ext/nokogiri/xml_io.h
|
48
|
-
ext/nokogiri/xml_libxml2_hacks.c
|
49
|
-
ext/nokogiri/xml_libxml2_hacks.h
|
50
47
|
ext/nokogiri/xml_namespace.c
|
51
48
|
ext/nokogiri/xml_namespace.h
|
52
49
|
ext/nokogiri/xml_node.c
|
@@ -77,84 +74,22 @@ ext/nokogiri/xslt_stylesheet.c
|
|
77
74
|
ext/nokogiri/xslt_stylesheet.h
|
78
75
|
lib/nokogiri.rb
|
79
76
|
lib/nokogiri/css.rb
|
77
|
+
lib/nokogiri/css/generated_parser.rb
|
78
|
+
lib/nokogiri/css/generated_tokenizer.rb
|
80
79
|
lib/nokogiri/css/node.rb
|
81
80
|
lib/nokogiri/css/parser.rb
|
82
81
|
lib/nokogiri/css/parser.y
|
83
|
-
lib/nokogiri/css/parser_extras.rb
|
84
82
|
lib/nokogiri/css/syntax_error.rb
|
85
83
|
lib/nokogiri/css/tokenizer.rb
|
86
84
|
lib/nokogiri/css/tokenizer.rex
|
87
85
|
lib/nokogiri/css/xpath_visitor.rb
|
88
86
|
lib/nokogiri/decorators/slop.rb
|
89
|
-
lib/nokogiri/ffi/encoding_handler.rb
|
90
|
-
lib/nokogiri/ffi/html/document.rb
|
91
|
-
lib/nokogiri/ffi/html/element_description.rb
|
92
|
-
lib/nokogiri/ffi/html/entity_lookup.rb
|
93
|
-
lib/nokogiri/ffi/html/sax/parser_context.rb
|
94
|
-
lib/nokogiri/ffi/io_callbacks.rb
|
95
|
-
lib/nokogiri/ffi/libxml.rb
|
96
|
-
lib/nokogiri/ffi/structs/common_node.rb
|
97
|
-
lib/nokogiri/ffi/structs/html_elem_desc.rb
|
98
|
-
lib/nokogiri/ffi/structs/html_entity_desc.rb
|
99
|
-
lib/nokogiri/ffi/structs/xml_alloc.rb
|
100
|
-
lib/nokogiri/ffi/structs/xml_attr.rb
|
101
|
-
lib/nokogiri/ffi/structs/xml_attribute.rb
|
102
|
-
lib/nokogiri/ffi/structs/xml_buffer.rb
|
103
|
-
lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb
|
104
|
-
lib/nokogiri/ffi/structs/xml_document.rb
|
105
|
-
lib/nokogiri/ffi/structs/xml_dtd.rb
|
106
|
-
lib/nokogiri/ffi/structs/xml_element.rb
|
107
|
-
lib/nokogiri/ffi/structs/xml_element_content.rb
|
108
|
-
lib/nokogiri/ffi/structs/xml_entity.rb
|
109
|
-
lib/nokogiri/ffi/structs/xml_enumeration.rb
|
110
|
-
lib/nokogiri/ffi/structs/xml_node.rb
|
111
|
-
lib/nokogiri/ffi/structs/xml_node_set.rb
|
112
|
-
lib/nokogiri/ffi/structs/xml_notation.rb
|
113
|
-
lib/nokogiri/ffi/structs/xml_ns.rb
|
114
|
-
lib/nokogiri/ffi/structs/xml_parser_context.rb
|
115
|
-
lib/nokogiri/ffi/structs/xml_parser_input.rb
|
116
|
-
lib/nokogiri/ffi/structs/xml_relax_ng.rb
|
117
|
-
lib/nokogiri/ffi/structs/xml_sax_handler.rb
|
118
|
-
lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb
|
119
|
-
lib/nokogiri/ffi/structs/xml_schema.rb
|
120
|
-
lib/nokogiri/ffi/structs/xml_syntax_error.rb
|
121
|
-
lib/nokogiri/ffi/structs/xml_text_reader.rb
|
122
|
-
lib/nokogiri/ffi/structs/xml_xpath_context.rb
|
123
|
-
lib/nokogiri/ffi/structs/xml_xpath_object.rb
|
124
|
-
lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb
|
125
|
-
lib/nokogiri/ffi/structs/xslt_stylesheet.rb
|
126
|
-
lib/nokogiri/ffi/weak_bucket.rb
|
127
|
-
lib/nokogiri/ffi/xml/attr.rb
|
128
|
-
lib/nokogiri/ffi/xml/attribute_decl.rb
|
129
|
-
lib/nokogiri/ffi/xml/cdata.rb
|
130
|
-
lib/nokogiri/ffi/xml/comment.rb
|
131
|
-
lib/nokogiri/ffi/xml/document.rb
|
132
|
-
lib/nokogiri/ffi/xml/document_fragment.rb
|
133
|
-
lib/nokogiri/ffi/xml/dtd.rb
|
134
|
-
lib/nokogiri/ffi/xml/element_content.rb
|
135
|
-
lib/nokogiri/ffi/xml/element_decl.rb
|
136
|
-
lib/nokogiri/ffi/xml/entity_decl.rb
|
137
|
-
lib/nokogiri/ffi/xml/entity_reference.rb
|
138
|
-
lib/nokogiri/ffi/xml/namespace.rb
|
139
|
-
lib/nokogiri/ffi/xml/node.rb
|
140
|
-
lib/nokogiri/ffi/xml/node_set.rb
|
141
|
-
lib/nokogiri/ffi/xml/processing_instruction.rb
|
142
|
-
lib/nokogiri/ffi/xml/reader.rb
|
143
|
-
lib/nokogiri/ffi/xml/relax_ng.rb
|
144
|
-
lib/nokogiri/ffi/xml/sax/parser.rb
|
145
|
-
lib/nokogiri/ffi/xml/sax/parser_context.rb
|
146
|
-
lib/nokogiri/ffi/xml/sax/push_parser.rb
|
147
|
-
lib/nokogiri/ffi/xml/schema.rb
|
148
|
-
lib/nokogiri/ffi/xml/syntax_error.rb
|
149
|
-
lib/nokogiri/ffi/xml/text.rb
|
150
|
-
lib/nokogiri/ffi/xml/xpath.rb
|
151
|
-
lib/nokogiri/ffi/xml/xpath_context.rb
|
152
|
-
lib/nokogiri/ffi/xslt/stylesheet.rb
|
153
87
|
lib/nokogiri/html.rb
|
154
88
|
lib/nokogiri/html/builder.rb
|
155
89
|
lib/nokogiri/html/document.rb
|
156
90
|
lib/nokogiri/html/document_fragment.rb
|
157
91
|
lib/nokogiri/html/element_description.rb
|
92
|
+
lib/nokogiri/html/element_description_defaults.rb
|
158
93
|
lib/nokogiri/html/entity_lookup.rb
|
159
94
|
lib/nokogiri/html/sax/parser.rb
|
160
95
|
lib/nokogiri/html/sax/parser_context.rb
|
@@ -205,15 +140,11 @@ test/css/test_nthiness.rb
|
|
205
140
|
test/css/test_parser.rb
|
206
141
|
test/css/test_tokenizer.rb
|
207
142
|
test/css/test_xpath_visitor.rb
|
208
|
-
test/decorators/test_slop.rb
|
209
|
-
test/ffi/test_document.rb
|
210
143
|
test/files/2ch.html
|
211
144
|
test/files/address_book.rlx
|
212
145
|
test/files/address_book.xml
|
213
146
|
test/files/bar/bar.xsd
|
214
147
|
test/files/dont_hurt_em_why.xml
|
215
|
-
test/files/encoding.html
|
216
|
-
test/files/encoding.xhtml
|
217
148
|
test/files/exslt.xml
|
218
149
|
test/files/exslt.xslt
|
219
150
|
test/files/foo/foo.xsd
|
@@ -280,4 +211,3 @@ test/xml/test_syntax_error.rb
|
|
280
211
|
test/xml/test_text.rb
|
281
212
|
test/xml/test_unparented_node.rb
|
282
213
|
test/xml/test_xpath.rb
|
283
|
-
test/xslt/test_custom_functions.rb
|
data/README.ja.rdoc
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
* http://nokogiri.org/
|
4
4
|
* http://github.com/tenderlove/nokogiri/wikis
|
5
5
|
* http://github.com/tenderlove/nokogiri/tree/master
|
6
|
-
* http://groups.google.com/group/nokogiri-
|
6
|
+
* http://groups.google.com/group/nokogiri-talk
|
7
7
|
* http://github.com/tenderlove/nokogiri/issues
|
8
8
|
|
9
9
|
== DESCRIPTION:
|
@@ -22,6 +22,10 @@ XMLは暴力に似ている - XMLが君の問題を解決しないとしたら
|
|
22
22
|
|
23
23
|
XML/HTMLの高速な解析と探索検索、ならびにCSS3セレクタとXPathをサポートしています。
|
24
24
|
|
25
|
+
スピードテスト:
|
26
|
+
|
27
|
+
* http://gist.github.com/24605
|
28
|
+
|
25
29
|
== SUPPORT:
|
26
30
|
|
27
31
|
日本語でNokogiriの
|
data/README.rdoc
CHANGED
@@ -23,6 +23,10 @@ enough of it.
|
|
23
23
|
Nokogiri parses and searches XML/HTML very quickly, and also has
|
24
24
|
correctly implemented CSS3 selector support as well as XPath support.
|
25
25
|
|
26
|
+
Here is a speed test:
|
27
|
+
|
28
|
+
* http://gist.github.com/24605
|
29
|
+
|
26
30
|
== SUPPORT:
|
27
31
|
|
28
32
|
The Nokogiri {mailing list}[http://groups.google.com/group/nokogiri-talk]
|
@@ -77,27 +81,9 @@ The IRC channel is #nokogiri on freenode.
|
|
77
81
|
|
78
82
|
== ENCODING:
|
79
83
|
|
80
|
-
Strings are always stored as UTF-8 internally. Methods that return
|
81
|
-
|
82
|
-
|
83
|
-
encoded like the source document.
|
84
|
-
|
85
|
-
*WARNING*
|
86
|
-
|
87
|
-
Some documents declare one particular encoding, but use a different
|
88
|
-
one. So, which encoding should the parser choose?
|
89
|
-
|
90
|
-
Remember that data is just a stream of bytes. Only us humans add
|
91
|
-
meaning to that stream. Any particular set of bytes could be valid
|
92
|
-
characters in multiple encodings, so detecting encoding with 100%
|
93
|
-
accuracy is not possible. libxml2 does its best, but it can't be right
|
94
|
-
100% of the time.
|
95
|
-
|
96
|
-
If you want Nokogiri to handle the document encoding properly, your
|
97
|
-
best bet is to explicitly set the encoding. Here is an example of
|
98
|
-
explicitly setting the encoding to EUC-JP on the parser:
|
99
|
-
|
100
|
-
doc = Nokogiri.XML('<foo><bar /><foo>', nil, 'EUC-JP')
|
84
|
+
Strings are always stored as UTF-8 internally. Methods that return text values
|
85
|
+
will always return UTF-8 encoded strings. Methods that return XML (like to_xml,
|
86
|
+
to_html and inner_html) will return a string encoded like the source document.
|
101
87
|
|
102
88
|
== INSTALL:
|
103
89
|
|
@@ -108,7 +94,7 @@ explicitly setting the encoding to EUC-JP on the parser:
|
|
108
94
|
Binary packages are available for:
|
109
95
|
|
110
96
|
* SuSE[http://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/]
|
111
|
-
* Fedora[
|
97
|
+
* Fedora[https://admin.fedoraproject.org/pkgdb/packages/name/rubygem-nokogiri]
|
112
98
|
|
113
99
|
== DEVELOPMENT:
|
114
100
|
|
data/Rakefile
CHANGED
@@ -7,13 +7,16 @@ require 'hoe'
|
|
7
7
|
windows = RUBY_PLATFORM =~ /(mswin|mingw)/i
|
8
8
|
java = RUBY_PLATFORM =~ /java/
|
9
9
|
|
10
|
-
GENERATED_PARSER = "lib/nokogiri/css/
|
11
|
-
GENERATED_TOKENIZER = "lib/nokogiri/css/
|
10
|
+
GENERATED_PARSER = "lib/nokogiri/css/generated_parser.rb"
|
11
|
+
GENERATED_TOKENIZER = "lib/nokogiri/css/generated_tokenizer.rb"
|
12
12
|
CROSS_DIR = File.join(File.dirname(__FILE__), 'tmp', 'cross')
|
13
13
|
|
14
|
+
EXTERNAL_JAVA_LIBRARIES = %w{isorelax jing nekohtml nekodtd xercesImpl}.map{|x| "lib/#{x}.jar"}
|
15
|
+
JAVA_EXT = "lib/nokogiri/nokogiri.jar"
|
16
|
+
JRUBY_HOME = Config::CONFIG['prefix']
|
17
|
+
|
14
18
|
# Make sure hoe-debugging is installed
|
15
19
|
Hoe.plugin :debugging
|
16
|
-
Hoe.plugin :git
|
17
20
|
|
18
21
|
HOE = Hoe.spec 'nokogiri' do
|
19
22
|
developer('Aaron Patterson', 'aaronp@rubyforge.org')
|
@@ -22,19 +25,18 @@ HOE = Hoe.spec 'nokogiri' do
|
|
22
25
|
self.history_file = ['CHANGELOG', ENV['HLANG'], 'rdoc'].compact.join('.')
|
23
26
|
self.extra_rdoc_files = FileList['*.rdoc','ext/nokogiri/*.c']
|
24
27
|
self.clean_globs = [
|
25
|
-
"ext/nokogiri/*.dll",
|
26
28
|
'lib/nokogiri/*.{o,so,bundle,a,log,dll}',
|
27
29
|
'lib/nokogiri/nokogiri.rb',
|
28
30
|
'lib/nokogiri/1.{8,9}',
|
29
31
|
GENERATED_PARSER,
|
30
32
|
GENERATED_TOKENIZER,
|
31
|
-
|
33
|
+
'cross',
|
32
34
|
]
|
33
35
|
|
34
36
|
%w{ racc rexical rake-compiler }.each do |dep|
|
35
|
-
extra_dev_deps << [dep, '>= 0']
|
37
|
+
self.extra_dev_deps << [dep, '>= 0']
|
36
38
|
end
|
37
|
-
extra_dev_deps << ["minitest", ">= 1.6.0"]
|
39
|
+
self.extra_dev_deps << ["minitest", ">= 1.6.0"]
|
38
40
|
|
39
41
|
self.spec_extras = { :extensions => ["ext/nokogiri/extconf.rb"] }
|
40
42
|
|
@@ -67,7 +69,7 @@ unless java
|
|
67
69
|
|
68
70
|
ext.config_options << ENV['EXTOPTS']
|
69
71
|
ext.cross_compile = true
|
70
|
-
ext.cross_platform
|
72
|
+
ext.cross_platform = 'i386-mingw32'
|
71
73
|
ext.cross_config_options <<
|
72
74
|
"--with-xml2-include=#{File.join(CROSS_DIR, 'include', 'libxml2')}"
|
73
75
|
ext.cross_config_options <<
|
@@ -78,60 +80,57 @@ unless java
|
|
78
80
|
end
|
79
81
|
end
|
80
82
|
|
81
|
-
namespace :
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
f.write(HOE.spec.to_ruby)
|
87
|
-
end
|
83
|
+
namespace :java do
|
84
|
+
desc "Removes all generated during compilation .class files."
|
85
|
+
task :clean_classes do
|
86
|
+
(FileList['ext/java/nokogiri/internals/*.class'] + FileList['ext/java/nokogiri/*.class'] + FileList['ext/java/*.class']).to_a.each do |file|
|
87
|
+
File.delete file
|
88
88
|
end
|
89
89
|
end
|
90
90
|
|
91
|
+
desc "Removes the generated .jar"
|
92
|
+
task :clean_jar do
|
93
|
+
FileList['lib/nokogiri/*.jar'].each{|f| File.delete f }
|
94
|
+
end
|
95
|
+
|
96
|
+
desc "Same as java:clean_classes and java:clean_jar"
|
97
|
+
task :clean_all => ["java:clean_classes", "java:clean_jar"]
|
98
|
+
|
91
99
|
desc "Build a gem targetted for JRuby"
|
92
|
-
task :
|
93
|
-
raise "ERROR: please run this task under jruby" unless java
|
100
|
+
task :gem => ['java:spec', GENERATED_PARSER, GENERATED_TOKENIZER, :build] do
|
94
101
|
system "gem build nokogiri.gemspec"
|
95
102
|
FileUtils.mkdir_p "pkg"
|
96
103
|
FileUtils.mv Dir.glob("nokogiri*-java.gem"), "pkg"
|
97
104
|
end
|
98
105
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
HOE.spec.files += Dir["ext/nokogiri/*.dll"]
|
106
|
-
HOE.spec.extensions = []
|
107
|
-
HOE.spec.add_dependency 'weakling', '>= 0.0.3'
|
108
|
-
f.write(HOE.spec.to_ruby)
|
109
|
-
end
|
106
|
+
task :spec do
|
107
|
+
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
108
|
+
HOE.spec.platform = 'java'
|
109
|
+
HOE.spec.files += [GENERATED_PARSER, GENERATED_TOKENIZER, JAVA_EXT] + EXTERNAL_JAVA_LIBRARIES
|
110
|
+
HOE.spec.extensions = []
|
111
|
+
f.write(HOE.spec.to_ruby)
|
110
112
|
end
|
113
|
+
end
|
111
114
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
115
|
+
desc "Build external library"
|
116
|
+
task :build_external do
|
117
|
+
Dir.chdir('ext/java') do
|
118
|
+
LIB_DIR = '../../lib'
|
119
|
+
CLASSPATH = "#{JRUBY_HOME}/lib/jruby.jar:#{LIB_DIR}/nekohtml.jar:#{LIB_DIR}/nekodtd.jar:#{LIB_DIR}/xercesImpl.jar:#{LIB_DIR}/isorelax.jar:#{LIB_DIR}/jing.jar"
|
120
|
+
sh "javac -g -cp #{CLASSPATH} nokogiri/*.java nokogiri/internals/*.java"
|
121
|
+
sh "jar cf ../../#{JAVA_EXT} nokogiri/*.class nokogiri/internals/*.class"
|
122
|
+
end
|
123
|
+
end
|
116
124
|
|
117
|
-
|
118
|
-
|
119
|
-
Dir.chdir dlldir do
|
120
|
-
unless File.exists? "nokogiri-1.4.3.1-java.gem"
|
121
|
-
run "wget http://rubygems.org/downloads/nokogiri-1.4.3.1-java.gem"
|
122
|
-
end
|
123
|
-
unless File.exists? "data.tar.gz"
|
124
|
-
run "tar -xf nokogiri-1.4.3.1-java.gem"
|
125
|
-
end
|
126
|
-
FileUtils.rm_rf "unpack"
|
127
|
-
FileUtils.mkdir "unpack"
|
128
|
-
Dir.chdir "unpack" do
|
129
|
-
run "tar -zxf ../data.tar.gz"
|
130
|
-
end
|
131
|
-
end
|
125
|
+
task :build => ["java:clean_jar", "java:build_external", "java:clean_classes"]
|
126
|
+
end
|
132
127
|
|
133
|
-
|
134
|
-
|
128
|
+
namespace :gem do
|
129
|
+
namespace :dev do
|
130
|
+
task :spec => [ GENERATED_PARSER, GENERATED_TOKENIZER ] do
|
131
|
+
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
132
|
+
HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
|
133
|
+
f.write(HOE.spec.to_ruby)
|
135
134
|
end
|
136
135
|
end
|
137
136
|
end
|
@@ -140,21 +139,25 @@ namespace :gem do
|
|
140
139
|
end
|
141
140
|
|
142
141
|
file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
|
143
|
-
|
144
|
-
|
145
|
-
|
142
|
+
begin
|
143
|
+
racc = `which racc`.strip
|
144
|
+
racc = "#{::Config::CONFIG['bindir']}/racc" if racc.empty?
|
145
|
+
sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
|
146
|
+
rescue
|
147
|
+
abort "need racc, sudo gem install racc"
|
148
|
+
end
|
146
149
|
end
|
147
150
|
|
148
151
|
file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
|
149
|
-
|
152
|
+
begin
|
153
|
+
sh "rex --independent -o #{t.name} #{t.prerequisites.first}"
|
154
|
+
rescue
|
155
|
+
abort "need rexical, sudo gem install rexical"
|
156
|
+
end
|
150
157
|
end
|
151
158
|
|
152
159
|
require 'tasks/test'
|
153
|
-
|
154
|
-
require 'tasks/cross_compile' unless java
|
155
|
-
rescue RuntimeError => e
|
156
|
-
warn "WARNING: Could not perform some cross-compiling: #{e}"
|
157
|
-
end
|
160
|
+
require 'tasks/cross_compile' unless RUBY_PLATFORM =~ /java/
|
158
161
|
|
159
162
|
desc "set environment variables to build and/or test with debug options"
|
160
163
|
task :debug do
|
@@ -166,14 +169,13 @@ end
|
|
166
169
|
# required_ruby_version
|
167
170
|
|
168
171
|
# Only do this on unix, since we can't build on windows
|
169
|
-
unless windows || java
|
172
|
+
unless windows || java
|
170
173
|
[:compile, :check_manifest].each do |task_name|
|
171
174
|
Rake::Task[task_name].prerequisites << GENERATED_PARSER
|
172
175
|
Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER
|
173
176
|
end
|
174
177
|
|
175
178
|
Rake::Task[:test].prerequisites << :compile
|
176
|
-
Rake::Task[:test].prerequisites << :check_extra_deps
|
177
179
|
if Hoe.plugins.include?(:debugging)
|
178
180
|
['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name|
|
179
181
|
Rake::Task["test:#{task_name}"].prerequisites << :compile
|
@@ -188,4 +190,21 @@ else
|
|
188
190
|
end
|
189
191
|
end
|
190
192
|
|
193
|
+
namespace :install do
|
194
|
+
desc "Install rex and racc for development"
|
195
|
+
task :deps => %w(rexical racc)
|
196
|
+
|
197
|
+
task :racc do |t|
|
198
|
+
sh "sudo gem install racc"
|
199
|
+
end
|
200
|
+
|
201
|
+
task :rexical do
|
202
|
+
sh "sudo gem install rexical"
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
namespace :rip do
|
207
|
+
task :install => [GENERATED_TOKENIZER, GENERATED_PARSER]
|
208
|
+
end
|
209
|
+
|
191
210
|
# vim: syntax=Ruby
|