nokogiri 1.4.7 → 1.5.0.beta.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (164) hide show
  1. data/CHANGELOG.ja.rdoc +8 -83
  2. data/CHANGELOG.rdoc +6 -80
  3. data/Manifest.txt +4 -74
  4. data/README.ja.rdoc +5 -1
  5. data/README.rdoc +8 -22
  6. data/Rakefile +79 -60
  7. data/bin/nokogiri +1 -6
  8. data/deps.rip +5 -0
  9. data/ext/nokogiri/extconf.rb +32 -53
  10. data/ext/nokogiri/nokogiri.c +0 -2
  11. data/ext/nokogiri/nokogiri.h +0 -9
  12. data/ext/nokogiri/xml_document.c +0 -14
  13. data/ext/nokogiri/xml_dtd.c +2 -2
  14. data/ext/nokogiri/xml_io.c +7 -32
  15. data/ext/nokogiri/xml_node.c +31 -103
  16. data/ext/nokogiri/xml_node_set.c +8 -8
  17. data/ext/nokogiri/xml_reader.c +1 -20
  18. data/ext/nokogiri/xml_sax_parser.c +3 -5
  19. data/ext/nokogiri/xml_sax_parser_context.c +0 -40
  20. data/ext/nokogiri/xml_xpath_context.c +2 -35
  21. data/ext/nokogiri/xslt_stylesheet.c +6 -124
  22. data/lib/nokogiri.rb +7 -3
  23. data/lib/nokogiri/css.rb +3 -6
  24. data/lib/nokogiri/css/generated_parser.rb +669 -0
  25. data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
  26. data/lib/nokogiri/css/parser.rb +70 -665
  27. data/lib/nokogiri/css/parser.y +1 -6
  28. data/lib/nokogiri/css/tokenizer.rb +3 -148
  29. data/lib/nokogiri/css/tokenizer.rex +1 -1
  30. data/lib/nokogiri/css/xpath_visitor.rb +14 -16
  31. data/lib/nokogiri/decorators/slop.rb +3 -5
  32. data/lib/nokogiri/html.rb +3 -2
  33. data/lib/nokogiri/html/document.rb +18 -134
  34. data/lib/nokogiri/html/document_fragment.rb +21 -26
  35. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  36. data/lib/nokogiri/html/sax/parser.rb +2 -6
  37. data/lib/nokogiri/version.rb +4 -9
  38. data/lib/nokogiri/xml/attribute_decl.rb +1 -1
  39. data/lib/nokogiri/xml/builder.rb +1 -1
  40. data/lib/nokogiri/xml/document.rb +3 -27
  41. data/lib/nokogiri/xml/document_fragment.rb +2 -9
  42. data/lib/nokogiri/xml/dtd.rb +1 -12
  43. data/lib/nokogiri/xml/element_decl.rb +1 -1
  44. data/lib/nokogiri/xml/entity_decl.rb +1 -1
  45. data/lib/nokogiri/xml/node.rb +75 -172
  46. data/lib/nokogiri/xml/node/save_options.rb +0 -10
  47. data/lib/nokogiri/xml/node_set.rb +3 -28
  48. data/lib/nokogiri/xml/parse_options.rb +0 -8
  49. data/lib/nokogiri/xml/reader.rb +6 -44
  50. data/lib/nokogiri/xml/sax/document.rb +5 -9
  51. data/lib/nokogiri/xml/schema.rb +1 -7
  52. data/lib/nokogiri/xslt.rb +5 -9
  53. data/tasks/cross_compile.rb +12 -27
  54. data/tasks/test.rb +0 -0
  55. data/test/css/test_parser.rb +19 -40
  56. data/test/css/test_tokenizer.rb +0 -8
  57. data/test/helper.rb +1 -4
  58. data/test/html/sax/test_parser.rb +21 -47
  59. data/test/html/sax/test_parser_context.rb +2 -2
  60. data/test/html/test_document.rb +3 -58
  61. data/test/html/test_document_encoding.rb +0 -53
  62. data/test/html/test_document_fragment.rb +13 -82
  63. data/test/html/test_element_description.rb +4 -2
  64. data/test/html/test_node.rb +0 -9
  65. data/test/test_memory_leak.rb +2 -57
  66. data/test/test_nokogiri.rb +14 -20
  67. data/test/test_reader.rb +7 -47
  68. data/test/test_xslt_transforms.rb +5 -8
  69. data/test/xml/sax/test_parser.rb +17 -34
  70. data/test/xml/sax/test_parser_context.rb +0 -50
  71. data/test/xml/sax/test_push_parser.rb +1 -18
  72. data/test/xml/test_attr.rb +4 -31
  73. data/test/xml/test_attribute_decl.rb +7 -3
  74. data/test/xml/test_builder.rb +5 -5
  75. data/test/xml/test_cdata.rb +3 -3
  76. data/test/xml/test_document.rb +18 -15
  77. data/test/xml/test_document_fragment.rb +20 -19
  78. data/test/xml/test_dtd.rb +13 -18
  79. data/test/xml/test_element_content.rb +1 -1
  80. data/test/xml/test_element_decl.rb +1 -1
  81. data/test/xml/test_entity_decl.rb +12 -10
  82. data/test/xml/test_namespace.rb +7 -5
  83. data/test/xml/test_node.rb +15 -54
  84. data/test/xml/test_node_reparenting.rb +42 -85
  85. data/test/xml/test_node_set.rb +2 -61
  86. data/test/xml/test_schema.rb +0 -5
  87. data/test/xml/test_text.rb +2 -11
  88. data/test/xml/test_unparented_node.rb +1 -1
  89. data/test/xml/test_xpath.rb +7 -43
  90. metadata +131 -155
  91. data/.gemtest +0 -0
  92. data/ext/nokogiri/depend +0 -358
  93. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  94. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  95. data/lib/nokogiri/css/parser_extras.rb +0 -91
  96. data/lib/nokogiri/ffi/encoding_handler.rb +0 -42
  97. data/lib/nokogiri/ffi/html/document.rb +0 -28
  98. data/lib/nokogiri/ffi/html/element_description.rb +0 -81
  99. data/lib/nokogiri/ffi/html/entity_lookup.rb +0 -16
  100. data/lib/nokogiri/ffi/html/sax/parser_context.rb +0 -38
  101. data/lib/nokogiri/ffi/io_callbacks.rb +0 -42
  102. data/lib/nokogiri/ffi/libxml.rb +0 -420
  103. data/lib/nokogiri/ffi/structs/common_node.rb +0 -38
  104. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +0 -24
  105. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +0 -13
  106. data/lib/nokogiri/ffi/structs/xml_alloc.rb +0 -16
  107. data/lib/nokogiri/ffi/structs/xml_attr.rb +0 -20
  108. data/lib/nokogiri/ffi/structs/xml_attribute.rb +0 -27
  109. data/lib/nokogiri/ffi/structs/xml_buffer.rb +0 -16
  110. data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +0 -11
  111. data/lib/nokogiri/ffi/structs/xml_document.rb +0 -117
  112. data/lib/nokogiri/ffi/structs/xml_dtd.rb +0 -28
  113. data/lib/nokogiri/ffi/structs/xml_element.rb +0 -26
  114. data/lib/nokogiri/ffi/structs/xml_element_content.rb +0 -17
  115. data/lib/nokogiri/ffi/structs/xml_entity.rb +0 -32
  116. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +0 -12
  117. data/lib/nokogiri/ffi/structs/xml_node.rb +0 -28
  118. data/lib/nokogiri/ffi/structs/xml_node_set.rb +0 -53
  119. data/lib/nokogiri/ffi/structs/xml_notation.rb +0 -11
  120. data/lib/nokogiri/ffi/structs/xml_ns.rb +0 -15
  121. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +0 -20
  122. data/lib/nokogiri/ffi/structs/xml_parser_input.rb +0 -19
  123. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +0 -14
  124. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +0 -51
  125. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +0 -124
  126. data/lib/nokogiri/ffi/structs/xml_schema.rb +0 -13
  127. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +0 -31
  128. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +0 -12
  129. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +0 -38
  130. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +0 -35
  131. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +0 -20
  132. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +0 -13
  133. data/lib/nokogiri/ffi/weak_bucket.rb +0 -40
  134. data/lib/nokogiri/ffi/xml/attr.rb +0 -41
  135. data/lib/nokogiri/ffi/xml/attribute_decl.rb +0 -27
  136. data/lib/nokogiri/ffi/xml/cdata.rb +0 -19
  137. data/lib/nokogiri/ffi/xml/comment.rb +0 -18
  138. data/lib/nokogiri/ffi/xml/document.rb +0 -174
  139. data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -21
  140. data/lib/nokogiri/ffi/xml/dtd.rb +0 -67
  141. data/lib/nokogiri/ffi/xml/element_content.rb +0 -43
  142. data/lib/nokogiri/ffi/xml/element_decl.rb +0 -19
  143. data/lib/nokogiri/ffi/xml/entity_decl.rb +0 -36
  144. data/lib/nokogiri/ffi/xml/entity_reference.rb +0 -19
  145. data/lib/nokogiri/ffi/xml/namespace.rb +0 -44
  146. data/lib/nokogiri/ffi/xml/node.rb +0 -559
  147. data/lib/nokogiri/ffi/xml/node_set.rb +0 -150
  148. data/lib/nokogiri/ffi/xml/processing_instruction.rb +0 -20
  149. data/lib/nokogiri/ffi/xml/reader.rb +0 -236
  150. data/lib/nokogiri/ffi/xml/relax_ng.rb +0 -85
  151. data/lib/nokogiri/ffi/xml/sax/parser.rb +0 -143
  152. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +0 -79
  153. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +0 -51
  154. data/lib/nokogiri/ffi/xml/schema.rb +0 -109
  155. data/lib/nokogiri/ffi/xml/syntax_error.rb +0 -98
  156. data/lib/nokogiri/ffi/xml/text.rb +0 -18
  157. data/lib/nokogiri/ffi/xml/xpath.rb +0 -9
  158. data/lib/nokogiri/ffi/xml/xpath_context.rb +0 -153
  159. data/lib/nokogiri/ffi/xslt/stylesheet.rb +0 -77
  160. data/test/decorators/test_slop.rb +0 -16
  161. data/test/ffi/test_document.rb +0 -35
  162. data/test/files/encoding.html +0 -82
  163. data/test/files/encoding.xhtml +0 -84
  164. data/test/xslt/test_custom_functions.rb +0 -94
data/CHANGELOG.ja.rdoc CHANGED
@@ -1,83 +1,8 @@
1
- === 1.4.7 / 2011年7月1日
2
-
3
- * バグの修正
4
-
5
- * エンコーディング宣言のないHTMLファイルで部分的に重複したドキュメントが生成される問題を修正した. #478
6
-
7
- === 1.4.6 / 2011年6月19日
8
-
9
- * ノート
10
-
11
- * このバージョンは、1.4.5と機能的に同じです
12
- * Rubyの1.8.6のサポートが復元されている
13
-
14
- === 1.4.5 / 2011年6月15日
15
-
16
- * 新機能
17
-
18
- * Nokogiri::HTML::Document#title アクセサメソッドでHTML文書のタイトルを読み書きできる
19
-
20
- * バグの修正
21
-
22
- * Node#serialize とその仲間達はSaveOptionオブジェクトを受け入れる
23
- * Nokogiri::CSS::Parser has-a Nokogiri::CSS::Tokenizer
24
- * [JRUBY+FFIのみ] 「弱い参照」はスレッドセーフになった. #355
25
- * HTML::SAX::Parserから呼ばれるstart_element()コールバックのattributes引数はHTML::XML::Parserによるエミュレートコールバックと同じく連想配列になった. rel. #356
26
- * HTML::SAX::Parserのparse*()メソッドはXML::SAX::Parser同様に渡されたブロックをコールバックするようになった.
27
- * HTMLパーサーのエンコーディング判定をlibxml2の仕様を超えて拡張・改善した. (XML宣言のencodingを認識、非ASCII文字出現後のmetaタグも文字化けを生じずに反映)
28
- * Document#remove_namespaces! は名前空間付きの属性に対応した. #396
29
-
30
- === 1.4.4 2010年11月15日
31
-
32
- * 新機能
33
-
34
- * XML::Node#children=ノード内のhtml reparented node(s)を返す事によって親の変更ができる。
35
- * XSLT はfunction extensionsをサポート。#336
36
- * XPath はパラメーター置換を結合する. #329
37
- * XML::Reader node typeを一定化させる. #369
38
- * SAX Parser context は行とコラムの両方の情報を提供する
39
-
40
- * バグの修正
41
-
42
- * XML::DTD#attributes は属性が存在しない際、nilの代わりに空のハッシュを返す
43
- * XML::DTD#{keys,each} は文字通りに機能するようになった #324
44
- * {XML,HTML}::DocumentFragment.{new,parse} 行送りと末尾の空白を除去しなくなった #319
45
- * XML::Node#{add_child,add_previous_sibling,add_next_sibling,replace} は文字列を見送る際にNodeSetを返す
46
- * 不確定タグはフレグメント内で要、不要に関係なく解析される #315
47
- * XML::Node#{replace,add_previous_sibling,add_next_sibling} libxmlのtext node merging に関わるedge caseを修正する #308
48
- * xpath handler argument が整列している最中に起こるGCでのsegfaultを修正 #345
49
- * Slop decoratorが既に確定された定義と共に正常に機能させるための便宜上の処置 #330
50
- * 子ノードが複製される際に起こるメモリ漏れの修正 #353
51
- * an+b記号の無使用時に発生するoff-by-oneバグとnth-last-{child,of-type} CSSセレクターの修正 #354
52
- * 非名前空間属性がSAX::Document#start_elementへパスできるように修正 #356
53
- * libxml2 in-contextの解析バグの処置  #362
54
- * フレグメント内のノードの中にあるNodeSet#wrapの修正 #331
55
-
56
- === 1.4.3 2010年7月28日
57
-
58
- * 新しい機能
59
-
60
- * XML::Reader#empty_element? - 子の無いエレメントにtrueを返す  #262
61
- * Node#remove_namespaces! - 1.4.2では 名前空間のみを取り除いていたが、
62
- 1.4.3 では名前空間及び、名前空間宣言も取り除く #294
63
-
64
- * バグの修正
65
-
66
- * XML::NodeSet#{include?,delete,push} はXML::Namespaceを受入れる
67
- * XML::Document#parse - 1.4.3より文書内の文脈を解析する機能を追加
68
- * XML::DocumentFragment#inner_html= 文脈解析を共に実行する #298, #281
69
- * lib/nokogiri/css/parser.y はCSSと疑似選別の両方を機能
70
- * 演算によって近隣に存在する併合型ノードへの遊離問題の有無に関わらず、一切の
71
- 弊害なしにテキストノードの繰り返しが実行可能  #283
72
- * xmlFirstElementChild et al.による libxml2バージョンでの不適合性を修正 #303
73
- * XML::Attr#add_namespace (!)文字通りの機能実現!  #252
74
- * HTML::DocumentFragment が文字列に存在するエンコードを使用 #305
75
-
76
1
  === 1.4.2 2010年5月22日
77
2
 
78
- * 新機能
3
+ * 新しい機能
79
4
 
80
- * XML::Node#parse 定義されたコンテキストノードでXML 又はHTMLのフレグメント
5
+ * XML::Node#parse 定義されたコンテキストノードで、XML 又はHTMLのフレグメント
81
6
  を解析する
82
7
  * XML::Node#namespacesが子ノードとその祖先ノード内で定義された全ての名前空間
83
8
  を返すようになった(以前は祖先ノードの名前空間は返されなかった)
@@ -105,18 +30,18 @@
105
30
  * XML::Node#xpath はNodeSetのオブジェクト以外のオブジェクトを返す GH #208
106
31
  * XSLT::StyleSheet#transformはパラメーターのハッシュを受け入れる GH #223
107
32
  * CSSのnot()の疑似セレクタの修正  GH #205
108
- * XML::Builder はノードらが切り離されても破壊しない(vihaiの協力に感謝)
33
+ * XML::Builder はノード達が切り離されても破壊しない(vihaiの協力に感謝)
109
34
  GH #228
110
35
  * SAX parser経由でエンコードを強制することが出来る  Eugene Pimenovに感謝!
111
36
  GH #204
112
- * XML::DocumentFragment はML::Node#parse を使用して子を限定する
113
- * XML Reader内のメモリリーク修正  sdorさん、ありがとう! GH#244
37
+ * XML::DocumentFragment はML::Node#parse を使用して子供を限定する
38
+ * XML Reader内のメモリリークを修正  sdorさん、ありがとう! GH#244
114
39
 
115
40
  * ノート
116
41
 
117
- * 今日4月18日現在、Windows gems は libxml 2.7.7 とlibxslt
118
- 1.1.26にDLLsを標準装備しています。このリリース以前にも既にDLLsはlibxml
119
- 2.7.3 と libxslt 1.1.24に標準装備済み。
42
+ * 今日4月18日現在、Windows gems は libxml 2.7.6 とlibxslt
43
+ 1.1.26にDLLsを正規装備しています。このリリース以前にも既にDLLsはlibxml
44
+ 2.7.3 と libxslt 1.1.24に正規装備されています。
120
45
 
121
46
  === 1.4.1 2009年12月10日
122
47
 
data/CHANGELOG.rdoc CHANGED
@@ -1,88 +1,14 @@
1
- === 1.4.7 / 2011-07-01
2
-
3
- * Bugfixes
4
-
5
- * Fix a bug in advanced encoding detection that leads to partially
6
- duplicated document when parsing an HTML file with unknown
7
- encoding. Thanks, Timothy Elliott (@ender672)! #478
8
-
9
- === 1.4.6 / 2011-06-19
1
+ == 1.5.0 beta1 / 2010/05/22
10
2
 
11
3
  * Notes
12
4
 
13
- * This version is functionally identical to 1.4.5.
14
- * Ruby 1.8.6 support has been restored.
15
-
16
- === 1.4.5 / 2011-06-15
17
-
18
- * New Features
19
-
20
- * Nokogiri::HTML::Document#title accessor gets and sets the document title.
21
- * extracted sets of Node::SaveOptions into Node::SaveOptions::DEFAULT_{X,H,XH}TML (refactor)
22
- * Raise an exception if a string is passed to Nokogiri::XML::Schema#validate. #406
23
-
24
- * Bugfixes
25
-
26
- * Node#serialize-and-friends now accepts a SaveOption object as the, erm, save object.
27
- * Nokogiri::CSS::Parser has-a Nokogiri::CSS::Tokenizer
28
- * [JRUBY+FFI only] Weak references are now threadsafe. #355
29
- * Make direct start_element() callback (currently used for
30
- HTML::SAX::Parser) pass attributes in assoc array, just as
31
- emulated start_element() callback does. rel. #356
32
- * HTML::SAX::Parser should call back a block given to parse*() if any, just as XML::SAX::Parser does.
33
- * Add further encoding detection to HTML parser that libxml2 does not do.
34
- * Document#remove_namespaces! now handles attributes with namespaces. #396
35
- * XSLT::Stylesheet#transform no longer segfaults when handed a non-XML::Document. #452
36
- * XML::Reader no longer segfaults when under GC pressure. #439
37
-
38
- === 1.4.4 / 2010-11-15
5
+ * JRuby support is provided by a new pure-java backend.
39
6
 
40
- * New Features
41
-
42
- * XML::Node#children= sets the node's inner html (much like #inner_html=), but returns the reparent node(s).
43
- * XSLT supports function extensions. #336
44
- * XPath bind parameter substitution. #329
45
- * XML::Reader node type constants. #369
46
- * SAX Parser context provides line and column information
47
-
48
- * Bugfixes
49
-
50
- * XML::DTD#attributes returns an empty hash instead of nil when there are no attributes.
51
- * XML::DTD#{keys,each} now work as expected. #324
52
- * {XML,HTML}::DocumentFragment.{new,parse} no longer strip leading and trailing whitespace. #319
53
- * XML::Node#{add_child,add_previous_sibling,add_next_sibling,replace} return a NodeSet when passed a string.
54
- * Unclosed tags parsed more robustly in fragments. #315
55
- * XML::Node#{replace,add_previous_sibling,add_next_sibling} edge cases fixed related to libxml's text node merging. #308
56
- * Fixed a segfault when GC occurs during xpath handler argument marshalling. #345
57
- * Added hack to Slop decorator to work with previously defined methods. #330
58
- * Fix a memory leak when duplicating child nodes. #353
59
- * Fixed off-by-one bug with nth-last-{child,of-type} CSS selectors when NOT using an+b notation. #354
60
- * Fixed passing of non-namespace attributes to SAX::Document#start_element. #356
61
- * Workaround for libxml2 in-context parsing bug. #362
62
- * Fixed NodeSet#wrap on nodes within a fragment. #331
63
-
64
- === 1.4.3 / 2010/07/28
65
-
66
- * New Features
67
-
68
- * XML::Reader#empty_element? returns true for empty elements. #262
69
- * Node#remove_namespaces! now removes namespace *declarations* as well. #294
70
- * NodeSet#at_xpath, NodeSet#at_css and NodeSet#> do what the corresponding
71
- methods of Node do.
72
-
73
- * Bugfixes
7
+ * Deprecations
74
8
 
75
- * XML::NodeSet#{include?,delete,push} accept an XML::Namespace
76
- * XML::Document#parse added for parsing in the context of a document
77
- * XML::DocumentFragment#inner_html= works with contextual parsing! #298, #281
78
- * lib/nokogiri/css/parser.y Combined CSS functions + pseudo selectors fixed
79
- * Reparenting text nodes is safe, even when the operation frees adjacent merged nodes. #283
80
- * Fixed libxml2 versionitis issue with xmlFirstElementChild et al. #303
81
- * XML::Attr#add_namespace now works as expected. #252
82
- * HTML::DocumentFragment uses the string's encoding. #305
83
- * Fix the CSS3 selector translation rule for the general sibling combinator
84
- (a.k.a. preceding selector) that incorrectly converted "E + F G" to
85
- "//F//G[preceding-sibling::E]".
9
+ * Ruby 1.8.6 is deprecated. Nokogiri will install, but official support is ended.
10
+ * LibXML 2.6.16 and earlier are deprecated. Nokogiri will refuse to install.
11
+ * FFI support is removed.
86
12
 
87
13
  === 1.4.2 / 2010/05/22
88
14
 
data/Manifest.txt CHANGED
@@ -1,5 +1,4 @@
1
1
  .autotest
2
- .gemtest
3
2
  CHANGELOG.ja.rdoc
4
3
  CHANGELOG.rdoc
5
4
  Manifest.txt
@@ -7,7 +6,7 @@ README.ja.rdoc
7
6
  README.rdoc
8
7
  Rakefile
9
8
  bin/nokogiri
10
- ext/nokogiri/depend
9
+ deps.rip
11
10
  ext/nokogiri/extconf.rb
12
11
  ext/nokogiri/html_document.c
13
12
  ext/nokogiri/html_document.h
@@ -45,8 +44,6 @@ ext/nokogiri/xml_entity_reference.c
45
44
  ext/nokogiri/xml_entity_reference.h
46
45
  ext/nokogiri/xml_io.c
47
46
  ext/nokogiri/xml_io.h
48
- ext/nokogiri/xml_libxml2_hacks.c
49
- ext/nokogiri/xml_libxml2_hacks.h
50
47
  ext/nokogiri/xml_namespace.c
51
48
  ext/nokogiri/xml_namespace.h
52
49
  ext/nokogiri/xml_node.c
@@ -77,84 +74,22 @@ ext/nokogiri/xslt_stylesheet.c
77
74
  ext/nokogiri/xslt_stylesheet.h
78
75
  lib/nokogiri.rb
79
76
  lib/nokogiri/css.rb
77
+ lib/nokogiri/css/generated_parser.rb
78
+ lib/nokogiri/css/generated_tokenizer.rb
80
79
  lib/nokogiri/css/node.rb
81
80
  lib/nokogiri/css/parser.rb
82
81
  lib/nokogiri/css/parser.y
83
- lib/nokogiri/css/parser_extras.rb
84
82
  lib/nokogiri/css/syntax_error.rb
85
83
  lib/nokogiri/css/tokenizer.rb
86
84
  lib/nokogiri/css/tokenizer.rex
87
85
  lib/nokogiri/css/xpath_visitor.rb
88
86
  lib/nokogiri/decorators/slop.rb
89
- lib/nokogiri/ffi/encoding_handler.rb
90
- lib/nokogiri/ffi/html/document.rb
91
- lib/nokogiri/ffi/html/element_description.rb
92
- lib/nokogiri/ffi/html/entity_lookup.rb
93
- lib/nokogiri/ffi/html/sax/parser_context.rb
94
- lib/nokogiri/ffi/io_callbacks.rb
95
- lib/nokogiri/ffi/libxml.rb
96
- lib/nokogiri/ffi/structs/common_node.rb
97
- lib/nokogiri/ffi/structs/html_elem_desc.rb
98
- lib/nokogiri/ffi/structs/html_entity_desc.rb
99
- lib/nokogiri/ffi/structs/xml_alloc.rb
100
- lib/nokogiri/ffi/structs/xml_attr.rb
101
- lib/nokogiri/ffi/structs/xml_attribute.rb
102
- lib/nokogiri/ffi/structs/xml_buffer.rb
103
- lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb
104
- lib/nokogiri/ffi/structs/xml_document.rb
105
- lib/nokogiri/ffi/structs/xml_dtd.rb
106
- lib/nokogiri/ffi/structs/xml_element.rb
107
- lib/nokogiri/ffi/structs/xml_element_content.rb
108
- lib/nokogiri/ffi/structs/xml_entity.rb
109
- lib/nokogiri/ffi/structs/xml_enumeration.rb
110
- lib/nokogiri/ffi/structs/xml_node.rb
111
- lib/nokogiri/ffi/structs/xml_node_set.rb
112
- lib/nokogiri/ffi/structs/xml_notation.rb
113
- lib/nokogiri/ffi/structs/xml_ns.rb
114
- lib/nokogiri/ffi/structs/xml_parser_context.rb
115
- lib/nokogiri/ffi/structs/xml_parser_input.rb
116
- lib/nokogiri/ffi/structs/xml_relax_ng.rb
117
- lib/nokogiri/ffi/structs/xml_sax_handler.rb
118
- lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb
119
- lib/nokogiri/ffi/structs/xml_schema.rb
120
- lib/nokogiri/ffi/structs/xml_syntax_error.rb
121
- lib/nokogiri/ffi/structs/xml_text_reader.rb
122
- lib/nokogiri/ffi/structs/xml_xpath_context.rb
123
- lib/nokogiri/ffi/structs/xml_xpath_object.rb
124
- lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb
125
- lib/nokogiri/ffi/structs/xslt_stylesheet.rb
126
- lib/nokogiri/ffi/weak_bucket.rb
127
- lib/nokogiri/ffi/xml/attr.rb
128
- lib/nokogiri/ffi/xml/attribute_decl.rb
129
- lib/nokogiri/ffi/xml/cdata.rb
130
- lib/nokogiri/ffi/xml/comment.rb
131
- lib/nokogiri/ffi/xml/document.rb
132
- lib/nokogiri/ffi/xml/document_fragment.rb
133
- lib/nokogiri/ffi/xml/dtd.rb
134
- lib/nokogiri/ffi/xml/element_content.rb
135
- lib/nokogiri/ffi/xml/element_decl.rb
136
- lib/nokogiri/ffi/xml/entity_decl.rb
137
- lib/nokogiri/ffi/xml/entity_reference.rb
138
- lib/nokogiri/ffi/xml/namespace.rb
139
- lib/nokogiri/ffi/xml/node.rb
140
- lib/nokogiri/ffi/xml/node_set.rb
141
- lib/nokogiri/ffi/xml/processing_instruction.rb
142
- lib/nokogiri/ffi/xml/reader.rb
143
- lib/nokogiri/ffi/xml/relax_ng.rb
144
- lib/nokogiri/ffi/xml/sax/parser.rb
145
- lib/nokogiri/ffi/xml/sax/parser_context.rb
146
- lib/nokogiri/ffi/xml/sax/push_parser.rb
147
- lib/nokogiri/ffi/xml/schema.rb
148
- lib/nokogiri/ffi/xml/syntax_error.rb
149
- lib/nokogiri/ffi/xml/text.rb
150
- lib/nokogiri/ffi/xml/xpath.rb
151
- lib/nokogiri/ffi/xml/xpath_context.rb
152
- lib/nokogiri/ffi/xslt/stylesheet.rb
153
87
  lib/nokogiri/html.rb
154
88
  lib/nokogiri/html/builder.rb
155
89
  lib/nokogiri/html/document.rb
156
90
  lib/nokogiri/html/document_fragment.rb
157
91
  lib/nokogiri/html/element_description.rb
92
+ lib/nokogiri/html/element_description_defaults.rb
158
93
  lib/nokogiri/html/entity_lookup.rb
159
94
  lib/nokogiri/html/sax/parser.rb
160
95
  lib/nokogiri/html/sax/parser_context.rb
@@ -205,15 +140,11 @@ test/css/test_nthiness.rb
205
140
  test/css/test_parser.rb
206
141
  test/css/test_tokenizer.rb
207
142
  test/css/test_xpath_visitor.rb
208
- test/decorators/test_slop.rb
209
- test/ffi/test_document.rb
210
143
  test/files/2ch.html
211
144
  test/files/address_book.rlx
212
145
  test/files/address_book.xml
213
146
  test/files/bar/bar.xsd
214
147
  test/files/dont_hurt_em_why.xml
215
- test/files/encoding.html
216
- test/files/encoding.xhtml
217
148
  test/files/exslt.xml
218
149
  test/files/exslt.xslt
219
150
  test/files/foo/foo.xsd
@@ -280,4 +211,3 @@ test/xml/test_syntax_error.rb
280
211
  test/xml/test_text.rb
281
212
  test/xml/test_unparented_node.rb
282
213
  test/xml/test_xpath.rb
283
- test/xslt/test_custom_functions.rb
data/README.ja.rdoc CHANGED
@@ -3,7 +3,7 @@
3
3
  * http://nokogiri.org/
4
4
  * http://github.com/tenderlove/nokogiri/wikis
5
5
  * http://github.com/tenderlove/nokogiri/tree/master
6
- * http://groups.google.com/group/nokogiri-list
6
+ * http://groups.google.com/group/nokogiri-talk
7
7
  * http://github.com/tenderlove/nokogiri/issues
8
8
 
9
9
  == DESCRIPTION:
@@ -22,6 +22,10 @@ XMLは暴力に似ている - XMLが君の問題を解決しないとしたら
22
22
 
23
23
  XML/HTMLの高速な解析と探索検索、ならびにCSS3セレクタとXPathをサポートしています。
24
24
 
25
+ スピードテスト:
26
+
27
+ * http://gist.github.com/24605
28
+
25
29
  == SUPPORT:
26
30
 
27
31
  日本語でNokogiriの
data/README.rdoc CHANGED
@@ -23,6 +23,10 @@ enough of it.
23
23
  Nokogiri parses and searches XML/HTML very quickly, and also has
24
24
  correctly implemented CSS3 selector support as well as XPath support.
25
25
 
26
+ Here is a speed test:
27
+
28
+ * http://gist.github.com/24605
29
+
26
30
  == SUPPORT:
27
31
 
28
32
  The Nokogiri {mailing list}[http://groups.google.com/group/nokogiri-talk]
@@ -77,27 +81,9 @@ The IRC channel is #nokogiri on freenode.
77
81
 
78
82
  == ENCODING:
79
83
 
80
- Strings are always stored as UTF-8 internally. Methods that return
81
- text values will always return UTF-8 encoded strings. Methods that
82
- return XML (like to_xml, to_html and inner_html) will return a string
83
- encoded like the source document.
84
-
85
- *WARNING*
86
-
87
- Some documents declare one particular encoding, but use a different
88
- one. So, which encoding should the parser choose?
89
-
90
- Remember that data is just a stream of bytes. Only us humans add
91
- meaning to that stream. Any particular set of bytes could be valid
92
- characters in multiple encodings, so detecting encoding with 100%
93
- accuracy is not possible. libxml2 does its best, but it can't be right
94
- 100% of the time.
95
-
96
- If you want Nokogiri to handle the document encoding properly, your
97
- best bet is to explicitly set the encoding. Here is an example of
98
- explicitly setting the encoding to EUC-JP on the parser:
99
-
100
- doc = Nokogiri.XML('<foo><bar /><foo>', nil, 'EUC-JP')
84
+ Strings are always stored as UTF-8 internally. Methods that return text values
85
+ will always return UTF-8 encoded strings. Methods that return XML (like to_xml,
86
+ to_html and inner_html) will return a string encoded like the source document.
101
87
 
102
88
  == INSTALL:
103
89
 
@@ -108,7 +94,7 @@ explicitly setting the encoding to EUC-JP on the parser:
108
94
  Binary packages are available for:
109
95
 
110
96
  * SuSE[http://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/]
111
- * Fedora[http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756]
97
+ * Fedora[https://admin.fedoraproject.org/pkgdb/packages/name/rubygem-nokogiri]
112
98
 
113
99
  == DEVELOPMENT:
114
100
 
data/Rakefile CHANGED
@@ -7,13 +7,16 @@ require 'hoe'
7
7
  windows = RUBY_PLATFORM =~ /(mswin|mingw)/i
8
8
  java = RUBY_PLATFORM =~ /java/
9
9
 
10
- GENERATED_PARSER = "lib/nokogiri/css/parser.rb"
11
- GENERATED_TOKENIZER = "lib/nokogiri/css/tokenizer.rb"
10
+ GENERATED_PARSER = "lib/nokogiri/css/generated_parser.rb"
11
+ GENERATED_TOKENIZER = "lib/nokogiri/css/generated_tokenizer.rb"
12
12
  CROSS_DIR = File.join(File.dirname(__FILE__), 'tmp', 'cross')
13
13
 
14
+ EXTERNAL_JAVA_LIBRARIES = %w{isorelax jing nekohtml nekodtd xercesImpl}.map{|x| "lib/#{x}.jar"}
15
+ JAVA_EXT = "lib/nokogiri/nokogiri.jar"
16
+ JRUBY_HOME = Config::CONFIG['prefix']
17
+
14
18
  # Make sure hoe-debugging is installed
15
19
  Hoe.plugin :debugging
16
- Hoe.plugin :git
17
20
 
18
21
  HOE = Hoe.spec 'nokogiri' do
19
22
  developer('Aaron Patterson', 'aaronp@rubyforge.org')
@@ -22,19 +25,18 @@ HOE = Hoe.spec 'nokogiri' do
22
25
  self.history_file = ['CHANGELOG', ENV['HLANG'], 'rdoc'].compact.join('.')
23
26
  self.extra_rdoc_files = FileList['*.rdoc','ext/nokogiri/*.c']
24
27
  self.clean_globs = [
25
- "ext/nokogiri/*.dll",
26
28
  'lib/nokogiri/*.{o,so,bundle,a,log,dll}',
27
29
  'lib/nokogiri/nokogiri.rb',
28
30
  'lib/nokogiri/1.{8,9}',
29
31
  GENERATED_PARSER,
30
32
  GENERATED_TOKENIZER,
31
- CROSS_DIR
33
+ 'cross',
32
34
  ]
33
35
 
34
36
  %w{ racc rexical rake-compiler }.each do |dep|
35
- extra_dev_deps << [dep, '>= 0']
37
+ self.extra_dev_deps << [dep, '>= 0']
36
38
  end
37
- extra_dev_deps << ["minitest", ">= 1.6.0"]
39
+ self.extra_dev_deps << ["minitest", ">= 1.6.0"]
38
40
 
39
41
  self.spec_extras = { :extensions => ["ext/nokogiri/extconf.rb"] }
40
42
 
@@ -67,7 +69,7 @@ unless java
67
69
 
68
70
  ext.config_options << ENV['EXTOPTS']
69
71
  ext.cross_compile = true
70
- ext.cross_platform = ["x86-mingw32", "x86-mswin32-60"]
72
+ ext.cross_platform = 'i386-mingw32'
71
73
  ext.cross_config_options <<
72
74
  "--with-xml2-include=#{File.join(CROSS_DIR, 'include', 'libxml2')}"
73
75
  ext.cross_config_options <<
@@ -78,60 +80,57 @@ unless java
78
80
  end
79
81
  end
80
82
 
81
- namespace :gem do
82
- namespace :dev do
83
- task :spec => [ GENERATED_PARSER, GENERATED_TOKENIZER ] do
84
- File.open("#{HOE.name}.gemspec", 'w') do |f|
85
- HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
86
- f.write(HOE.spec.to_ruby)
87
- end
83
+ namespace :java do
84
+ desc "Removes all generated during compilation .class files."
85
+ task :clean_classes do
86
+ (FileList['ext/java/nokogiri/internals/*.class'] + FileList['ext/java/nokogiri/*.class'] + FileList['ext/java/*.class']).to_a.each do |file|
87
+ File.delete file
88
88
  end
89
89
  end
90
90
 
91
+ desc "Removes the generated .jar"
92
+ task :clean_jar do
93
+ FileList['lib/nokogiri/*.jar'].each{|f| File.delete f }
94
+ end
95
+
96
+ desc "Same as java:clean_classes and java:clean_jar"
97
+ task :clean_all => ["java:clean_classes", "java:clean_jar"]
98
+
91
99
  desc "Build a gem targetted for JRuby"
92
- task :jruby => ['gem:jruby:spec'] do
93
- raise "ERROR: please run this task under jruby" unless java
100
+ task :gem => ['java:spec', GENERATED_PARSER, GENERATED_TOKENIZER, :build] do
94
101
  system "gem build nokogiri.gemspec"
95
102
  FileUtils.mkdir_p "pkg"
96
103
  FileUtils.mv Dir.glob("nokogiri*-java.gem"), "pkg"
97
104
  end
98
105
 
99
- namespace :jruby do
100
- task :spec => [GENERATED_PARSER, GENERATED_TOKENIZER, :"gem:jruby:dlls"] do
101
- File.open("#{HOE.name}.gemspec", 'w') do |f|
102
- HOE.spec.platform = 'java'
103
- HOE.spec.files << GENERATED_PARSER
104
- HOE.spec.files << GENERATED_TOKENIZER
105
- HOE.spec.files += Dir["ext/nokogiri/*.dll"]
106
- HOE.spec.extensions = []
107
- HOE.spec.add_dependency 'weakling', '>= 0.0.3'
108
- f.write(HOE.spec.to_ruby)
109
- end
106
+ task :spec do
107
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
108
+ HOE.spec.platform = 'java'
109
+ HOE.spec.files += [GENERATED_PARSER, GENERATED_TOKENIZER, JAVA_EXT] + EXTERNAL_JAVA_LIBRARIES
110
+ HOE.spec.extensions = []
111
+ f.write(HOE.spec.to_ruby)
110
112
  end
113
+ end
111
114
 
112
- task :dlls do
113
- def run cmd
114
- puts(cmd) || system(cmd) || raise("command failed")
115
- end
115
+ desc "Build external library"
116
+ task :build_external do
117
+ Dir.chdir('ext/java') do
118
+ LIB_DIR = '../../lib'
119
+ CLASSPATH = "#{JRUBY_HOME}/lib/jruby.jar:#{LIB_DIR}/nekohtml.jar:#{LIB_DIR}/nekodtd.jar:#{LIB_DIR}/xercesImpl.jar:#{LIB_DIR}/isorelax.jar:#{LIB_DIR}/jing.jar"
120
+ sh "javac -g -cp #{CLASSPATH} nokogiri/*.java nokogiri/internals/*.java"
121
+ sh "jar cf ../../#{JAVA_EXT} nokogiri/*.class nokogiri/internals/*.class"
122
+ end
123
+ end
116
124
 
117
- dlldir = "tmp/dlls"
118
- FileUtils.mkdir_p dlldir
119
- Dir.chdir dlldir do
120
- unless File.exists? "nokogiri-1.4.3.1-java.gem"
121
- run "wget http://rubygems.org/downloads/nokogiri-1.4.3.1-java.gem"
122
- end
123
- unless File.exists? "data.tar.gz"
124
- run "tar -xf nokogiri-1.4.3.1-java.gem"
125
- end
126
- FileUtils.rm_rf "unpack"
127
- FileUtils.mkdir "unpack"
128
- Dir.chdir "unpack" do
129
- run "tar -zxf ../data.tar.gz"
130
- end
131
- end
125
+ task :build => ["java:clean_jar", "java:build_external", "java:clean_classes"]
126
+ end
132
127
 
133
- Dir["#{dlldir}/unpack/ext/nokogiri/*.dll"].each do |file|
134
- cp file, "ext/nokogiri"
128
+ namespace :gem do
129
+ namespace :dev do
130
+ task :spec => [ GENERATED_PARSER, GENERATED_TOKENIZER ] do
131
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
132
+ HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
133
+ f.write(HOE.spec.to_ruby)
135
134
  end
136
135
  end
137
136
  end
@@ -140,21 +139,25 @@ namespace :gem do
140
139
  end
141
140
 
142
141
  file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
143
- racc = Config::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
144
- racc = "#{::Config::CONFIG['bindir']}/racc" if racc.empty?
145
- sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
142
+ begin
143
+ racc = `which racc`.strip
144
+ racc = "#{::Config::CONFIG['bindir']}/racc" if racc.empty?
145
+ sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
146
+ rescue
147
+ abort "need racc, sudo gem install racc"
148
+ end
146
149
  end
147
150
 
148
151
  file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
149
- sh "rex --independent -o #{t.name} #{t.prerequisites.first}"
152
+ begin
153
+ sh "rex --independent -o #{t.name} #{t.prerequisites.first}"
154
+ rescue
155
+ abort "need rexical, sudo gem install rexical"
156
+ end
150
157
  end
151
158
 
152
159
  require 'tasks/test'
153
- begin
154
- require 'tasks/cross_compile' unless java
155
- rescue RuntimeError => e
156
- warn "WARNING: Could not perform some cross-compiling: #{e}"
157
- end
160
+ require 'tasks/cross_compile' unless RUBY_PLATFORM =~ /java/
158
161
 
159
162
  desc "set environment variables to build and/or test with debug options"
160
163
  task :debug do
@@ -166,14 +169,13 @@ end
166
169
  # required_ruby_version
167
170
 
168
171
  # Only do this on unix, since we can't build on windows
169
- unless windows || java || ENV['NOKOGIRI_FFI']
172
+ unless windows || java
170
173
  [:compile, :check_manifest].each do |task_name|
171
174
  Rake::Task[task_name].prerequisites << GENERATED_PARSER
172
175
  Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER
173
176
  end
174
177
 
175
178
  Rake::Task[:test].prerequisites << :compile
176
- Rake::Task[:test].prerequisites << :check_extra_deps
177
179
  if Hoe.plugins.include?(:debugging)
178
180
  ['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name|
179
181
  Rake::Task["test:#{task_name}"].prerequisites << :compile
@@ -188,4 +190,21 @@ else
188
190
  end
189
191
  end
190
192
 
193
+ namespace :install do
194
+ desc "Install rex and racc for development"
195
+ task :deps => %w(rexical racc)
196
+
197
+ task :racc do |t|
198
+ sh "sudo gem install racc"
199
+ end
200
+
201
+ task :rexical do
202
+ sh "sudo gem install rexical"
203
+ end
204
+ end
205
+
206
+ namespace :rip do
207
+ task :install => [GENERATED_TOKENIZER, GENERATED_PARSER]
208
+ end
209
+
191
210
  # vim: syntax=Ruby