nokogiri 1.6.6.4-java → 1.6.7-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.cross_rubies +7 -5
- data/.travis.yml +33 -30
- data/CHANGELOG.ja.rdoc +38 -6
- data/CHANGELOG.rdoc +33 -1
- data/Gemfile +3 -2
- data/LICENSE.txt +31 -0
- data/Manifest.txt +4 -24
- data/README.md +170 -0
- data/Rakefile +25 -22
- data/appveyor.yml +22 -0
- data/build_all +6 -90
- data/ext/java/nokogiri/XmlDocument.java +5 -0
- data/ext/java/nokogiri/XmlNode.java +16 -1
- data/ext/java/nokogiri/XmlSaxPushParser.java +6 -2
- data/ext/java/nokogiri/XmlSchema.java +20 -20
- data/ext/java/nokogiri/internals/NokogiriHandler.java +21 -15
- data/ext/java/nokogiri/internals/ParserContext.java +15 -11
- data/ext/nokogiri/extconf.rb +37 -34
- data/ext/nokogiri/xml_node.c +21 -11
- data/ext/nokogiri/xml_sax_parser_context.c +2 -2
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/node.rb +16 -0
- data/{ports/patches → patches}/sort-patches-by-date +0 -0
- data/test/html/test_document.rb +26 -3
- data/test/xml/sax/test_parser.rb +11 -0
- data/test/xml/test_document.rb +11 -0
- data/test/xml/test_schema.rb +15 -2
- data/test/xml/test_xpath.rb +15 -0
- metadata +28 -33
- data/README.ja.rdoc +0 -112
- data/README.rdoc +0 -177
- data/ports/patches/libxml2/0001-Revert-Missing-initialization-for-the-catalog-module.patch +0 -29
- data/ports/patches/libxml2/0002-Fix-missing-entities-after-CVE-2014-3660-fix.patch +0 -31
- data/ports/patches/libxml2/0003-Stop-parsing-on-entities-boundaries-errors.patch +0 -32
- data/ports/patches/libxml2/0004-Cleanup-conditional-section-error-handling.patch +0 -49
- data/ports/patches/libxml2/0005-CVE-2015-1819-Enforce-the-reader-to-run-in-constant-.patch +0 -177
- data/ports/patches/libxml2/0006-Another-variation-of-overflow-in-Conditional-section.patch +0 -32
- data/ports/patches/libxml2/0007-Fix-an-error-in-previous-Conditional-section-patch.patch +0 -28
- data/ports/patches/libxml2/0008-CVE-2015-8035-Fix-XZ-compression-support-loop.patch +0 -31
- data/ports/patches/libxslt/0001-Adding-doc-update-related-to-1.1.28.patch +0 -222
- data/ports/patches/libxslt/0002-Fix-a-couple-of-places-where-f-printf-parameters-wer.patch +0 -53
- data/ports/patches/libxslt/0003-Initialize-pseudo-random-number-generator-with-curre.patch +0 -60
- data/ports/patches/libxslt/0004-EXSLT-function-str-replace-is-broken-as-is.patch +0 -42
- data/ports/patches/libxslt/0006-Fix-str-padding-to-work-with-UTF-8-strings.patch +0 -164
- data/ports/patches/libxslt/0007-Separate-function-for-predicate-matching-in-patterns.patch +0 -587
- data/ports/patches/libxslt/0008-Fix-direct-pattern-matching.patch +0 -80
- data/ports/patches/libxslt/0009-Fix-certain-patterns-with-predicates.patch +0 -185
- data/ports/patches/libxslt/0010-Fix-handling-of-UTF-8-strings-in-EXSLT-crypto-module.patch +0 -126
- data/ports/patches/libxslt/0013-Memory-leak-in-xsltCompileIdKeyPattern-error-path.patch +0 -25
- data/ports/patches/libxslt/0014-Fix-for-bug-436589.patch +0 -43
- data/ports/patches/libxslt/0015-Fix-mkdir-for-mingw.patch +0 -41
- data/ports/patches/libxslt/0016-Fix-for-type-confusion-in-preprocessing-attributes.patch +0 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a74dc6cdae47508d72a91cdb4922ae0c39e09350
|
4
|
+
data.tar.gz: 74a12a5214a566aee45ceb9429ed300dc0c0a982
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad2c4f7b59627b714fdd51b37342d94b2a7b4d4af1ce9cd1cea1a2916c1abd54d1960418e37a71fa4778d14fe9011bc28614d41413b9e50b5ff79032b39d102b
|
7
|
+
data.tar.gz: 354a2313e830f1740e5b36a4c7fdc60c784a90cb7ec5bc696734891fbbe01379cf1f2ec683d5b8e5f52f19c3db944e2ffa4eada05ae8aa633d9cac6dc8ffaf12
|
data/.cross_rubies
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
1.9.3-
|
2
|
-
2.0.0-
|
3
|
-
2.0.0-
|
4
|
-
2.1.
|
5
|
-
2.1.
|
1
|
+
1.9.3-p551:i586-mingw32msvc
|
2
|
+
2.0.0-p645:i686-w64-mingw32
|
3
|
+
2.0.0-p645:x86_64-w64-mingw32
|
4
|
+
2.1.6:i686-w64-mingw32
|
5
|
+
2.1.6:x86_64-w64-mingw32
|
6
|
+
2.2.2:i686-w64-mingw32
|
7
|
+
2.2.2:x86_64-w64-mingw32
|
data/.travis.yml
CHANGED
@@ -1,38 +1,41 @@
|
|
1
1
|
language: ruby
|
2
2
|
|
3
|
+
sudo: false
|
4
|
+
|
3
5
|
rvm:
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
6
|
+
- ruby-1.9.2
|
7
|
+
- ruby-1.9.3
|
8
|
+
- ruby-2.0.0
|
9
|
+
- ruby-2.1.7
|
10
|
+
- ruby-2.2.2
|
11
|
+
- jruby-19mode
|
12
|
+
- jruby-1.7.22
|
13
|
+
- jruby-9.0.0.0
|
14
|
+
- rbx-19mode
|
15
|
+
- rbx-2
|
14
16
|
|
15
17
|
os:
|
16
|
-
|
17
|
-
|
18
|
+
- linux
|
19
|
+
- osx
|
18
20
|
|
19
21
|
matrix:
|
20
|
-
allow_failures:
|
21
|
-
- os: osx
|
22
|
-
- rvm: jruby-9.0.0.0.pre1
|
23
|
-
|
24
22
|
exclude:
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
23
|
+
- os: osx
|
24
|
+
rvm: ruby-1.9.2
|
25
|
+
- os: osx
|
26
|
+
rvm: jruby-19mode
|
27
|
+
- os: osx
|
28
|
+
rvm: jruby-1.7.22
|
29
|
+
- os: osx
|
30
|
+
rvm: jruby-9.0.0.0
|
31
|
+
- os: osx
|
32
|
+
rvm: rbx-19mode
|
33
|
+
- os: osx
|
34
|
+
rvm: rbx-2
|
35
|
+
|
36
|
+
notifications:
|
37
|
+
irc:
|
38
|
+
channels:
|
39
|
+
- "chat.freenode.net#nokogiri"
|
40
|
+
use_notice: true
|
41
|
+
skip_join: true
|
data/CHANGELOG.ja.rdoc
CHANGED
@@ -1,13 +1,44 @@
|
|
1
|
-
=== 1.6.
|
1
|
+
=== 1.6.7 / 2015年11月29日
|
2
2
|
|
3
|
-
|
3
|
+
==== 註
|
4
|
+
|
5
|
+
This version supports native builds on Windows using the RubyInstaller
|
6
|
+
DevKit. It also supports Ruby 2.2.x on Windows, as well as making
|
7
|
+
several other improvements to the installation process on various
|
8
|
+
platforms.
|
9
|
+
|
10
|
+
This version also includes the security patches already applied in
|
11
|
+
v1.6.6.3 and v1.6.6.4 to the vendored libxml2 and libxslt source.
|
12
|
+
See #1374 and #1376 for details.
|
13
|
+
|
14
|
+
==== 機能
|
15
|
+
|
16
|
+
* Cross-built gems now have a proper ruby version requirement. (#1266)
|
17
|
+
* Ruby 2.2.x is supported on Windows.
|
18
|
+
* Native build is supported on Windows.
|
19
|
+
* [MRI] libxml2 and libxslt `config.guess` files brought up to date. (#1326) (Thanks, @hernan-erasmo!)
|
20
|
+
* [JRuby] fix error in validating files with jruby (#1355, #1361) (Thanks, @twalpole!)
|
21
|
+
* [MRI, OSX] Patch to handle nonstandard location of `iconv.h`. (#1206, #1210, #1218, #1345) (Thanks, @neonichu!)
|
22
|
+
|
23
|
+
==== バグ修正
|
24
|
+
|
25
|
+
* [JRuby] reset the namespace cache when replacing the document's innerHtml (#1265) (Thanks, @mkristian!)
|
26
|
+
* [JRuby] Document#parse should support IO objects that respond to #read. (#1124) (Thanks, Jake Byman!)
|
27
|
+
* [MRI] Duplicate-id errors when setting the `id` attribute on HTML documents are now silenced. (#1262)
|
28
|
+
* [JRuby] SAX parser cuts texts in peices when quare brackets exist. (#1261)
|
29
|
+
* [JRuby] Namespaced attributes aren't removed by remove_attribute. (#1299)
|
30
|
+
|
31
|
+
|
32
|
+
=== 1.6.6.4 / 2015年11月19日
|
33
|
+
|
34
|
+
This version pulls in an upstream patch to the vendored libxml2 to address:
|
4
35
|
|
5
36
|
* unclosed comment uninitialized access issue (#1376)
|
6
37
|
|
7
38
|
This issue does not have a CVE assigned to it as this time.
|
8
39
|
|
9
40
|
|
10
|
-
=== 1.6.6.3 / 2015
|
41
|
+
=== 1.6.6.3 / 2015年11月16日
|
11
42
|
|
12
43
|
This version pulls in several upstream patches to the vendored libxml2 and libxslt to address:
|
13
44
|
|
@@ -24,7 +55,7 @@ See #1374 for details.
|
|
24
55
|
|
25
56
|
=== 1.6.6.2 / 2015年01月23日
|
26
57
|
|
27
|
-
====
|
58
|
+
==== バグ修正
|
28
59
|
|
29
60
|
* Fixed installation issue affecting compiler arguments. (#1230)
|
30
61
|
|
@@ -33,6 +64,7 @@ See #1374 for details.
|
|
33
64
|
|
34
65
|
Note that 1.6.6.0 was not released.
|
35
66
|
|
67
|
+
|
36
68
|
==== 機能
|
37
69
|
|
38
70
|
* Unified Node and NodeSet implementations of #search, #xpath and #css.
|
@@ -183,7 +215,7 @@ libxml2 2.6.21 以上が必要に. (従前は 2.6.17 以上)
|
|
183
215
|
|
184
216
|
=== 1.6.1 / 2013年12月14日
|
185
217
|
|
186
|
-
*
|
218
|
+
* バグ修正
|
187
219
|
|
188
220
|
* (JRuby) 不正なドキュメントをパースするときにメモリ不足に陥るバグを修正
|
189
221
|
* (JRuby) Billion-laughs攻撃対策のリグレッションを修正 #586
|
@@ -223,7 +255,7 @@ libxml2 2.6.21 以上が必要に. (従前は 2.6.17 以上)
|
|
223
255
|
|
224
256
|
=== 1.5.11 / 2013年12月14日
|
225
257
|
|
226
|
-
*
|
258
|
+
* バグ修正
|
227
259
|
|
228
260
|
* (JRuby) 不正なドキュメントをパースするときにメモリ不足に陥るバグを修正
|
229
261
|
* (JRuby) Billion-laughs攻撃対策のリグレッションを修正 #586
|
data/CHANGELOG.rdoc
CHANGED
@@ -1,6 +1,37 @@
|
|
1
|
+
=== 1.6.7 / 2015-11-29
|
2
|
+
|
3
|
+
==== Notes
|
4
|
+
|
5
|
+
This version supports native builds on Windows using the RubyInstaller
|
6
|
+
DevKit. It also supports Ruby 2.2.x on Windows, as well as making
|
7
|
+
several other improvements to the installation process on various
|
8
|
+
platforms.
|
9
|
+
|
10
|
+
This version also includes the security patches already applied in
|
11
|
+
v1.6.6.3 and v1.6.6.4 to the vendored libxml2 and libxslt source.
|
12
|
+
See #1374 and #1376 for details.
|
13
|
+
|
14
|
+
==== Features
|
15
|
+
|
16
|
+
* Cross-built gems now have a proper ruby version requirement. (#1266)
|
17
|
+
* Ruby 2.2.x is supported on Windows.
|
18
|
+
* Native build is supported on Windows.
|
19
|
+
* [MRI] libxml2 and libxslt `config.guess` files brought up to date. (#1326) (Thanks, @hernan-erasmo!)
|
20
|
+
* [JRuby] fix error in validating files with jruby (#1355, #1361) (Thanks, @twalpole!)
|
21
|
+
* [MRI, OSX] Patch to handle nonstandard location of `iconv.h`. (#1206, #1210, #1218, #1345) (Thanks, @neonichu!)
|
22
|
+
|
23
|
+
==== Bug Fixes
|
24
|
+
|
25
|
+
* [JRuby] reset the namespace cache when replacing the document's innerHtml (#1265) (Thanks, @mkristian!)
|
26
|
+
* [JRuby] Document#parse should support IO objects that respond to #read. (#1124) (Thanks, Jake Byman!)
|
27
|
+
* [MRI] Duplicate-id errors when setting the `id` attribute on HTML documents are now silenced. (#1262)
|
28
|
+
* [JRuby] SAX parser cuts texts in peices when quare brackets exist. (#1261)
|
29
|
+
* [JRuby] Namespaced attributes aren't removed by remove_attribute. (#1299)
|
30
|
+
|
31
|
+
|
1
32
|
=== 1.6.6.4 / 2015-11-19
|
2
33
|
|
3
|
-
This version pulls in an upstream
|
34
|
+
This version pulls in an upstream patch to the vendored libxml2 to address:
|
4
35
|
|
5
36
|
* unclosed comment uninitialized access issue (#1376)
|
6
37
|
|
@@ -33,6 +64,7 @@ See #1374 for details.
|
|
33
64
|
|
34
65
|
Note that 1.6.6.0 was not released.
|
35
66
|
|
67
|
+
|
36
68
|
==== Features
|
37
69
|
|
38
70
|
* Unified Node and NodeSet implementations of #search, #xpath and #css.
|
data/Gemfile
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
source "https://rubygems.org/"
|
6
6
|
|
7
|
-
gem "
|
7
|
+
gem "mini_portile2", "~>2.0.0.rc2"
|
8
8
|
|
9
9
|
gem "rdoc", "~>4.0", :group => [:development, :test]
|
10
10
|
gem "hoe-bundler", ">=1.1", :group => [:development, :test]
|
@@ -14,8 +14,9 @@ gem "hoe-git", ">=1.4", :group => [:development, :test]
|
|
14
14
|
gem "minitest", "~>2.2.2", :group => [:development, :test]
|
15
15
|
gem "rake", ">=0.9", :group => [:development, :test]
|
16
16
|
gem "rake-compiler", "~>0.9.2", :group => [:development, :test]
|
17
|
+
gem "rake-compiler-dock", "~>0.4.2", :group => [:development, :test]
|
17
18
|
gem "racc", ">=1.4.6", :group => [:development, :test], :platform => :ruby
|
18
19
|
gem "rexical", ">=1.0.5", :group => [:development, :test], :platform => :ruby
|
19
|
-
gem "hoe", "~>3.
|
20
|
+
gem "hoe", "~>3.14", :group => [:development, :test]
|
20
21
|
|
21
22
|
# vim: syntax=ruby
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright (c) 2008 - 2015:
|
4
|
+
|
5
|
+
* [Aaron Patterson](http://tenderlovemaking.com)
|
6
|
+
* [Mike Dalessio](http://mike.daless.io)
|
7
|
+
* [Charles Nutter](http://blog.headius.com)
|
8
|
+
* [Sergio Arbeo](http://www.serabe.com)
|
9
|
+
* [Patrick Mahoney](http://polycrystal.org)
|
10
|
+
* [Yoko Harada](http://yokolet.blogspot.com)
|
11
|
+
* [Akinori MUSHA](https://akinori.org)
|
12
|
+
* [John Shahid](https://github.com/jvshahid)
|
13
|
+
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
15
|
+
a copy of this software and associated documentation files (the
|
16
|
+
'Software'), to deal in the Software without restriction, including
|
17
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
18
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
19
|
+
permit persons to whom the Software is furnished to do so, subject to
|
20
|
+
the following conditions:
|
21
|
+
|
22
|
+
The above copyright notice and this permission notice shall be
|
23
|
+
included in all copies or substantial portions of the Software.
|
24
|
+
|
25
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
26
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
27
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
28
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
29
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
30
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
31
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
CHANGED
@@ -7,13 +7,14 @@ CHANGELOG.ja.rdoc
|
|
7
7
|
CHANGELOG.rdoc
|
8
8
|
C_CODING_STYLE.rdoc
|
9
9
|
Gemfile
|
10
|
+
LICENSE.txt
|
10
11
|
Manifest.txt
|
11
|
-
README.
|
12
|
-
README.rdoc
|
12
|
+
README.md
|
13
13
|
ROADMAP.md
|
14
14
|
Rakefile
|
15
15
|
STANDARD_RESPONSES.md
|
16
16
|
Y_U_NO_GEMSPEC.md
|
17
|
+
appveyor.yml
|
17
18
|
bin/nokogiri
|
18
19
|
build_all
|
19
20
|
dependencies.yml
|
@@ -242,28 +243,7 @@ lib/nokogiri/xslt.rb
|
|
242
243
|
lib/nokogiri/xslt/stylesheet.rb
|
243
244
|
lib/xercesImpl.jar
|
244
245
|
lib/xsd/xmlparser/nokogiri.rb
|
245
|
-
|
246
|
-
ports/patches/libxml2/0002-Fix-missing-entities-after-CVE-2014-3660-fix.patch
|
247
|
-
ports/patches/libxml2/0003-Stop-parsing-on-entities-boundaries-errors.patch
|
248
|
-
ports/patches/libxml2/0004-Cleanup-conditional-section-error-handling.patch
|
249
|
-
ports/patches/libxml2/0005-CVE-2015-1819-Enforce-the-reader-to-run-in-constant-.patch
|
250
|
-
ports/patches/libxml2/0006-Another-variation-of-overflow-in-Conditional-section.patch
|
251
|
-
ports/patches/libxml2/0007-Fix-an-error-in-previous-Conditional-section-patch.patch
|
252
|
-
ports/patches/libxml2/0008-CVE-2015-8035-Fix-XZ-compression-support-loop.patch
|
253
|
-
ports/patches/libxslt/0001-Adding-doc-update-related-to-1.1.28.patch
|
254
|
-
ports/patches/libxslt/0002-Fix-a-couple-of-places-where-f-printf-parameters-wer.patch
|
255
|
-
ports/patches/libxslt/0003-Initialize-pseudo-random-number-generator-with-curre.patch
|
256
|
-
ports/patches/libxslt/0004-EXSLT-function-str-replace-is-broken-as-is.patch
|
257
|
-
ports/patches/libxslt/0006-Fix-str-padding-to-work-with-UTF-8-strings.patch
|
258
|
-
ports/patches/libxslt/0007-Separate-function-for-predicate-matching-in-patterns.patch
|
259
|
-
ports/patches/libxslt/0008-Fix-direct-pattern-matching.patch
|
260
|
-
ports/patches/libxslt/0009-Fix-certain-patterns-with-predicates.patch
|
261
|
-
ports/patches/libxslt/0010-Fix-handling-of-UTF-8-strings-in-EXSLT-crypto-module.patch
|
262
|
-
ports/patches/libxslt/0013-Memory-leak-in-xsltCompileIdKeyPattern-error-path.patch
|
263
|
-
ports/patches/libxslt/0014-Fix-for-bug-436589.patch
|
264
|
-
ports/patches/libxslt/0015-Fix-mkdir-for-mingw.patch
|
265
|
-
ports/patches/libxslt/0016-Fix-for-type-confusion-in-preprocessing-attributes.patch
|
266
|
-
ports/patches/sort-patches-by-date
|
246
|
+
patches/sort-patches-by-date
|
267
247
|
suppressions/README.txt
|
268
248
|
suppressions/nokogiri_ree-1.8.7.358.supp
|
269
249
|
suppressions/nokogiri_ruby-1.8.7.370.supp
|
data/README.md
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
# Nokogiri
|
2
|
+
|
3
|
+
* http://nokogiri.org
|
4
|
+
* Installation: http://nokogiri.org/tutorials/installing_nokogiri.html
|
5
|
+
* Tutorials: http://nokogiri.org
|
6
|
+
* README: https://github.com/sparklemotion/nokogiri
|
7
|
+
* Mailing List: https://groups.google.com/group/nokogiri-talk
|
8
|
+
* Bug Reports: https://github.com/sparklemotion/nokogiri/issues
|
9
|
+
|
10
|
+
|
11
|
+
## Status
|
12
|
+
|
13
|
+
[![Travis Build Status](https://travis-ci.org/sparklemotion/nokogiri.svg?branch=master)](https://travis-ci.org/sparklemotion/nokogiri)
|
14
|
+
[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/github/sparklemotion/nokogiri?branch=master&svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri?branch=master)
|
15
|
+
[![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.png)](https://codeclimate.com/github/sparklemotion/nokogiri)
|
16
|
+
[![Version Eye](https://www.versioneye.com/ruby/nokogiri/badge.png)](https://www.versioneye.com/ruby/nokogiri)
|
17
|
+
|
18
|
+
|
19
|
+
## Description
|
20
|
+
|
21
|
+
Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among
|
22
|
+
Nokogiri's many features is the ability to search documents via XPath
|
23
|
+
or CSS3 selectors.
|
24
|
+
|
25
|
+
XML is like violence - if it doesn’t solve your problems, you are not
|
26
|
+
using enough of it.
|
27
|
+
|
28
|
+
|
29
|
+
## Features
|
30
|
+
|
31
|
+
* XML/HTML DOM parser which handles broken HTML
|
32
|
+
* XML/HTML SAX parser
|
33
|
+
* XML/HTML Push parser
|
34
|
+
* XPath 1.0 support for document searching
|
35
|
+
* CSS3 selector support for document searching
|
36
|
+
* XML/HTML builder
|
37
|
+
* XSLT transformer
|
38
|
+
|
39
|
+
Nokogiri parses and searches XML/HTML using native libraries (either C
|
40
|
+
or Java, depending on your Ruby), which means it's fast and
|
41
|
+
standards-compliant.
|
42
|
+
|
43
|
+
|
44
|
+
## Installation
|
45
|
+
|
46
|
+
If this doesn't work:
|
47
|
+
|
48
|
+
```
|
49
|
+
gem install nokogiri
|
50
|
+
```
|
51
|
+
|
52
|
+
then please start troubleshooting here:
|
53
|
+
|
54
|
+
> http://www.nokogiri.org/tutorials/installing_nokogiri.html
|
55
|
+
|
56
|
+
There are currently 1,237 Stack Overflow questions about Nokogiri
|
57
|
+
installation. The vast majority of them are out of date and therefore
|
58
|
+
incorrect. __Please do not use Stack Overflow.__
|
59
|
+
|
60
|
+
Instead, [tell us](http://nokogiri.org/tutorials/getting_help.html)
|
61
|
+
when the above instructions don't work for you. This allows us to both
|
62
|
+
help you directly and improve the documentation.
|
63
|
+
|
64
|
+
|
65
|
+
### Binary packages
|
66
|
+
|
67
|
+
Binary packages are available for some distributions.
|
68
|
+
|
69
|
+
* Debian: https://packages.debian.org/sid/ruby-nokogiri
|
70
|
+
* SuSE: https://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/
|
71
|
+
* Fedora: http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756
|
72
|
+
|
73
|
+
|
74
|
+
## Support
|
75
|
+
|
76
|
+
There are open-source tutorials (to which we invite contributions!) here: http://nokogiri.org/tutorials
|
77
|
+
|
78
|
+
* The Nokogiri mailing list is active: https://groups.google.com/group/nokogiri-talk
|
79
|
+
* The Nokogiri bug tracker is here: https://github.com/sparklemotion/nokogiri/issues
|
80
|
+
* Before filing a bug report, please read our submission guidelines: http://nokogiri.org/tutorials/getting_help.html
|
81
|
+
* The IRC channel is #nokogiri on freenode.
|
82
|
+
|
83
|
+
|
84
|
+
## Synopsis
|
85
|
+
|
86
|
+
Nokogiri is a large library, but here is example usage for parsing and examining a document:
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
require 'nokogiri'
|
90
|
+
require 'open-uri'
|
91
|
+
|
92
|
+
# Fetch and parse HTML document
|
93
|
+
doc = Nokogiri::HTML(open('http://www.nokogiri.org/tutorials/installing_nokogiri.html'))
|
94
|
+
|
95
|
+
####
|
96
|
+
# Search for nodes by css
|
97
|
+
doc.css('nav ul.menu li a').each do |link|
|
98
|
+
puts link.content
|
99
|
+
end
|
100
|
+
|
101
|
+
####
|
102
|
+
# Search for nodes by xpath
|
103
|
+
doc.xpath('//h2 | //h3').each do |link|
|
104
|
+
puts link.content
|
105
|
+
end
|
106
|
+
|
107
|
+
####
|
108
|
+
# Or mix and match.
|
109
|
+
doc.search('code.sh', '//h2').each do |link|
|
110
|
+
puts link.content
|
111
|
+
end
|
112
|
+
```
|
113
|
+
|
114
|
+
|
115
|
+
## Requirements
|
116
|
+
|
117
|
+
* Ruby 1.9.3 or higher, including any development packages necessary
|
118
|
+
to compile native extensions.
|
119
|
+
|
120
|
+
* In Nokogiri 1.6.0 and later libxml2 and libxslt are bundled with the
|
121
|
+
gem, but if you want to use the system versions:
|
122
|
+
|
123
|
+
* at install time, set the environment variable
|
124
|
+
`USING_SYSTEM_ALLOCATOR_LIBRARY` or else use the
|
125
|
+
`--use-system-libraries` argument. (See
|
126
|
+
http://nokogiri.org/tutorials/installing_nokogiri.html#using_your_system_libraries
|
127
|
+
for specifics.)
|
128
|
+
|
129
|
+
* libxml2 >=2.6.21 with iconv support
|
130
|
+
(libxml2-dev/-devel is also required)
|
131
|
+
|
132
|
+
* libxslt, built with and supported by the given libxml2
|
133
|
+
(libxslt-dev/-devel is also required)
|
134
|
+
|
135
|
+
|
136
|
+
## Encoding
|
137
|
+
|
138
|
+
Strings are always stored as UTF-8 internally. Methods that return
|
139
|
+
text values will always return UTF-8 encoded strings. Methods that
|
140
|
+
return a string containing markup (like `to_xml`, `to_html` and
|
141
|
+
`inner_html`) will return a string encoded like the source document.
|
142
|
+
|
143
|
+
__WARNING__
|
144
|
+
|
145
|
+
Some documents declare one encoding, but actually use a different
|
146
|
+
one. In these cases, which encoding should the parser choose?
|
147
|
+
|
148
|
+
Data is just a stream of bytes. Humans add meaning to that stream. Any
|
149
|
+
particular set of bytes could be valid characters in multiple
|
150
|
+
encodings, so detecting encoding with 100% accuracy is not
|
151
|
+
possible. `libxml2` does its best, but it can't be right all the time.
|
152
|
+
|
153
|
+
If you want Nokogiri to handle the document encoding properly, your
|
154
|
+
best bet is to explicitly set the encoding. Here is an example of
|
155
|
+
explicitly setting the encoding to EUC-JP on the parser:
|
156
|
+
|
157
|
+
```ruby
|
158
|
+
doc = Nokogiri.XML('<foo><bar /><foo>', nil, 'EUC-JP')
|
159
|
+
```
|
160
|
+
|
161
|
+
## Development
|
162
|
+
|
163
|
+
```bash
|
164
|
+
bundle install
|
165
|
+
bundle exec rake
|
166
|
+
```
|
167
|
+
|
168
|
+
## License
|
169
|
+
|
170
|
+
MIT. See the `LICENSE.txt` file.
|