nokogiri 1.6.6.4-java → 1.6.7-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.cross_rubies +7 -5
- data/.travis.yml +33 -30
- data/CHANGELOG.ja.rdoc +38 -6
- data/CHANGELOG.rdoc +33 -1
- data/Gemfile +3 -2
- data/LICENSE.txt +31 -0
- data/Manifest.txt +4 -24
- data/README.md +170 -0
- data/Rakefile +25 -22
- data/appveyor.yml +22 -0
- data/build_all +6 -90
- data/ext/java/nokogiri/XmlDocument.java +5 -0
- data/ext/java/nokogiri/XmlNode.java +16 -1
- data/ext/java/nokogiri/XmlSaxPushParser.java +6 -2
- data/ext/java/nokogiri/XmlSchema.java +20 -20
- data/ext/java/nokogiri/internals/NokogiriHandler.java +21 -15
- data/ext/java/nokogiri/internals/ParserContext.java +15 -11
- data/ext/nokogiri/extconf.rb +37 -34
- data/ext/nokogiri/xml_node.c +21 -11
- data/ext/nokogiri/xml_sax_parser_context.c +2 -2
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/node.rb +16 -0
- data/{ports/patches → patches}/sort-patches-by-date +0 -0
- data/test/html/test_document.rb +26 -3
- data/test/xml/sax/test_parser.rb +11 -0
- data/test/xml/test_document.rb +11 -0
- data/test/xml/test_schema.rb +15 -2
- data/test/xml/test_xpath.rb +15 -0
- metadata +28 -33
- data/README.ja.rdoc +0 -112
- data/README.rdoc +0 -177
- data/ports/patches/libxml2/0001-Revert-Missing-initialization-for-the-catalog-module.patch +0 -29
- data/ports/patches/libxml2/0002-Fix-missing-entities-after-CVE-2014-3660-fix.patch +0 -31
- data/ports/patches/libxml2/0003-Stop-parsing-on-entities-boundaries-errors.patch +0 -32
- data/ports/patches/libxml2/0004-Cleanup-conditional-section-error-handling.patch +0 -49
- data/ports/patches/libxml2/0005-CVE-2015-1819-Enforce-the-reader-to-run-in-constant-.patch +0 -177
- data/ports/patches/libxml2/0006-Another-variation-of-overflow-in-Conditional-section.patch +0 -32
- data/ports/patches/libxml2/0007-Fix-an-error-in-previous-Conditional-section-patch.patch +0 -28
- data/ports/patches/libxml2/0008-CVE-2015-8035-Fix-XZ-compression-support-loop.patch +0 -31
- data/ports/patches/libxslt/0001-Adding-doc-update-related-to-1.1.28.patch +0 -222
- data/ports/patches/libxslt/0002-Fix-a-couple-of-places-where-f-printf-parameters-wer.patch +0 -53
- data/ports/patches/libxslt/0003-Initialize-pseudo-random-number-generator-with-curre.patch +0 -60
- data/ports/patches/libxslt/0004-EXSLT-function-str-replace-is-broken-as-is.patch +0 -42
- data/ports/patches/libxslt/0006-Fix-str-padding-to-work-with-UTF-8-strings.patch +0 -164
- data/ports/patches/libxslt/0007-Separate-function-for-predicate-matching-in-patterns.patch +0 -587
- data/ports/patches/libxslt/0008-Fix-direct-pattern-matching.patch +0 -80
- data/ports/patches/libxslt/0009-Fix-certain-patterns-with-predicates.patch +0 -185
- data/ports/patches/libxslt/0010-Fix-handling-of-UTF-8-strings-in-EXSLT-crypto-module.patch +0 -126
- data/ports/patches/libxslt/0013-Memory-leak-in-xsltCompileIdKeyPattern-error-path.patch +0 -25
- data/ports/patches/libxslt/0014-Fix-for-bug-436589.patch +0 -43
- data/ports/patches/libxslt/0015-Fix-mkdir-for-mingw.patch +0 -41
- data/ports/patches/libxslt/0016-Fix-for-type-confusion-in-preprocessing-attributes.patch +0 -29
data/test/xml/sax/test_parser.rb
CHANGED
@@ -376,6 +376,17 @@ module Nokogiri
|
|
376
376
|
|
377
377
|
assert_equal [['Root', []], ['Data', []], ['Item', []], ['Data', []], ['Item', []]], @parser.document.start_elements
|
378
378
|
end
|
379
|
+
|
380
|
+
def test_square_bracket_in_text # issue 1261
|
381
|
+
xml = <<-eoxml
|
382
|
+
<tu tuid="87dea04cf60af103ff09d1dba36ae820" segtype="block">
|
383
|
+
<prop type="x-smartling-string-variant">en:#:home_page:#:stories:#:[6]:#:name</prop>
|
384
|
+
<tuv xml:lang="en-US"><seg>Sandy S.</seg></tuv>
|
385
|
+
</tu>
|
386
|
+
eoxml
|
387
|
+
@parser.parse(xml)
|
388
|
+
assert @parser.document.data.must_include "en:#:home_page:#:stories:#:[6]:#:name"
|
389
|
+
end
|
379
390
|
end
|
380
391
|
end
|
381
392
|
end
|
data/test/xml/test_document.rb
CHANGED
@@ -689,6 +689,17 @@ module Nokogiri
|
|
689
689
|
refute_nil doc
|
690
690
|
end
|
691
691
|
|
692
|
+
def test_parse_works_with_an_object_that_responds_to_read
|
693
|
+
klass = Class.new do
|
694
|
+
def read *args
|
695
|
+
"<div>foo</div>"
|
696
|
+
end
|
697
|
+
end
|
698
|
+
|
699
|
+
doc = Nokogiri::XML.parse klass.new
|
700
|
+
doc.at_css("div").content.must_equal("foo")
|
701
|
+
end
|
702
|
+
|
692
703
|
def test_search_on_empty_documents
|
693
704
|
doc = Nokogiri::XML::Document.new
|
694
705
|
ns = doc.search('//foo')
|
data/test/xml/test_schema.rb
CHANGED
@@ -94,9 +94,22 @@ EOF
|
|
94
94
|
end
|
95
95
|
|
96
96
|
def test_validate_invalid_document
|
97
|
-
|
97
|
+
doc = Nokogiri::XML File.read(PO_XML_FILE)
|
98
|
+
doc.css("city").unlink
|
98
99
|
|
99
|
-
assert errors = @xsd.validate(
|
100
|
+
assert errors = @xsd.validate(doc)
|
101
|
+
assert_equal 2, errors.length
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_validate_invalid_file
|
105
|
+
tempfile = Tempfile.new("xml")
|
106
|
+
|
107
|
+
doc = Nokogiri::XML File.read(PO_XML_FILE)
|
108
|
+
doc.css("city").unlink
|
109
|
+
tempfile.write doc.to_xml
|
110
|
+
tempfile.close
|
111
|
+
|
112
|
+
assert errors = @xsd.validate(tempfile.path)
|
100
113
|
assert_equal 2, errors.length
|
101
114
|
end
|
102
115
|
|
data/test/xml/test_xpath.rb
CHANGED
@@ -425,6 +425,21 @@ END
|
|
425
425
|
assert_equal 1, xml_doc.xpath('//mods:titleInfo',ns_hash).length
|
426
426
|
assert_equal 'finnish', xml_doc.xpath('//mods:titleInfo[1]/@lang',ns_hash).first.value
|
427
427
|
end
|
428
|
+
|
429
|
+
def test_xpath_after_reset_doc_via_innerhtml
|
430
|
+
xml = <<XML
|
431
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
432
|
+
<document xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0">
|
433
|
+
<text:section name="Section1">[TEXT_INSIDE_SECTION]</text:section>
|
434
|
+
</document>
|
435
|
+
XML
|
436
|
+
|
437
|
+
doc = Nokogiri::XML(xml)
|
438
|
+
doc.inner_html = doc.inner_html
|
439
|
+
sections = doc.xpath(".//text:section[@name='Section1']")
|
440
|
+
assert_equal 1, sections.size
|
441
|
+
assert_equal "[TEXT_INSIDE_SECTION]", sections.first.text
|
442
|
+
end
|
428
443
|
end
|
429
444
|
end
|
430
445
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.7
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Aaron Patterson
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2015-11-
|
15
|
+
date: 2015-11-30 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rdoc
|
@@ -126,6 +126,20 @@ dependencies:
|
|
126
126
|
version: 0.9.2
|
127
127
|
prerelease: false
|
128
128
|
type: :development
|
129
|
+
- !ruby/object:Gem::Dependency
|
130
|
+
name: rake-compiler-dock
|
131
|
+
version_requirements: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ~>
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: 0.4.2
|
136
|
+
requirement: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ~>
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: 0.4.2
|
141
|
+
prerelease: false
|
142
|
+
type: :development
|
129
143
|
- !ruby/object:Gem::Dependency
|
130
144
|
name: racc
|
131
145
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -169,11 +183,12 @@ dependencies:
|
|
169
183
|
prerelease: false
|
170
184
|
type: :development
|
171
185
|
description: |-
|
172
|
-
Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among
|
173
|
-
many features is the ability to search documents via XPath
|
186
|
+
Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among
|
187
|
+
Nokogiri's many features is the ability to search documents via XPath
|
188
|
+
or CSS3 selectors.
|
174
189
|
|
175
|
-
XML is like violence - if it doesn’t solve your problems, you are not
|
176
|
-
enough of it.
|
190
|
+
XML is like violence - if it doesn’t solve your problems, you are not
|
191
|
+
using enough of it.
|
177
192
|
email:
|
178
193
|
- aaronp@rubyforge.org
|
179
194
|
- mike.dalessio@gmail.com
|
@@ -187,9 +202,9 @@ extra_rdoc_files:
|
|
187
202
|
- CHANGELOG.ja.rdoc
|
188
203
|
- CHANGELOG.rdoc
|
189
204
|
- C_CODING_STYLE.rdoc
|
205
|
+
- LICENSE.txt
|
190
206
|
- Manifest.txt
|
191
|
-
- README.
|
192
|
-
- README.rdoc
|
207
|
+
- README.md
|
193
208
|
- ROADMAP.md
|
194
209
|
- STANDARD_RESPONSES.md
|
195
210
|
- Y_U_NO_GEMSPEC.md
|
@@ -238,13 +253,14 @@ files:
|
|
238
253
|
- CHANGELOG.rdoc
|
239
254
|
- C_CODING_STYLE.rdoc
|
240
255
|
- Gemfile
|
256
|
+
- LICENSE.txt
|
241
257
|
- Manifest.txt
|
242
|
-
- README.
|
243
|
-
- README.rdoc
|
258
|
+
- README.md
|
244
259
|
- ROADMAP.md
|
245
260
|
- Rakefile
|
246
261
|
- STANDARD_RESPONSES.md
|
247
262
|
- Y_U_NO_GEMSPEC.md
|
263
|
+
- appveyor.yml
|
248
264
|
- bin/nokogiri
|
249
265
|
- build_all
|
250
266
|
- dependencies.yml
|
@@ -474,28 +490,7 @@ files:
|
|
474
490
|
- lib/nokogiri/xslt/stylesheet.rb
|
475
491
|
- lib/xercesImpl.jar
|
476
492
|
- lib/xsd/xmlparser/nokogiri.rb
|
477
|
-
-
|
478
|
-
- ports/patches/libxml2/0002-Fix-missing-entities-after-CVE-2014-3660-fix.patch
|
479
|
-
- ports/patches/libxml2/0003-Stop-parsing-on-entities-boundaries-errors.patch
|
480
|
-
- ports/patches/libxml2/0004-Cleanup-conditional-section-error-handling.patch
|
481
|
-
- ports/patches/libxml2/0005-CVE-2015-1819-Enforce-the-reader-to-run-in-constant-.patch
|
482
|
-
- ports/patches/libxml2/0006-Another-variation-of-overflow-in-Conditional-section.patch
|
483
|
-
- ports/patches/libxml2/0007-Fix-an-error-in-previous-Conditional-section-patch.patch
|
484
|
-
- ports/patches/libxml2/0008-CVE-2015-8035-Fix-XZ-compression-support-loop.patch
|
485
|
-
- ports/patches/libxslt/0001-Adding-doc-update-related-to-1.1.28.patch
|
486
|
-
- ports/patches/libxslt/0002-Fix-a-couple-of-places-where-f-printf-parameters-wer.patch
|
487
|
-
- ports/patches/libxslt/0003-Initialize-pseudo-random-number-generator-with-curre.patch
|
488
|
-
- ports/patches/libxslt/0004-EXSLT-function-str-replace-is-broken-as-is.patch
|
489
|
-
- ports/patches/libxslt/0006-Fix-str-padding-to-work-with-UTF-8-strings.patch
|
490
|
-
- ports/patches/libxslt/0007-Separate-function-for-predicate-matching-in-patterns.patch
|
491
|
-
- ports/patches/libxslt/0008-Fix-direct-pattern-matching.patch
|
492
|
-
- ports/patches/libxslt/0009-Fix-certain-patterns-with-predicates.patch
|
493
|
-
- ports/patches/libxslt/0010-Fix-handling-of-UTF-8-strings-in-EXSLT-crypto-module.patch
|
494
|
-
- ports/patches/libxslt/0013-Memory-leak-in-xsltCompileIdKeyPattern-error-path.patch
|
495
|
-
- ports/patches/libxslt/0014-Fix-for-bug-436589.patch
|
496
|
-
- ports/patches/libxslt/0015-Fix-mkdir-for-mingw.patch
|
497
|
-
- ports/patches/libxslt/0016-Fix-for-type-confusion-in-preprocessing-attributes.patch
|
498
|
-
- ports/patches/sort-patches-by-date
|
493
|
+
- patches/sort-patches-by-date
|
499
494
|
- suppressions/README.txt
|
500
495
|
- suppressions/nokogiri_ree-1.8.7.358.supp
|
501
496
|
- suppressions/nokogiri_ruby-1.8.7.370.supp
|
@@ -616,7 +611,7 @@ metadata: {}
|
|
616
611
|
post_install_message:
|
617
612
|
rdoc_options:
|
618
613
|
- --main
|
619
|
-
- README.
|
614
|
+
- README.md
|
620
615
|
require_paths:
|
621
616
|
- lib
|
622
617
|
required_ruby_version: !ruby/object:Gem::Requirement
|
data/README.ja.rdoc
DELETED
@@ -1,112 +0,0 @@
|
|
1
|
-
= Nokogiri (鋸) {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri]
|
2
|
-
|
3
|
-
* http://nokogiri.org
|
4
|
-
* https://github.com/sparklemotion/nokogiri
|
5
|
-
* https://groups.google.com/group/nokogiri-talk
|
6
|
-
* https://github.com/sparklemotion/nokogiri/issues
|
7
|
-
|
8
|
-
== DESCRIPTION:
|
9
|
-
|
10
|
-
Nokogiri はHTMLとXMLとSAXとXSLTとReaderのパーサーです。とりわけ重要な特徴は、
|
11
|
-
ドキュメントをXPathやCSS3セレクター経由で探索する機能を持つことです。
|
12
|
-
|
13
|
-
XMLは暴力に似ている - XMLが君の問題を解決しないとしたら、君はXMLを十分に
|
14
|
-
使いこなしていない事になる。
|
15
|
-
|
16
|
-
== FEATURES:
|
17
|
-
|
18
|
-
* XPath 1.0による探索
|
19
|
-
* CSS3 のセレクターによる探索
|
20
|
-
* XML/HTMLのビルダー
|
21
|
-
|
22
|
-
XML/HTMLの高速な解析と探索検索、ならびにCSS3セレクタとXPath 1.0をサポートしています。
|
23
|
-
|
24
|
-
== SUPPORT:
|
25
|
-
|
26
|
-
日本語でNokogiriの
|
27
|
-
{メーリングリスト}[https://groups.google.com/group/nokogiri-list]
|
28
|
-
|
29
|
-
* https://groups.google.com/group/nokogiri-list
|
30
|
-
|
31
|
-
{バグ報告}[https://github.com/sparklemotion/nokogiri/issues]
|
32
|
-
|
33
|
-
* https://github.com/sparklemotion/nokogiri/issues
|
34
|
-
|
35
|
-
IRCのチャンネルはfreenodeの #nokogiri です。
|
36
|
-
|
37
|
-
== SYNOPSIS:
|
38
|
-
|
39
|
-
require 'nokogiri'
|
40
|
-
require 'open-uri'
|
41
|
-
|
42
|
-
# Fetch and parse HTML document
|
43
|
-
doc = Nokogiri::HTML(open('http://www.nokogiri.org/tutorials/installing_nokogiri.html'))
|
44
|
-
|
45
|
-
####
|
46
|
-
# Search for nodes by css
|
47
|
-
doc.css('nav ul.menu li a').each do |link|
|
48
|
-
puts link.content
|
49
|
-
end
|
50
|
-
|
51
|
-
####
|
52
|
-
# Search for nodes by xpath
|
53
|
-
doc.xpath('//h2 | //h3').each do |link|
|
54
|
-
puts link.content
|
55
|
-
end
|
56
|
-
|
57
|
-
####
|
58
|
-
# Or mix and match.
|
59
|
-
doc.search('code.sh', '//h2').each do |link|
|
60
|
-
puts link.content
|
61
|
-
end
|
62
|
-
|
63
|
-
|
64
|
-
== REQUIREMENTS:
|
65
|
-
|
66
|
-
* ruby 1.9.3以上
|
67
|
-
|
68
|
-
* Nokogiri 1.6.0以降ではlibxml2とlibxsltは同梱されているが、
|
69
|
-
もしインストール済みのものを使いたい場合:
|
70
|
-
|
71
|
-
* libxml2 2.6.21以上, iconvサポート付きのもの
|
72
|
-
(libxml2-dev/-develパッケージも必要)
|
73
|
-
|
74
|
-
* libxslt 上記のlibxml2でビルドされ、サポートされているもの
|
75
|
-
(libxslt-dev/-develパッケージも必要)
|
76
|
-
|
77
|
-
== INSTALL:
|
78
|
-
|
79
|
-
* sudo gem install nokogiri
|
80
|
-
|
81
|
-
== LICENSE:
|
82
|
-
|
83
|
-
(The MIT License)
|
84
|
-
|
85
|
-
Copyright (c) 2008 - 2015:
|
86
|
-
|
87
|
-
* {Aaron Patterson}[http://tenderlovemaking.com]
|
88
|
-
* {Mike Dalessio}[http://mike.daless.io]
|
89
|
-
* {Charles Nutter}[http://blog.headius.com]
|
90
|
-
* {Sergio Arbeo}[http://www.serabe.com]
|
91
|
-
* {Patrick Mahoney}[http://polycrystal.org]
|
92
|
-
* {Yoko Harada}[http://yokolet.blogspot.com]
|
93
|
-
* {Akinori MUSHA}[https://akinori.org]
|
94
|
-
|
95
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
96
|
-
a copy of this software and associated documentation files (the
|
97
|
-
'Software'), to deal in the Software without restriction, including
|
98
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
99
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
100
|
-
permit persons to whom the Software is furnished to do so, subject to
|
101
|
-
the following conditions:
|
102
|
-
|
103
|
-
The above copyright notice and this permission notice shall be
|
104
|
-
included in all copies or substantial portions of the Software.
|
105
|
-
|
106
|
-
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
107
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
108
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
109
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
110
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
111
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
112
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
DELETED
@@ -1,177 +0,0 @@
|
|
1
|
-
= Nokogiri {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/github/sparklemotion/nokogiri.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri] {<img src="https://www.versioneye.com/ruby/nokogiri/badge.png" alt="Dependency Status" />}[https://www.versioneye.com/ruby/nokogiri]
|
2
|
-
|
3
|
-
* http://nokogiri.org
|
4
|
-
* https://github.com/sparklemotion/nokogiri
|
5
|
-
* https://groups.google.com/group/nokogiri-talk
|
6
|
-
* https://github.com/sparklemotion/nokogiri/issues
|
7
|
-
|
8
|
-
== DESCRIPTION:
|
9
|
-
|
10
|
-
Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among Nokogiri's
|
11
|
-
many features is the ability to search documents via XPath or CSS3 selectors.
|
12
|
-
|
13
|
-
XML is like violence - if it doesn’t solve your problems, you are not using
|
14
|
-
enough of it.
|
15
|
-
|
16
|
-
== FEATURES:
|
17
|
-
|
18
|
-
* XPath 1.0 support for document searching
|
19
|
-
* CSS3 selector support for document searching
|
20
|
-
* XML/HTML builder
|
21
|
-
|
22
|
-
Nokogiri parses and searches XML/HTML very quickly, and also has
|
23
|
-
correctly implemented CSS3 selector support as well as XPath 1.0 support.
|
24
|
-
|
25
|
-
== SUPPORT:
|
26
|
-
|
27
|
-
Before filing a bug report, please read our {submission guidelines}[http://nokogiri.org/tutorials/getting_help.html] at:
|
28
|
-
|
29
|
-
* http://nokogiri.org/tutorials/getting_help.html
|
30
|
-
|
31
|
-
The Nokogiri {mailing list}[https://groups.google.com/group/nokogiri-talk]
|
32
|
-
is available here:
|
33
|
-
|
34
|
-
* https://groups.google.com/group/nokogiri-talk
|
35
|
-
|
36
|
-
The {bug tracker}[https://github.com/sparklemotion/nokogiri/issues]
|
37
|
-
is available here:
|
38
|
-
|
39
|
-
* https://github.com/sparklemotion/nokogiri/issues
|
40
|
-
|
41
|
-
The IRC channel is #nokogiri on freenode.
|
42
|
-
|
43
|
-
== SYNOPSIS:
|
44
|
-
|
45
|
-
require 'nokogiri'
|
46
|
-
require 'open-uri'
|
47
|
-
|
48
|
-
# Fetch and parse HTML document
|
49
|
-
doc = Nokogiri::HTML(open('http://www.nokogiri.org/tutorials/installing_nokogiri.html'))
|
50
|
-
|
51
|
-
####
|
52
|
-
# Search for nodes by css
|
53
|
-
doc.css('nav ul.menu li a').each do |link|
|
54
|
-
puts link.content
|
55
|
-
end
|
56
|
-
|
57
|
-
####
|
58
|
-
# Search for nodes by xpath
|
59
|
-
doc.xpath('//h2 | //h3').each do |link|
|
60
|
-
puts link.content
|
61
|
-
end
|
62
|
-
|
63
|
-
####
|
64
|
-
# Or mix and match.
|
65
|
-
doc.search('code.sh', '//h2').each do |link|
|
66
|
-
puts link.content
|
67
|
-
end
|
68
|
-
|
69
|
-
|
70
|
-
== REQUIREMENTS:
|
71
|
-
|
72
|
-
* ruby 1.9.3 or higher
|
73
|
-
|
74
|
-
* in Nokogiri 1.6.0 and later libxml2 and libxslt are bundled with the
|
75
|
-
gem, but if you want to use them installed on the system:
|
76
|
-
|
77
|
-
* libxml2 >=2.6.21 with iconv support
|
78
|
-
(libxml2-dev/-devel is required too)
|
79
|
-
|
80
|
-
* libxslt, built with and supported by the given libxml2
|
81
|
-
(libxslt-dev/-devel is required too)
|
82
|
-
|
83
|
-
== ENCODING:
|
84
|
-
|
85
|
-
Strings are always stored as UTF-8 internally. Methods that return
|
86
|
-
text values will always return UTF-8 encoded strings. Methods that
|
87
|
-
return XML (like to_xml, to_html and inner_html) will return a string
|
88
|
-
encoded like the source document.
|
89
|
-
|
90
|
-
*WARNING*
|
91
|
-
|
92
|
-
Some documents declare one particular encoding, but use a different
|
93
|
-
one. So, which encoding should the parser choose?
|
94
|
-
|
95
|
-
Remember that data is just a stream of bytes. Only we humans add
|
96
|
-
meaning to that stream. Any particular set of bytes could be valid
|
97
|
-
characters in multiple encodings, so detecting encoding with 100%
|
98
|
-
accuracy is not possible. libxml2 does its best, but it can't be right
|
99
|
-
100% of the time.
|
100
|
-
|
101
|
-
If you want Nokogiri to handle the document encoding properly, your
|
102
|
-
best bet is to explicitly set the encoding. Here is an example of
|
103
|
-
explicitly setting the encoding to EUC-JP on the parser:
|
104
|
-
|
105
|
-
doc = Nokogiri.XML('<foo><bar /><foo>', nil, 'EUC-JP')
|
106
|
-
|
107
|
-
== INSTALL:
|
108
|
-
|
109
|
-
* sudo gem install nokogiri
|
110
|
-
|
111
|
-
=== Binary packages
|
112
|
-
|
113
|
-
Binary packages are available for:
|
114
|
-
|
115
|
-
* SuSE[https://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/]
|
116
|
-
* Fedora[http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756]
|
117
|
-
|
118
|
-
== DEVELOPMENT:
|
119
|
-
|
120
|
-
=== Developing on C Ruby (MRI)
|
121
|
-
|
122
|
-
Developing Nokogiri requires racc and rexical to generate the parser and
|
123
|
-
tokenizer. To start development, make sure you have `libxml2` and `libxslt`
|
124
|
-
installed.
|
125
|
-
|
126
|
-
Then install core gems and bootstrap:
|
127
|
-
|
128
|
-
$ gem install hoe rake-compiler mini_portile
|
129
|
-
$ rake newb
|
130
|
-
|
131
|
-
=== Developing on JRuby
|
132
|
-
|
133
|
-
Currently, development with JRuby depends on CRuby being installed. With
|
134
|
-
CRuby, install racc and rexical:
|
135
|
-
|
136
|
-
$ gem install racc rexical
|
137
|
-
|
138
|
-
Make sure hoe and rake compiler are installed with JRuby:
|
139
|
-
|
140
|
-
$ jgem install hoe rake-compiler
|
141
|
-
|
142
|
-
Then run rake:
|
143
|
-
|
144
|
-
$ jruby -S rake
|
145
|
-
|
146
|
-
== LICENSE:
|
147
|
-
|
148
|
-
(The MIT License)
|
149
|
-
|
150
|
-
Copyright (c) 2008 - 2015:
|
151
|
-
|
152
|
-
* {Aaron Patterson}[http://tenderlovemaking.com]
|
153
|
-
* {Mike Dalessio}[http://mike.daless.io]
|
154
|
-
* {Charles Nutter}[http://blog.headius.com]
|
155
|
-
* {Sergio Arbeo}[http://www.serabe.com]
|
156
|
-
* {Patrick Mahoney}[http://polycrystal.org]
|
157
|
-
* {Yoko Harada}[http://yokolet.blogspot.com]
|
158
|
-
* {Akinori MUSHA}[https://akinori.org]
|
159
|
-
|
160
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
161
|
-
a copy of this software and associated documentation files (the
|
162
|
-
'Software'), to deal in the Software without restriction, including
|
163
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
164
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
165
|
-
permit persons to whom the Software is furnished to do so, subject to
|
166
|
-
the following conditions:
|
167
|
-
|
168
|
-
The above copyright notice and this permission notice shall be
|
169
|
-
included in all copies or substantial portions of the Software.
|
170
|
-
|
171
|
-
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
172
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
173
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
174
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
175
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
176
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
177
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|