nokogiri 1.8.2-java → 1.8.3-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/.travis.yml +14 -14
- data/CHANGELOG.md +43 -1
- data/LICENSE.md +2 -1
- data/Manifest.txt +3 -0
- data/README.md +20 -21
- data/Rakefile +3 -9
- data/SECURITY.md +19 -0
- data/build_all +1 -1
- data/dependencies.yml +11 -11
- data/ext/java/nokogiri/HtmlSaxParserContext.java +7 -13
- data/ext/java/nokogiri/HtmlSaxPushParser.java +72 -90
- data/ext/java/nokogiri/NokogiriService.java +0 -19
- data/ext/java/nokogiri/XmlNode.java +2 -23
- data/ext/java/nokogiri/XmlSaxParserContext.java +81 -101
- data/ext/java/nokogiri/XmlSaxPushParser.java +117 -89
- data/ext/java/nokogiri/XmlSyntaxError.java +9 -17
- data/ext/java/nokogiri/internals/NokogiriHandler.java +100 -108
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +11 -14
- data/ext/java/nokogiri/internals/ParserContext.java +34 -19
- data/ext/java/nokogiri/internals/ReaderNode.java +6 -10
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +4 -3
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +6 -3
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +4 -3
- data/ext/nokogiri/extconf.rb +1 -1
- data/ext/nokogiri/html_element_description.c +14 -14
- data/ext/nokogiri/xml_cdata.c +6 -4
- data/ext/nokogiri/xml_document.c +2 -3
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_io.c +1 -0
- data/ext/nokogiri/xml_namespace.c +3 -9
- data/ext/nokogiri/xml_namespace.h +2 -0
- data/ext/nokogiri/xml_node.c +23 -15
- data/ext/nokogiri/xml_node_set.c +5 -4
- data/ext/nokogiri/xml_node_set.h +0 -1
- data/ext/nokogiri/xslt_stylesheet.c +2 -2
- data/lib/nokogiri/css/parser.rb +108 -90
- data/lib/nokogiri/css/parser.y +13 -2
- data/lib/nokogiri/css/tokenizer.rb +1 -1
- data/lib/nokogiri/css/tokenizer.rex +4 -4
- data/lib/nokogiri/css/xpath_visitor.rb +10 -3
- data/lib/nokogiri/html/document_fragment.rb +11 -1
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/node.rb +58 -0
- data/lib/nokogiri/xml/node_set.rb +32 -18
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +78 -0
- data/test/css/test_nthiness.rb +21 -21
- data/test/css/test_parser.rb +17 -0
- data/test/html/test_attributes.rb +85 -0
- data/test/html/test_document_fragment.rb +7 -1
- data/test/test_css_cache.rb +5 -3
- data/test/xml/sax/test_parser.rb +9 -1
- data/test/xml/sax/test_push_parser.rb +60 -0
- data/test/xml/test_cdata.rb +1 -1
- data/test/xml/test_document.rb +5 -5
- data/test/xml/test_dtd.rb +4 -4
- data/test/xml/test_node.rb +89 -6
- data/test/xml/test_node_attributes.rb +3 -3
- data/test/xml/test_node_reparenting.rb +18 -0
- data/test/xml/test_node_set.rb +31 -4
- data/test/xml/test_reader.rb +13 -1
- data/test/xml/test_syntax_error.rb +3 -3
- data/test/xml/test_xpath.rb +8 -0
- metadata +25 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f6f9a9fae7034c2bd0c490e0395e5c7a26dbde6c
|
4
|
+
data.tar.gz: f97501387a99a0a335c45569eaa22bdc9b6f41af
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f248fb6871692986843da22c22c7f20064e1ed1d92c073fbe5b00f7a2151d5f409f1723dbc2ec356fe80e192bf975bc6f9033d056916b1f0f661085e88a0da0
|
7
|
+
data.tar.gz: 94306c4713312f3c00da66f82adc79dc159eb6b40ffedf11ac5185efacfab1fcf4bac083c1d0fff95947612f4c97a0f8e7d8b6e6d5656d25d24c8913df279b34
|
data/.travis.yml
CHANGED
@@ -18,15 +18,15 @@ matrix:
|
|
18
18
|
- os: osx
|
19
19
|
rvm: 2.2.5
|
20
20
|
- os: linux
|
21
|
-
rvm: 2.3.
|
21
|
+
rvm: 2.3.5
|
22
22
|
- os: osx
|
23
|
-
rvm: 2.3.
|
23
|
+
rvm: 2.3.5
|
24
24
|
- os: linux
|
25
|
-
rvm: 2.4.
|
25
|
+
rvm: 2.4.2
|
26
26
|
env:
|
27
27
|
- RUBYOPT="--enable-frozen-string-literal --debug=frozen-string-literal"
|
28
28
|
- os: osx
|
29
|
-
rvm: 2.4.
|
29
|
+
rvm: 2.4.2
|
30
30
|
env:
|
31
31
|
- RUBYOPT="--enable-frozen-string-literal --debug=frozen-string-literal"
|
32
32
|
- os: linux
|
@@ -40,22 +40,22 @@ matrix:
|
|
40
40
|
- os: linux
|
41
41
|
rvm: jruby-1.7
|
42
42
|
- os: linux
|
43
|
-
rvm: jruby-9.1.
|
43
|
+
rvm: jruby-9.1.15.0
|
44
44
|
- os: linux
|
45
45
|
rvm: rbx-3
|
46
46
|
allow_failures:
|
47
47
|
- rvm: ruby-head
|
48
48
|
fast_finish: true
|
49
49
|
|
50
|
-
notifications:
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
50
|
+
# notifications:
|
51
|
+
# irc:
|
52
|
+
# channels:
|
53
|
+
# - "chat.freenode.net#nokogiri"
|
54
|
+
# on_success: always
|
55
|
+
# on_failure: always
|
56
|
+
# template:
|
57
|
+
# - "%{repository} (%{branch}:%{commit} by %{author}): %{message} (%{build_url})"
|
58
|
+
# skip_join: true
|
59
59
|
|
60
60
|
addons:
|
61
61
|
apt:
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,45 @@
|
|
1
|
+
# 1.8.3 / 2018-06-16
|
2
|
+
|
3
|
+
## Security Notes
|
4
|
+
|
5
|
+
[MRI] Behavior in libxml2 has been reverted which caused CVE-2018-8048 (loofah gem), CVE-2018-3740 (sanitize gem), and CVE-2018-3741 (rails-html-sanitizer gem). The commit in question is here:
|
6
|
+
|
7
|
+
> https://github.com/GNOME/libxml2/commit/960f0e2
|
8
|
+
|
9
|
+
and more information is available about this commit and its impact here:
|
10
|
+
|
11
|
+
> https://github.com/flavorjones/loofah/issues/144
|
12
|
+
|
13
|
+
This release simply reverts the libxml2 commit in question to protect users of Nokogiri's vendored libraries from similar vulnerabilities.
|
14
|
+
|
15
|
+
If you're offended by what happened here, I'd kindly ask that you comment on the upstream bug report here:
|
16
|
+
|
17
|
+
> https://bugzilla.gnome.org/show_bug.cgi?id=769760
|
18
|
+
|
19
|
+
|
20
|
+
## Dependencies
|
21
|
+
|
22
|
+
* [MRI] libxml2 is updated from 2.9.7 to 2.9.8
|
23
|
+
|
24
|
+
|
25
|
+
## Features
|
26
|
+
|
27
|
+
* Node#classes, #add_class, #append_class, and #remove_class are added.
|
28
|
+
* NodeSet#append_class is added.
|
29
|
+
* NodeSet#remove_attribute is a new alias for NodeSet#remove_attr.
|
30
|
+
* NodeSet#each now returns an Enumerator when no block is passed (Thanks, @park53kr!)
|
31
|
+
* [JRuby] General improvements in JRuby implementation (Thanks, @kares!)
|
32
|
+
|
33
|
+
|
34
|
+
## Bug fixes
|
35
|
+
|
36
|
+
* CSS attribute selectors now gracefully handle queries using integers. [#711]
|
37
|
+
* Handle ASCII-8BIT encoding on fragment input [#553]
|
38
|
+
* Handle non-string return values within `Reader` [#898]
|
39
|
+
* [JRuby] Allow Node#replace to insert Comment and CDATA nodes. [#1666]
|
40
|
+
* [JRuby] Stability and speed improvements to `Node`, `Sax::PushParser`, and the JRuby implementation [#1708, #1710, #1501]
|
41
|
+
|
42
|
+
|
1
43
|
# 1.8.2 / 2018-01-29
|
2
44
|
|
3
45
|
## Security Notes
|
@@ -8,7 +50,7 @@
|
|
8
50
|
## Dependencies
|
9
51
|
|
10
52
|
* [MRI] libxml2 is updated from 2.9.5 to 2.9.7
|
11
|
-
* [MRI]
|
53
|
+
* [MRI] libxslt is updated from 1.1.30 to 1.1.32
|
12
54
|
|
13
55
|
|
14
56
|
## Features
|
data/LICENSE.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
(The MIT License)
|
2
2
|
|
3
|
-
Copyright (c) 2008 -
|
3
|
+
Copyright (c) 2008 - 2018:
|
4
4
|
|
5
5
|
* [Aaron Patterson](http://tenderlovemaking.com)
|
6
6
|
* [Mike Dalessio](http://mike.daless.io)
|
@@ -10,6 +10,7 @@ Copyright (c) 2008 - 2017:
|
|
10
10
|
* [Yoko Harada](http://yokolet.blogspot.com)
|
11
11
|
* [Akinori MUSHA](https://akinori.org)
|
12
12
|
* [John Shahid](https://github.com/jvshahid)
|
13
|
+
* [Lars Kanis](https://github.com/larskanis)
|
13
14
|
|
14
15
|
Permission is hereby granted, free of charge, to any person obtaining
|
15
16
|
a copy of this software and associated documentation files (the
|
data/Manifest.txt
CHANGED
@@ -14,6 +14,7 @@ Manifest.txt
|
|
14
14
|
README.md
|
15
15
|
ROADMAP.md
|
16
16
|
Rakefile
|
17
|
+
SECURITY.md
|
17
18
|
STANDARD_RESPONSES.md
|
18
19
|
Y_U_NO_GEMSPEC.md
|
19
20
|
appveyor.yml
|
@@ -249,6 +250,7 @@ lib/xalan.jar
|
|
249
250
|
lib/xercesImpl.jar
|
250
251
|
lib/xml-apis.jar
|
251
252
|
lib/xsd/xmlparser/nokogiri.rb
|
253
|
+
patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch
|
252
254
|
patches/sort-patches-by-date
|
253
255
|
suppressions/README.txt
|
254
256
|
suppressions/nokogiri_ruby-2.supp
|
@@ -300,6 +302,7 @@ test/html/sax/test_parser.rb
|
|
300
302
|
test/html/sax/test_parser_context.rb
|
301
303
|
test/html/sax/test_parser_text.rb
|
302
304
|
test/html/sax/test_push_parser.rb
|
305
|
+
test/html/test_attributes.rb
|
303
306
|
test/html/test_builder.rb
|
304
307
|
test/html/test_document.rb
|
305
308
|
test/html/test_document_encoding.rb
|
data/README.md
CHANGED
@@ -1,30 +1,24 @@
|
|
1
1
|
# Nokogiri
|
2
2
|
|
3
|
-
* http://nokogiri.org
|
4
|
-
* Installation: http://nokogiri.org/tutorials/installing_nokogiri.html
|
5
|
-
* Tutorials: http://nokogiri.org
|
6
|
-
* README: https://github.com/sparklemotion/nokogiri
|
7
|
-
* Mailing List: https://groups.google.com/group/nokogiri-talk
|
8
|
-
* Bug Reports: https://github.com/sparklemotion/nokogiri/issues
|
9
|
-
|
10
|
-
|
11
|
-
## Status
|
12
|
-
|
13
|
-
|System|Status|
|
14
|
-
|--|--|
|
15
|
-
| Concourse | [![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/nokogiri/jobs/ruby-2.4-system/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/nokogiri?groups=master) |
|
16
|
-
| Travis | [![Travis Build Status](https://travis-ci.org/sparklemotion/nokogiri.svg?branch=master)](https://travis-ci.org/sparklemotion/nokogiri) |
|
17
|
-
| Appveyor | [![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/github/sparklemotion/nokogiri?branch=master&svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri?branch=master) |
|
18
|
-
| Code Climate | [![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.svg)](https://codeclimate.com/github/sparklemotion/nokogiri) |
|
19
|
-
| Version Eye | [![Version Eye](https://www.versioneye.com/ruby/nokogiri/badge.png)](https://www.versioneye.com/ruby/nokogiri) |
|
20
|
-
|
21
|
-
|
22
3
|
## Description
|
23
4
|
|
24
5
|
Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among
|
25
6
|
Nokogiri's many features is the ability to search documents via XPath
|
26
7
|
or CSS3 selectors.
|
27
8
|
|
9
|
+
* http://nokogiri.org
|
10
|
+
* [Installation Help](http://nokogiri.org/tutorials/installing_nokogiri.html)
|
11
|
+
* [Tutorials](http://nokogiri.org)
|
12
|
+
* [GitHub](https://github.com/sparklemotion/nokogiri)
|
13
|
+
* [Mailing List](https://groups.google.com/group/nokogiri-talk)
|
14
|
+
* [Bug Reports](https://github.com/sparklemotion/nokogiri/issues)
|
15
|
+
* [Chat/Gitter](https://gitter.im/sparklemotion/nokogiri)
|
16
|
+
|
17
|
+
[![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/nokogiri/jobs/ruby-2.4-system/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/nokogiri?groups=master)
|
18
|
+
[![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.svg)](https://codeclimate.com/github/sparklemotion/nokogiri)
|
19
|
+
[![Version Eye](https://www.versioneye.com/ruby/nokogiri/badge.png)](https://www.versioneye.com/ruby/nokogiri)
|
20
|
+
[![Join the chat at https://gitter.im/sparklemotion/nokogiri](https://badges.gitter.im/sparklemotion/nokogiri.svg)](https://gitter.im/sparklemotion/nokogiri?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
21
|
+
|
28
22
|
|
29
23
|
## Features
|
30
24
|
|
@@ -81,6 +75,11 @@ There are open-source tutorials (to which we invite contributions!) here: http:/
|
|
81
75
|
* The IRC channel is #nokogiri on freenode.
|
82
76
|
|
83
77
|
|
78
|
+
## Security and Vulnerability Reporting
|
79
|
+
|
80
|
+
See [`SECURITY.md`](SECURITY.md)
|
81
|
+
|
82
|
+
|
84
83
|
## Synopsis
|
85
84
|
|
86
85
|
Nokogiri is a large library, but here is example usage for parsing and examining a document:
|
@@ -158,7 +157,7 @@ best bet is to explicitly set the encoding. Here is an example of
|
|
158
157
|
explicitly setting the encoding to EUC-JP on the parser:
|
159
158
|
|
160
159
|
```ruby
|
161
|
-
doc = Nokogiri.XML('<foo><bar
|
160
|
+
doc = Nokogiri.XML('<foo><bar /></foo>', nil, 'EUC-JP')
|
162
161
|
```
|
163
162
|
|
164
163
|
## Development
|
@@ -170,4 +169,4 @@ explicitly setting the encoding to EUC-JP on the parser:
|
|
170
169
|
|
171
170
|
## License
|
172
171
|
|
173
|
-
MIT. See
|
172
|
+
MIT. See [`LICENSE.md`](LICENSE.md).
|
data/Rakefile
CHANGED
@@ -107,6 +107,8 @@ HOE = Hoe.spec 'nokogiri' do
|
|
107
107
|
developer 'Yoko Harada', 'yokolet@gmail.com'
|
108
108
|
developer 'Tim Elliott', 'tle@holymonkey.com'
|
109
109
|
developer 'Akinori MUSHA', 'knu@idaemons.org'
|
110
|
+
developer 'John Shahid', 'jvshahid@gmail.com'
|
111
|
+
developer 'Lars Kanis', 'lars@greiz-reinsdorf.de'
|
110
112
|
|
111
113
|
license "MIT"
|
112
114
|
|
@@ -292,15 +294,7 @@ task :java_debug do
|
|
292
294
|
ENV['JAVA_OPTS'] = '-Xdebug -Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y' if ENV['JAVA_DEBUG']
|
293
295
|
end
|
294
296
|
|
295
|
-
|
296
|
-
task :test_19 => :test
|
297
|
-
task :test_20 do
|
298
|
-
ENV['JRUBY_OPTS'] = "--2.0"
|
299
|
-
Rake::Task["test"].invoke
|
300
|
-
end
|
301
|
-
end
|
302
|
-
|
303
|
-
# Rake::Task[:test].prerequisites << :compile
|
297
|
+
Rake::Task[:test].prerequisites << :compile
|
304
298
|
Rake::Task[:test].prerequisites << :java_debug
|
305
299
|
Rake::Task[:test].prerequisites << :check_extra_deps unless java?
|
306
300
|
|
data/SECURITY.md
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# Security and Vulnerability Reporting
|
2
|
+
|
3
|
+
The Nokogiri Core Contributors take security very seriously and investigate all reported vulnerabilities.
|
4
|
+
|
5
|
+
If you would like to report a vulnerablity or have a security concern regarding Nokogiri or how Nokogiri is using any of its underlying platform-specific libraries (such as libxml2 or xerces), please [report it via HackerOne](https://hackerone.com/nokogiri/reports/new).
|
6
|
+
|
7
|
+
Your report will be acknowledged within 24 hours, and you'll receive a more detailed response within 72 hours indicating next steps in handling your report.
|
8
|
+
|
9
|
+
If you have not received a reply to your submission within 48 hours, there are a few steps you can take:
|
10
|
+
|
11
|
+
* Contact the current security coordinator (Mike Dalessio <mike.dalessio@gmail.com>)
|
12
|
+
* Contact the core contributor mailing list (nokogiri-core@googlegroups.com)
|
13
|
+
* Join the [nokogiri-talk group](https://groups.google.com/d/forum/nokogiri-talk)
|
14
|
+
|
15
|
+
Please note, the nokogiri-talk list is a public area. When escalating in that venue, please do not discuss your issue. Simply say that you're trying to get a hold of someone from the core team.
|
16
|
+
|
17
|
+
The information you share with the Nokogiri Core Contributors as part of this process will be kept confidential within the team, unless or until we need to share information upstream with our dependent libraries' core teams, at which point we will notify you.
|
18
|
+
|
19
|
+
If a vulnerability is first reported by you, we will credit you with the discovery in the public disclosure.
|
data/build_all
CHANGED
data/dependencies.yml
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
libxml2:
|
2
|
-
version: "2.9.
|
3
|
-
sha256: "
|
2
|
+
version: "2.9.8"
|
3
|
+
sha256: "0b74e51595654f958148759cfef0993114ddccccbb6f31aee018f3558e8e2732"
|
4
4
|
# manually verified checksum:
|
5
5
|
#
|
6
|
-
# $ gpg --verify libxml2-2.9.
|
7
|
-
# gpg: Signature made
|
6
|
+
# $ gpg --verify libxml2-2.9.8.tar.gz.asc ./ports/archives/libxml2-2.9.8.tar.gz
|
7
|
+
# gpg: Signature made Mon 05 Mar 2018 11:07:45 AM EST using RSA key ID 596BEA5D
|
8
8
|
# gpg: Good signature from "Daniel Veillard (Red Hat work email) <veillard@redhat.com>"
|
9
9
|
# gpg: aka "Daniel Veillard <Daniel.Veillard@w3.org>"
|
10
10
|
# gpg: WARNING: This key is not certified with a trusted signature!
|
@@ -15,13 +15,13 @@ libxml2:
|
|
15
15
|
# using this pgp signature:
|
16
16
|
# -----BEGIN PGP SIGNATURE-----
|
17
17
|
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
# =
|
18
|
+
# iQEcBAABAgAGBQJanWtRAAoJEBVYiyZZa+pdV7oIAJWdFahwt+reN/Zt2RPmjjcr
|
19
|
+
# eSsY7UV1RXjScnNjTzJT1h2hJ7SnUjCkqjR6VdtKDUIzpuX+S2U83joafJH6mxUb
|
20
|
+
# yw2nO4RfjYTPxpz5JkvqT7jmgEIaD81BuwcMehqpMpIfiKa2NgO1DSfZxgs8a9E2
|
21
|
+
# +ehc/kZWuI5gmNGrd84EEWUqpYW/Xx7jy02osioJuU5IMPjzZKNR3maXp9oAKeBc
|
22
|
+
# S2QNa1ID/pUk3K3M/5nlwNgAtQ7lxQrqhrSma2dsKt/IpL6VXomxuD4Bh1r2MZhX
|
23
|
+
# uZ456X/xJN8UmPewLZWGBU1MK9wqu3Zx5Qwz64H6UdlYIzXZ2jXj2YWZa6xkxPA=
|
24
|
+
# =69xn
|
25
25
|
# -----END PGP SIGNATURE-----
|
26
26
|
#
|
27
27
|
|
@@ -223,30 +223,24 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
|
|
223
223
|
}
|
224
224
|
|
225
225
|
/**
|
226
|
-
* Create a new parser context that will read from a raw input
|
227
|
-
*
|
228
|
-
* thread by HtmlSaxPushParser.
|
226
|
+
* Create a new parser context that will read from a raw input stream.
|
227
|
+
* Meant to be run in a separate thread by HtmlSaxPushParser.
|
229
228
|
*/
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz);
|
234
|
-
ctx.initialize(context.getRuntime());
|
229
|
+
static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klazz, InputStream stream) {
|
230
|
+
HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz);
|
231
|
+
ctx.initialize(runtime);
|
235
232
|
ctx.setInputSource(stream);
|
236
233
|
return ctx;
|
237
234
|
}
|
238
235
|
|
239
236
|
@Override
|
240
|
-
protected void preParse(
|
241
|
-
IRubyObject handlerRuby,
|
242
|
-
NokogiriHandler handler) {
|
237
|
+
protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
|
243
238
|
// final String path = "Nokogiri::XML::FragmentHandler";
|
244
239
|
// final String docFrag =
|
245
240
|
// "http://cyberneko.org/html/features/balance-tags/document-fragment";
|
246
241
|
// RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
|
247
242
|
// IRubyObject doc = adapter.getInstanceVariable(handlerRuby, "@document");
|
248
|
-
// RubyModule mod =
|
249
|
-
// context.getRuntime().getClassFromPath(path);
|
243
|
+
// RubyModule mod = runtime.getClassFromPath(path);
|
250
244
|
// try {
|
251
245
|
// if (doc != null && !doc.isNil() && adapter.isKindOf(doc, mod))
|
252
246
|
// parser.setFeature(docFrag, true);
|
@@ -32,33 +32,26 @@
|
|
32
32
|
|
33
33
|
package nokogiri;
|
34
34
|
|
35
|
+
import static nokogiri.XmlSaxPushParser.terminateExecution;
|
35
36
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
36
37
|
import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
|
37
|
-
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
38
38
|
|
39
39
|
import java.io.ByteArrayInputStream;
|
40
|
+
import java.io.InputStream;
|
40
41
|
import java.io.IOException;
|
41
|
-
import java.nio.charset.Charset;
|
42
|
-
import java.nio.charset.IllegalCharsetNameException;
|
43
42
|
import java.util.concurrent.Callable;
|
43
|
+
import java.util.concurrent.ExecutionException;
|
44
44
|
import java.util.concurrent.ExecutorService;
|
45
45
|
import java.util.concurrent.Executors;
|
46
|
-
import java.util.EnumSet;
|
47
46
|
import java.util.concurrent.Future;
|
48
47
|
import java.util.concurrent.FutureTask;
|
49
48
|
import java.util.concurrent.ThreadFactory;
|
50
49
|
|
51
|
-
import nokogiri.internals
|
52
|
-
import nokogiri.internals.NokogiriBlockingQueueInputStream;
|
53
|
-
import nokogiri.internals.NokogiriHelpers;
|
54
|
-
import nokogiri.internals.ParserContext;
|
50
|
+
import nokogiri.internals.*;
|
55
51
|
|
56
52
|
import org.jruby.Ruby;
|
57
53
|
import org.jruby.RubyClass;
|
58
|
-
import org.jruby.RubyException;
|
59
|
-
import org.jruby.RubyFixnum;
|
60
54
|
import org.jruby.RubyObject;
|
61
|
-
import org.jruby.RubyString;
|
62
55
|
import org.jruby.anno.JRubyClass;
|
63
56
|
import org.jruby.anno.JRubyMethod;
|
64
57
|
import org.jruby.exceptions.RaiseException;
|
@@ -74,12 +67,13 @@ import org.jruby.runtime.builtin.IRubyObject;
|
|
74
67
|
@JRubyClass(name="Nokogiri::HTML::SAX::PushParser")
|
75
68
|
public class HtmlSaxPushParser extends RubyObject {
|
76
69
|
ParserContext.Options options;
|
77
|
-
IRubyObject optionsRuby;
|
78
70
|
IRubyObject saxParser;
|
71
|
+
|
79
72
|
NokogiriBlockingQueueInputStream stream;
|
80
|
-
|
81
|
-
|
82
|
-
|
73
|
+
|
74
|
+
private ParserTask parserTask = null;
|
75
|
+
private FutureTask<HtmlSaxParserContext> futureTask = null;
|
76
|
+
private ExecutorService executor = null;
|
83
77
|
|
84
78
|
public HtmlSaxPushParser(Ruby ruby, RubyClass rubyClass) {
|
85
79
|
super(ruby, rubyClass);
|
@@ -87,48 +81,46 @@ public class HtmlSaxPushParser extends RubyObject {
|
|
87
81
|
|
88
82
|
@Override
|
89
83
|
public void finalize() {
|
90
|
-
|
84
|
+
try {
|
85
|
+
terminateImpl();
|
86
|
+
}
|
87
|
+
catch (Exception e) { /* ignored */ }
|
91
88
|
}
|
92
89
|
|
93
|
-
/**
|
94
|
-
* Silently skips provided encoding
|
95
|
-
*
|
96
|
-
*/
|
97
90
|
@JRubyMethod
|
98
91
|
public IRubyObject initialize_native(final ThreadContext context,
|
99
92
|
IRubyObject saxParser,
|
100
93
|
IRubyObject fileName,
|
101
94
|
IRubyObject encoding) {
|
102
|
-
|
103
|
-
= invoke(context, context.getRuntime().getClassFromPath("Nokogiri::XML::ParseOptions"), "new");
|
104
|
-
|
95
|
+
// NOTE: Silently skips provided encoding
|
105
96
|
options = new ParserContext.Options(0);
|
106
97
|
this.saxParser = saxParser;
|
107
98
|
return this;
|
108
99
|
}
|
109
100
|
|
110
|
-
|
111
|
-
|
112
|
-
|
101
|
+
private transient IRubyObject parse_options;
|
102
|
+
|
103
|
+
private IRubyObject parse_options(final ThreadContext context) {
|
104
|
+
if (parse_options == null) {
|
105
|
+
parse_options = invoke(context, context.runtime.getClassFromPath("Nokogiri::XML::ParseOptions"), "new");
|
106
|
+
}
|
107
|
+
return parse_options;
|
108
|
+
}
|
109
|
+
|
113
110
|
@JRubyMethod(name="options")
|
114
111
|
public IRubyObject getOptions(ThreadContext context) {
|
115
|
-
return invoke(context,
|
112
|
+
return invoke(context, parse_options(context), "options");
|
116
113
|
}
|
117
114
|
|
118
|
-
/**
|
119
|
-
* <code>val</code> is an integer.
|
120
|
-
*/
|
121
115
|
@JRubyMethod(name="options=")
|
122
|
-
public IRubyObject setOptions(ThreadContext context, IRubyObject
|
123
|
-
invoke(context,
|
124
|
-
options =
|
125
|
-
new ParserContext.Options(val.convertToInteger().getLongValue());
|
116
|
+
public IRubyObject setOptions(ThreadContext context, IRubyObject opts) {
|
117
|
+
invoke(context, parse_options(context), "options=", opts);
|
118
|
+
options = new ParserContext.Options(opts.convertToInteger().getLongValue());
|
126
119
|
return getOptions(context);
|
127
120
|
}
|
128
121
|
|
129
122
|
@JRubyMethod
|
130
|
-
public IRubyObject native_write(ThreadContext context, IRubyObject chunk,
|
131
|
-
IRubyObject isLast) {
|
123
|
+
public IRubyObject native_write(ThreadContext context, IRubyObject chunk, IRubyObject isLast) {
|
132
124
|
try {
|
133
125
|
initialize_task(context);
|
134
126
|
} catch (IOException e) {
|
@@ -136,7 +128,7 @@ public class HtmlSaxPushParser extends RubyObject {
|
|
136
128
|
}
|
137
129
|
final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk);
|
138
130
|
if (data == null) {
|
139
|
-
terminateTask(context);
|
131
|
+
terminateTask(context.runtime);
|
140
132
|
throw new RaiseException(XmlSyntaxError.createHTMLSyntaxError(context.runtime)); // Nokogiri::HTML::SyntaxError
|
141
133
|
}
|
142
134
|
|
@@ -145,33 +137,37 @@ public class HtmlSaxPushParser extends RubyObject {
|
|
145
137
|
if (isLast.isTrue()) {
|
146
138
|
IRubyObject document = invoke(context, this, "document");
|
147
139
|
invoke(context, document, "end_document");
|
148
|
-
terminateTask(context);
|
140
|
+
terminateTask(context.runtime);
|
149
141
|
} else {
|
150
142
|
try {
|
151
|
-
|
152
|
-
|
153
|
-
}
|
154
|
-
|
155
|
-
|
156
|
-
|
143
|
+
Future<Void> task = stream.addChunk(data);
|
144
|
+
task.get();
|
145
|
+
}
|
146
|
+
catch (ClosedStreamException ex) {
|
147
|
+
// this means the stream is closed, ignore this exception
|
148
|
+
}
|
149
|
+
catch (Exception e) {
|
150
|
+
throw context.runtime.newRuntimeError(e.getMessage());
|
157
151
|
}
|
158
152
|
|
159
153
|
}
|
160
154
|
|
161
155
|
if (!options.recover && parserTask.getErrorCount() > errorCount0) {
|
162
|
-
terminateTask(context);
|
163
|
-
throw
|
156
|
+
terminateTask(context.runtime);
|
157
|
+
throw parserTask.getLastError();
|
164
158
|
}
|
165
159
|
|
166
160
|
return this;
|
167
161
|
}
|
168
162
|
|
163
|
+
@SuppressWarnings("unchecked")
|
169
164
|
private void initialize_task(ThreadContext context) throws IOException {
|
170
165
|
if (futureTask == null || stream == null) {
|
171
166
|
stream = new NokogiriBlockingQueueInputStream();
|
172
167
|
|
173
|
-
|
174
|
-
|
168
|
+
assert saxParser != null : "saxParser null";
|
169
|
+
parserTask = new ParserTask(context, saxParser, stream);
|
170
|
+
futureTask = new FutureTask<HtmlSaxParserContext>((Callable) parserTask);
|
175
171
|
executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
|
176
172
|
@Override
|
177
173
|
public Thread newThread(Runnable r) {
|
@@ -185,56 +181,42 @@ public class HtmlSaxPushParser extends RubyObject {
|
|
185
181
|
}
|
186
182
|
}
|
187
183
|
|
188
|
-
private
|
184
|
+
private void terminateTask(final Ruby runtime) {
|
185
|
+
if (executor == null) return;
|
186
|
+
|
189
187
|
try {
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
} catch (Exception e) {
|
195
|
-
if (context != null)
|
196
|
-
throw context.getRuntime().newRuntimeError(e.getMessage());
|
188
|
+
terminateImpl();
|
189
|
+
}
|
190
|
+
catch (InterruptedException e) {
|
191
|
+
throw runtime.newRuntimeError(e.toString());
|
197
192
|
}
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
193
|
+
catch (Exception e) {
|
194
|
+
throw runtime.newRuntimeError(e.toString());
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
private synchronized void terminateImpl() throws InterruptedException, ExecutionException {
|
199
|
+
terminateExecution(executor, stream, futureTask);
|
200
|
+
|
201
|
+
executor = null; stream = null; futureTask = null;
|
202
|
+
}
|
203
|
+
|
204
|
+
private static HtmlSaxParserContext parse(final Ruby runtime, final InputStream stream) {
|
205
|
+
RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::HTML::SAX::ParserContext");
|
206
|
+
return HtmlSaxParserContext.parse_stream(runtime, klazz, stream);
|
203
207
|
}
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
private
|
208
|
-
|
209
|
-
private final HtmlSaxParserContext parser;
|
210
|
-
|
211
|
-
private ParserTask(ThreadContext context, IRubyObject handler) {
|
212
|
-
RubyClass klazz = getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::SAX::ParserContext");
|
213
|
-
this.context = context;
|
214
|
-
this.handler = handler;
|
215
|
-
this.parser = (HtmlSaxParserContext) HtmlSaxParserContext.parse_stream(context, klazz, stream);
|
208
|
+
|
209
|
+
static class ParserTask extends XmlSaxPushParser.ParserTask /* <HtmlSaxPushParser> */ {
|
210
|
+
|
211
|
+
private ParserTask(ThreadContext context, IRubyObject handler, InputStream stream) {
|
212
|
+
super(context, handler, parse(context.runtime, stream), stream);
|
216
213
|
}
|
217
214
|
|
218
215
|
@Override
|
219
216
|
public HtmlSaxParserContext call() throws Exception {
|
220
|
-
|
221
|
-
parser.parse_with(context, handler);
|
222
|
-
} finally {
|
223
|
-
// we have to close the stream before exiting, otherwise someone
|
224
|
-
// can add a chunk and block on task.get() forever.
|
225
|
-
stream.close();
|
226
|
-
}
|
227
|
-
return parser;
|
228
|
-
}
|
229
|
-
|
230
|
-
private synchronized int getErrorCount() {
|
231
|
-
// check for null because thread may not have started yet
|
232
|
-
if (parser.getNokogiriHandler() == null) return 0;
|
233
|
-
else return parser.getNokogiriHandler().getErrorCount();
|
217
|
+
return (HtmlSaxParserContext) super.call();
|
234
218
|
}
|
235
219
|
|
236
|
-
private synchronized RubyException getLastError() {
|
237
|
-
return (RubyException) parser.getNokogiriHandler().getLastError();
|
238
|
-
}
|
239
220
|
}
|
221
|
+
|
240
222
|
}
|