nokogiri 1.8.2-java → 1.8.3-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (65) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +14 -14
  3. data/CHANGELOG.md +43 -1
  4. data/LICENSE.md +2 -1
  5. data/Manifest.txt +3 -0
  6. data/README.md +20 -21
  7. data/Rakefile +3 -9
  8. data/SECURITY.md +19 -0
  9. data/build_all +1 -1
  10. data/dependencies.yml +11 -11
  11. data/ext/java/nokogiri/HtmlSaxParserContext.java +7 -13
  12. data/ext/java/nokogiri/HtmlSaxPushParser.java +72 -90
  13. data/ext/java/nokogiri/NokogiriService.java +0 -19
  14. data/ext/java/nokogiri/XmlNode.java +2 -23
  15. data/ext/java/nokogiri/XmlSaxParserContext.java +81 -101
  16. data/ext/java/nokogiri/XmlSaxPushParser.java +117 -89
  17. data/ext/java/nokogiri/XmlSyntaxError.java +9 -17
  18. data/ext/java/nokogiri/internals/NokogiriHandler.java +100 -108
  19. data/ext/java/nokogiri/internals/NokogiriHelpers.java +11 -14
  20. data/ext/java/nokogiri/internals/ParserContext.java +34 -19
  21. data/ext/java/nokogiri/internals/ReaderNode.java +6 -10
  22. data/ext/java/nokogiri/internals/SaveContextVisitor.java +4 -3
  23. data/ext/java/nokogiri/internals/XmlDomParserContext.java +6 -3
  24. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +4 -3
  25. data/ext/nokogiri/extconf.rb +1 -1
  26. data/ext/nokogiri/html_element_description.c +14 -14
  27. data/ext/nokogiri/xml_cdata.c +6 -4
  28. data/ext/nokogiri/xml_document.c +2 -3
  29. data/ext/nokogiri/xml_dtd.c +2 -2
  30. data/ext/nokogiri/xml_io.c +1 -0
  31. data/ext/nokogiri/xml_namespace.c +3 -9
  32. data/ext/nokogiri/xml_namespace.h +2 -0
  33. data/ext/nokogiri/xml_node.c +23 -15
  34. data/ext/nokogiri/xml_node_set.c +5 -4
  35. data/ext/nokogiri/xml_node_set.h +0 -1
  36. data/ext/nokogiri/xslt_stylesheet.c +2 -2
  37. data/lib/nokogiri/css/parser.rb +108 -90
  38. data/lib/nokogiri/css/parser.y +13 -2
  39. data/lib/nokogiri/css/tokenizer.rb +1 -1
  40. data/lib/nokogiri/css/tokenizer.rex +4 -4
  41. data/lib/nokogiri/css/xpath_visitor.rb +10 -3
  42. data/lib/nokogiri/html/document_fragment.rb +11 -1
  43. data/lib/nokogiri/nokogiri.jar +0 -0
  44. data/lib/nokogiri/version.rb +1 -1
  45. data/lib/nokogiri/xml/node.rb +58 -0
  46. data/lib/nokogiri/xml/node_set.rb +32 -18
  47. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +78 -0
  48. data/test/css/test_nthiness.rb +21 -21
  49. data/test/css/test_parser.rb +17 -0
  50. data/test/html/test_attributes.rb +85 -0
  51. data/test/html/test_document_fragment.rb +7 -1
  52. data/test/test_css_cache.rb +5 -3
  53. data/test/xml/sax/test_parser.rb +9 -1
  54. data/test/xml/sax/test_push_parser.rb +60 -0
  55. data/test/xml/test_cdata.rb +1 -1
  56. data/test/xml/test_document.rb +5 -5
  57. data/test/xml/test_dtd.rb +4 -4
  58. data/test/xml/test_node.rb +89 -6
  59. data/test/xml/test_node_attributes.rb +3 -3
  60. data/test/xml/test_node_reparenting.rb +18 -0
  61. data/test/xml/test_node_set.rb +31 -4
  62. data/test/xml/test_reader.rb +13 -1
  63. data/test/xml/test_syntax_error.rb +3 -3
  64. data/test/xml/test_xpath.rb +8 -0
  65. metadata +25 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 23422f6678062a871c836fd2b844d8b286a682eb4b261285e969c9a7d44fa555
4
- data.tar.gz: 7f5a130b02e476899204ae9f04d7761de8c24ea9d8b9b4f3fb044e09229de67e
2
+ SHA1:
3
+ metadata.gz: f6f9a9fae7034c2bd0c490e0395e5c7a26dbde6c
4
+ data.tar.gz: f97501387a99a0a335c45569eaa22bdc9b6f41af
5
5
  SHA512:
6
- metadata.gz: 067c096637dcee08715b39429bd362c020cc00bce7e7a4f24eedcea4935f7e91e6abdfb545463963e29853acd5d7c56f4df1062d5463e8952b9fe4892b9d42ec
7
- data.tar.gz: 72dcc24e197b347240b9c88d694151eee14f9480505e889d585407e91a20a4c3128a9a0d948a277246149153bb2d219bbd12cbf89a8f9c04e50000d8f38476da
6
+ metadata.gz: 6f248fb6871692986843da22c22c7f20064e1ed1d92c073fbe5b00f7a2151d5f409f1723dbc2ec356fe80e192bf975bc6f9033d056916b1f0f661085e88a0da0
7
+ data.tar.gz: 94306c4713312f3c00da66f82adc79dc159eb6b40ffedf11ac5185efacfab1fcf4bac083c1d0fff95947612f4c97a0f8e7d8b6e6d5656d25d24c8913df279b34
@@ -18,15 +18,15 @@ matrix:
18
18
  - os: osx
19
19
  rvm: 2.2.5
20
20
  - os: linux
21
- rvm: 2.3.1
21
+ rvm: 2.3.5
22
22
  - os: osx
23
- rvm: 2.3.1
23
+ rvm: 2.3.5
24
24
  - os: linux
25
- rvm: 2.4.0
25
+ rvm: 2.4.2
26
26
  env:
27
27
  - RUBYOPT="--enable-frozen-string-literal --debug=frozen-string-literal"
28
28
  - os: osx
29
- rvm: 2.4.0
29
+ rvm: 2.4.2
30
30
  env:
31
31
  - RUBYOPT="--enable-frozen-string-literal --debug=frozen-string-literal"
32
32
  - os: linux
@@ -40,22 +40,22 @@ matrix:
40
40
  - os: linux
41
41
  rvm: jruby-1.7
42
42
  - os: linux
43
- rvm: jruby-9.1.5.0
43
+ rvm: jruby-9.1.15.0
44
44
  - os: linux
45
45
  rvm: rbx-3
46
46
  allow_failures:
47
47
  - rvm: ruby-head
48
48
  fast_finish: true
49
49
 
50
- notifications:
51
- irc:
52
- channels:
53
- - "chat.freenode.net#nokogiri"
54
- on_success: always
55
- on_failure: always
56
- template:
57
- - "%{repository} (%{branch}:%{commit} by %{author}): %{message} (%{build_url})"
58
- skip_join: true
50
+ # notifications:
51
+ # irc:
52
+ # channels:
53
+ # - "chat.freenode.net#nokogiri"
54
+ # on_success: always
55
+ # on_failure: always
56
+ # template:
57
+ # - "%{repository} (%{branch}:%{commit} by %{author}): %{message} (%{build_url})"
58
+ # skip_join: true
59
59
 
60
60
  addons:
61
61
  apt:
@@ -1,3 +1,45 @@
1
+ # 1.8.3 / 2018-06-16
2
+
3
+ ## Security Notes
4
+
5
+ [MRI] Behavior in libxml2 has been reverted which caused CVE-2018-8048 (loofah gem), CVE-2018-3740 (sanitize gem), and CVE-2018-3741 (rails-html-sanitizer gem). The commit in question is here:
6
+
7
+ > https://github.com/GNOME/libxml2/commit/960f0e2
8
+
9
+ and more information is available about this commit and its impact here:
10
+
11
+ > https://github.com/flavorjones/loofah/issues/144
12
+
13
+ This release simply reverts the libxml2 commit in question to protect users of Nokogiri's vendored libraries from similar vulnerabilities.
14
+
15
+ If you're offended by what happened here, I'd kindly ask that you comment on the upstream bug report here:
16
+
17
+ > https://bugzilla.gnome.org/show_bug.cgi?id=769760
18
+
19
+
20
+ ## Dependencies
21
+
22
+ * [MRI] libxml2 is updated from 2.9.7 to 2.9.8
23
+
24
+
25
+ ## Features
26
+
27
+ * Node#classes, #add_class, #append_class, and #remove_class are added.
28
+ * NodeSet#append_class is added.
29
+ * NodeSet#remove_attribute is a new alias for NodeSet#remove_attr.
30
+ * NodeSet#each now returns an Enumerator when no block is passed (Thanks, @park53kr!)
31
+ * [JRuby] General improvements in JRuby implementation (Thanks, @kares!)
32
+
33
+
34
+ ## Bug fixes
35
+
36
+ * CSS attribute selectors now gracefully handle queries using integers. [#711]
37
+ * Handle ASCII-8BIT encoding on fragment input [#553]
38
+ * Handle non-string return values within `Reader` [#898]
39
+ * [JRuby] Allow Node#replace to insert Comment and CDATA nodes. [#1666]
40
+ * [JRuby] Stability and speed improvements to `Node`, `Sax::PushParser`, and the JRuby implementation [#1708, #1710, #1501]
41
+
42
+
1
43
  # 1.8.2 / 2018-01-29
2
44
 
3
45
  ## Security Notes
@@ -8,7 +50,7 @@
8
50
  ## Dependencies
9
51
 
10
52
  * [MRI] libxml2 is updated from 2.9.5 to 2.9.7
11
- * [MRI] libxml2 is updated from 1.1.30 to 1.1.32
53
+ * [MRI] libxslt is updated from 1.1.30 to 1.1.32
12
54
 
13
55
 
14
56
  ## Features
data/LICENSE.md CHANGED
@@ -1,6 +1,6 @@
1
1
  (The MIT License)
2
2
 
3
- Copyright (c) 2008 - 2017:
3
+ Copyright (c) 2008 - 2018:
4
4
 
5
5
  * [Aaron Patterson](http://tenderlovemaking.com)
6
6
  * [Mike Dalessio](http://mike.daless.io)
@@ -10,6 +10,7 @@ Copyright (c) 2008 - 2017:
10
10
  * [Yoko Harada](http://yokolet.blogspot.com)
11
11
  * [Akinori MUSHA](https://akinori.org)
12
12
  * [John Shahid](https://github.com/jvshahid)
13
+ * [Lars Kanis](https://github.com/larskanis)
13
14
 
14
15
  Permission is hereby granted, free of charge, to any person obtaining
15
16
  a copy of this software and associated documentation files (the
@@ -14,6 +14,7 @@ Manifest.txt
14
14
  README.md
15
15
  ROADMAP.md
16
16
  Rakefile
17
+ SECURITY.md
17
18
  STANDARD_RESPONSES.md
18
19
  Y_U_NO_GEMSPEC.md
19
20
  appveyor.yml
@@ -249,6 +250,7 @@ lib/xalan.jar
249
250
  lib/xercesImpl.jar
250
251
  lib/xml-apis.jar
251
252
  lib/xsd/xmlparser/nokogiri.rb
253
+ patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch
252
254
  patches/sort-patches-by-date
253
255
  suppressions/README.txt
254
256
  suppressions/nokogiri_ruby-2.supp
@@ -300,6 +302,7 @@ test/html/sax/test_parser.rb
300
302
  test/html/sax/test_parser_context.rb
301
303
  test/html/sax/test_parser_text.rb
302
304
  test/html/sax/test_push_parser.rb
305
+ test/html/test_attributes.rb
303
306
  test/html/test_builder.rb
304
307
  test/html/test_document.rb
305
308
  test/html/test_document_encoding.rb
data/README.md CHANGED
@@ -1,30 +1,24 @@
1
1
  # Nokogiri
2
2
 
3
- * http://nokogiri.org
4
- * Installation: http://nokogiri.org/tutorials/installing_nokogiri.html
5
- * Tutorials: http://nokogiri.org
6
- * README: https://github.com/sparklemotion/nokogiri
7
- * Mailing List: https://groups.google.com/group/nokogiri-talk
8
- * Bug Reports: https://github.com/sparklemotion/nokogiri/issues
9
-
10
-
11
- ## Status
12
-
13
- |System|Status|
14
- |--|--|
15
- | Concourse | [![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/nokogiri/jobs/ruby-2.4-system/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/nokogiri?groups=master) |
16
- | Travis | [![Travis Build Status](https://travis-ci.org/sparklemotion/nokogiri.svg?branch=master)](https://travis-ci.org/sparklemotion/nokogiri) |
17
- | Appveyor | [![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/github/sparklemotion/nokogiri?branch=master&svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri?branch=master) |
18
- | Code Climate | [![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.svg)](https://codeclimate.com/github/sparklemotion/nokogiri) |
19
- | Version Eye | [![Version Eye](https://www.versioneye.com/ruby/nokogiri/badge.png)](https://www.versioneye.com/ruby/nokogiri) |
20
-
21
-
22
3
  ## Description
23
4
 
24
5
  Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser. Among
25
6
  Nokogiri's many features is the ability to search documents via XPath
26
7
  or CSS3 selectors.
27
8
 
9
+ * http://nokogiri.org
10
+ * [Installation Help](http://nokogiri.org/tutorials/installing_nokogiri.html)
11
+ * [Tutorials](http://nokogiri.org)
12
+ * [GitHub](https://github.com/sparklemotion/nokogiri)
13
+ * [Mailing List](https://groups.google.com/group/nokogiri-talk)
14
+ * [Bug Reports](https://github.com/sparklemotion/nokogiri/issues)
15
+ * [Chat/Gitter](https://gitter.im/sparklemotion/nokogiri)
16
+
17
+ [![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/nokogiri/jobs/ruby-2.4-system/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/nokogiri?groups=master)
18
+ [![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.svg)](https://codeclimate.com/github/sparklemotion/nokogiri)
19
+ [![Version Eye](https://www.versioneye.com/ruby/nokogiri/badge.png)](https://www.versioneye.com/ruby/nokogiri)
20
+ [![Join the chat at https://gitter.im/sparklemotion/nokogiri](https://badges.gitter.im/sparklemotion/nokogiri.svg)](https://gitter.im/sparklemotion/nokogiri?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
21
+
28
22
 
29
23
  ## Features
30
24
 
@@ -81,6 +75,11 @@ There are open-source tutorials (to which we invite contributions!) here: http:/
81
75
  * The IRC channel is #nokogiri on freenode.
82
76
 
83
77
 
78
+ ## Security and Vulnerability Reporting
79
+
80
+ See [`SECURITY.md`](SECURITY.md)
81
+
82
+
84
83
  ## Synopsis
85
84
 
86
85
  Nokogiri is a large library, but here is example usage for parsing and examining a document:
@@ -158,7 +157,7 @@ best bet is to explicitly set the encoding. Here is an example of
158
157
  explicitly setting the encoding to EUC-JP on the parser:
159
158
 
160
159
  ```ruby
161
- doc = Nokogiri.XML('<foo><bar /><foo>', nil, 'EUC-JP')
160
+ doc = Nokogiri.XML('<foo><bar /></foo>', nil, 'EUC-JP')
162
161
  ```
163
162
 
164
163
  ## Development
@@ -170,4 +169,4 @@ explicitly setting the encoding to EUC-JP on the parser:
170
169
 
171
170
  ## License
172
171
 
173
- MIT. See the `LICENSE.md` file.
172
+ MIT. See [`LICENSE.md`](LICENSE.md).
data/Rakefile CHANGED
@@ -107,6 +107,8 @@ HOE = Hoe.spec 'nokogiri' do
107
107
  developer 'Yoko Harada', 'yokolet@gmail.com'
108
108
  developer 'Tim Elliott', 'tle@holymonkey.com'
109
109
  developer 'Akinori MUSHA', 'knu@idaemons.org'
110
+ developer 'John Shahid', 'jvshahid@gmail.com'
111
+ developer 'Lars Kanis', 'lars@greiz-reinsdorf.de'
110
112
 
111
113
  license "MIT"
112
114
 
@@ -292,15 +294,7 @@ task :java_debug do
292
294
  ENV['JAVA_OPTS'] = '-Xdebug -Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y' if ENV['JAVA_DEBUG']
293
295
  end
294
296
 
295
- if java?
296
- task :test_19 => :test
297
- task :test_20 do
298
- ENV['JRUBY_OPTS'] = "--2.0"
299
- Rake::Task["test"].invoke
300
- end
301
- end
302
-
303
- # Rake::Task[:test].prerequisites << :compile
297
+ Rake::Task[:test].prerequisites << :compile
304
298
  Rake::Task[:test].prerequisites << :java_debug
305
299
  Rake::Task[:test].prerequisites << :check_extra_deps unless java?
306
300
 
@@ -0,0 +1,19 @@
1
+ # Security and Vulnerability Reporting
2
+
3
+ The Nokogiri Core Contributors take security very seriously and investigate all reported vulnerabilities.
4
+
5
+ If you would like to report a vulnerablity or have a security concern regarding Nokogiri or how Nokogiri is using any of its underlying platform-specific libraries (such as libxml2 or xerces), please [report it via HackerOne](https://hackerone.com/nokogiri/reports/new).
6
+
7
+ Your report will be acknowledged within 24 hours, and you'll receive a more detailed response within 72 hours indicating next steps in handling your report.
8
+
9
+ If you have not received a reply to your submission within 48 hours, there are a few steps you can take:
10
+
11
+ * Contact the current security coordinator (Mike Dalessio <mike.dalessio@gmail.com>)
12
+ * Contact the core contributor mailing list (nokogiri-core@googlegroups.com)
13
+ * Join the [nokogiri-talk group](https://groups.google.com/d/forum/nokogiri-talk)
14
+
15
+ Please note, the nokogiri-talk list is a public area. When escalating in that venue, please do not discuss your issue. Simply say that you're trying to get a hold of someone from the core team.
16
+
17
+ The information you share with the Nokogiri Core Contributors as part of this process will be kept confidential within the team, unless or until we need to share information upstream with our dependent libraries' core teams, at which point we will notify you.
18
+
19
+ If a vulnerability is first reported by you, we will credit you with the discovery in the public disclosure.
data/build_all CHANGED
@@ -40,5 +40,5 @@ bundle exec rake generate
40
40
  rvm jruby
41
41
  gem install bundler --conservative
42
42
  bundle install --quiet --local || bundle install
43
- bundle exec rake gem
43
+ bundle exec ruby -S rake gem
44
44
  cp -v pkg/nokogiri*java.gem gems
@@ -1,10 +1,10 @@
1
1
  libxml2:
2
- version: "2.9.7"
3
- sha256: "f63c5e7d30362ed28b38bfa1ac6313f9a80230720b7fb6c80575eeab3ff5900c"
2
+ version: "2.9.8"
3
+ sha256: "0b74e51595654f958148759cfef0993114ddccccbb6f31aee018f3558e8e2732"
4
4
  # manually verified checksum:
5
5
  #
6
- # $ gpg --verify libxml2-2.9.7.tar.gz.asc libxml2-2.9.7.tar.gz
7
- # gpg: Signature made Thu 02 Nov 2017 04:17:54 PM EDT using RSA key ID 596BEA5D
6
+ # $ gpg --verify libxml2-2.9.8.tar.gz.asc ./ports/archives/libxml2-2.9.8.tar.gz
7
+ # gpg: Signature made Mon 05 Mar 2018 11:07:45 AM EST using RSA key ID 596BEA5D
8
8
  # gpg: Good signature from "Daniel Veillard (Red Hat work email) <veillard@redhat.com>"
9
9
  # gpg: aka "Daniel Veillard <Daniel.Veillard@w3.org>"
10
10
  # gpg: WARNING: This key is not certified with a trusted signature!
@@ -15,13 +15,13 @@ libxml2:
15
15
  # using this pgp signature:
16
16
  # -----BEGIN PGP SIGNATURE-----
17
17
  #
18
- # iQEcBAABAgAGBQJZ+31yAAoJEBVYiyZZa+pd+MMH/RTsPCo8CegAHsSUlNCae2Ay
19
- # HpM6J7sNWs0b2xwaYRtwhewJ5z/rsCcM5nn7MeGiU8dX7W5MKF69uGXUywufRxzO
20
- # GFoQSxO0a1z5mtGu7jC/W6DTYHFBeOJcEfMa3zUZjZ0RXqrENRhX55okse4pJPmG
21
- # dWQphbu9G3qoCuWD7QpyI7Mjhjemmyi4Ai/4vJPO8WtB7VDfdtpuZFVCRLG2cp0X
22
- # 90SZde+0oWJcN8oIG3FaTd7O4OuJPAceqNhTvF90dLvqSqRBd0RqZNnjYNYZ6uRy
23
- # FQSlQBVCxdLY0AI7Yooq79TJD+0vcqiQNwQwH7L+hBWgvPos4DKN1M9LaJc/RTk=
24
- # =5Qzo
18
+ # iQEcBAABAgAGBQJanWtRAAoJEBVYiyZZa+pdV7oIAJWdFahwt+reN/Zt2RPmjjcr
19
+ # eSsY7UV1RXjScnNjTzJT1h2hJ7SnUjCkqjR6VdtKDUIzpuX+S2U83joafJH6mxUb
20
+ # yw2nO4RfjYTPxpz5JkvqT7jmgEIaD81BuwcMehqpMpIfiKa2NgO1DSfZxgs8a9E2
21
+ # +ehc/kZWuI5gmNGrd84EEWUqpYW/Xx7jy02osioJuU5IMPjzZKNR3maXp9oAKeBc
22
+ # S2QNa1ID/pUk3K3M/5nlwNgAtQ7lxQrqhrSma2dsKt/IpL6VXomxuD4Bh1r2MZhX
23
+ # uZ456X/xJN8UmPewLZWGBU1MK9wqu3Zx5Qwz64H6UdlYIzXZ2jXj2YWZa6xkxPA=
24
+ # =69xn
25
25
  # -----END PGP SIGNATURE-----
26
26
  #
27
27
 
@@ -223,30 +223,24 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
223
223
  }
224
224
 
225
225
  /**
226
- * Create a new parser context that will read from a raw input
227
- * stream. Not a JRuby method. Meant to be run in a separate
228
- * thread by HtmlSaxPushParser.
226
+ * Create a new parser context that will read from a raw input stream.
227
+ * Meant to be run in a separate thread by HtmlSaxPushParser.
229
228
  */
230
- public static IRubyObject parse_stream(ThreadContext context,
231
- IRubyObject klazz,
232
- InputStream stream) {
233
- HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz);
234
- ctx.initialize(context.getRuntime());
229
+ static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klazz, InputStream stream) {
230
+ HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz);
231
+ ctx.initialize(runtime);
235
232
  ctx.setInputSource(stream);
236
233
  return ctx;
237
234
  }
238
235
 
239
236
  @Override
240
- protected void preParse(ThreadContext context,
241
- IRubyObject handlerRuby,
242
- NokogiriHandler handler) {
237
+ protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
243
238
  // final String path = "Nokogiri::XML::FragmentHandler";
244
239
  // final String docFrag =
245
240
  // "http://cyberneko.org/html/features/balance-tags/document-fragment";
246
241
  // RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
247
242
  // IRubyObject doc = adapter.getInstanceVariable(handlerRuby, "@document");
248
- // RubyModule mod =
249
- // context.getRuntime().getClassFromPath(path);
243
+ // RubyModule mod = runtime.getClassFromPath(path);
250
244
  // try {
251
245
  // if (doc != null && !doc.isNil() && adapter.isKindOf(doc, mod))
252
246
  // parser.setFeature(docFrag, true);
@@ -32,33 +32,26 @@
32
32
 
33
33
  package nokogiri;
34
34
 
35
+ import static nokogiri.XmlSaxPushParser.terminateExecution;
35
36
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
37
  import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
37
- import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
38
38
 
39
39
  import java.io.ByteArrayInputStream;
40
+ import java.io.InputStream;
40
41
  import java.io.IOException;
41
- import java.nio.charset.Charset;
42
- import java.nio.charset.IllegalCharsetNameException;
43
42
  import java.util.concurrent.Callable;
43
+ import java.util.concurrent.ExecutionException;
44
44
  import java.util.concurrent.ExecutorService;
45
45
  import java.util.concurrent.Executors;
46
- import java.util.EnumSet;
47
46
  import java.util.concurrent.Future;
48
47
  import java.util.concurrent.FutureTask;
49
48
  import java.util.concurrent.ThreadFactory;
50
49
 
51
- import nokogiri.internals.ClosedStreamException;
52
- import nokogiri.internals.NokogiriBlockingQueueInputStream;
53
- import nokogiri.internals.NokogiriHelpers;
54
- import nokogiri.internals.ParserContext;
50
+ import nokogiri.internals.*;
55
51
 
56
52
  import org.jruby.Ruby;
57
53
  import org.jruby.RubyClass;
58
- import org.jruby.RubyException;
59
- import org.jruby.RubyFixnum;
60
54
  import org.jruby.RubyObject;
61
- import org.jruby.RubyString;
62
55
  import org.jruby.anno.JRubyClass;
63
56
  import org.jruby.anno.JRubyMethod;
64
57
  import org.jruby.exceptions.RaiseException;
@@ -74,12 +67,13 @@ import org.jruby.runtime.builtin.IRubyObject;
74
67
  @JRubyClass(name="Nokogiri::HTML::SAX::PushParser")
75
68
  public class HtmlSaxPushParser extends RubyObject {
76
69
  ParserContext.Options options;
77
- IRubyObject optionsRuby;
78
70
  IRubyObject saxParser;
71
+
79
72
  NokogiriBlockingQueueInputStream stream;
80
- ParserTask parserTask = null;
81
- FutureTask<HtmlSaxParserContext> futureTask = null;
82
- ExecutorService executor = null;
73
+
74
+ private ParserTask parserTask = null;
75
+ private FutureTask<HtmlSaxParserContext> futureTask = null;
76
+ private ExecutorService executor = null;
83
77
 
84
78
  public HtmlSaxPushParser(Ruby ruby, RubyClass rubyClass) {
85
79
  super(ruby, rubyClass);
@@ -87,48 +81,46 @@ public class HtmlSaxPushParser extends RubyObject {
87
81
 
88
82
  @Override
89
83
  public void finalize() {
90
- terminateTask(null);
84
+ try {
85
+ terminateImpl();
86
+ }
87
+ catch (Exception e) { /* ignored */ }
91
88
  }
92
89
 
93
- /**
94
- * Silently skips provided encoding
95
- *
96
- */
97
90
  @JRubyMethod
98
91
  public IRubyObject initialize_native(final ThreadContext context,
99
92
  IRubyObject saxParser,
100
93
  IRubyObject fileName,
101
94
  IRubyObject encoding) {
102
- optionsRuby
103
- = invoke(context, context.getRuntime().getClassFromPath("Nokogiri::XML::ParseOptions"), "new");
104
-
95
+ // NOTE: Silently skips provided encoding
105
96
  options = new ParserContext.Options(0);
106
97
  this.saxParser = saxParser;
107
98
  return this;
108
99
  }
109
100
 
110
- /**
111
- * Returns an integer.
112
- */
101
+ private transient IRubyObject parse_options;
102
+
103
+ private IRubyObject parse_options(final ThreadContext context) {
104
+ if (parse_options == null) {
105
+ parse_options = invoke(context, context.runtime.getClassFromPath("Nokogiri::XML::ParseOptions"), "new");
106
+ }
107
+ return parse_options;
108
+ }
109
+
113
110
  @JRubyMethod(name="options")
114
111
  public IRubyObject getOptions(ThreadContext context) {
115
- return invoke(context, optionsRuby, "options");
112
+ return invoke(context, parse_options(context), "options");
116
113
  }
117
114
 
118
- /**
119
- * <code>val</code> is an integer.
120
- */
121
115
  @JRubyMethod(name="options=")
122
- public IRubyObject setOptions(ThreadContext context, IRubyObject val) {
123
- invoke(context, optionsRuby, "options=", val);
124
- options =
125
- new ParserContext.Options(val.convertToInteger().getLongValue());
116
+ public IRubyObject setOptions(ThreadContext context, IRubyObject opts) {
117
+ invoke(context, parse_options(context), "options=", opts);
118
+ options = new ParserContext.Options(opts.convertToInteger().getLongValue());
126
119
  return getOptions(context);
127
120
  }
128
121
 
129
122
  @JRubyMethod
130
- public IRubyObject native_write(ThreadContext context, IRubyObject chunk,
131
- IRubyObject isLast) {
123
+ public IRubyObject native_write(ThreadContext context, IRubyObject chunk, IRubyObject isLast) {
132
124
  try {
133
125
  initialize_task(context);
134
126
  } catch (IOException e) {
@@ -136,7 +128,7 @@ public class HtmlSaxPushParser extends RubyObject {
136
128
  }
137
129
  final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk);
138
130
  if (data == null) {
139
- terminateTask(context);
131
+ terminateTask(context.runtime);
140
132
  throw new RaiseException(XmlSyntaxError.createHTMLSyntaxError(context.runtime)); // Nokogiri::HTML::SyntaxError
141
133
  }
142
134
 
@@ -145,33 +137,37 @@ public class HtmlSaxPushParser extends RubyObject {
145
137
  if (isLast.isTrue()) {
146
138
  IRubyObject document = invoke(context, this, "document");
147
139
  invoke(context, document, "end_document");
148
- terminateTask(context);
140
+ terminateTask(context.runtime);
149
141
  } else {
150
142
  try {
151
- Future<Void> task = stream.addChunk(data);
152
- task.get();
153
- } catch (ClosedStreamException ex) {
154
- // this means the stream is closed, ignore this exception
155
- } catch (Exception e) {
156
- throw context.getRuntime().newRuntimeError(e.getMessage());
143
+ Future<Void> task = stream.addChunk(data);
144
+ task.get();
145
+ }
146
+ catch (ClosedStreamException ex) {
147
+ // this means the stream is closed, ignore this exception
148
+ }
149
+ catch (Exception e) {
150
+ throw context.runtime.newRuntimeError(e.getMessage());
157
151
  }
158
152
 
159
153
  }
160
154
 
161
155
  if (!options.recover && parserTask.getErrorCount() > errorCount0) {
162
- terminateTask(context);
163
- throw new RaiseException(parserTask.getLastError(), true);
156
+ terminateTask(context.runtime);
157
+ throw parserTask.getLastError();
164
158
  }
165
159
 
166
160
  return this;
167
161
  }
168
162
 
163
+ @SuppressWarnings("unchecked")
169
164
  private void initialize_task(ThreadContext context) throws IOException {
170
165
  if (futureTask == null || stream == null) {
171
166
  stream = new NokogiriBlockingQueueInputStream();
172
167
 
173
- parserTask = new ParserTask(context, saxParser);
174
- futureTask = new FutureTask<HtmlSaxParserContext>(parserTask);
168
+ assert saxParser != null : "saxParser null";
169
+ parserTask = new ParserTask(context, saxParser, stream);
170
+ futureTask = new FutureTask<HtmlSaxParserContext>((Callable) parserTask);
175
171
  executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
176
172
  @Override
177
173
  public Thread newThread(Runnable r) {
@@ -185,56 +181,42 @@ public class HtmlSaxPushParser extends RubyObject {
185
181
  }
186
182
  }
187
183
 
188
- private synchronized void terminateTask(ThreadContext context) {
184
+ private void terminateTask(final Ruby runtime) {
185
+ if (executor == null) return;
186
+
189
187
  try {
190
- Future<Void> task = stream.addChunk(NokogiriBlockingQueueInputStream.END);
191
- task.get();
192
- } catch (ClosedStreamException ex) {
193
- // ignore this exception, it means the stream was closed
194
- } catch (Exception e) {
195
- if (context != null)
196
- throw context.getRuntime().newRuntimeError(e.getMessage());
188
+ terminateImpl();
189
+ }
190
+ catch (InterruptedException e) {
191
+ throw runtime.newRuntimeError(e.toString());
197
192
  }
198
- futureTask.cancel(true);
199
- executor.shutdown();
200
- executor = null;
201
- stream = null;
202
- futureTask = null;
193
+ catch (Exception e) {
194
+ throw runtime.newRuntimeError(e.toString());
195
+ }
196
+ }
197
+
198
+ private synchronized void terminateImpl() throws InterruptedException, ExecutionException {
199
+ terminateExecution(executor, stream, futureTask);
200
+
201
+ executor = null; stream = null; futureTask = null;
202
+ }
203
+
204
+ private static HtmlSaxParserContext parse(final Ruby runtime, final InputStream stream) {
205
+ RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::HTML::SAX::ParserContext");
206
+ return HtmlSaxParserContext.parse_stream(runtime, klazz, stream);
203
207
  }
204
-
205
-
206
- private class ParserTask implements Callable<HtmlSaxParserContext> {
207
- private final ThreadContext context;
208
- private final IRubyObject handler;
209
- private final HtmlSaxParserContext parser;
210
-
211
- private ParserTask(ThreadContext context, IRubyObject handler) {
212
- RubyClass klazz = getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::SAX::ParserContext");
213
- this.context = context;
214
- this.handler = handler;
215
- this.parser = (HtmlSaxParserContext) HtmlSaxParserContext.parse_stream(context, klazz, stream);
208
+
209
+ static class ParserTask extends XmlSaxPushParser.ParserTask /* <HtmlSaxPushParser> */ {
210
+
211
+ private ParserTask(ThreadContext context, IRubyObject handler, InputStream stream) {
212
+ super(context, handler, parse(context.runtime, stream), stream);
216
213
  }
217
214
 
218
215
  @Override
219
216
  public HtmlSaxParserContext call() throws Exception {
220
- try {
221
- parser.parse_with(context, handler);
222
- } finally {
223
- // we have to close the stream before exiting, otherwise someone
224
- // can add a chunk and block on task.get() forever.
225
- stream.close();
226
- }
227
- return parser;
228
- }
229
-
230
- private synchronized int getErrorCount() {
231
- // check for null because thread may not have started yet
232
- if (parser.getNokogiriHandler() == null) return 0;
233
- else return parser.getNokogiriHandler().getErrorCount();
217
+ return (HtmlSaxParserContext) super.call();
234
218
  }
235
219
 
236
- private synchronized RubyException getLastError() {
237
- return (RubyException) parser.getNokogiriHandler().getLastError();
238
- }
239
220
  }
221
+
240
222
  }