nokogiri 1.6.5-java → 1.6.6.1-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.cross_rubies +5 -0
  3. data/.travis.yml +10 -20
  4. data/CHANGELOG.ja.rdoc +28 -1
  5. data/CHANGELOG.rdoc +28 -1
  6. data/Gemfile +1 -1
  7. data/Manifest.txt +5 -1
  8. data/README.ja.rdoc +10 -9
  9. data/README.rdoc +6 -9
  10. data/ROADMAP.md +15 -3
  11. data/Rakefile +1 -3
  12. data/bin/nokogiri +48 -8
  13. data/ext/java/nokogiri/HtmlSaxParserContext.java +1 -1
  14. data/ext/java/nokogiri/HtmlSaxPushParser.java +244 -0
  15. data/ext/java/nokogiri/NokogiriService.java +9 -0
  16. data/ext/java/nokogiri/XmlComment.java +2 -0
  17. data/ext/java/nokogiri/XmlNode.java +57 -30
  18. data/ext/java/nokogiri/XmlSyntaxError.java +11 -9
  19. data/ext/nokogiri/extconf.rb +18 -3
  20. data/ext/nokogiri/xml_comment.c +17 -2
  21. data/ext/nokogiri/xml_node.c +66 -6
  22. data/ext/nokogiri/xml_syntax_error.c +4 -0
  23. data/ext/nokogiri/xml_syntax_error.h +1 -0
  24. data/lib/nokogiri.rb +2 -2
  25. data/lib/nokogiri/decorators/slop.rb +7 -8
  26. data/lib/nokogiri/html/document_fragment.rb +0 -2
  27. data/lib/nokogiri/html/sax/push_parser.rb +22 -2
  28. data/lib/nokogiri/nokogiri.jar +0 -0
  29. data/lib/nokogiri/version.rb +1 -1
  30. data/lib/nokogiri/xml.rb +1 -0
  31. data/lib/nokogiri/xml/document.rb +4 -4
  32. data/lib/nokogiri/xml/document_fragment.rb +39 -2
  33. data/lib/nokogiri/xml/node.rb +11 -181
  34. data/lib/nokogiri/xml/node_set.rb +41 -85
  35. data/lib/nokogiri/xml/searchable.rb +221 -0
  36. data/ports/patches/sort-patches-by-date +25 -0
  37. data/test/css/test_nthiness.rb +1 -1
  38. data/test/html/sax/test_push_parser.rb +87 -0
  39. data/test/html/test_document.rb +20 -5
  40. data/test/html/test_document_fragment.rb +25 -0
  41. data/test/xml/test_attr.rb +5 -2
  42. data/test/xml/test_builder.rb +27 -1
  43. data/test/xml/test_comment.rb +11 -0
  44. data/test/xml/test_document.rb +34 -0
  45. data/test/xml/test_document_fragment.rb +40 -9
  46. data/test/xml/test_namespace.rb +1 -0
  47. data/test/xml/test_node.rb +37 -1
  48. data/test/xml/test_node_set.rb +56 -36
  49. data/test/xml/test_xpath.rb +65 -19
  50. data/test_all +11 -1
  51. metadata +12 -7
  52. data/tasks/nokogiri.org.rb +0 -24
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1148b14986575ffdf88e32369713e071b058ed5b
4
- data.tar.gz: aff3cd8473522f0eece98be2bcde6bc1fdbc35d2
3
+ metadata.gz: c221c2bb5589110b400152cbbbf0785d8da989d0
4
+ data.tar.gz: cc543c794183d4aae095e6dfa31dbaf204f2febb
5
5
  SHA512:
6
- metadata.gz: 60e01772a9bb88140f6fec66ea489ea19d24d515c209ddb98250183c8a7afee3fcdbd17c767ee52dc1eb81b53afc5002f8c7b36cd01329a8ad863b119a139753
7
- data.tar.gz: b8e5157309140b6c767a34adf1f2d700fad690dd373638cb108cc6969873dc1e8b600c5f37a59b1080c3d835d1484b218c2169f2f4c952d0c1e0bc2d9ebeb61c
6
+ metadata.gz: bfbe69e4d43e664d0fe03ff63e309218859c8680f0595d62c6953f978e0ed24ea208d4b289693b64b5e2634aa48375674f1ca7aa66b9f6052d1c956fc0481e4b
7
+ data.tar.gz: b18823e13651c2c60e71861f2995bf1a01224df6a8473c78c7e7d2fe8ae4e0a853aebf209610b18aa028601d38cb13f0926e0c15d9a99fcce9ddc8f27ee354cd
@@ -0,0 +1,5 @@
1
+ 1.9.3-p547:i586-mingw32msvc
2
+ 2.0.0-p576:i686-w64-mingw32
3
+ 2.0.0-p576:x86_64-w64-mingw32
4
+ 2.1.3:i686-w64-mingw32
5
+ 2.1.3:x86_64-w64-mingw32
@@ -1,12 +1,16 @@
1
1
  language: ruby
2
+
2
3
  rvm:
3
- - 1.9.3
4
- - 2.0.0
5
- - 2.1.3
6
- - ruby-head
4
+ - ruby-1.9.2
5
+ - ruby-1.9.3
6
+ - ruby-2.0
7
+ - ruby-2.1
8
+ - ruby-2.2
7
9
  - jruby-19mode
8
- - jruby-20mode
10
+ - jruby-1.7.18
11
+ - jruby-9.0.0.0.pre1
9
12
  - rbx-19mode
13
+ - rbx-2
10
14
 
11
15
  os:
12
16
  - linux
@@ -14,22 +18,8 @@ os:
14
18
 
15
19
  jdk:
16
20
  - openjdk7
17
- - openjdk6
18
21
 
19
22
  matrix:
20
23
  allow_failures:
21
24
  - os: osx
22
- - rvm: rbx-19mode
23
- # currently broken on Travis CI
24
- - rvm: jruby-20mode
25
- exclude:
26
- - rvm: 1.9.3
27
- jdk: openjdk7
28
- - rvm: 2.0.0
29
- jdk: openjdk7
30
- - rvm: 2.1.3
31
- jdk: openjdk7
32
- - rvm: ruby-head
33
- jdk: openjdk7
34
- - rvm: rbx-19mode
35
- jdk: openjdk7
25
+ - rvm: jruby-9.0.0.0.pre1
@@ -1,4 +1,31 @@
1
- === 1.6.5 / 未リリース
1
+ === 1.6.6.1 / 2015年01月22日
2
+
3
+ Note that 1.6.6.0 was not released.
4
+
5
+ ==== 機能
6
+
7
+ * Unified Node and NodeSet implementations of #search, #xpath and #css.
8
+ * Added Node#lang and Node#lang=.
9
+ * bin/nokogiri passes the URI to parse() if an HTTP URL is given.
10
+ * bin/nokogiri now loads ~/.nokogirirc so user can define helper methods, etc.
11
+ * bin/nokogiri can be configured to use Pry instead of IRB by adding a couple of lines to ~/.nokogirirc. (#1198)
12
+ * bin/nokogiri can better handle urls from STDIN (aiding use of xargs). (#1065)
13
+ * JRuby 9K support.
14
+
15
+
16
+ ==== バグ修正
17
+
18
+ * DocumentFragment#search now matches against root nodes. (#1205)
19
+ * (MRI) More fixes related to handling libxml2 parse errors during DocumentFragment#dup. (#1196)
20
+ * (JRuby) Builder now handles namespace hrefs properly when there is a default ns. (#1039)
21
+ * (JRuby) Clear the XPath cache on attr removal. (#1109)
22
+ * `XML::Comment.new` argument types are now consistent and safe (and documented) across MRI and JRuby. (#1224)
23
+ * (MRI) Restoring support for Ruby 1.9.2 that was broken in v1.6.4.1 and v1.6.5. (#1207)
24
+ * Check if `zlib` is available before building `libxml2`. (#1188)
25
+ * (JRuby) HtmlSaxPushParser now exists. (#1147) (Thanks, Piotr Szmielew!)
26
+
27
+
28
+ === 1.6.5 / 2014年11月26日
2
29
 
3
30
  ==== 機能
4
31
 
@@ -1,4 +1,31 @@
1
- === 1.6.5 / unreleased
1
+ === 1.6.6.1 / 2015-01-22
2
+
3
+ Note that 1.6.6.0 was not released.
4
+
5
+ ==== Features
6
+
7
+ * Unified Node and NodeSet implementations of #search, #xpath and #css.
8
+ * Added Node#lang and Node#lang=.
9
+ * bin/nokogiri passes the URI to parse() if an HTTP URL is given.
10
+ * bin/nokogiri now loads ~/.nokogirirc so user can define helper methods, etc.
11
+ * bin/nokogiri can be configured to use Pry instead of IRB by adding a couple of lines to ~/.nokogirirc. (#1198)
12
+ * bin/nokogiri can better handle urls from STDIN (aiding use of xargs). (#1065)
13
+ * JRuby 9K support.
14
+
15
+
16
+ ==== Bug fixes
17
+
18
+ * DocumentFragment#search now matches against root nodes. (#1205)
19
+ * (MRI) More fixes related to handling libxml2 parse errors during DocumentFragment#dup. (#1196)
20
+ * (JRuby) Builder now handles namespace hrefs properly when there is a default ns. (#1039)
21
+ * (JRuby) Clear the XPath cache on attr removal. (#1109)
22
+ * `XML::Comment.new` argument types are now consistent and safe (and documented) across MRI and JRuby. (#1224)
23
+ * (MRI) Restoring support for Ruby 1.9.2 that was broken in v1.6.4.1 and v1.6.5. (#1207)
24
+ * Check if `zlib` is available before building `libxml2`. (#1188)
25
+ * (JRuby) HtmlSaxPushParser now exists. (#1147) (Thanks, Piotr Szmielew!)
26
+
27
+
28
+ === 1.6.5 / 2014-11-26
2
29
 
3
30
  ==== Features
4
31
 
data/Gemfile CHANGED
@@ -8,7 +8,7 @@ gem "mini_portile", "~>0.6.0"
8
8
 
9
9
  gem "rdoc", "~>4.0", :group => [:development, :test]
10
10
  gem "hoe-bundler", ">=1.1", :group => [:development, :test]
11
- gem "hoe-debugging", ">=1.0.3", :group => [:development, :test]
11
+ gem "hoe-debugging", "~>1.2.0", :group => [:development, :test]
12
12
  gem "hoe-gemspec", ">=1.0", :group => [:development, :test]
13
13
  gem "hoe-git", ">=1.4", :group => [:development, :test]
14
14
  gem "minitest", "~>2.2.2", :group => [:development, :test]
@@ -1,4 +1,5 @@
1
1
  .autotest
2
+ .cross_rubies
2
3
  .editorconfig
3
4
  .gemtest
4
5
  .travis.yml
@@ -21,6 +22,7 @@ ext/java/nokogiri/HtmlDocument.java
21
22
  ext/java/nokogiri/HtmlElementDescription.java
22
23
  ext/java/nokogiri/HtmlEntityLookup.java
23
24
  ext/java/nokogiri/HtmlSaxParserContext.java
25
+ ext/java/nokogiri/HtmlSaxPushParser.java
24
26
  ext/java/nokogiri/NokogiriService.java
25
27
  ext/java/nokogiri/XmlAttr.java
26
28
  ext/java/nokogiri/XmlAttributeDecl.java
@@ -230,6 +232,7 @@ lib/nokogiri/xml/sax/parser.rb
230
232
  lib/nokogiri/xml/sax/parser_context.rb
231
233
  lib/nokogiri/xml/sax/push_parser.rb
232
234
  lib/nokogiri/xml/schema.rb
235
+ lib/nokogiri/xml/searchable.rb
233
236
  lib/nokogiri/xml/syntax_error.rb
234
237
  lib/nokogiri/xml/text.rb
235
238
  lib/nokogiri/xml/xpath.rb
@@ -253,12 +256,12 @@ ports/patches/libxslt/0010-Fix-handling-of-UTF-8-strings-in-EXSLT-crypto-module.
253
256
  ports/patches/libxslt/0013-Memory-leak-in-xsltCompileIdKeyPattern-error-path.patch
254
257
  ports/patches/libxslt/0014-Fix-for-bug-436589.patch
255
258
  ports/patches/libxslt/0015-Fix-mkdir-for-mingw.patch
259
+ ports/patches/sort-patches-by-date
256
260
  suppressions/README.txt
257
261
  suppressions/nokogiri_ree-1.8.7.358.supp
258
262
  suppressions/nokogiri_ruby-1.8.7.370.supp
259
263
  suppressions/nokogiri_ruby-1.9.2.320.supp
260
264
  suppressions/nokogiri_ruby-1.9.3.327.supp
261
- tasks/nokogiri.org.rb
262
265
  tasks/test.rb
263
266
  test/css/test_nthiness.rb
264
267
  test/css/test_parser.rb
@@ -303,6 +306,7 @@ test/files/xinclude.xml
303
306
  test/helper.rb
304
307
  test/html/sax/test_parser.rb
305
308
  test/html/sax/test_parser_context.rb
309
+ test/html/sax/test_push_parser.rb
306
310
  test/html/test_builder.rb
307
311
  test/html/test_document.rb
308
312
  test/html/test_document_encoding.rb
@@ -38,24 +38,25 @@ IRCのチャンネルはfreenodeの #nokogiri です。
38
38
 
39
39
  require 'nokogiri'
40
40
  require 'open-uri'
41
-
42
- doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
43
-
41
+
42
+ # Fetch and parse HTML document
43
+ doc = Nokogiri::HTML(open('http://www.nokogiri.org/tutorials/installing_nokogiri.html'))
44
+
44
45
  ####
45
46
  # Search for nodes by css
46
- doc.css('h3.r a.l').each do |link|
47
+ doc.css('nav ul.menu li a').each do |link|
47
48
  puts link.content
48
49
  end
49
-
50
+
50
51
  ####
51
52
  # Search for nodes by xpath
52
- doc.xpath('//h3/a[@class="l"]').each do |link|
53
+ doc.xpath('//h2 | //h3').each do |link|
53
54
  puts link.content
54
55
  end
55
-
56
+
56
57
  ####
57
58
  # Or mix and match.
58
- doc.search('h3.r a.l', '//h3/a[@class="l"]').each do |link|
59
+ doc.search('code.sh', '//h2').each do |link|
59
60
  puts link.content
60
61
  end
61
62
 
@@ -81,7 +82,7 @@ IRCのチャンネルはfreenodeの #nokogiri です。
81
82
 
82
83
  (The MIT License)
83
84
 
84
- Copyright (c) 2008 - 2014:
85
+ Copyright (c) 2008 - 2015:
85
86
 
86
87
  * {Aaron Patterson}[http://tenderlovemaking.com]
87
88
  * {Mike Dalessio}[http://mike.daless.io]
@@ -45,27 +45,24 @@ The IRC channel is #nokogiri on freenode.
45
45
  require 'nokogiri'
46
46
  require 'open-uri'
47
47
 
48
- # Get a Nokogiri::HTML::Document for the page we’re interested in...
49
-
50
- doc = Nokogiri::HTML(open('http://www.google.com/search?q=sparklemotion'))
51
-
52
- # Do funky things with it using Nokogiri::XML::Node methods...
48
+ # Fetch and parse HTML document
49
+ doc = Nokogiri::HTML(open('http://www.nokogiri.org/tutorials/installing_nokogiri.html'))
53
50
 
54
51
  ####
55
52
  # Search for nodes by css
56
- doc.css('h3.r a').each do |link|
53
+ doc.css('nav ul.menu li a').each do |link|
57
54
  puts link.content
58
55
  end
59
56
 
60
57
  ####
61
58
  # Search for nodes by xpath
62
- doc.xpath('//h3/a').each do |link|
59
+ doc.xpath('//h2 | //h3').each do |link|
63
60
  puts link.content
64
61
  end
65
62
 
66
63
  ####
67
64
  # Or mix and match.
68
- doc.search('h3.r a.l', '//h3/a').each do |link|
65
+ doc.search('code.sh', '//h2').each do |link|
69
66
  puts link.content
70
67
  end
71
68
 
@@ -150,7 +147,7 @@ Then run rake:
150
147
 
151
148
  (The MIT License)
152
149
 
153
- Copyright (c) 2008 - 2014:
150
+ Copyright (c) 2008 - 2015:
154
151
 
155
152
  * {Aaron Patterson}[http://tenderlovemaking.com]
156
153
  * {Mike Dalessio}[http://mike.daless.io]
data/ROADMAP.md CHANGED
@@ -57,6 +57,7 @@
57
57
  - https://github.com/sparklemotion/nokogiri/issues/370
58
58
  - https://github.com/sparklemotion/nokogiri/issues/454
59
59
  - https://github.com/sparklemotion/nokogiri/issues/572
60
+ could we fix this by making DocumentFragment be a subclass of NodeSet?
60
61
 
61
62
 
62
63
  ## Better Syntax for custom XPath function handler
@@ -70,9 +71,6 @@
70
71
  * we should standardize on a hash of options for these and other calls
71
72
  * what should NodeSet#xpath return?
72
73
  * https://github.com/sparklemotion/nokogiri/issues/656
73
- * also, clean up or unify the implementations of #xpath-and-friends in Node and NodeSet
74
- * implementations are very similar, but no shared code :(
75
- * decorate nodes in a consistent manner
76
74
 
77
75
  ## Encoding
78
76
 
@@ -87,3 +85,17 @@ Somebody who knows encoding well should head this up.
87
85
 
88
86
  It's fundamentally broken, in that we can't stop people from crashing
89
87
  their application if they want to use object reference unsafely.
88
+
89
+
90
+ ## Class methods that require Document
91
+
92
+ There are a few methods, like `Nokogiri::XML::Comment.new` that
93
+ require a Document object.
94
+
95
+ We should probably make Document instance methods to wrap this, since
96
+ it's a non-obvious expectation and thus fails as a convention.
97
+
98
+ So, instead, let's make alternative methods like
99
+ `Nokogiri::XML::Document#new_comment`, and recommend those as the
100
+ proper convention.
101
+
data/Rakefile CHANGED
@@ -101,8 +101,6 @@ CROSS_RUBIES = File.read('.cross_rubies').lines.flat_map { |line|
101
101
 
102
102
  ENV['RUBY_CC_VERSION'] ||= CROSS_RUBIES.map(&:ver).uniq.join(":")
103
103
 
104
- require 'tasks/nokogiri.org'
105
-
106
104
  HOE = Hoe.spec 'nokogiri' do
107
105
  developer 'Aaron Patterson', 'aaronp@rubyforge.org'
108
106
  developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
@@ -142,7 +140,7 @@ HOE = Hoe.spec 'nokogiri' do
142
140
 
143
141
  self.extra_dev_deps += [
144
142
  ["hoe-bundler", ">= 1.1"],
145
- ["hoe-debugging", ">= 1.0.3"],
143
+ ["hoe-debugging", "~> 1.2.0"],
146
144
  ["hoe-gemspec", ">= 1.0"],
147
145
  ["hoe-git", ">= 1.4"],
148
146
  ["minitest", "~> 2.2.2"],
@@ -1,14 +1,41 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'optparse'
3
3
  require 'open-uri'
4
- require 'irb'
5
4
  require 'uri'
6
5
  require 'rubygems'
7
6
  require 'nokogiri'
7
+ autoload :IRB, 'irb'
8
8
 
9
9
  parse_class = Nokogiri
10
10
  encoding = nil
11
11
 
12
+ # This module provides some tunables with the nokogiri CLI for use in
13
+ # your ~/.nokogirirc.
14
+ module Nokogiri::CLI
15
+ class << self
16
+ # Specify the console engine, defaulted to IRB.
17
+ #
18
+ # call-seq:
19
+ # require 'pry'
20
+ # Nokogiri::CLI.console = Pry
21
+ attr_writer :console
22
+
23
+ def console
24
+ case @console
25
+ when Symbol
26
+ Kernel.const_get(@console)
27
+ else
28
+ @console
29
+ end
30
+ end
31
+
32
+ attr_accessor :rcfile
33
+ end
34
+
35
+ self.rcfile = File.expand_path('~/.nokogirirc')
36
+ self.console = :IRB
37
+ end
38
+
12
39
  opts = OptionParser.new do |opts|
13
40
  opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser"
14
41
  opts.define_head "Usage: nokogiri <uri|path> [options]"
@@ -20,11 +47,15 @@ opts = OptionParser.new do |opts|
20
47
  opts.separator ""
21
48
  opts.separator "Options:"
22
49
 
23
- opts.on("--type [TYPE]", [:xml, :html]) do |v|
50
+ opts.on("--type type", "Parse as type: xml or html (default: auto)", [:xml, :html]) do |v|
24
51
  parse_class = {:xml => Nokogiri::XML, :html => Nokogiri::HTML}[v]
25
52
  end
26
53
 
27
- opts.on("-E", "--encoding encoding", "Read as encoding (default #{encoding})") do |v|
54
+ opts.on("-C file", "Specifies initialization file to load (default #{Nokogiri::CLI.rcfile})") do |v|
55
+ Nokogiri::CLI.rcfile = v
56
+ end
57
+
58
+ opts.on("-E", "--encoding encoding", "Read as encoding (default: #{encoding || 'none'})") do |v|
28
59
  encoding = v
29
60
  end
30
61
 
@@ -48,15 +79,24 @@ opts = OptionParser.new do |opts|
48
79
  end
49
80
  opts.parse!
50
81
 
51
- uri = ARGV.shift
82
+ url = ARGV.shift
52
83
 
53
- if uri.to_s.strip.empty? && $stdin.tty?
84
+ if url.to_s.strip.empty? && $stdin.tty?
54
85
  puts opts
55
86
  exit 1
56
87
  end
57
88
 
58
- if $stdin.tty?
59
- @doc = parse_class.parse(open(uri).read, nil, encoding)
89
+ if File.file?(Nokogiri::CLI.rcfile)
90
+ load Nokogiri::CLI.rcfile
91
+ end
92
+
93
+ if url || $stdin.tty?
94
+ case uri = (URI(url) rescue url)
95
+ when URI::HTTP
96
+ @doc = parse_class.parse(uri.read, url, encoding)
97
+ else
98
+ @doc = parse_class.parse(open(url).read, nil, encoding)
99
+ end
60
100
  else
61
101
  @doc = parse_class.parse($stdin, nil, encoding)
62
102
  end
@@ -72,7 +112,7 @@ else
72
112
  eval @script, binding, '<main>'
73
113
  else
74
114
  puts "Your document is stored in @doc..."
75
- IRB.start
115
+ Nokogiri::CLI.console.start
76
116
  end
77
117
  end
78
118
 
@@ -226,7 +226,7 @@ public class HtmlSaxParserContext extends XmlSaxParserContext {
226
226
  /**
227
227
  * Create a new parser context that will read from a raw input
228
228
  * stream. Not a JRuby method. Meant to be run in a separate
229
- * thread by XmlSaxPushParser.
229
+ * thread by HtmlSaxPushParser.
230
230
  */
231
231
  public static IRubyObject parse_stream(ThreadContext context,
232
232
  IRubyObject klazz,
@@ -0,0 +1,244 @@
1
+ /**
2
+ * (The MIT License)
3
+ *
4
+ * Copyright (c) 2008 - 2012:
5
+ *
6
+ * * {Aaron Patterson}[http://tenderlovemaking.com]
7
+ * * {Mike Dalessio}[http://mike.daless.io]
8
+ * * {Charles Nutter}[http://blog.headius.com]
9
+ * * {Sergio Arbeo}[http://www.serabe.com]
10
+ * * {Patrick Mahoney}[http://polycrystal.org]
11
+ * * {Yoko Harada}[http://yokolet.blogspot.com]
12
+ *
13
+ * Permission is hereby granted, free of charge, to any person obtaining
14
+ * a copy of this software and associated documentation files (the
15
+ * 'Software'), to deal in the Software without restriction, including
16
+ * without limitation the rights to use, copy, modify, merge, publish,
17
+ * distribute, sublicense, and/or sell copies of the Software, and to
18
+ * permit persons to whom the Software is furnished to do so, subject to
19
+ * the following conditions:
20
+ *
21
+ * The above copyright notice and this permission notice shall be
22
+ * included in all copies or substantial portions of the Software.
23
+ *
24
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
+ */
32
+
33
+ package nokogiri;
34
+
35
+ import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
+ import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
37
+ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
38
+
39
+ import java.io.ByteArrayInputStream;
40
+ import java.io.IOException;
41
+ import java.nio.charset.Charset;
42
+ import java.nio.charset.IllegalCharsetNameException;
43
+ import java.util.concurrent.Callable;
44
+ import java.util.concurrent.ExecutorService;
45
+ import java.util.concurrent.Executors;
46
+ import java.util.EnumSet;
47
+ import java.util.concurrent.Future;
48
+ import java.util.concurrent.FutureTask;
49
+ import java.util.concurrent.ThreadFactory;
50
+
51
+ import nokogiri.internals.ClosedStreamException;
52
+ import nokogiri.internals.NokogiriBlockingQueueInputStream;
53
+ import nokogiri.internals.ParserContext;
54
+
55
+ import org.jruby.Ruby;
56
+ import org.jruby.RubyClass;
57
+ import org.jruby.RubyException;
58
+ import org.jruby.RubyFixnum;
59
+ import org.jruby.RubyObject;
60
+ import org.jruby.RubyString;
61
+ import org.jruby.anno.JRubyClass;
62
+ import org.jruby.anno.JRubyMethod;
63
+ import org.jruby.exceptions.RaiseException;
64
+ import org.jruby.runtime.ThreadContext;
65
+ import org.jruby.runtime.builtin.IRubyObject;
66
+
67
+ /**
68
+ * Class for Nokogiri::HTML::SAX::PushParser
69
+ *
70
+ * @author
71
+ * @author Piotr Szmielew <p.szmielew@ava.waw.pl> - based on Nokogiri::XML::SAX::PushParser
72
+ */
73
+ @JRubyClass(name="Nokogiri::HTML::SAX::PushParser")
74
+ public class HtmlSaxPushParser extends RubyObject {
75
+ ParserContext.Options options;
76
+ IRubyObject optionsRuby;
77
+ IRubyObject saxParser;
78
+ NokogiriBlockingQueueInputStream stream;
79
+ ParserTask parserTask = null;
80
+ FutureTask<HtmlSaxParserContext> futureTask = null;
81
+ ExecutorService executor = null;
82
+
83
+ public HtmlSaxPushParser(Ruby ruby, RubyClass rubyClass) {
84
+ super(ruby, rubyClass);
85
+ }
86
+
87
+ @Override
88
+ public void finalize() {
89
+ terminateTask(null);
90
+ }
91
+
92
+ /**
93
+ * Silently skips provided encoding
94
+ *
95
+ */
96
+ @JRubyMethod
97
+ public IRubyObject initialize_native(final ThreadContext context,
98
+ IRubyObject saxParser,
99
+ IRubyObject fileName,
100
+ IRubyObject encoding) {
101
+ optionsRuby
102
+ = invoke(context, context.getRuntime().getClassFromPath("Nokogiri::XML::ParseOptions"), "new");
103
+
104
+ options = new ParserContext.Options(0);
105
+ this.saxParser = saxParser;
106
+ return this;
107
+ }
108
+
109
+ /**
110
+ * Returns an integer.
111
+ */
112
+ @JRubyMethod(name="options")
113
+ public IRubyObject getOptions(ThreadContext context) {
114
+ return invoke(context, optionsRuby, "options");
115
+ }
116
+
117
+ /**
118
+ * <code>val</code> is an integer.
119
+ */
120
+ @JRubyMethod(name="options=")
121
+ public IRubyObject setOptions(ThreadContext context, IRubyObject val) {
122
+ invoke(context, optionsRuby, "options=", val);
123
+ options =
124
+ new ParserContext.Options(val.convertToInteger().getLongValue());
125
+ return getOptions(context);
126
+ }
127
+
128
+ @JRubyMethod
129
+ public IRubyObject native_write(ThreadContext context, IRubyObject chunk,
130
+ IRubyObject isLast) {
131
+ try {
132
+ initialize_task(context);
133
+ } catch (IOException e) {
134
+ throw context.getRuntime().newRuntimeError(e.getMessage());
135
+ }
136
+ byte[] data = null;
137
+ if (chunk instanceof RubyString || chunk.respondsTo("to_str")) {
138
+ data = chunk.convertToString().getBytes();
139
+ } else {
140
+ terminateTask(context);
141
+ XmlSyntaxError xmlSyntaxError =
142
+ (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::SyntaxError"));
143
+ throw new RaiseException(xmlSyntaxError);
144
+ }
145
+
146
+ int errorCount0 = parserTask.getErrorCount();;
147
+
148
+
149
+ if (isLast.isTrue()) {
150
+ IRubyObject document = invoke(context, this, "document");
151
+ invoke(context, document, "end_document");
152
+ terminateTask(context);
153
+ } else {
154
+ try {
155
+ Future<Void> task = stream.addChunk(new ByteArrayInputStream(data));
156
+ task.get();
157
+ } catch (ClosedStreamException ex) {
158
+ // this means the stream is closed, ignore this exception
159
+ } catch (Exception e) {
160
+ throw context.getRuntime().newRuntimeError(e.getMessage());
161
+ }
162
+
163
+ }
164
+
165
+ if (!options.recover && parserTask.getErrorCount() > errorCount0) {
166
+ terminateTask(context);
167
+ throw new RaiseException(parserTask.getLastError(), true);
168
+ }
169
+
170
+ return this;
171
+ }
172
+
173
+ private void initialize_task(ThreadContext context) throws IOException {
174
+ if (futureTask == null || stream == null) {
175
+ stream = new NokogiriBlockingQueueInputStream();
176
+
177
+ parserTask = new ParserTask(context, saxParser);
178
+ futureTask = new FutureTask<HtmlSaxParserContext>(parserTask);
179
+ executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
180
+ @Override
181
+ public Thread newThread(Runnable r) {
182
+ Thread t = new Thread(r);
183
+ t.setName("HtmlSaxPushParser");
184
+ t.setDaemon(true);
185
+ return t;
186
+ }
187
+ });
188
+ executor.submit(futureTask);
189
+ }
190
+ }
191
+
192
+ private synchronized void terminateTask(ThreadContext context) {
193
+ try {
194
+ Future<Void> task = stream.addChunk(NokogiriBlockingQueueInputStream.END);
195
+ task.get();
196
+ } catch (ClosedStreamException ex) {
197
+ // ignore this exception, it means the stream was closed
198
+ } catch (Exception e) {
199
+ if (context != null)
200
+ throw context.getRuntime().newRuntimeError(e.getMessage());
201
+ }
202
+ futureTask.cancel(true);
203
+ executor.shutdown();
204
+ executor = null;
205
+ stream = null;
206
+ futureTask = null;
207
+ }
208
+
209
+
210
+ private class ParserTask implements Callable<HtmlSaxParserContext> {
211
+ private final ThreadContext context;
212
+ private final IRubyObject handler;
213
+ private final HtmlSaxParserContext parser;
214
+
215
+ private ParserTask(ThreadContext context, IRubyObject handler) {
216
+ RubyClass klazz = getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::SAX::ParserContext");
217
+ this.context = context;
218
+ this.handler = handler;
219
+ this.parser = (HtmlSaxParserContext) HtmlSaxParserContext.parse_stream(context, klazz, stream);
220
+ }
221
+
222
+ @Override
223
+ public HtmlSaxParserContext call() throws Exception {
224
+ try {
225
+ parser.parse_with(context, handler);
226
+ } finally {
227
+ // we have to close the stream before exiting, otherwise someone
228
+ // can add a chunk and block on task.get() forever.
229
+ stream.close();
230
+ }
231
+ return parser;
232
+ }
233
+
234
+ private synchronized int getErrorCount() {
235
+ // check for null because thread may not have started yet
236
+ if (parser.getNokogiriHandler() == null) return 0;
237
+ else return parser.getNokogiriHandler().getErrorCount();
238
+ }
239
+
240
+ private synchronized RubyException getLastError() {
241
+ return (RubyException) parser.getNokogiriHandler().getLastError();
242
+ }
243
+ }
244
+ }