nokogiri 1.5.5.rc3-java → 1.5.6-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (67) hide show
  1. data/CHANGELOG.ja.rdoc +42 -1
  2. data/CHANGELOG.rdoc +41 -1
  3. data/Manifest.txt +8 -1
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +5 -8
  6. data/ROADMAP.md +6 -2
  7. data/Rakefile +29 -7
  8. data/bin/nokogiri +19 -4
  9. data/build_all +56 -17
  10. data/ext/java/nokogiri/HtmlDocument.java +26 -0
  11. data/ext/java/nokogiri/NokogiriService.java +7 -1
  12. data/ext/java/nokogiri/XmlDocument.java +24 -6
  13. data/ext/java/nokogiri/XmlDocumentFragment.java +2 -26
  14. data/ext/java/nokogiri/XmlDtd.java +13 -2
  15. data/ext/java/nokogiri/XmlElement.java +3 -12
  16. data/ext/java/nokogiri/XmlEntityReference.java +32 -8
  17. data/ext/java/nokogiri/XmlNamespace.java +2 -1
  18. data/ext/java/nokogiri/XmlNode.java +83 -31
  19. data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
  20. data/ext/java/nokogiri/XmlText.java +2 -14
  21. data/ext/java/nokogiri/XsltStylesheet.java +4 -2
  22. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  23. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
  24. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  25. data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
  26. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
  27. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
  28. data/ext/java/nokogiri/internals/NokogiriHandler.java +22 -14
  29. data/ext/java/nokogiri/internals/NokogiriHelpers.java +25 -7
  30. data/ext/java/nokogiri/internals/ParserContext.java +2 -1
  31. data/ext/java/nokogiri/internals/ReaderNode.java +2 -1
  32. data/ext/java/nokogiri/internals/SaveContextVisitor.java +100 -102
  33. data/ext/java/nokogiri/internals/XmlDomParserContext.java +10 -4
  34. data/ext/nokogiri/extconf.rb +1 -0
  35. data/ext/nokogiri/xml_document.c +2 -2
  36. data/ext/nokogiri/xml_node.c +31 -14
  37. data/ext/nokogiri/xml_sax_parser.c +16 -0
  38. data/ext/nokogiri/xslt_stylesheet.c +19 -2
  39. data/lib/nekodtd.jar +0 -0
  40. data/lib/nokogiri/nokogiri.jar +0 -0
  41. data/lib/nokogiri/version.rb +4 -1
  42. data/lib/nokogiri/xml/document.rb +8 -6
  43. data/lib/nokogiri/xml/document_fragment.rb +10 -1
  44. data/lib/nokogiri/xml/node.rb +58 -61
  45. data/lib/nokogiri/xml/sax/document.rb +7 -0
  46. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  47. data/lib/nokogiri/xml/xpath_context.rb +1 -1
  48. data/lib/nokogiri/xslt.rb +1 -1
  49. data/tasks/cross_compile.rb +5 -8
  50. data/test/files/test_document_url/bar.xml +2 -0
  51. data/test/files/test_document_url/document.dtd +4 -0
  52. data/test/files/test_document_url/document.xml +6 -0
  53. data/test/helper.rb +6 -0
  54. data/test/html/test_document.rb +23 -0
  55. data/test/html/test_document_fragment.rb +5 -0
  56. data/test/test_xslt_transforms.rb +30 -0
  57. data/test/xml/sax/test_parser.rb +20 -1
  58. data/test/xml/test_builder.rb +42 -0
  59. data/test/xml/test_document.rb +64 -9
  60. data/test/xml/test_document_fragment.rb +7 -0
  61. data/test/xml/test_entity_reference.rb +12 -0
  62. data/test/xml/test_namespace.rb +20 -0
  63. data/test/xml/test_node.rb +79 -0
  64. data/test/xml/test_node_attributes.rb +29 -0
  65. data/test/xml/test_unparented_node.rb +9 -0
  66. data/test_all +11 -14
  67. metadata +744 -560
@@ -1,7 +1,47 @@
1
- == 1.5.5 / unreleased
1
+ == 1.5.6 / unreleased
2
2
 
3
3
  * Features
4
4
 
5
+ * XML::Document#collect_namespaces メソッドのパフォーマンスを改善した。 #761 (ありがとう、Juergen Mangler!)
6
+ * SAX::Document#processing_instructionに新しいcallbackが追加 (ありがとう、Kitaiti Makoto!)
7
+ * Node#native_content= メソッドでエスケープされていない文字列をセットできるようにした。 #768
8
+ * 名前空間を付けて xpath 式を書く場合に、シンボルキーを使えるようにした。#729 (ありがとう、Ben Langfeld.)
9
+ * XML::Node#[]= メソッド内で受け取った引数を文字列に変換するようにした。#729 (ありがとう、Ben Langfeld.)
10
+ * bin/nokogiri コマンドが $stdin からドキュメントを読んで処理できるようにした。
11
+ * bin/nokogiri -e を指定することでコマンドラインプログラムを実行できるようにした。
12
+ * bin/nokogiri --version will print the Xerces and NekoHTML versions when ran with JRuby.
13
+
14
+
15
+ * Bugfixes
16
+ * Nokogiri はこのバージョンからXSLT変換のエラーを検出するようになった。#731 (ありがとう、Justin Fitzsimmons!)
17
+ * Don't throw an Error when trying to replace top-level text node in DocumentFragment. #775
18
+ * SAXパーザに不正なエンコーディングに渡された場合はArgumentErrorを投げるようにした。#756 (ありがとう、Bradley Schaefer!)
19
+ * [JRuby] XML宣言の前にスペースがあると、ドキュメントのパーズに失敗する。(#748の修正でこれもなおっている) #790
20
+ * [JRuby] Nokogiri::XML::Node#content のJRubyの振る舞いがCRubyと同じではない。#794, #797
21
+ * [JRuby] で '#' で始まる文字列を名前とする EntityReference を作ろうとすると INVALID_CHARACTER_ERR という例外がはっせいする。 #719
22
+ * [JRuby] では Nodeのサブクラスのnamespaceを正しく文字列に変換しない。 #715
23
+ * [JRuby] Node#contentがこのバージョンから改行コードを正しく表示するようになった。#737 (ありがとう、Piotr Szmielew!)
24
+ * [JRuby] recover optionが指定されている場合は宣言の無いネームスペースを無視するようにした。#748
25
+ * [JRuby] ネームスペースを検出するXPathが続けて実行されても例外を投げてはいけない。#764
26
+ * [JRuby] XMLを表示(出力)する際のホワイトスペースの扱いをlibxml2バージョンとさらに同様になるようにした。#771
27
+ * [JRuby] ネームスペース付きの属性を含むXMLドキュメントを文字列でbuilderに追加しようとすると失敗する。#770
28
+ * [JRuby] Nokogiri::XML::Document#wrapを使って生成したドキュメントに << でノードを追加しようとすると
29
+ undefined method `length' for nil:NilClassのエラーが発生する #781
30
+ * [JRuby] 開いているファイルのデスクリプタを閉じようとすると、"bad file descriptor" が発生する。#495
31
+ * [JRuby] JRuby/CRuby incompatibility for attribute decorators. #785
32
+ * [JRuby] Issues parsing valid XML with no internal subset in the DTD. #547, #811
33
+ * [JRuby] Issues parsing valid node content when it contains colons. #728
34
+ * [JRuby] Correctly parse the doc type of html documents. #733
35
+ * [JRuby] Include dtd in the xml output when a builder is used with create_internal_subset. #751
36
+ * [JRuby] builder requires textwrappers for valid utf8 in jruby, not in mri. #784
37
+
38
+
39
+ == 1.5.5 / 2012年6月24日
40
+
41
+ * Features
42
+
43
+ * Much-improved support for JRuby in 1.9 mode! Yay!
44
+
5
45
  * Bugfixes
6
46
 
7
47
  * JRuby Nokogiri の add_previous_sibling が以前は動いていたのに今は動かない(1.5.0 -> 1.5.1)。 #691 (ありがとう, John Shahid!)
@@ -15,6 +55,7 @@
15
55
  * JRuby で Nokogiri::XML::Node を継承したクラスを定義すると、namespace が表示されない。 #695
16
56
  * JRuby で RDF::RDFXML::Writer をインスタンス化しようとすると NAMESPACE_ERR (org.w3c.dom.DOMException) が発生する. #683
17
57
  * JRuby で xpath に namespaces を指定すると例外が発生する. #493
58
+ * JRuby の Entity 解決は C version の Nokogiri と同じ結果にならないといけない。#704, #647, #703
18
59
 
19
60
 
20
61
  == 1.5.4 / 2012年6月12日
@@ -1,7 +1,46 @@
1
- == 1.5.5 / unreleased
1
+ == 1.5.6 / unreleased
2
2
 
3
3
  * Features
4
4
 
5
+ * Improved performance of XML::Document#collect_namespaces. #761 (Thanks, Juergen Mangler!)
6
+ * New callback SAX::Document#processing_instruction (Thanks, Kitaiti Makoto!)
7
+ * Node#native_content= allows setting unescaped node contant. #768
8
+ * XPath lookup with namespaces supports symbol keys. #729 (Thanks, Ben Langfeld.)
9
+ * XML::Node#[]= stringifies values. #729 (Thanks, Ben Langfeld.)
10
+ * bin/nokogiri will process a document from $stdin
11
+ * bin/nokogiri -e will execute a program from the command line
12
+ * bin/nokogiri --version will print the Xerces and NekoHTML versions when ran with JRuby.
13
+
14
+
15
+ * Bugfixes
16
+ * Nokogiri now detects XSLT transform errors. #731 (Thanks, Justin Fitzsimmons!)
17
+ * Don't throw an Error when trying to replace top-level text node in DocumentFragment. #775
18
+ * Raise an ArgumentError if an invalid encoding is passed to the SAX parser. #756 (Thanks, Bradley Schaefer!)
19
+ * [JRuby] space prior to xml preamble causes nokogiri to fail parsing. (fixed along with #748) #790
20
+ * [JRuby] Fixed the bug Nokogiri::XML::Node#content inconsistency between Java and C. #794, #797
21
+ * [JRuby] raises INVALID_CHARACTER_ERR exception when EntityReference name starts with '#'. #719
22
+ * [JRuby] doesn't coerce namespaces out of strings on a direct subclass of Node. #715
23
+ * [JRuby] Node#content now renders newlines properly. #737 (Thanks, Piotr Szmielew!)
24
+ * [JRuby] Unknown namespace are ignore when the recover option is used. #748
25
+ * [JRuby] XPath queries for namespaces should not throw exceptions when called twice in a row. #764
26
+ * [JRuby] More consistent (with libxml2) whitespace formatting when emitting XML. #771
27
+ * [JRuby] namespaced attributes broken when appending raw xml to builder. #770
28
+ * [JRuby] Nokogiri::XML::Document#wrap raises undefined method `length' for nil:NilClass when trying to << to a node. #781
29
+ * [JRuby] Fixed "bad file descriptor" bug when closing open file descriptors. #495
30
+ * [JRuby] JRuby/CRuby incompatibility for attribute decorators. #785
31
+ * [JRuby] Issues parsing valid XML with no internal subset in the DTD. #547, #811
32
+ * [JRuby] Issues parsing valid node content when it contains colons. #728
33
+ * [JRuby] Correctly parse the doc type of html documents. #733
34
+ * [JRuby] Include dtd in the xml output when a builder is used with create_internal_subset. #751
35
+ * [JRuby] builder requires textwrappers for valid utf8 in jruby, not in mri. #784
36
+
37
+
38
+ == 1.5.5 / 2012-06-24
39
+
40
+ * Features
41
+
42
+ * Much-improved support for JRuby in 1.9 mode! Yay!
43
+
5
44
  * Bugfixes
6
45
 
7
46
  * Regression in JRuby Nokogiri add_previous_sibling (1.5.0 -> 1.5.1) #691 (Thanks, John Shahid!)
@@ -15,6 +54,7 @@
15
54
  * JRuby renders nodes without their namespace when subclassing Node. #695
16
55
  * JRuby raises NAMESPACE_ERR (org.w3c.dom.DOMException) while instantiating RDF::RDFXML::Writer. #683
17
56
  * JRuby is not able to use namespaces in xpath. #493
57
+ * JRuby's Entity resolving should be consistent with C-Nokogiri #704, #647, #703
18
58
 
19
59
 
20
60
  == 1.5.4 / 2012-06-12
@@ -43,8 +43,13 @@ ext/java/nokogiri/XmlSyntaxError.java
43
43
  ext/java/nokogiri/XmlText.java
44
44
  ext/java/nokogiri/XmlXpathContext.java
45
45
  ext/java/nokogiri/XsltStylesheet.java
46
+ ext/java/nokogiri/internals/ClosedStreamException.java
46
47
  ext/java/nokogiri/internals/HtmlDomParserContext.java
48
+ ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java
47
49
  ext/java/nokogiri/internals/NokogiriDocumentCache.java
50
+ ext/java/nokogiri/internals/NokogiriDomParser.java
51
+ ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java
52
+ ext/java/nokogiri/internals/NokogiriEntityResolver.java
48
53
  ext/java/nokogiri/internals/NokogiriErrorHandler.java
49
54
  ext/java/nokogiri/internals/NokogiriHandler.java
50
55
  ext/java/nokogiri/internals/NokogiriHelpers.java
@@ -62,7 +67,6 @@ ext/java/nokogiri/internals/ReaderNode.java
62
67
  ext/java/nokogiri/internals/SaveContextVisitor.java
63
68
  ext/java/nokogiri/internals/SchemaErrorHandler.java
64
69
  ext/java/nokogiri/internals/XmlDeclHandler.java
65
- ext/java/nokogiri/internals/XmlDomParser.java
66
70
  ext/java/nokogiri/internals/XmlDomParserContext.java
67
71
  ext/java/nokogiri/internals/XmlSaxParser.java
68
72
  ext/java/nokogiri/internals/XsltExtensionFunction.java
@@ -230,6 +234,9 @@ test/files/snuggles.xml
230
234
  test/files/staff.dtd
231
235
  test/files/staff.xml
232
236
  test/files/staff.xslt
237
+ test/files/test_document_url/bar.xml
238
+ test/files/test_document_url/document.dtd
239
+ test/files/test_document_url/document.xml
233
240
  test/files/tlm.html
234
241
  test/files/to_be_xincluded.xml
235
242
  test/files/valid_bar.xml
@@ -1,4 +1,4 @@
1
- = Nokogiri (鋸)
1
+ = Nokogiri (鋸) {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri]
2
2
 
3
3
  * http://nokogiri.org/
4
4
  * http://github.com/sparklemotion/nokogiri/wikis
@@ -1,4 +1,4 @@
1
- = Nokogiri {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri]
1
+ = Nokogiri {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri]
2
2
 
3
3
  * http://nokogiri.org
4
4
  * http://github.com/sparklemotion/nokogiri/wikis
@@ -122,13 +122,10 @@ Developing Nokogiri requires racc and rexical to generate the parser and
122
122
  tokenizer. To start development, make sure you have `libxml2` and `libxslt`
123
123
  installed.
124
124
 
125
- Then install hoe and rake-compiler:
125
+ Then install core gems and bootstrap:
126
126
 
127
- $ gem install hoe rake-compiler racc rexical minitest
128
-
129
- Then run rake:
130
-
131
- $ rake
127
+ $ gem install hoe rake-compiler mini_portile
128
+ $ rake newb
132
129
 
133
130
  === Developing on JRuby
134
131
 
@@ -151,7 +148,7 @@ Then run rake:
151
148
 
152
149
  Copyright (c) 2008 - 2012:
153
150
 
154
- * {Aaron Patterson}[http://sparklemotionmaking.com]
151
+ * {Aaron Patterson}[http://tenderlovemaking.com]
155
152
  * {Mike Dalessio}[http://mike.daless.io]
156
153
  * {Charles Nutter}[http://blog.headius.com]
157
154
  * {Sergio Arbeo}[http://www.serabe.com]
data/ROADMAP.md CHANGED
@@ -19,8 +19,9 @@
19
19
  * https://github.com/sparklemotion/nokogiri/issues/679
20
20
  Mixing in Enumerable has some unintended consequences; plus we want to improve the attributes API
21
21
 
22
- * (closed) https://github.com/sparklemotion/nokogiri/issues/666
23
- Some ideas for a better attributes API?
22
+ * Some ideas for a better attributes API?
23
+ * (closed) https://github.com/sparklemotion/nokogiri/issues/666
24
+ * https://github.com/sparklemotion/nokogiri/issues/765
24
25
 
25
26
 
26
27
  ## improve CSS query parsing
@@ -69,6 +70,9 @@
69
70
  * we should standardize on a hash of options for these and other calls
70
71
  * what should NodeSet#xpath return?
71
72
  * https://github.com/sparklemotion/nokogiri/issues/656
73
+ * also, clean up or unify the implementations of #xpath-and-friends in Node and NodeSet
74
+ * implementations are very similar, but no shared code :(
75
+ * decorate nodes in a consistent manner
72
76
 
73
77
  ## Encoding
74
78
 
data/Rakefile CHANGED
@@ -17,6 +17,8 @@ def java?
17
17
  !! (RUBY_PLATFORM =~ /java/)
18
18
  end
19
19
 
20
+ ENV['LANG'] = "en_US.UTF-8" # UBUNTU 10.04, Y U NO DEFAULT TO UTF-8?
21
+
20
22
  require 'tasks/nokogiri.org'
21
23
 
22
24
  HOE = Hoe.spec 'nokogiri' do
@@ -46,14 +48,10 @@ HOE = Hoe.spec 'nokogiri' do
46
48
  ["mini_portile", ">= 0.2.2"],
47
49
  ["minitest", "~> 2.2.2"],
48
50
  ["rake", ">= 0.9"],
49
- ["rake-compiler", "= 0.8.0"]
51
+ ["rake-compiler", "= 0.8.0"],
52
+ ["racc", ">= 1.4.6"],
53
+ ["rexical", ">= 1.0.5"]
50
54
  ]
51
- if ! java?
52
- self.extra_dev_deps += [
53
- ["racc", ">= 1.4.6"],
54
- ["rexical", ">= 1.0.5"]
55
- ]
56
- end
57
55
 
58
56
  if java?
59
57
  self.spec_extras = { :platform => 'java' }
@@ -118,6 +116,20 @@ desc "Generate css/parser.rb and css/tokenizer.rex"
118
116
  task 'generate' => [GENERATED_PARSER, GENERATED_TOKENIZER]
119
117
  task 'gem:spec' => 'generate' if Rake::Task.task_defined?("gem:spec")
120
118
 
119
+ # This is a big hack to make sure that the racc and rexical
120
+ # dependencies in the Gemfile are constrainted to ruby platforms
121
+ # (i.e. MRI and Rubinius). There's no way to do that through hoe,
122
+ # and any solution will require changing hoe and hoe-bundler.
123
+ old_gemfile_task = Rake::Task['bundler:gemfile'] rescue nil
124
+ task 'bundler:gemfile' do
125
+ old_gemfile_task.invoke if old_gemfile_task
126
+
127
+ lines = File.open('Gemfile', 'r') { |f| f.readlines }.map do |line|
128
+ line =~ /racc|rexical/ ? "#{line.strip}, :platform => :ruby" : line
129
+ end
130
+ File.open('Gemfile', 'w') { |f| lines.each { |line| f.puts line } }
131
+ end
132
+
121
133
  file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
122
134
  racc = RbConfig::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
123
135
  racc = "#{::RbConfig::CONFIG['bindir']}/racc" if racc.empty?
@@ -149,9 +161,18 @@ task :java_debug do
149
161
  ENV['JAVA_OPTS'] = '-Xdebug -Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y' if java? && ENV['JAVA_DEBUG']
150
162
  end
151
163
 
164
+ if java?
165
+ task :test_18 => :test
166
+ task :test_19 do
167
+ ENV['JRUBY_OPTS'] = "--1.9"
168
+ Rake::Task["test"].invoke
169
+ end
170
+ end
171
+
152
172
  Rake::Task[:test].prerequisites << :compile
153
173
  Rake::Task[:test].prerequisites << :java_debug
154
174
  Rake::Task[:test].prerequisites << :check_extra_deps unless java?
175
+
155
176
  if Hoe.plugins.include?(:debugging)
156
177
  ['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name|
157
178
  Rake::Task["test:#{task_name}"].prerequisites << :compile
@@ -162,6 +183,7 @@ end
162
183
 
163
184
  desc "build a windows gem without all the ceremony."
164
185
  task "gem:windows" => "gem" do
186
+ # TODO: 1.8.7-p358, 1.9.3-p194
165
187
  cross_rubies = ["1.8.7-p330", "1.9.2-p136"]
166
188
  ruby_cc_version = cross_rubies.collect { |_| _.split("-").first }.join(":") # e.g., "1.8.7:1.9.2"
167
189
  rake_compiler_config_path = "#{ENV['HOME']}/.rake-compiler/config.yml"
@@ -16,6 +16,7 @@ opts = OptionParser.new do |opts|
16
16
  opts.separator "Examples:"
17
17
  opts.separator " nokogiri http://www.ruby-lang.org/"
18
18
  opts.separator " nokogiri ./public/index.html"
19
+ opts.separator " curl -s http://nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'"
19
20
  opts.separator ""
20
21
  opts.separator "Options:"
21
22
 
@@ -27,6 +28,10 @@ opts = OptionParser.new do |opts|
27
28
  encoding = v
28
29
  end
29
30
 
31
+ opts.on("-e command", "Specifies script from command-line.") do |v|
32
+ @script = v
33
+ end
34
+
30
35
  opts.on("--rng <uri|path>", "Validate using this rng file.") do |v|
31
36
  @rng = open(v) {|f| Nokogiri::XML::RelaxNG(f)}
32
37
  end
@@ -45,19 +50,29 @@ opts.parse!
45
50
 
46
51
  uri = ARGV.shift
47
52
 
48
- if uri.to_s.strip.empty?
53
+ if uri.to_s.strip.empty? && $stdin.tty?
49
54
  puts opts
50
55
  exit 1
51
56
  end
52
57
 
53
- @doc = parse_class.parse(open(uri).read, nil, encoding)
58
+ if $stdin.tty?
59
+ @doc = parse_class.parse(open(uri).read, nil, encoding)
60
+ else
61
+ @doc = parse_class.parse($stdin, nil, encoding)
62
+ end
63
+
64
+ $_ = @doc
54
65
 
55
66
  if @rng
56
67
  @rng.validate(@doc).each do |error|
57
68
  puts error.message
58
69
  end
59
70
  else
60
- puts "Your document is stored in @doc..."
61
- IRB.start
71
+ if @script
72
+ eval @script, binding, '<main>'
73
+ else
74
+ puts "Your document is stored in @doc..."
75
+ IRB.start
76
+ end
62
77
  end
63
78
 
data/build_all CHANGED
@@ -2,13 +2,39 @@
2
2
  #
3
3
  # script to build gems for all relevant platforms:
4
4
  # - MRI et al (standard gem)
5
- # - windows (x86-mingw32 and x86-msin32-60)
5
+ # - windows (x86-mingw32 and x86-mswin32-60)
6
6
  # - jruby
7
7
  #
8
- # prerequisite is the mingw32 packages.
9
- # on ubuntu, `sudo apt-get install mingw32`
10
- # for others, read up at https://github.com/luislavena/rake-compiler
8
+ # here's what I recommend for building all the gems:
11
9
  #
10
+ # 1. set up a vagrant VM guest running ubuntu lucid 32-bit.
11
+ # 2. install rvm, and install 1.8.7, 1.9.3 and jruby.
12
+ # 3. `sudo apt-get install mingw32`
13
+ #
14
+ # as you build, you may run into these problems:
15
+ #
16
+ # - if you're using Virtualbox shared directories, you'll get a mingw
17
+ # "Protocol error" at linktime. Boo! Either use NFS or a
18
+ # locally-checked-out repository.
19
+ #
20
+ # - on ubuntus 11 and later, you may have issues with building
21
+ # rake-compiler's rubies against openssl v2. Just comment the lines
22
+ # out from ossl_ssl.c and you'll be fine.
23
+ #
24
+ # - you may have issues with Pathname conversion to String in
25
+ # bundler. Add this to the offending bundler file:
26
+ #
27
+ # class Pathname
28
+ # def to_str
29
+ # to_s
30
+ # end
31
+ # end
32
+ #
33
+ # - you may also have to hack rubygems.rb to eliminate a reference to
34
+ # RUBY_ENGINE
35
+ #
36
+
37
+ HOST=
12
38
 
13
39
  # Load RVM into a shell session *as a function*
14
40
  if [[ -s "$HOME/.rvm/scripts/rvm" ]] ; then
@@ -21,38 +47,51 @@ fi
21
47
 
22
48
  function rvm_use {
23
49
  current_ruby=$1
24
- rvm use "${1}@nokogiri" --create
50
+ rvm use "${1}@nokogiri" --create || rvm -v
25
51
  }
26
52
 
27
53
  set -o errexit
28
54
 
29
55
  # initialize
30
56
  rvm_use 1.8.7
57
+ bundle install --quiet --local || bundle install
31
58
  rm -rf tmp pkg
32
- bundle exec rake clean clobber
59
+ bundle exec rake clean
33
60
 
34
61
  # holding pen
35
62
  rm -rf gems
36
63
  mkdir -p gems
37
64
 
38
- # MRI
39
- rvm_use 1.8.7
40
- bundle exec rake gem
41
- cp -v pkg/nokogiri*.gem gems # should only be one at this point in the script
42
-
43
65
  # windows
66
+ platform=$(uname -i)
67
+ if [[ $platform =~ "64" ]] ; then
68
+ echo ""
69
+ echo "ERROR: You need to build the windows gem on a 32-bit machine!"
70
+ echo ""
71
+ exit 1
72
+ fi
44
73
  rvm_use 1.8.7
45
- bundle exec rake-compiler cross-ruby VERSION=1.8.7-p330
46
- bundle exec rake-compiler cross-ruby VERSION=1.9.2-p136
74
+ if [[ ! -a ${HOME}/.rake-compiler/ruby/ruby-1.8.7-p330/lib/ruby/1.8.7/x86_64-linux/rbconfig.rb ]] ; then
75
+ bundle exec rake-compiler cross-ruby VERSION=1.8.7-p330
76
+ fi
77
+ if [[ ! -a ${HOME}/.rake-compiler/ruby/ruby-1.9.2-p136/lib/ruby/1.9.1/x86_64-linux/rbconfig.rb ]] ; then
78
+ bundle exec rake-compiler cross-ruby VERSION=1.9.2-p136
79
+ fi
47
80
  bundle exec rake cross
48
- rake gem:windows # don't use bundler here. it blows up. *shrug*
81
+ bundle exec rake gem:windows
49
82
  cp -v pkg/nokogiri*x86-{mingw32,mswin32}*.gem gems
50
83
 
84
+ # MRI
85
+ rvm_use 1.8.7
86
+ bundle exec rake gem
87
+ cp -v pkg/nokogiri*.gem gems # should only be one at this point in the script
88
+
51
89
  # jruby
52
- rvm_use jruby-1.6.5
90
+ rvm_use jruby
91
+ bundle install --quiet --local || bundle install
53
92
  bundle exec rake clean clobber
54
93
  rvm_use 1.8.7
55
94
  bundle exec rake generate
56
- rvm_use jruby-1.6.5
95
+ rvm_use jruby
57
96
  bundle exec rake gem
58
- cp -v pkg/nokogiri*java.gem gems
97
+ cp -v pkg/nokogiri*java.gem gems
@@ -55,6 +55,10 @@ import org.w3c.dom.NodeList;
55
55
  */
56
56
  @JRubyClass(name="Nokogiri::HTML::Document", parent="Nokogiri::XML::Document")
57
57
  public class HtmlDocument extends XmlDocument {
58
+ private static final String DEFAULT_CONTENT_TYPE = "html";
59
+ private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN";
60
+ private static final String DEFAULT_SYTEM_ID = "http://www.w3.org/TR/html4/strict.dtd";
61
+
58
62
  private String parsed_encoding = null;
59
63
 
60
64
  public HtmlDocument(Ruby ruby, RubyClass klazz) {
@@ -82,6 +86,28 @@ public class HtmlDocument extends XmlDocument {
82
86
  return htmlDocument;
83
87
  }
84
88
 
89
+ public IRubyObject getInternalSubset(ThreadContext context) {
90
+ IRubyObject internalSubset = super.getInternalSubset(context);
91
+
92
+ // html documents are expected to have a default internal subset
93
+ // the default values are the same ones used when the following
94
+ // feature is turned on
95
+ // "http://cyberneko.org/html/features/insert-doctype"
96
+ // the reason we don't turn it on, is because it overrides the document's
97
+ // declared doctype declaration.
98
+
99
+ if (internalSubset.isNil()) {
100
+ internalSubset = XmlDtd.newEmpty(context.getRuntime(),
101
+ getDocument(),
102
+ context.getRuntime().newString(DEFAULT_CONTENT_TYPE),
103
+ context.getRuntime().newString(DEFAULT_PUBLIC_ID),
104
+ context.getRuntime().newString(DEFAULT_SYTEM_ID));
105
+ setInternalSubset(internalSubset);
106
+ }
107
+
108
+ return internalSubset;
109
+ }
110
+
85
111
  public static IRubyObject do_parse(ThreadContext context,
86
112
  IRubyObject klass,
87
113
  IRubyObject[] args) {