nokogiri 1.5.5.rc3-java → 1.5.6-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +42 -1
- data/CHANGELOG.rdoc +41 -1
- data/Manifest.txt +8 -1
- data/README.ja.rdoc +1 -1
- data/README.rdoc +5 -8
- data/ROADMAP.md +6 -2
- data/Rakefile +29 -7
- data/bin/nokogiri +19 -4
- data/build_all +56 -17
- data/ext/java/nokogiri/HtmlDocument.java +26 -0
- data/ext/java/nokogiri/NokogiriService.java +7 -1
- data/ext/java/nokogiri/XmlDocument.java +24 -6
- data/ext/java/nokogiri/XmlDocumentFragment.java +2 -26
- data/ext/java/nokogiri/XmlDtd.java +13 -2
- data/ext/java/nokogiri/XmlElement.java +3 -12
- data/ext/java/nokogiri/XmlEntityReference.java +32 -8
- data/ext/java/nokogiri/XmlNamespace.java +2 -1
- data/ext/java/nokogiri/XmlNode.java +83 -31
- data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
- data/ext/java/nokogiri/XmlText.java +2 -14
- data/ext/java/nokogiri/XsltStylesheet.java +4 -2
- data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
- data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
- data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
- data/ext/java/nokogiri/internals/NokogiriHandler.java +22 -14
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +25 -7
- data/ext/java/nokogiri/internals/ParserContext.java +2 -1
- data/ext/java/nokogiri/internals/ReaderNode.java +2 -1
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +100 -102
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +10 -4
- data/ext/nokogiri/extconf.rb +1 -0
- data/ext/nokogiri/xml_document.c +2 -2
- data/ext/nokogiri/xml_node.c +31 -14
- data/ext/nokogiri/xml_sax_parser.c +16 -0
- data/ext/nokogiri/xslt_stylesheet.c +19 -2
- data/lib/nekodtd.jar +0 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +4 -1
- data/lib/nokogiri/xml/document.rb +8 -6
- data/lib/nokogiri/xml/document_fragment.rb +10 -1
- data/lib/nokogiri/xml/node.rb +58 -61
- data/lib/nokogiri/xml/sax/document.rb +7 -0
- data/lib/nokogiri/xml/sax/parser.rb +7 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -1
- data/lib/nokogiri/xslt.rb +1 -1
- data/tasks/cross_compile.rb +5 -8
- data/test/files/test_document_url/bar.xml +2 -0
- data/test/files/test_document_url/document.dtd +4 -0
- data/test/files/test_document_url/document.xml +6 -0
- data/test/helper.rb +6 -0
- data/test/html/test_document.rb +23 -0
- data/test/html/test_document_fragment.rb +5 -0
- data/test/test_xslt_transforms.rb +30 -0
- data/test/xml/sax/test_parser.rb +20 -1
- data/test/xml/test_builder.rb +42 -0
- data/test/xml/test_document.rb +64 -9
- data/test/xml/test_document_fragment.rb +7 -0
- data/test/xml/test_entity_reference.rb +12 -0
- data/test/xml/test_namespace.rb +20 -0
- data/test/xml/test_node.rb +79 -0
- data/test/xml/test_node_attributes.rb +29 -0
- data/test/xml/test_unparented_node.rb +9 -0
- data/test_all +11 -14
- metadata +744 -560
data/CHANGELOG.ja.rdoc
CHANGED
@@ -1,7 +1,47 @@
|
|
1
|
-
== 1.5.
|
1
|
+
== 1.5.6 / unreleased
|
2
2
|
|
3
3
|
* Features
|
4
4
|
|
5
|
+
* XML::Document#collect_namespaces メソッドのパフォーマンスを改善した。 #761 (ありがとう、Juergen Mangler!)
|
6
|
+
* SAX::Document#processing_instructionに新しいcallbackが追加 (ありがとう、Kitaiti Makoto!)
|
7
|
+
* Node#native_content= メソッドでエスケープされていない文字列をセットできるようにした。 #768
|
8
|
+
* 名前空間を付けて xpath 式を書く場合に、シンボルキーを使えるようにした。#729 (ありがとう、Ben Langfeld.)
|
9
|
+
* XML::Node#[]= メソッド内で受け取った引数を文字列に変換するようにした。#729 (ありがとう、Ben Langfeld.)
|
10
|
+
* bin/nokogiri コマンドが $stdin からドキュメントを読んで処理できるようにした。
|
11
|
+
* bin/nokogiri -e を指定することでコマンドラインプログラムを実行できるようにした。
|
12
|
+
* bin/nokogiri --version will print the Xerces and NekoHTML versions when ran with JRuby.
|
13
|
+
|
14
|
+
|
15
|
+
* Bugfixes
|
16
|
+
* Nokogiri はこのバージョンからXSLT変換のエラーを検出するようになった。#731 (ありがとう、Justin Fitzsimmons!)
|
17
|
+
* Don't throw an Error when trying to replace top-level text node in DocumentFragment. #775
|
18
|
+
* SAXパーザに不正なエンコーディングに渡された場合はArgumentErrorを投げるようにした。#756 (ありがとう、Bradley Schaefer!)
|
19
|
+
* [JRuby] XML宣言の前にスペースがあると、ドキュメントのパーズに失敗する。(#748の修正でこれもなおっている) #790
|
20
|
+
* [JRuby] Nokogiri::XML::Node#content のJRubyの振る舞いがCRubyと同じではない。#794, #797
|
21
|
+
* [JRuby] で '#' で始まる文字列を名前とする EntityReference を作ろうとすると INVALID_CHARACTER_ERR という例外がはっせいする。 #719
|
22
|
+
* [JRuby] では Nodeのサブクラスのnamespaceを正しく文字列に変換しない。 #715
|
23
|
+
* [JRuby] Node#contentがこのバージョンから改行コードを正しく表示するようになった。#737 (ありがとう、Piotr Szmielew!)
|
24
|
+
* [JRuby] recover optionが指定されている場合は宣言の無いネームスペースを無視するようにした。#748
|
25
|
+
* [JRuby] ネームスペースを検出するXPathが続けて実行されても例外を投げてはいけない。#764
|
26
|
+
* [JRuby] XMLを表示(出力)する際のホワイトスペースの扱いをlibxml2バージョンとさらに同様になるようにした。#771
|
27
|
+
* [JRuby] ネームスペース付きの属性を含むXMLドキュメントを文字列でbuilderに追加しようとすると失敗する。#770
|
28
|
+
* [JRuby] Nokogiri::XML::Document#wrapを使って生成したドキュメントに << でノードを追加しようとすると
|
29
|
+
undefined method `length' for nil:NilClassのエラーが発生する #781
|
30
|
+
* [JRuby] 開いているファイルのデスクリプタを閉じようとすると、"bad file descriptor" が発生する。#495
|
31
|
+
* [JRuby] JRuby/CRuby incompatibility for attribute decorators. #785
|
32
|
+
* [JRuby] Issues parsing valid XML with no internal subset in the DTD. #547, #811
|
33
|
+
* [JRuby] Issues parsing valid node content when it contains colons. #728
|
34
|
+
* [JRuby] Correctly parse the doc type of html documents. #733
|
35
|
+
* [JRuby] Include dtd in the xml output when a builder is used with create_internal_subset. #751
|
36
|
+
* [JRuby] builder requires textwrappers for valid utf8 in jruby, not in mri. #784
|
37
|
+
|
38
|
+
|
39
|
+
== 1.5.5 / 2012年6月24日
|
40
|
+
|
41
|
+
* Features
|
42
|
+
|
43
|
+
* Much-improved support for JRuby in 1.9 mode! Yay!
|
44
|
+
|
5
45
|
* Bugfixes
|
6
46
|
|
7
47
|
* JRuby Nokogiri の add_previous_sibling が以前は動いていたのに今は動かない(1.5.0 -> 1.5.1)。 #691 (ありがとう, John Shahid!)
|
@@ -15,6 +55,7 @@
|
|
15
55
|
* JRuby で Nokogiri::XML::Node を継承したクラスを定義すると、namespace が表示されない。 #695
|
16
56
|
* JRuby で RDF::RDFXML::Writer をインスタンス化しようとすると NAMESPACE_ERR (org.w3c.dom.DOMException) が発生する. #683
|
17
57
|
* JRuby で xpath に namespaces を指定すると例外が発生する. #493
|
58
|
+
* JRuby の Entity 解決は C version の Nokogiri と同じ結果にならないといけない。#704, #647, #703
|
18
59
|
|
19
60
|
|
20
61
|
== 1.5.4 / 2012年6月12日
|
data/CHANGELOG.rdoc
CHANGED
@@ -1,7 +1,46 @@
|
|
1
|
-
== 1.5.
|
1
|
+
== 1.5.6 / unreleased
|
2
2
|
|
3
3
|
* Features
|
4
4
|
|
5
|
+
* Improved performance of XML::Document#collect_namespaces. #761 (Thanks, Juergen Mangler!)
|
6
|
+
* New callback SAX::Document#processing_instruction (Thanks, Kitaiti Makoto!)
|
7
|
+
* Node#native_content= allows setting unescaped node contant. #768
|
8
|
+
* XPath lookup with namespaces supports symbol keys. #729 (Thanks, Ben Langfeld.)
|
9
|
+
* XML::Node#[]= stringifies values. #729 (Thanks, Ben Langfeld.)
|
10
|
+
* bin/nokogiri will process a document from $stdin
|
11
|
+
* bin/nokogiri -e will execute a program from the command line
|
12
|
+
* bin/nokogiri --version will print the Xerces and NekoHTML versions when ran with JRuby.
|
13
|
+
|
14
|
+
|
15
|
+
* Bugfixes
|
16
|
+
* Nokogiri now detects XSLT transform errors. #731 (Thanks, Justin Fitzsimmons!)
|
17
|
+
* Don't throw an Error when trying to replace top-level text node in DocumentFragment. #775
|
18
|
+
* Raise an ArgumentError if an invalid encoding is passed to the SAX parser. #756 (Thanks, Bradley Schaefer!)
|
19
|
+
* [JRuby] space prior to xml preamble causes nokogiri to fail parsing. (fixed along with #748) #790
|
20
|
+
* [JRuby] Fixed the bug Nokogiri::XML::Node#content inconsistency between Java and C. #794, #797
|
21
|
+
* [JRuby] raises INVALID_CHARACTER_ERR exception when EntityReference name starts with '#'. #719
|
22
|
+
* [JRuby] doesn't coerce namespaces out of strings on a direct subclass of Node. #715
|
23
|
+
* [JRuby] Node#content now renders newlines properly. #737 (Thanks, Piotr Szmielew!)
|
24
|
+
* [JRuby] Unknown namespace are ignore when the recover option is used. #748
|
25
|
+
* [JRuby] XPath queries for namespaces should not throw exceptions when called twice in a row. #764
|
26
|
+
* [JRuby] More consistent (with libxml2) whitespace formatting when emitting XML. #771
|
27
|
+
* [JRuby] namespaced attributes broken when appending raw xml to builder. #770
|
28
|
+
* [JRuby] Nokogiri::XML::Document#wrap raises undefined method `length' for nil:NilClass when trying to << to a node. #781
|
29
|
+
* [JRuby] Fixed "bad file descriptor" bug when closing open file descriptors. #495
|
30
|
+
* [JRuby] JRuby/CRuby incompatibility for attribute decorators. #785
|
31
|
+
* [JRuby] Issues parsing valid XML with no internal subset in the DTD. #547, #811
|
32
|
+
* [JRuby] Issues parsing valid node content when it contains colons. #728
|
33
|
+
* [JRuby] Correctly parse the doc type of html documents. #733
|
34
|
+
* [JRuby] Include dtd in the xml output when a builder is used with create_internal_subset. #751
|
35
|
+
* [JRuby] builder requires textwrappers for valid utf8 in jruby, not in mri. #784
|
36
|
+
|
37
|
+
|
38
|
+
== 1.5.5 / 2012-06-24
|
39
|
+
|
40
|
+
* Features
|
41
|
+
|
42
|
+
* Much-improved support for JRuby in 1.9 mode! Yay!
|
43
|
+
|
5
44
|
* Bugfixes
|
6
45
|
|
7
46
|
* Regression in JRuby Nokogiri add_previous_sibling (1.5.0 -> 1.5.1) #691 (Thanks, John Shahid!)
|
@@ -15,6 +54,7 @@
|
|
15
54
|
* JRuby renders nodes without their namespace when subclassing Node. #695
|
16
55
|
* JRuby raises NAMESPACE_ERR (org.w3c.dom.DOMException) while instantiating RDF::RDFXML::Writer. #683
|
17
56
|
* JRuby is not able to use namespaces in xpath. #493
|
57
|
+
* JRuby's Entity resolving should be consistent with C-Nokogiri #704, #647, #703
|
18
58
|
|
19
59
|
|
20
60
|
== 1.5.4 / 2012-06-12
|
data/Manifest.txt
CHANGED
@@ -43,8 +43,13 @@ ext/java/nokogiri/XmlSyntaxError.java
|
|
43
43
|
ext/java/nokogiri/XmlText.java
|
44
44
|
ext/java/nokogiri/XmlXpathContext.java
|
45
45
|
ext/java/nokogiri/XsltStylesheet.java
|
46
|
+
ext/java/nokogiri/internals/ClosedStreamException.java
|
46
47
|
ext/java/nokogiri/internals/HtmlDomParserContext.java
|
48
|
+
ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java
|
47
49
|
ext/java/nokogiri/internals/NokogiriDocumentCache.java
|
50
|
+
ext/java/nokogiri/internals/NokogiriDomParser.java
|
51
|
+
ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java
|
52
|
+
ext/java/nokogiri/internals/NokogiriEntityResolver.java
|
48
53
|
ext/java/nokogiri/internals/NokogiriErrorHandler.java
|
49
54
|
ext/java/nokogiri/internals/NokogiriHandler.java
|
50
55
|
ext/java/nokogiri/internals/NokogiriHelpers.java
|
@@ -62,7 +67,6 @@ ext/java/nokogiri/internals/ReaderNode.java
|
|
62
67
|
ext/java/nokogiri/internals/SaveContextVisitor.java
|
63
68
|
ext/java/nokogiri/internals/SchemaErrorHandler.java
|
64
69
|
ext/java/nokogiri/internals/XmlDeclHandler.java
|
65
|
-
ext/java/nokogiri/internals/XmlDomParser.java
|
66
70
|
ext/java/nokogiri/internals/XmlDomParserContext.java
|
67
71
|
ext/java/nokogiri/internals/XmlSaxParser.java
|
68
72
|
ext/java/nokogiri/internals/XsltExtensionFunction.java
|
@@ -230,6 +234,9 @@ test/files/snuggles.xml
|
|
230
234
|
test/files/staff.dtd
|
231
235
|
test/files/staff.xml
|
232
236
|
test/files/staff.xslt
|
237
|
+
test/files/test_document_url/bar.xml
|
238
|
+
test/files/test_document_url/document.dtd
|
239
|
+
test/files/test_document_url/document.xml
|
233
240
|
test/files/tlm.html
|
234
241
|
test/files/to_be_xincluded.xml
|
235
242
|
test/files/valid_bar.xml
|
data/README.ja.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= Nokogiri (鋸)
|
1
|
+
= Nokogiri (鋸) {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri]
|
2
2
|
|
3
3
|
* http://nokogiri.org/
|
4
4
|
* http://github.com/sparklemotion/nokogiri/wikis
|
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= Nokogiri {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri]
|
1
|
+
= Nokogiri {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri]
|
2
2
|
|
3
3
|
* http://nokogiri.org
|
4
4
|
* http://github.com/sparklemotion/nokogiri/wikis
|
@@ -122,13 +122,10 @@ Developing Nokogiri requires racc and rexical to generate the parser and
|
|
122
122
|
tokenizer. To start development, make sure you have `libxml2` and `libxslt`
|
123
123
|
installed.
|
124
124
|
|
125
|
-
Then install
|
125
|
+
Then install core gems and bootstrap:
|
126
126
|
|
127
|
-
$ gem install hoe rake-compiler
|
128
|
-
|
129
|
-
Then run rake:
|
130
|
-
|
131
|
-
$ rake
|
127
|
+
$ gem install hoe rake-compiler mini_portile
|
128
|
+
$ rake newb
|
132
129
|
|
133
130
|
=== Developing on JRuby
|
134
131
|
|
@@ -151,7 +148,7 @@ Then run rake:
|
|
151
148
|
|
152
149
|
Copyright (c) 2008 - 2012:
|
153
150
|
|
154
|
-
* {Aaron Patterson}[http://
|
151
|
+
* {Aaron Patterson}[http://tenderlovemaking.com]
|
155
152
|
* {Mike Dalessio}[http://mike.daless.io]
|
156
153
|
* {Charles Nutter}[http://blog.headius.com]
|
157
154
|
* {Sergio Arbeo}[http://www.serabe.com]
|
data/ROADMAP.md
CHANGED
@@ -19,8 +19,9 @@
|
|
19
19
|
* https://github.com/sparklemotion/nokogiri/issues/679
|
20
20
|
Mixing in Enumerable has some unintended consequences; plus we want to improve the attributes API
|
21
21
|
|
22
|
-
*
|
23
|
-
|
22
|
+
* Some ideas for a better attributes API?
|
23
|
+
* (closed) https://github.com/sparklemotion/nokogiri/issues/666
|
24
|
+
* https://github.com/sparklemotion/nokogiri/issues/765
|
24
25
|
|
25
26
|
|
26
27
|
## improve CSS query parsing
|
@@ -69,6 +70,9 @@
|
|
69
70
|
* we should standardize on a hash of options for these and other calls
|
70
71
|
* what should NodeSet#xpath return?
|
71
72
|
* https://github.com/sparklemotion/nokogiri/issues/656
|
73
|
+
* also, clean up or unify the implementations of #xpath-and-friends in Node and NodeSet
|
74
|
+
* implementations are very similar, but no shared code :(
|
75
|
+
* decorate nodes in a consistent manner
|
72
76
|
|
73
77
|
## Encoding
|
74
78
|
|
data/Rakefile
CHANGED
@@ -17,6 +17,8 @@ def java?
|
|
17
17
|
!! (RUBY_PLATFORM =~ /java/)
|
18
18
|
end
|
19
19
|
|
20
|
+
ENV['LANG'] = "en_US.UTF-8" # UBUNTU 10.04, Y U NO DEFAULT TO UTF-8?
|
21
|
+
|
20
22
|
require 'tasks/nokogiri.org'
|
21
23
|
|
22
24
|
HOE = Hoe.spec 'nokogiri' do
|
@@ -46,14 +48,10 @@ HOE = Hoe.spec 'nokogiri' do
|
|
46
48
|
["mini_portile", ">= 0.2.2"],
|
47
49
|
["minitest", "~> 2.2.2"],
|
48
50
|
["rake", ">= 0.9"],
|
49
|
-
["rake-compiler", "= 0.8.0"]
|
51
|
+
["rake-compiler", "= 0.8.0"],
|
52
|
+
["racc", ">= 1.4.6"],
|
53
|
+
["rexical", ">= 1.0.5"]
|
50
54
|
]
|
51
|
-
if ! java?
|
52
|
-
self.extra_dev_deps += [
|
53
|
-
["racc", ">= 1.4.6"],
|
54
|
-
["rexical", ">= 1.0.5"]
|
55
|
-
]
|
56
|
-
end
|
57
55
|
|
58
56
|
if java?
|
59
57
|
self.spec_extras = { :platform => 'java' }
|
@@ -118,6 +116,20 @@ desc "Generate css/parser.rb and css/tokenizer.rex"
|
|
118
116
|
task 'generate' => [GENERATED_PARSER, GENERATED_TOKENIZER]
|
119
117
|
task 'gem:spec' => 'generate' if Rake::Task.task_defined?("gem:spec")
|
120
118
|
|
119
|
+
# This is a big hack to make sure that the racc and rexical
|
120
|
+
# dependencies in the Gemfile are constrainted to ruby platforms
|
121
|
+
# (i.e. MRI and Rubinius). There's no way to do that through hoe,
|
122
|
+
# and any solution will require changing hoe and hoe-bundler.
|
123
|
+
old_gemfile_task = Rake::Task['bundler:gemfile'] rescue nil
|
124
|
+
task 'bundler:gemfile' do
|
125
|
+
old_gemfile_task.invoke if old_gemfile_task
|
126
|
+
|
127
|
+
lines = File.open('Gemfile', 'r') { |f| f.readlines }.map do |line|
|
128
|
+
line =~ /racc|rexical/ ? "#{line.strip}, :platform => :ruby" : line
|
129
|
+
end
|
130
|
+
File.open('Gemfile', 'w') { |f| lines.each { |line| f.puts line } }
|
131
|
+
end
|
132
|
+
|
121
133
|
file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
|
122
134
|
racc = RbConfig::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
|
123
135
|
racc = "#{::RbConfig::CONFIG['bindir']}/racc" if racc.empty?
|
@@ -149,9 +161,18 @@ task :java_debug do
|
|
149
161
|
ENV['JAVA_OPTS'] = '-Xdebug -Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y' if java? && ENV['JAVA_DEBUG']
|
150
162
|
end
|
151
163
|
|
164
|
+
if java?
|
165
|
+
task :test_18 => :test
|
166
|
+
task :test_19 do
|
167
|
+
ENV['JRUBY_OPTS'] = "--1.9"
|
168
|
+
Rake::Task["test"].invoke
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
152
172
|
Rake::Task[:test].prerequisites << :compile
|
153
173
|
Rake::Task[:test].prerequisites << :java_debug
|
154
174
|
Rake::Task[:test].prerequisites << :check_extra_deps unless java?
|
175
|
+
|
155
176
|
if Hoe.plugins.include?(:debugging)
|
156
177
|
['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name|
|
157
178
|
Rake::Task["test:#{task_name}"].prerequisites << :compile
|
@@ -162,6 +183,7 @@ end
|
|
162
183
|
|
163
184
|
desc "build a windows gem without all the ceremony."
|
164
185
|
task "gem:windows" => "gem" do
|
186
|
+
# TODO: 1.8.7-p358, 1.9.3-p194
|
165
187
|
cross_rubies = ["1.8.7-p330", "1.9.2-p136"]
|
166
188
|
ruby_cc_version = cross_rubies.collect { |_| _.split("-").first }.join(":") # e.g., "1.8.7:1.9.2"
|
167
189
|
rake_compiler_config_path = "#{ENV['HOME']}/.rake-compiler/config.yml"
|
data/bin/nokogiri
CHANGED
@@ -16,6 +16,7 @@ opts = OptionParser.new do |opts|
|
|
16
16
|
opts.separator "Examples:"
|
17
17
|
opts.separator " nokogiri http://www.ruby-lang.org/"
|
18
18
|
opts.separator " nokogiri ./public/index.html"
|
19
|
+
opts.separator " curl -s http://nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'"
|
19
20
|
opts.separator ""
|
20
21
|
opts.separator "Options:"
|
21
22
|
|
@@ -27,6 +28,10 @@ opts = OptionParser.new do |opts|
|
|
27
28
|
encoding = v
|
28
29
|
end
|
29
30
|
|
31
|
+
opts.on("-e command", "Specifies script from command-line.") do |v|
|
32
|
+
@script = v
|
33
|
+
end
|
34
|
+
|
30
35
|
opts.on("--rng <uri|path>", "Validate using this rng file.") do |v|
|
31
36
|
@rng = open(v) {|f| Nokogiri::XML::RelaxNG(f)}
|
32
37
|
end
|
@@ -45,19 +50,29 @@ opts.parse!
|
|
45
50
|
|
46
51
|
uri = ARGV.shift
|
47
52
|
|
48
|
-
if uri.to_s.strip.empty?
|
53
|
+
if uri.to_s.strip.empty? && $stdin.tty?
|
49
54
|
puts opts
|
50
55
|
exit 1
|
51
56
|
end
|
52
57
|
|
53
|
-
|
58
|
+
if $stdin.tty?
|
59
|
+
@doc = parse_class.parse(open(uri).read, nil, encoding)
|
60
|
+
else
|
61
|
+
@doc = parse_class.parse($stdin, nil, encoding)
|
62
|
+
end
|
63
|
+
|
64
|
+
$_ = @doc
|
54
65
|
|
55
66
|
if @rng
|
56
67
|
@rng.validate(@doc).each do |error|
|
57
68
|
puts error.message
|
58
69
|
end
|
59
70
|
else
|
60
|
-
|
61
|
-
|
71
|
+
if @script
|
72
|
+
eval @script, binding, '<main>'
|
73
|
+
else
|
74
|
+
puts "Your document is stored in @doc..."
|
75
|
+
IRB.start
|
76
|
+
end
|
62
77
|
end
|
63
78
|
|
data/build_all
CHANGED
@@ -2,13 +2,39 @@
|
|
2
2
|
#
|
3
3
|
# script to build gems for all relevant platforms:
|
4
4
|
# - MRI et al (standard gem)
|
5
|
-
# - windows (x86-mingw32 and x86-
|
5
|
+
# - windows (x86-mingw32 and x86-mswin32-60)
|
6
6
|
# - jruby
|
7
7
|
#
|
8
|
-
#
|
9
|
-
# on ubuntu, `sudo apt-get install mingw32`
|
10
|
-
# for others, read up at https://github.com/luislavena/rake-compiler
|
8
|
+
# here's what I recommend for building all the gems:
|
11
9
|
#
|
10
|
+
# 1. set up a vagrant VM guest running ubuntu lucid 32-bit.
|
11
|
+
# 2. install rvm, and install 1.8.7, 1.9.3 and jruby.
|
12
|
+
# 3. `sudo apt-get install mingw32`
|
13
|
+
#
|
14
|
+
# as you build, you may run into these problems:
|
15
|
+
#
|
16
|
+
# - if you're using Virtualbox shared directories, you'll get a mingw
|
17
|
+
# "Protocol error" at linktime. Boo! Either use NFS or a
|
18
|
+
# locally-checked-out repository.
|
19
|
+
#
|
20
|
+
# - on ubuntus 11 and later, you may have issues with building
|
21
|
+
# rake-compiler's rubies against openssl v2. Just comment the lines
|
22
|
+
# out from ossl_ssl.c and you'll be fine.
|
23
|
+
#
|
24
|
+
# - you may have issues with Pathname conversion to String in
|
25
|
+
# bundler. Add this to the offending bundler file:
|
26
|
+
#
|
27
|
+
# class Pathname
|
28
|
+
# def to_str
|
29
|
+
# to_s
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# - you may also have to hack rubygems.rb to eliminate a reference to
|
34
|
+
# RUBY_ENGINE
|
35
|
+
#
|
36
|
+
|
37
|
+
HOST=
|
12
38
|
|
13
39
|
# Load RVM into a shell session *as a function*
|
14
40
|
if [[ -s "$HOME/.rvm/scripts/rvm" ]] ; then
|
@@ -21,38 +47,51 @@ fi
|
|
21
47
|
|
22
48
|
function rvm_use {
|
23
49
|
current_ruby=$1
|
24
|
-
rvm use "${1}@nokogiri" --create
|
50
|
+
rvm use "${1}@nokogiri" --create || rvm -v
|
25
51
|
}
|
26
52
|
|
27
53
|
set -o errexit
|
28
54
|
|
29
55
|
# initialize
|
30
56
|
rvm_use 1.8.7
|
57
|
+
bundle install --quiet --local || bundle install
|
31
58
|
rm -rf tmp pkg
|
32
|
-
bundle exec rake clean
|
59
|
+
bundle exec rake clean
|
33
60
|
|
34
61
|
# holding pen
|
35
62
|
rm -rf gems
|
36
63
|
mkdir -p gems
|
37
64
|
|
38
|
-
# MRI
|
39
|
-
rvm_use 1.8.7
|
40
|
-
bundle exec rake gem
|
41
|
-
cp -v pkg/nokogiri*.gem gems # should only be one at this point in the script
|
42
|
-
|
43
65
|
# windows
|
66
|
+
platform=$(uname -i)
|
67
|
+
if [[ $platform =~ "64" ]] ; then
|
68
|
+
echo ""
|
69
|
+
echo "ERROR: You need to build the windows gem on a 32-bit machine!"
|
70
|
+
echo ""
|
71
|
+
exit 1
|
72
|
+
fi
|
44
73
|
rvm_use 1.8.7
|
45
|
-
|
46
|
-
bundle exec rake-compiler cross-ruby VERSION=1.
|
74
|
+
if [[ ! -a ${HOME}/.rake-compiler/ruby/ruby-1.8.7-p330/lib/ruby/1.8.7/x86_64-linux/rbconfig.rb ]] ; then
|
75
|
+
bundle exec rake-compiler cross-ruby VERSION=1.8.7-p330
|
76
|
+
fi
|
77
|
+
if [[ ! -a ${HOME}/.rake-compiler/ruby/ruby-1.9.2-p136/lib/ruby/1.9.1/x86_64-linux/rbconfig.rb ]] ; then
|
78
|
+
bundle exec rake-compiler cross-ruby VERSION=1.9.2-p136
|
79
|
+
fi
|
47
80
|
bundle exec rake cross
|
48
|
-
rake gem:windows
|
81
|
+
bundle exec rake gem:windows
|
49
82
|
cp -v pkg/nokogiri*x86-{mingw32,mswin32}*.gem gems
|
50
83
|
|
84
|
+
# MRI
|
85
|
+
rvm_use 1.8.7
|
86
|
+
bundle exec rake gem
|
87
|
+
cp -v pkg/nokogiri*.gem gems # should only be one at this point in the script
|
88
|
+
|
51
89
|
# jruby
|
52
|
-
rvm_use jruby
|
90
|
+
rvm_use jruby
|
91
|
+
bundle install --quiet --local || bundle install
|
53
92
|
bundle exec rake clean clobber
|
54
93
|
rvm_use 1.8.7
|
55
94
|
bundle exec rake generate
|
56
|
-
rvm_use jruby
|
95
|
+
rvm_use jruby
|
57
96
|
bundle exec rake gem
|
58
|
-
cp -v pkg/nokogiri*java.gem gems
|
97
|
+
cp -v pkg/nokogiri*java.gem gems
|
@@ -55,6 +55,10 @@ import org.w3c.dom.NodeList;
|
|
55
55
|
*/
|
56
56
|
@JRubyClass(name="Nokogiri::HTML::Document", parent="Nokogiri::XML::Document")
|
57
57
|
public class HtmlDocument extends XmlDocument {
|
58
|
+
private static final String DEFAULT_CONTENT_TYPE = "html";
|
59
|
+
private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN";
|
60
|
+
private static final String DEFAULT_SYTEM_ID = "http://www.w3.org/TR/html4/strict.dtd";
|
61
|
+
|
58
62
|
private String parsed_encoding = null;
|
59
63
|
|
60
64
|
public HtmlDocument(Ruby ruby, RubyClass klazz) {
|
@@ -82,6 +86,28 @@ public class HtmlDocument extends XmlDocument {
|
|
82
86
|
return htmlDocument;
|
83
87
|
}
|
84
88
|
|
89
|
+
public IRubyObject getInternalSubset(ThreadContext context) {
|
90
|
+
IRubyObject internalSubset = super.getInternalSubset(context);
|
91
|
+
|
92
|
+
// html documents are expected to have a default internal subset
|
93
|
+
// the default values are the same ones used when the following
|
94
|
+
// feature is turned on
|
95
|
+
// "http://cyberneko.org/html/features/insert-doctype"
|
96
|
+
// the reason we don't turn it on, is because it overrides the document's
|
97
|
+
// declared doctype declaration.
|
98
|
+
|
99
|
+
if (internalSubset.isNil()) {
|
100
|
+
internalSubset = XmlDtd.newEmpty(context.getRuntime(),
|
101
|
+
getDocument(),
|
102
|
+
context.getRuntime().newString(DEFAULT_CONTENT_TYPE),
|
103
|
+
context.getRuntime().newString(DEFAULT_PUBLIC_ID),
|
104
|
+
context.getRuntime().newString(DEFAULT_SYTEM_ID));
|
105
|
+
setInternalSubset(internalSubset);
|
106
|
+
}
|
107
|
+
|
108
|
+
return internalSubset;
|
109
|
+
}
|
110
|
+
|
85
111
|
public static IRubyObject do_parse(ThreadContext context,
|
86
112
|
IRubyObject klass,
|
87
113
|
IRubyObject[] args) {
|