nokogiri 1.4.6-x86-mswin32-60 → 1.4.7-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +6 -0
- data/CHANGELOG.rdoc +8 -0
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/html/document.rb +5 -8
- data/lib/nokogiri/version.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/html/test_document_encoding.rb +7 -0
- metadata +4 -4
data/CHANGELOG.ja.rdoc
CHANGED
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
=== 1.4.7 / 2011-07-01
|
2
|
+
|
3
|
+
* Bugfixes
|
4
|
+
|
5
|
+
* Fix a bug in advanced encoding detection that leads to partially
|
6
|
+
duplicated document when parsing an HTML file with unknown
|
7
|
+
encoding. Thanks, Timothy Elliott (@ender672)! #478
|
8
|
+
|
1
9
|
=== 1.4.6 / 2011-06-19
|
2
10
|
|
3
11
|
* Notes
|
Binary file
|
Binary file
|
@@ -92,7 +92,7 @@ module Nokogiri
|
|
92
92
|
if string_or_io.respond_to?(:read)
|
93
93
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
94
94
|
if !encoding
|
95
|
-
# Perform
|
95
|
+
# Perform advanced encoding detection that libxml2 does
|
96
96
|
# not do.
|
97
97
|
string_or_io = EncodingReader.new(string_or_io)
|
98
98
|
begin
|
@@ -181,16 +181,13 @@ module Nokogiri
|
|
181
181
|
if !@firstchunk
|
182
182
|
@firstchunk = @io.read(len) or return nil
|
183
183
|
|
184
|
-
# This implementation expects
|
185
|
-
#
|
186
|
-
#
|
187
|
-
# libxml2 does not do.
|
184
|
+
# This implementation expects that the first call from
|
185
|
+
# htmlReadIO() is made with a length long enough (~1KB) to
|
186
|
+
# achieve advanced encoding detection.
|
188
187
|
if encoding = EncodingReader.detect_encoding(@firstchunk)
|
188
|
+
# The first chunk is stored for the next read in retry.
|
189
189
|
raise EncodingFoundException, encoding
|
190
190
|
end
|
191
|
-
|
192
|
-
# This chunk is stored for the next read in retry.
|
193
|
-
return @firstchunk
|
194
191
|
end
|
195
192
|
|
196
193
|
ret = @firstchunk.slice!(0, len)
|
data/lib/nokogiri/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -22,6 +22,7 @@ module Nokogiri
|
|
22
22
|
SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
|
23
23
|
ENCODING_XHTML_FILE = File.join(ASSETS_DIR, 'encoding.xhtml')
|
24
24
|
ENCODING_HTML_FILE = File.join(ASSETS_DIR, 'encoding.html')
|
25
|
+
NOENCODING_FILE = File.join(ASSETS_DIR, 'noencoding.html')
|
25
26
|
PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
|
26
27
|
PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
|
27
28
|
ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
|
@@ -89,6 +89,13 @@ module Nokogiri
|
|
89
89
|
File.open(file, 'rb')
|
90
90
|
end
|
91
91
|
|
92
|
+
def test_document_html_noencoding
|
93
|
+
from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
|
94
|
+
from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
|
95
|
+
|
96
|
+
assert_equal from_string.to_s.size, from_stream.to_s.size
|
97
|
+
end
|
98
|
+
|
92
99
|
def test_document_xhtml_enc
|
93
100
|
[ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
|
94
101
|
doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 9
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 4
|
9
|
-
-
|
10
|
-
version: 1.4.
|
9
|
+
- 7
|
10
|
+
version: 1.4.7
|
11
11
|
platform: x86-mswin32-60
|
12
12
|
authors:
|
13
13
|
- Aaron Patterson
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-07-01 00:00:00 -04:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|