nokogiri 1.4.6 → 1.4.7
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +6 -0
- data/CHANGELOG.rdoc +8 -0
- data/lib/nokogiri/css/parser.rb +68 -68
- data/lib/nokogiri/html/document.rb +5 -8
- data/lib/nokogiri/version.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/html/test_document_encoding.rb +7 -0
- metadata +3 -33
data/CHANGELOG.ja.rdoc
CHANGED
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
=== 1.4.7 / 2011-07-01
|
2
|
+
|
3
|
+
* Bugfixes
|
4
|
+
|
5
|
+
* Fix a bug in advanced encoding detection that leads to partially
|
6
|
+
duplicated document when parsing an HTML file with unknown
|
7
|
+
encoding. Thanks, Timothy Elliott (@ender672)! #478
|
8
|
+
|
1
9
|
=== 1.4.6 / 2011-06-19
|
2
10
|
|
3
11
|
* Notes
|
data/lib/nokogiri/css/parser.rb
CHANGED
@@ -14,90 +14,90 @@ module Nokogiri
|
|
14
14
|
##### State transition tables begin ###
|
15
15
|
|
16
16
|
racc_action_table = [
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
17
|
+
12, 21, 46, 47, 20, 45, 22, 12, 65, 9,
|
18
|
+
78, 20, 76, 22, 12, 42, 9, 85, 20, 13,
|
19
|
+
90, 89, 11, 9, 14, 7, 13, 10, 15, 11,
|
20
|
+
81, 14, 7, 13, 10, 15, 11, 20, 14, 7,
|
21
|
+
12, 10, 15, 77, 20, 11, 80, 79, 20, 9,
|
22
|
+
84, 15, 20, 9, 79, 11, 20, 82, 7, 13,
|
23
|
+
20, 15, 11, 26, 14, 7, 11, 10, 15, 7,
|
24
|
+
11, 10, 15, 7, 11, 26, 15, 7, 11, 12,
|
25
|
+
15, 7, 86, 20, 15, 58, 88, 20, 39, 83,
|
26
|
+
41, 12, 46, 50, 82, 49, 46, 50, 13, 49,
|
27
|
+
53, 11, -23, 14, 7, 11, 91, 15, 7, 38,
|
28
|
+
13, 15, 73, 74, 93, 14, 37, 73, 74, 46,
|
29
|
+
50, 26, 49, 69, 70, 71, 96, 72, 69, 70,
|
30
|
+
71, 68, 72, 30, 31, 33, 68, 46, 50, 97,
|
31
|
+
49, nil, nil, 32, nil, 35, 34 ]
|
32
32
|
|
33
33
|
racc_action_check = [
|
34
|
-
0,
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
34
|
+
0, 1, 12, 12, 0, 12, 51, 36, 38, 0,
|
35
|
+
44, 36, 41, 1, 22, 12, 36, 51, 22, 0,
|
36
|
+
67, 67, 0, 22, 0, 0, 36, 0, 0, 36,
|
37
|
+
46, 36, 36, 22, 36, 36, 22, 62, 22, 22,
|
38
|
+
14, 22, 22, 43, 14, 4, 45, 45, 26, 14,
|
39
|
+
48, 4, 18, 26, 49, 62, 17, 50, 62, 14,
|
40
|
+
3, 62, 14, 24, 14, 14, 26, 14, 14, 26,
|
41
|
+
18, 26, 26, 18, 17, 3, 18, 17, 3, 11,
|
42
|
+
17, 3, 61, 16, 3, 21, 66, 19, 11, 47,
|
43
|
+
11, 15, 13, 13, 47, 13, 82, 82, 11, 82,
|
44
|
+
15, 16, 9, 11, 16, 19, 75, 16, 19, 8,
|
45
|
+
15, 19, 39, 39, 80, 15, 7, 40, 40, 81,
|
46
|
+
81, 5, 81, 39, 39, 39, 83, 39, 40, 40,
|
47
|
+
40, 39, 40, 6, 6, 6, 40, 79, 79, 93,
|
48
|
+
79, nil, nil, 6, nil, 6, 6 ]
|
49
49
|
|
50
50
|
racc_action_pointer = [
|
51
|
-
-2,
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
nil, nil, nil,
|
57
|
-
|
58
|
-
nil, nil, nil,
|
59
|
-
|
60
|
-
nil, nil, nil,
|
51
|
+
-2, 1, nil, 54, 21, 100, 126, 105, 81, 74,
|
52
|
+
nil, 77, -8, 82, 38, 89, 77, 50, 46, 81,
|
53
|
+
nil, 85, 12, nil, 42, nil, 42, nil, nil, nil,
|
54
|
+
nil, nil, nil, nil, nil, nil, 5, nil, -3, 109,
|
55
|
+
114, -13, nil, 20, -13, 35, 18, 82, 27, 42,
|
56
|
+
45, -6, nil, nil, nil, nil, nil, nil, nil, nil,
|
57
|
+
nil, 59, 31, nil, nil, nil, 61, 10, nil, nil,
|
58
|
+
nil, nil, nil, nil, nil, 81, nil, nil, nil, 127,
|
59
|
+
107, 109, 86, 113, nil, nil, nil, nil, nil, nil,
|
60
|
+
nil, nil, nil, 126, nil, nil, nil, nil ]
|
61
61
|
|
62
62
|
racc_action_default = [
|
63
|
-
-24, -
|
64
|
-
-
|
65
|
-
-
|
66
|
-
|
67
|
-
-
|
68
|
-
-
|
69
|
-
-
|
70
|
-
-
|
71
|
-
-
|
72
|
-
-
|
63
|
+
-24, -69, -2, -45, -12, -16, -18, -69, -69, -21,
|
64
|
+
-22, -69, -69, -69, -24, -69, -50, -51, -52, -53,
|
65
|
+
-54, -69, -24, -9, -44, -11, -24, -13, -14, -15,
|
66
|
+
-3, -4, -5, -6, -7, -8, -24, -19, -69, -57,
|
67
|
+
-57, -69, -28, -69, -69, -36, -37, -38, -69, -36,
|
68
|
+
-38, -69, -42, -43, -46, -47, -48, -49, 98, -1,
|
69
|
+
-10, -69, -66, -68, -17, -20, -69, -69, -58, -59,
|
70
|
+
-60, -61, -62, -63, -64, -69, -27, -29, -30, -69,
|
71
|
+
-41, -69, -69, -69, -31, -32, -65, -67, -25, -55,
|
72
|
+
-56, -26, -33, -69, -34, -35, -40, -39 ]
|
73
73
|
|
74
74
|
racc_goto_table = [
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
85, nil, nil, nil, 80, 78, nil, nil, nil, nil,
|
75
|
+
43, 48, 24, 25, 40, 29, 59, 1, 52, 66,
|
76
|
+
75, 23, 27, 28, 62, 54, 55, 56, 57, 36,
|
77
|
+
64, 51, 44, 61, 60, 63, nil, nil, nil, nil,
|
79
78
|
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
80
|
-
nil, nil, nil, nil, nil, nil, nil,
|
81
|
-
|
82
|
-
nil, nil,
|
79
|
+
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
80
|
+
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
81
|
+
nil, 87, nil, nil, nil, nil, nil, 92, nil, 94,
|
82
|
+
95 ]
|
83
83
|
|
84
84
|
racc_goto_check = [
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
85
|
+
15, 15, 7, 8, 9, 8, 2, 1, 9, 14,
|
86
|
+
14, 6, 10, 11, 5, 7, 7, 7, 7, 3,
|
87
|
+
2, 1, 16, 19, 8, 7, nil, nil, nil, nil,
|
88
|
+
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
89
|
+
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
89
90
|
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
90
|
-
nil,
|
91
|
-
15
|
92
|
-
nil, nil, 7 ]
|
91
|
+
nil, 7, nil, nil, nil, nil, nil, 15, nil, 15,
|
92
|
+
15 ]
|
93
93
|
|
94
94
|
racc_goto_pointer = [
|
95
|
-
nil,
|
96
|
-
|
95
|
+
nil, 7, -16, 13, nil, -12, 8, -1, 0, -7,
|
96
|
+
8, 9, nil, nil, -30, -12, 10, nil, nil, -3 ]
|
97
97
|
|
98
98
|
racc_goto_default = [
|
99
|
-
nil, nil,
|
100
|
-
|
99
|
+
nil, nil, 2, nil, 6, 3, nil, 5, nil, 4,
|
100
|
+
19, 18, 17, 8, nil, nil, nil, 16, 67, nil ]
|
101
101
|
|
102
102
|
racc_reduce_table = [
|
103
103
|
0, 0, :racc_error,
|
@@ -92,7 +92,7 @@ module Nokogiri
|
|
92
92
|
if string_or_io.respond_to?(:read)
|
93
93
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
94
94
|
if !encoding
|
95
|
-
# Perform
|
95
|
+
# Perform advanced encoding detection that libxml2 does
|
96
96
|
# not do.
|
97
97
|
string_or_io = EncodingReader.new(string_or_io)
|
98
98
|
begin
|
@@ -181,16 +181,13 @@ module Nokogiri
|
|
181
181
|
if !@firstchunk
|
182
182
|
@firstchunk = @io.read(len) or return nil
|
183
183
|
|
184
|
-
# This implementation expects
|
185
|
-
#
|
186
|
-
#
|
187
|
-
# libxml2 does not do.
|
184
|
+
# This implementation expects that the first call from
|
185
|
+
# htmlReadIO() is made with a length long enough (~1KB) to
|
186
|
+
# achieve advanced encoding detection.
|
188
187
|
if encoding = EncodingReader.detect_encoding(@firstchunk)
|
188
|
+
# The first chunk is stored for the next read in retry.
|
189
189
|
raise EncodingFoundException, encoding
|
190
190
|
end
|
191
|
-
|
192
|
-
# This chunk is stored for the next read in retry.
|
193
|
-
return @firstchunk
|
194
191
|
end
|
195
192
|
|
196
193
|
ret = @firstchunk.slice!(0, len)
|
data/lib/nokogiri/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -22,6 +22,7 @@ module Nokogiri
|
|
22
22
|
SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
|
23
23
|
ENCODING_XHTML_FILE = File.join(ASSETS_DIR, 'encoding.xhtml')
|
24
24
|
ENCODING_HTML_FILE = File.join(ASSETS_DIR, 'encoding.html')
|
25
|
+
NOENCODING_FILE = File.join(ASSETS_DIR, 'noencoding.html')
|
25
26
|
PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
|
26
27
|
PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
|
27
28
|
ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
|
@@ -89,6 +89,13 @@ module Nokogiri
|
|
89
89
|
File.open(file, 'rb')
|
90
90
|
end
|
91
91
|
|
92
|
+
def test_document_html_noencoding
|
93
|
+
from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
|
94
|
+
from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
|
95
|
+
|
96
|
+
assert_equal from_string.to_s.size, from_stream.to_s.size
|
97
|
+
end
|
98
|
+
|
92
99
|
def test_document_xhtml_enc
|
93
100
|
[ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
|
94
101
|
doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
|
metadata
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 11
|
5
4
|
prerelease:
|
6
|
-
|
7
|
-
- 1
|
8
|
-
- 4
|
9
|
-
- 6
|
10
|
-
version: 1.4.6
|
5
|
+
version: 1.4.7
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
8
|
- Aaron Patterson
|
@@ -16,7 +11,7 @@ autorequire:
|
|
16
11
|
bindir: bin
|
17
12
|
cert_chain: []
|
18
13
|
|
19
|
-
date: 2011-
|
14
|
+
date: 2011-07-01 00:00:00 -04:00
|
20
15
|
default_executable:
|
21
16
|
dependencies:
|
22
17
|
- !ruby/object:Gem::Dependency
|
@@ -27,9 +22,6 @@ dependencies:
|
|
27
22
|
requirements:
|
28
23
|
- - ">="
|
29
24
|
- !ruby/object:Gem::Version
|
30
|
-
hash: 3
|
31
|
-
segments:
|
32
|
-
- 0
|
33
25
|
version: "0"
|
34
26
|
type: :development
|
35
27
|
version_requirements: *id001
|
@@ -41,9 +33,6 @@ dependencies:
|
|
41
33
|
requirements:
|
42
34
|
- - ">="
|
43
35
|
- !ruby/object:Gem::Version
|
44
|
-
hash: 3
|
45
|
-
segments:
|
46
|
-
- 0
|
47
36
|
version: "0"
|
48
37
|
type: :development
|
49
38
|
version_requirements: *id002
|
@@ -55,9 +44,6 @@ dependencies:
|
|
55
44
|
requirements:
|
56
45
|
- - ">="
|
57
46
|
- !ruby/object:Gem::Version
|
58
|
-
hash: 3
|
59
|
-
segments:
|
60
|
-
- 0
|
61
47
|
version: "0"
|
62
48
|
type: :development
|
63
49
|
version_requirements: *id003
|
@@ -69,11 +55,6 @@ dependencies:
|
|
69
55
|
requirements:
|
70
56
|
- - ">="
|
71
57
|
- !ruby/object:Gem::Version
|
72
|
-
hash: 15
|
73
|
-
segments:
|
74
|
-
- 1
|
75
|
-
- 6
|
76
|
-
- 0
|
77
58
|
version: 1.6.0
|
78
59
|
type: :development
|
79
60
|
version_requirements: *id004
|
@@ -85,11 +66,6 @@ dependencies:
|
|
85
66
|
requirements:
|
86
67
|
- - ">="
|
87
68
|
- !ruby/object:Gem::Version
|
88
|
-
hash: 35
|
89
|
-
segments:
|
90
|
-
- 2
|
91
|
-
- 9
|
92
|
-
- 4
|
93
69
|
version: 2.9.4
|
94
70
|
type: :development
|
95
71
|
version_requirements: *id005
|
@@ -442,23 +418,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
442
418
|
requirements:
|
443
419
|
- - ">="
|
444
420
|
- !ruby/object:Gem::Version
|
445
|
-
hash: 3
|
446
|
-
segments:
|
447
|
-
- 0
|
448
421
|
version: "0"
|
449
422
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
450
423
|
none: false
|
451
424
|
requirements:
|
452
425
|
- - ">="
|
453
426
|
- !ruby/object:Gem::Version
|
454
|
-
hash: 3
|
455
|
-
segments:
|
456
|
-
- 0
|
457
427
|
version: "0"
|
458
428
|
requirements: []
|
459
429
|
|
460
430
|
rubyforge_project: nokogiri
|
461
|
-
rubygems_version: 1.
|
431
|
+
rubygems_version: 1.5.2
|
462
432
|
signing_key:
|
463
433
|
specification_version: 3
|
464
434
|
summary: "Nokogiri (\xE9\x8B\xB8) is an HTML, XML, SAX, and Reader parser"
|