nokogiri 1.4.6 → 1.4.7

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

@@ -1,3 +1,9 @@
1
+ === 1.4.7 / 2011年7月1日
2
+
3
+ * バグの修正
4
+
5
+ * エンコーディング宣言のないHTMLファイルで部分的に重複したドキュメントが生成される問題を修正した. #478
6
+
1
7
  === 1.4.6 / 2011年6月19日
2
8
 
3
9
  * ノート
@@ -1,3 +1,11 @@
1
+ === 1.4.7 / 2011-07-01
2
+
3
+ * Bugfixes
4
+
5
+ * Fix a bug in advanced encoding detection that leads to partially
6
+ duplicated document when parsing an HTML file with unknown
7
+ encoding. Thanks, Timothy Elliott (@ender672)! #478
8
+
1
9
  === 1.4.6 / 2011-06-19
2
10
 
3
11
  * Notes
@@ -14,90 +14,90 @@ module Nokogiri
14
14
  ##### State transition tables begin ###
15
15
 
16
16
  racc_action_table = [
17
- 4, 56, 27, 22, 12, 24, 57, 4, 65, 1,
18
- 41, 12, 75, 57, 4, 23, 1, 82, 12, 19,
19
- 93, 92, 5, 1, 9, 10, 19, 13, 16, 5,
20
- 12, 9, 10, 19, 13, 16, 5, 5, 9, 10,
21
- 4, 13, 16, 16, 12, 41, 64, 4, 5, 1,
22
- 60, 10, 12, 59, 16, 59, 28, 1, 29, 19,
23
- 12, 12, 5, 62, 9, 10, 19, 13, 16, 12,
24
- 5, 9, 83, 10, 12, 13, 16, 84, 5, 5,
25
- 63, 10, 10, 12, 16, 16, 58, 5, 61, 62,
26
- 10, 4, 5, 16, 87, 10, 27, 53, 16, 54,
27
- 49, 5, 88, 41, 10, 68, 70, 16, 27, 53,
28
- 19, 54, 44, 91, 21, 9, 69, 71, 72, 94,
29
- 74, 68, 70, -23, 66, 33, 35, 37, 27, 53,
30
- 96, 54, 69, 71, 72, 32, 74, 34, 36, 97,
31
- 66, 27, 53, nil, 54 ]
17
+ 12, 21, 46, 47, 20, 45, 22, 12, 65, 9,
18
+ 78, 20, 76, 22, 12, 42, 9, 85, 20, 13,
19
+ 90, 89, 11, 9, 14, 7, 13, 10, 15, 11,
20
+ 81, 14, 7, 13, 10, 15, 11, 20, 14, 7,
21
+ 12, 10, 15, 77, 20, 11, 80, 79, 20, 9,
22
+ 84, 15, 20, 9, 79, 11, 20, 82, 7, 13,
23
+ 20, 15, 11, 26, 14, 7, 11, 10, 15, 7,
24
+ 11, 10, 15, 7, 11, 26, 15, 7, 11, 12,
25
+ 15, 7, 86, 20, 15, 58, 88, 20, 39, 83,
26
+ 41, 12, 46, 50, 82, 49, 46, 50, 13, 49,
27
+ 53, 11, -23, 14, 7, 11, 91, 15, 7, 38,
28
+ 13, 15, 73, 74, 93, 14, 37, 73, 74, 46,
29
+ 50, 26, 49, 69, 70, 71, 96, 72, 69, 70,
30
+ 71, 68, 72, 30, 31, 33, 68, 46, 50, 97,
31
+ 49, nil, nil, 32, nil, 35, 34 ]
32
32
 
33
33
  racc_action_check = [
34
- 0, 20, 4, 4, 0, 4, 43, 9, 27, 0,
35
- 39, 9, 29, 20, 31, 4, 9, 43, 31, 0,
36
- 73, 73, 0, 31, 0, 0, 9, 0, 0, 9,
37
- 7, 9, 9, 31, 9, 9, 31, 14, 31, 31,
38
- 57, 31, 31, 14, 57, 7, 26, 5, 7, 57,
39
- 22, 7, 41, 53, 7, 22, 5, 41, 5, 57,
40
- 8, 79, 57, 54, 57, 57, 5, 57, 57, 18,
41
- 41, 5, 55, 41, 17, 41, 41, 56, 8, 79,
42
- 25, 8, 79, 15, 8, 79, 21, 18, 24, 24,
43
- 18, 16, 17, 18, 60, 17, 59, 59, 17, 59,
44
- 16, 15, 61, 11, 15, 30, 30, 15, 19, 19,
45
- 16, 19, 10, 67, 2, 16, 30, 30, 30, 76,
46
- 30, 28, 28, 1, 30, 6, 6, 6, 62, 62,
47
- 81, 62, 28, 28, 28, 6, 28, 6, 6, 88,
48
- 28, 65, 65, nil, 65 ]
34
+ 0, 1, 12, 12, 0, 12, 51, 36, 38, 0,
35
+ 44, 36, 41, 1, 22, 12, 36, 51, 22, 0,
36
+ 67, 67, 0, 22, 0, 0, 36, 0, 0, 36,
37
+ 46, 36, 36, 22, 36, 36, 22, 62, 22, 22,
38
+ 14, 22, 22, 43, 14, 4, 45, 45, 26, 14,
39
+ 48, 4, 18, 26, 49, 62, 17, 50, 62, 14,
40
+ 3, 62, 14, 24, 14, 14, 26, 14, 14, 26,
41
+ 18, 26, 26, 18, 17, 3, 18, 17, 3, 11,
42
+ 17, 3, 61, 16, 3, 21, 66, 19, 11, 47,
43
+ 11, 15, 13, 13, 47, 13, 82, 82, 11, 82,
44
+ 15, 16, 9, 11, 16, 19, 75, 16, 19, 8,
45
+ 15, 19, 39, 39, 80, 15, 7, 40, 40, 81,
46
+ 81, 5, 81, 39, 39, 39, 83, 39, 40, 40,
47
+ 40, 39, 40, 6, 6, 6, 40, 79, 79, 93,
48
+ 79, nil, nil, 6, nil, 6, 6 ]
49
49
 
50
50
  racc_action_pointer = [
51
- -2, 95, 86, nil, -8, 45, 118, 24, 54, 5,
52
- 101, 82, nil, nil, 13, 77, 89, 68, 63, 98,
53
- 1, 75, 43, nil, 77, 57, 23, -4, 118, -13,
54
- 102, 12, nil, nil, nil, nil, nil, nil, nil, -11,
55
- nil, 46, nil, -6, nil, nil, nil, nil, nil, nil,
56
- nil, nil, nil, 41, 51, 49, 77, 38, nil, 86,
57
- 81, 95, 118, nil, nil, 131, nil, 88, nil, nil,
58
- nil, nil, nil, 10, nil, nil, 94, nil, nil, 55,
59
- nil, 107, nil, nil, nil, nil, nil, nil, 126, nil,
60
- nil, nil, nil, nil, nil, nil, nil, nil ]
51
+ -2, 1, nil, 54, 21, 100, 126, 105, 81, 74,
52
+ nil, 77, -8, 82, 38, 89, 77, 50, 46, 81,
53
+ nil, 85, 12, nil, 42, nil, 42, nil, nil, nil,
54
+ nil, nil, nil, nil, nil, nil, 5, nil, -3, 109,
55
+ 114, -13, nil, 20, -13, 35, 18, 82, 27, 42,
56
+ 45, -6, nil, nil, nil, nil, nil, nil, nil, nil,
57
+ nil, 59, 31, nil, nil, nil, 61, 10, nil, nil,
58
+ nil, nil, nil, nil, nil, 81, nil, nil, nil, 127,
59
+ 107, 109, 86, 113, nil, nil, nil, nil, nil, nil,
60
+ nil, nil, nil, 126, nil, nil, nil, nil ]
61
61
 
62
62
  racc_action_default = [
63
- -24, -21, -69, -2, -69, -69, -18, -45, -50, -24,
64
- -69, -16, -54, -22, -12, -53, -69, -52, -51, -69,
65
- -69, -69, -38, -28, -36, -69, -69, -37, -57, -69,
66
- -57, -24, -5, -3, -8, -4, -7, -6, -9, -44,
67
- -11, -24, -46, -69, -19, -15, -13, -14, -49, -43,
68
- -42, -48, -47, -38, -36, -69, -69, -24, -20, -69,
69
- -69, -41, -69, -29, -30, -69, -58, -69, -63, -59,
70
- -64, -60, -61, -69, -62, -27, -69, -17, -10, -66,
71
- -68, -69, -32, -31, 98, -1, -35, -40, -69, -33,
72
- -34, -25, -55, -56, -26, -67, -65, -39 ]
63
+ -24, -69, -2, -45, -12, -16, -18, -69, -69, -21,
64
+ -22, -69, -69, -69, -24, -69, -50, -51, -52, -53,
65
+ -54, -69, -24, -9, -44, -11, -24, -13, -14, -15,
66
+ -3, -4, -5, -6, -7, -8, -24, -19, -69, -57,
67
+ -57, -69, -28, -69, -69, -36, -37, -38, -69, -36,
68
+ -38, -69, -42, -43, -46, -47, -48, -49, 98, -1,
69
+ -10, -69, -66, -68, -17, -20, -69, -69, -58, -59,
70
+ -60, -61, -62, -63, -64, -69, -27, -29, -30, -69,
71
+ -41, -69, -69, -69, -31, -32, -65, -67, -25, -55,
72
+ -56, -26, -33, -69, -34, -35, -40, -39 ]
73
73
 
74
74
  racc_goto_table = [
75
- 39, 42, 25, 40, 77, 30, 20, 45, 48, 79,
76
- 51, 52, 67, 46, 76, 43, 50, 55, 47, 38,
77
- 31, 26, 81, nil, nil, nil, nil, nil, nil, nil,
78
- 85, nil, nil, nil, 80, 78, nil, nil, nil, nil,
75
+ 43, 48, 24, 25, 40, 29, 59, 1, 52, 66,
76
+ 75, 23, 27, 28, 62, 54, 55, 56, 57, 36,
77
+ 64, 51, 44, 61, 60, 63, nil, nil, nil, nil,
79
78
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
80
- nil, nil, nil, nil, nil, nil, nil, 86, nil, nil,
81
- 89, nil, nil, 90, nil, nil, nil, nil, nil, nil,
82
- nil, nil, 95 ]
79
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
80
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
81
+ nil, 87, nil, nil, nil, nil, nil, 92, nil, 94,
82
+ 95 ]
83
83
 
84
84
  racc_goto_check = [
85
- 7, 7, 15, 8, 2, 9, 1, 8, 7, 5,
86
- 7, 7, 14, 10, 14, 1, 9, 15, 11, 6,
87
- 3, 16, 19, nil, nil, nil, nil, nil, nil, nil,
88
- 2, nil, nil, nil, 7, 8, nil, nil, nil, nil,
85
+ 15, 15, 7, 8, 9, 8, 2, 1, 9, 14,
86
+ 14, 6, 10, 11, 5, 7, 7, 7, 7, 3,
87
+ 2, 1, 16, 19, 8, 7, nil, nil, nil, nil,
88
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
89
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
89
90
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
90
- nil, nil, nil, nil, nil, nil, nil, 15, nil, nil,
91
- 15, nil, nil, 15, nil, nil, nil, nil, nil, nil,
92
- nil, nil, 7 ]
91
+ nil, 7, nil, nil, nil, nil, nil, 15, nil, 15,
92
+ 15 ]
93
93
 
94
94
  racc_goto_pointer = [
95
- nil, 6, -27, 14, nil, -32, 12, -7, -4, 0,
96
- -1, 4, nil, nil, -16, -2, 17, nil, nil, -19 ]
95
+ nil, 7, -16, 13, nil, -12, 8, -1, 0, -7,
96
+ 8, 9, nil, nil, -30, -12, 10, nil, nil, -3 ]
97
97
 
98
98
  racc_goto_default = [
99
- nil, nil, 3, nil, 6, 7, nil, 11, nil, 14,
100
- 15, 17, 18, 2, nil, nil, nil, 8, 73, nil ]
99
+ nil, nil, 2, nil, 6, 3, nil, 5, nil, 4,
100
+ 19, 18, 17, 8, nil, nil, nil, 16, 67, nil ]
101
101
 
102
102
  racc_reduce_table = [
103
103
  0, 0, :racc_error,
@@ -92,7 +92,7 @@ module Nokogiri
92
92
  if string_or_io.respond_to?(:read)
93
93
  url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
94
94
  if !encoding
95
- # Perform further encoding detection that libxml2 does
95
+ # Perform advanced encoding detection that libxml2 does
96
96
  # not do.
97
97
  string_or_io = EncodingReader.new(string_or_io)
98
98
  begin
@@ -181,16 +181,13 @@ module Nokogiri
181
181
  if !@firstchunk
182
182
  @firstchunk = @io.read(len) or return nil
183
183
 
184
- # This implementation expects and assumes that the first
185
- # call from htmlReadIO() is made with a length long enough
186
- # (~1KB) to achieve further encoding detection that
187
- # libxml2 does not do.
184
+ # This implementation expects that the first call from
185
+ # htmlReadIO() is made with a length long enough (~1KB) to
186
+ # achieve advanced encoding detection.
188
187
  if encoding = EncodingReader.detect_encoding(@firstchunk)
188
+ # The first chunk is stored for the next read in retry.
189
189
  raise EncodingFoundException, encoding
190
190
  end
191
-
192
- # This chunk is stored for the next read in retry.
193
- return @firstchunk
194
191
  end
195
192
 
196
193
  ret = @firstchunk.slice!(0, len)
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.4.6'
3
+ VERSION = '1.4.7'
4
4
 
5
5
  # More complete version information about libxml
6
6
  VERSION_INFO = {}
@@ -22,6 +22,7 @@ module Nokogiri
22
22
  SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
23
23
  ENCODING_XHTML_FILE = File.join(ASSETS_DIR, 'encoding.xhtml')
24
24
  ENCODING_HTML_FILE = File.join(ASSETS_DIR, 'encoding.html')
25
+ NOENCODING_FILE = File.join(ASSETS_DIR, 'noencoding.html')
25
26
  PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
26
27
  PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
27
28
  ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
@@ -89,6 +89,13 @@ module Nokogiri
89
89
  File.open(file, 'rb')
90
90
  end
91
91
 
92
+ def test_document_html_noencoding
93
+ from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
94
+ from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
95
+
96
+ assert_equal from_string.to_s.size, from_stream.to_s.size
97
+ end
98
+
92
99
  def test_document_xhtml_enc
93
100
  [ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
94
101
  doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
metadata CHANGED
@@ -1,13 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogiri
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
5
4
  prerelease:
6
- segments:
7
- - 1
8
- - 4
9
- - 6
10
- version: 1.4.6
5
+ version: 1.4.7
11
6
  platform: ruby
12
7
  authors:
13
8
  - Aaron Patterson
@@ -16,7 +11,7 @@ autorequire:
16
11
  bindir: bin
17
12
  cert_chain: []
18
13
 
19
- date: 2011-06-19 00:00:00 -04:00
14
+ date: 2011-07-01 00:00:00 -04:00
20
15
  default_executable:
21
16
  dependencies:
22
17
  - !ruby/object:Gem::Dependency
@@ -27,9 +22,6 @@ dependencies:
27
22
  requirements:
28
23
  - - ">="
29
24
  - !ruby/object:Gem::Version
30
- hash: 3
31
- segments:
32
- - 0
33
25
  version: "0"
34
26
  type: :development
35
27
  version_requirements: *id001
@@ -41,9 +33,6 @@ dependencies:
41
33
  requirements:
42
34
  - - ">="
43
35
  - !ruby/object:Gem::Version
44
- hash: 3
45
- segments:
46
- - 0
47
36
  version: "0"
48
37
  type: :development
49
38
  version_requirements: *id002
@@ -55,9 +44,6 @@ dependencies:
55
44
  requirements:
56
45
  - - ">="
57
46
  - !ruby/object:Gem::Version
58
- hash: 3
59
- segments:
60
- - 0
61
47
  version: "0"
62
48
  type: :development
63
49
  version_requirements: *id003
@@ -69,11 +55,6 @@ dependencies:
69
55
  requirements:
70
56
  - - ">="
71
57
  - !ruby/object:Gem::Version
72
- hash: 15
73
- segments:
74
- - 1
75
- - 6
76
- - 0
77
58
  version: 1.6.0
78
59
  type: :development
79
60
  version_requirements: *id004
@@ -85,11 +66,6 @@ dependencies:
85
66
  requirements:
86
67
  - - ">="
87
68
  - !ruby/object:Gem::Version
88
- hash: 35
89
- segments:
90
- - 2
91
- - 9
92
- - 4
93
69
  version: 2.9.4
94
70
  type: :development
95
71
  version_requirements: *id005
@@ -442,23 +418,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
442
418
  requirements:
443
419
  - - ">="
444
420
  - !ruby/object:Gem::Version
445
- hash: 3
446
- segments:
447
- - 0
448
421
  version: "0"
449
422
  required_rubygems_version: !ruby/object:Gem::Requirement
450
423
  none: false
451
424
  requirements:
452
425
  - - ">="
453
426
  - !ruby/object:Gem::Version
454
- hash: 3
455
- segments:
456
- - 0
457
427
  version: "0"
458
428
  requirements: []
459
429
 
460
430
  rubyforge_project: nokogiri
461
- rubygems_version: 1.6.0
431
+ rubygems_version: 1.5.2
462
432
  signing_key:
463
433
  specification_version: 3
464
434
  summary: "Nokogiri (\xE9\x8B\xB8) is an HTML, XML, SAX, and Reader parser"