actionmailer 2.3.5 → 2.3.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/CHANGELOG +8 -0
  2. data/MIT-LICENSE +1 -1
  3. data/Rakefile +1 -1
  4. data/lib/action_mailer.rb +1 -1
  5. data/lib/action_mailer/base.rb +2 -2
  6. data/lib/action_mailer/quoting.rb +1 -0
  7. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail.rb +1 -0
  8. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/Makefile +18 -0
  9. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/address.rb +4 -38
  10. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/attachments.rb +65 -0
  11. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/base64.rb +0 -0
  12. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/compat.rb +0 -0
  13. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/config.rb +0 -0
  14. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/core_extensions.rb +0 -0
  15. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/encode.rb +9 -0
  16. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/header.rb +4 -2
  17. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/index.rb +0 -0
  18. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/interface.rb +33 -1
  19. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/loader.rb +0 -0
  20. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/mail.rb +1 -1
  21. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/mailbox.rb +3 -2
  22. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/main.rb +0 -0
  23. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/mbox.rb +0 -0
  24. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/net.rb +3 -1
  25. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/obsolete.rb +0 -0
  26. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/parser.rb +1060 -0
  27. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/parser.y +416 -0
  28. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/port.rb +0 -0
  29. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/quoting.rb +47 -1
  30. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/require_arch.rb +0 -0
  31. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/scanner.rb +0 -0
  32. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/scanner_r.rb +2 -1
  33. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/stringio.rb +0 -0
  34. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/utils.rb +50 -25
  35. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/COPYING +504 -0
  36. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/README +12 -0
  37. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
  38. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
  39. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
  40. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +238 -0
  41. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
  42. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
  43. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
  44. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
  45. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +89 -0
  46. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
  47. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
  48. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
  49. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
  50. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
  51. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
  52. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
  53. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
  54. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
  55. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
  56. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
  57. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
  58. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
  59. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
  60. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
  61. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
  62. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
  63. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
  64. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
  65. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +45 -0
  66. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
  67. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
  68. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +56 -0
  69. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
  70. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +168 -0
  71. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
  72. data/lib/action_mailer/vendor/{tmail-1.2.3 → tmail-1.2.7}/tmail/version.rb +1 -1
  73. data/lib/action_mailer/vendor/tmail.rb +2 -2
  74. data/lib/action_mailer/version.rb +1 -1
  75. data/test/abstract_unit.rb +1 -1
  76. data/test/fixtures/helpers/example_helper.rb +1 -1
  77. data/test/mail_service_test.rb +66 -2
  78. data/test/quoting_test.rb +10 -4
  79. metadata +87 -37
  80. data/lib/action_mailer/vendor/tmail-1.2.3/tmail/attachments.rb +0 -46
  81. data/lib/action_mailer/vendor/tmail-1.2.3/tmail/parser.rb +0 -1478
@@ -0,0 +1,147 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Universal charset detector code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 2001
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ # Shy Shalom - original C code
13
+ #
14
+ # This library is free software; you can redistribute it and/or
15
+ # modify it under the terms of the GNU Lesser General Public
16
+ # License as published by the Free Software Foundation; either
17
+ # version 2.1 of the License, or (at your option) any later version.
18
+ #
19
+ # This library is distributed in the hope that it will be useful,
20
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ # Lesser General Public License for more details.
23
+ #
24
+ # You should have received a copy of the GNU Lesser General Public
25
+ # License along with this library; if not, write to the Free Software
26
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
+ # 02110-1301 USA
28
+ ######################### END LICENSE BLOCK #########################
29
+
30
+ module CharDet
31
+ FREQ_CAT_NUM = 4
32
+
33
+ UDF = 0 # undefined
34
+ OTH = 1 # other
35
+ ASC = 2 # ascii capital letter
36
+ ASS = 3 # ascii small letter
37
+ ACV = 4 # accent capital vowel
38
+ ACO = 5 # accent capital other
39
+ ASV = 6 # accent small vowel
40
+ ASO = 7 # accent small other
41
+ CLASS_NUM = 8 # total classes
42
+
43
+ Latin1_CharToClass = [
44
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
45
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
46
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
47
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
48
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
49
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
50
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
51
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
52
+ OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
53
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
54
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
55
+ ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
56
+ OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
57
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
58
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
59
+ ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
60
+ OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
61
+ OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
62
+ UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
63
+ OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
64
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
65
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
66
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
67
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
68
+ ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
69
+ ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
70
+ ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
71
+ ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
72
+ ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
73
+ ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
74
+ ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
75
+ ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
76
+ ]
77
+
78
+ # 0 : illegal
79
+ # 1 : very unlikely
80
+ # 2 : normal
81
+ # 3 : very likely
82
+ Latin1ClassModel = [
83
+ # UDF OTH ASC ASS ACV ACO ASV ASO
84
+ 0, 0, 0, 0, 0, 0, 0, 0, # UDF
85
+ 0, 3, 3, 3, 3, 3, 3, 3, # OTH
86
+ 0, 3, 3, 3, 3, 3, 3, 3, # ASC
87
+ 0, 3, 3, 3, 1, 1, 3, 3, # ASS
88
+ 0, 3, 3, 3, 1, 2, 1, 2, # ACV
89
+ 0, 3, 3, 3, 3, 3, 3, 3, # ACO
90
+ 0, 3, 1, 3, 1, 1, 1, 3, # ASV
91
+ 0, 3, 1, 3, 1, 1, 3, 3, # ASO
92
+ ]
93
+
94
+ class Latin1Prober < CharSetProber
95
+ def initialize
96
+ super
97
+ reset()
98
+ end
99
+
100
+ def reset
101
+ @_mLastCharClass = OTH
102
+ @_mFreqCounter = [0] * FREQ_CAT_NUM
103
+ super
104
+ end
105
+
106
+ def get_charset_name
107
+ return "windows-1252"
108
+ end
109
+
110
+ def feed(aBuf)
111
+ aBuf = filter_with_english_letters(aBuf)
112
+ aBuf.each_byte do |b|
113
+ c = b.chr
114
+ charClass = Latin1_CharToClass[c[0]]
115
+ freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
116
+ if freq == 0
117
+ @_mState = ENotMe
118
+ break
119
+ end
120
+ @_mFreqCounter[freq] += 1
121
+ @_mLastCharClass = charClass
122
+ end
123
+
124
+ return get_state()
125
+ end
126
+
127
+ def get_confidence
128
+ if get_state() == ENotMe
129
+ return 0.01
130
+ end
131
+
132
+ total = @_mFreqCounter.inject{|a,b| a+b}
133
+ if total < 0.01
134
+ confidence = 0.0
135
+ else
136
+ confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
137
+ end
138
+ if confidence < 0.0
139
+ confidence = 0.0
140
+ end
141
+ # lower the confidence of latin1 so that other more accurate detector
142
+ # can take priority.
143
+ confidence = confidence * 0.5
144
+ return confidence
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,89 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Universal charset detector code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 2001
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ # Shy Shalom - original C code
13
+ # Proofpoint, Inc.
14
+ #
15
+ # This library is free software; you can redistribute it and/or
16
+ # modify it under the terms of the GNU Lesser General Public
17
+ # License as published by the Free Software Foundation; either
18
+ # version 2.1 of the License, or (at your option) any later version.
19
+ #
20
+ # This library is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23
+ # Lesser General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU Lesser General Public
26
+ # License along with this library; if not, write to the Free Software
27
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
28
+ # 02110-1301 USA
29
+ ######################### END LICENSE BLOCK #########################
30
+
31
+ module CharDet
32
+ class MultiByteCharSetProber < CharSetProber
33
+ def initialize
34
+ super
35
+ @_mDistributionAnalyzer = nil
36
+ @_mCodingSM = nil
37
+ @_mLastChar = "\x00\x00"
38
+ end
39
+
40
+ def reset
41
+ super
42
+ if @_mCodingSM
43
+ @_mCodingSM.reset()
44
+ end
45
+ if @_mDistributionAnalyzer
46
+ @_mDistributionAnalyzer.reset()
47
+ end
48
+ @_mLastChar = "\x00\x00"
49
+ end
50
+
51
+ def get_charset_name
52
+ end
53
+
54
+ def feed(aBuf)
55
+ aLen = aBuf.length
56
+ for i in (0...aLen)
57
+ codingState = @_mCodingSM.next_state(aBuf[i..i])
58
+ if codingState == EError
59
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
60
+ @_mState = ENotMe
61
+ break
62
+ elsif codingState == EItsMe
63
+ @_mState = EFoundIt
64
+ break
65
+ elsif codingState == EStart
66
+ charLen = @_mCodingSM.get_current_charlen()
67
+ if i == 0
68
+ @_mLastChar[1] = aBuf[0..0]
69
+ @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
70
+ else
71
+ @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
72
+ end
73
+ end
74
+ end
75
+ @_mLastChar[0] = aBuf[aLen-1..aLen-1]
76
+
77
+ if get_state() == EDetecting
78
+ if @_mDistributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
79
+ @_mState = EFoundIt
80
+ end
81
+ end
82
+ return get_state()
83
+ end
84
+
85
+ def get_confidence
86
+ return @_mDistributionAnalyzer.get_confidence()
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,45 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Universal charset detector code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 2001
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ # Shy Shalom - original C code
13
+ # Proofpoint, Inc.
14
+ #
15
+ # This library is free software; you can redistribute it and/or
16
+ # modify it under the terms of the GNU Lesser General Public
17
+ # License as published by the Free Software Foundation; either
18
+ # version 2.1 of the License, or (at your option) any later version.
19
+ #
20
+ # This library is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23
+ # Lesser General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU Lesser General Public
26
+ # License along with this library; if not, write to the Free Software
27
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
28
+ # 02110-1301 USA
29
+ ######################### END LICENSE BLOCK #########################
30
+
31
+ module CharDet
32
+ class MBCSGroupProber < CharSetGroupProber
33
+ def initialize
34
+ super
35
+ @_mProbers = [ UTF8Prober.new,
36
+ SJISProber.new,
37
+ EUCJPProber.new,
38
+ GB2312Prober.new,
39
+ EUCKRProber.new,
40
+ Big5Prober.new,
41
+ EUCTWProber.new ]
42
+ reset()
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,542 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ # BIG5
31
+
32
+ BIG5_cls = [
33
+ 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
34
+ 1,1,1,1,1,1,0,0, # 08 - 0f
35
+ 1,1,1,1,1,1,1,1, # 10 - 17
36
+ 1,1,1,0,1,1,1,1, # 18 - 1f
37
+ 1,1,1,1,1,1,1,1, # 20 - 27
38
+ 1,1,1,1,1,1,1,1, # 28 - 2f
39
+ 1,1,1,1,1,1,1,1, # 30 - 37
40
+ 1,1,1,1,1,1,1,1, # 38 - 3f
41
+ 2,2,2,2,2,2,2,2, # 40 - 47
42
+ 2,2,2,2,2,2,2,2, # 48 - 4f
43
+ 2,2,2,2,2,2,2,2, # 50 - 57
44
+ 2,2,2,2,2,2,2,2, # 58 - 5f
45
+ 2,2,2,2,2,2,2,2, # 60 - 67
46
+ 2,2,2,2,2,2,2,2, # 68 - 6f
47
+ 2,2,2,2,2,2,2,2, # 70 - 77
48
+ 2,2,2,2,2,2,2,1, # 78 - 7f
49
+ 4,4,4,4,4,4,4,4, # 80 - 87
50
+ 4,4,4,4,4,4,4,4, # 88 - 8f
51
+ 4,4,4,4,4,4,4,4, # 90 - 97
52
+ 4,4,4,4,4,4,4,4, # 98 - 9f
53
+ 4,3,3,3,3,3,3,3, # a0 - a7
54
+ 3,3,3,3,3,3,3,3, # a8 - af
55
+ 3,3,3,3,3,3,3,3, # b0 - b7
56
+ 3,3,3,3,3,3,3,3, # b8 - bf
57
+ 3,3,3,3,3,3,3,3, # c0 - c7
58
+ 3,3,3,3,3,3,3,3, # c8 - cf
59
+ 3,3,3,3,3,3,3,3, # d0 - d7
60
+ 3,3,3,3,3,3,3,3, # d8 - df
61
+ 3,3,3,3,3,3,3,3, # e0 - e7
62
+ 3,3,3,3,3,3,3,3, # e8 - ef
63
+ 3,3,3,3,3,3,3,3, # f0 - f7
64
+ 3,3,3,3,3,3,3,0 # f8 - ff
65
+ ]
66
+
67
+ BIG5_st = [
68
+ EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
69
+ EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,#08-0f
70
+ EError,EStart,EStart,EStart,EStart,EStart,EStart,EStart #10-17
71
+ ]
72
+
73
+ Big5CharLenTable = [0, 1, 1, 2, 0]
74
+
75
+ Big5SMModel = {'classTable' => BIG5_cls,
76
+ 'classFactor' => 5,
77
+ 'stateTable' => BIG5_st,
78
+ 'charLenTable' => Big5CharLenTable,
79
+ 'name' => 'Big5'
80
+ }
81
+
82
+ # EUC-JP
83
+
84
+ EUCJP_cls = [
85
+ 4,4,4,4,4,4,4,4, # 00 - 07
86
+ 4,4,4,4,4,4,5,5, # 08 - 0f
87
+ 4,4,4,4,4,4,4,4, # 10 - 17
88
+ 4,4,4,5,4,4,4,4, # 18 - 1f
89
+ 4,4,4,4,4,4,4,4, # 20 - 27
90
+ 4,4,4,4,4,4,4,4, # 28 - 2f
91
+ 4,4,4,4,4,4,4,4, # 30 - 37
92
+ 4,4,4,4,4,4,4,4, # 38 - 3f
93
+ 4,4,4,4,4,4,4,4, # 40 - 47
94
+ 4,4,4,4,4,4,4,4, # 48 - 4f
95
+ 4,4,4,4,4,4,4,4, # 50 - 57
96
+ 4,4,4,4,4,4,4,4, # 58 - 5f
97
+ 4,4,4,4,4,4,4,4, # 60 - 67
98
+ 4,4,4,4,4,4,4,4, # 68 - 6f
99
+ 4,4,4,4,4,4,4,4, # 70 - 77
100
+ 4,4,4,4,4,4,4,4, # 78 - 7f
101
+ 5,5,5,5,5,5,5,5, # 80 - 87
102
+ 5,5,5,5,5,5,1,3, # 88 - 8f
103
+ 5,5,5,5,5,5,5,5, # 90 - 97
104
+ 5,5,5,5,5,5,5,5, # 98 - 9f
105
+ 5,2,2,2,2,2,2,2, # a0 - a7
106
+ 2,2,2,2,2,2,2,2, # a8 - af
107
+ 2,2,2,2,2,2,2,2, # b0 - b7
108
+ 2,2,2,2,2,2,2,2, # b8 - bf
109
+ 2,2,2,2,2,2,2,2, # c0 - c7
110
+ 2,2,2,2,2,2,2,2, # c8 - cf
111
+ 2,2,2,2,2,2,2,2, # d0 - d7
112
+ 2,2,2,2,2,2,2,2, # d8 - df
113
+ 0,0,0,0,0,0,0,0, # e0 - e7
114
+ 0,0,0,0,0,0,0,0, # e8 - ef
115
+ 0,0,0,0,0,0,0,0, # f0 - f7
116
+ 0,0,0,0,0,0,0,5 # f8 - ff
117
+ ]
118
+
119
+ EUCJP_st = [
120
+ 3, 4, 3, 5,EStart,EError,EError,EError,#00-07
121
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
122
+ EItsMe,EItsMe,EStart,EError,EStart,EError,EError,EError,#10-17
123
+ EError,EError,EStart,EError,EError,EError, 3,EError,#18-1f
124
+ 3,EError,EError,EError,EStart,EStart,EStart,EStart #20-27
125
+ ]
126
+
127
+ EUCJPCharLenTable = [2, 2, 2, 3, 1, 0]
128
+
129
+ EUCJPSMModel = {'classTable' => EUCJP_cls,
130
+ 'classFactor' => 6,
131
+ 'stateTable' => EUCJP_st,
132
+ 'charLenTable' => EUCJPCharLenTable,
133
+ 'name' => 'EUC-JP'
134
+ }
135
+
136
+ # EUC-KR
137
+
138
+ EUCKR_cls = [
139
+ 1,1,1,1,1,1,1,1, # 00 - 07
140
+ 1,1,1,1,1,1,0,0, # 08 - 0f
141
+ 1,1,1,1,1,1,1,1, # 10 - 17
142
+ 1,1,1,0,1,1,1,1, # 18 - 1f
143
+ 1,1,1,1,1,1,1,1, # 20 - 27
144
+ 1,1,1,1,1,1,1,1, # 28 - 2f
145
+ 1,1,1,1,1,1,1,1, # 30 - 37
146
+ 1,1,1,1,1,1,1,1, # 38 - 3f
147
+ 1,1,1,1,1,1,1,1, # 40 - 47
148
+ 1,1,1,1,1,1,1,1, # 48 - 4f
149
+ 1,1,1,1,1,1,1,1, # 50 - 57
150
+ 1,1,1,1,1,1,1,1, # 58 - 5f
151
+ 1,1,1,1,1,1,1,1, # 60 - 67
152
+ 1,1,1,1,1,1,1,1, # 68 - 6f
153
+ 1,1,1,1,1,1,1,1, # 70 - 77
154
+ 1,1,1,1,1,1,1,1, # 78 - 7f
155
+ 0,0,0,0,0,0,0,0, # 80 - 87
156
+ 0,0,0,0,0,0,0,0, # 88 - 8f
157
+ 0,0,0,0,0,0,0,0, # 90 - 97
158
+ 0,0,0,0,0,0,0,0, # 98 - 9f
159
+ 0,2,2,2,2,2,2,2, # a0 - a7
160
+ 2,2,2,2,2,3,3,3, # a8 - af
161
+ 2,2,2,2,2,2,2,2, # b0 - b7
162
+ 2,2,2,2,2,2,2,2, # b8 - bf
163
+ 2,2,2,2,2,2,2,2, # c0 - c7
164
+ 2,3,2,2,2,2,2,2, # c8 - cf
165
+ 2,2,2,2,2,2,2,2, # d0 - d7
166
+ 2,2,2,2,2,2,2,2, # d8 - df
167
+ 2,2,2,2,2,2,2,2, # e0 - e7
168
+ 2,2,2,2,2,2,2,2, # e8 - ef
169
+ 2,2,2,2,2,2,2,2, # f0 - f7
170
+ 2,2,2,2,2,2,2,0 # f8 - ff
171
+ ]
172
+
173
+ EUCKR_st = [
174
+ EError,EStart, 3,EError,EError,EError,EError,EError,#00-07
175
+ EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,EStart#08-0f
176
+ ]
177
+
178
+ EUCKRCharLenTable = [0, 1, 2, 0]
179
+
180
+ EUCKRSMModel = {'classTable' => EUCKR_cls,
181
+ 'classFactor' => 4,
182
+ 'stateTable' => EUCKR_st,
183
+ 'charLenTable' => EUCKRCharLenTable,
184
+ 'name' => 'EUC-KR'
185
+ }
186
+
187
+ # EUC-TW
188
+
189
+ EUCTW_cls = [
190
+ 2,2,2,2,2,2,2,2, # 00 - 07
191
+ 2,2,2,2,2,2,0,0, # 08 - 0f
192
+ 2,2,2,2,2,2,2,2, # 10 - 17
193
+ 2,2,2,0,2,2,2,2, # 18 - 1f
194
+ 2,2,2,2,2,2,2,2, # 20 - 27
195
+ 2,2,2,2,2,2,2,2, # 28 - 2f
196
+ 2,2,2,2,2,2,2,2, # 30 - 37
197
+ 2,2,2,2,2,2,2,2, # 38 - 3f
198
+ 2,2,2,2,2,2,2,2, # 40 - 47
199
+ 2,2,2,2,2,2,2,2, # 48 - 4f
200
+ 2,2,2,2,2,2,2,2, # 50 - 57
201
+ 2,2,2,2,2,2,2,2, # 58 - 5f
202
+ 2,2,2,2,2,2,2,2, # 60 - 67
203
+ 2,2,2,2,2,2,2,2, # 68 - 6f
204
+ 2,2,2,2,2,2,2,2, # 70 - 77
205
+ 2,2,2,2,2,2,2,2, # 78 - 7f
206
+ 0,0,0,0,0,0,0,0, # 80 - 87
207
+ 0,0,0,0,0,0,6,0, # 88 - 8f
208
+ 0,0,0,0,0,0,0,0, # 90 - 97
209
+ 0,0,0,0,0,0,0,0, # 98 - 9f
210
+ 0,3,4,4,4,4,4,4, # a0 - a7
211
+ 5,5,1,1,1,1,1,1, # a8 - af
212
+ 1,1,1,1,1,1,1,1, # b0 - b7
213
+ 1,1,1,1,1,1,1,1, # b8 - bf
214
+ 1,1,3,1,3,3,3,3, # c0 - c7
215
+ 3,3,3,3,3,3,3,3, # c8 - cf
216
+ 3,3,3,3,3,3,3,3, # d0 - d7
217
+ 3,3,3,3,3,3,3,3, # d8 - df
218
+ 3,3,3,3,3,3,3,3, # e0 - e7
219
+ 3,3,3,3,3,3,3,3, # e8 - ef
220
+ 3,3,3,3,3,3,3,3, # f0 - f7
221
+ 3,3,3,3,3,3,3,0 # f8 - ff
222
+ ]
223
+
224
+ EUCTW_st = [
225
+ EError,EError,EStart, 3, 3, 3, 4,EError,#00-07
226
+ EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
227
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EStart,EError,#10-17
228
+ EStart,EStart,EStart,EError,EError,EError,EError,EError,#18-1f
229
+ 5,EError,EError,EError,EStart,EError,EStart,EStart,#20-27
230
+ EStart,EError,EStart,EStart,EStart,EStart,EStart,EStart #28-2f
231
+ ]
232
+
233
+ EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3]
234
+
235
+ EUCTWSMModel = {'classTable' => EUCTW_cls,
236
+ 'classFactor' => 7,
237
+ 'stateTable' => EUCTW_st,
238
+ 'charLenTable' => EUCTWCharLenTable,
239
+ 'name' => 'x-euc-tw'
240
+ }
241
+
242
+ # GB2312
243
+
244
+ GB2312_cls = [
245
+ 1,1,1,1,1,1,1,1, # 00 - 07
246
+ 1,1,1,1,1,1,0,0, # 08 - 0f
247
+ 1,1,1,1,1,1,1,1, # 10 - 17
248
+ 1,1,1,0,1,1,1,1, # 18 - 1f
249
+ 1,1,1,1,1,1,1,1, # 20 - 27
250
+ 1,1,1,1,1,1,1,1, # 28 - 2f
251
+ 3,3,3,3,3,3,3,3, # 30 - 37
252
+ 3,3,1,1,1,1,1,1, # 38 - 3f
253
+ 2,2,2,2,2,2,2,2, # 40 - 47
254
+ 2,2,2,2,2,2,2,2, # 48 - 4f
255
+ 2,2,2,2,2,2,2,2, # 50 - 57
256
+ 2,2,2,2,2,2,2,2, # 58 - 5f
257
+ 2,2,2,2,2,2,2,2, # 60 - 67
258
+ 2,2,2,2,2,2,2,2, # 68 - 6f
259
+ 2,2,2,2,2,2,2,2, # 70 - 77
260
+ 2,2,2,2,2,2,2,4, # 78 - 7f
261
+ 5,6,6,6,6,6,6,6, # 80 - 87
262
+ 6,6,6,6,6,6,6,6, # 88 - 8f
263
+ 6,6,6,6,6,6,6,6, # 90 - 97
264
+ 6,6,6,6,6,6,6,6, # 98 - 9f
265
+ 6,6,6,6,6,6,6,6, # a0 - a7
266
+ 6,6,6,6,6,6,6,6, # a8 - af
267
+ 6,6,6,6,6,6,6,6, # b0 - b7
268
+ 6,6,6,6,6,6,6,6, # b8 - bf
269
+ 6,6,6,6,6,6,6,6, # c0 - c7
270
+ 6,6,6,6,6,6,6,6, # c8 - cf
271
+ 6,6,6,6,6,6,6,6, # d0 - d7
272
+ 6,6,6,6,6,6,6,6, # d8 - df
273
+ 6,6,6,6,6,6,6,6, # e0 - e7
274
+ 6,6,6,6,6,6,6,6, # e8 - ef
275
+ 6,6,6,6,6,6,6,6, # f0 - f7
276
+ 6,6,6,6,6,6,6,0 # f8 - ff
277
+ ]
278
+
279
+ GB2312_st = [
280
+ EError,EStart,EStart,EStart,EStart,EStart, 3,EError,#00-07
281
+ EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
282
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,#10-17
283
+ 4,EError,EStart,EStart,EError,EError,EError,EError,#18-1f
284
+ EError,EError, 5,EError,EError,EError,EItsMe,EError,#20-27
285
+ EError,EError,EStart,EStart,EStart,EStart,EStart,EStart#28-2f
286
+ ]
287
+
288
+ # To be accurate, the length of class 6 can be either 2 or 4.
289
+ # But it is not necessary to discriminate between the two since
290
+ # it is used for frequency analysis only, and we are validing
291
+ # each code range there as well. So it is safe to set it to be
292
+ # 2 here.
293
+ GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2]
294
+
295
+ GB2312SMModel = {'classTable' => GB2312_cls,
296
+ 'classFactor' => 7,
297
+ 'stateTable' => GB2312_st,
298
+ 'charLenTable' => GB2312CharLenTable,
299
+ 'name' => 'GB2312'
300
+ }
301
+
302
+ # Shift_JIS
303
+
304
+ SJIS_cls = [
305
+ 1,1,1,1,1,1,1,1, # 00 - 07
306
+ 1,1,1,1,1,1,0,0, # 08 - 0f
307
+ 1,1,1,1,1,1,1,1, # 10 - 17
308
+ 1,1,1,0,1,1,1,1, # 18 - 1f
309
+ 1,1,1,1,1,1,1,1, # 20 - 27
310
+ 1,1,1,1,1,1,1,1, # 28 - 2f
311
+ 1,1,1,1,1,1,1,1, # 30 - 37
312
+ 1,1,1,1,1,1,1,1, # 38 - 3f
313
+ 2,2,2,2,2,2,2,2, # 40 - 47
314
+ 2,2,2,2,2,2,2,2, # 48 - 4f
315
+ 2,2,2,2,2,2,2,2, # 50 - 57
316
+ 2,2,2,2,2,2,2,2, # 58 - 5f
317
+ 2,2,2,2,2,2,2,2, # 60 - 67
318
+ 2,2,2,2,2,2,2,2, # 68 - 6f
319
+ 2,2,2,2,2,2,2,2, # 70 - 77
320
+ 2,2,2,2,2,2,2,1, # 78 - 7f
321
+ 3,3,3,3,3,3,3,3, # 80 - 87
322
+ 3,3,3,3,3,3,3,3, # 88 - 8f
323
+ 3,3,3,3,3,3,3,3, # 90 - 97
324
+ 3,3,3,3,3,3,3,3, # 98 - 9f
325
+ #0xa0 is illegal in sjis encoding, but some pages does
326
+ #contain such byte. We need to be more error forgiven.
327
+ 2,2,2,2,2,2,2,2, # a0 - a7
328
+ 2,2,2,2,2,2,2,2, # a8 - af
329
+ 2,2,2,2,2,2,2,2, # b0 - b7
330
+ 2,2,2,2,2,2,2,2, # b8 - bf
331
+ 2,2,2,2,2,2,2,2, # c0 - c7
332
+ 2,2,2,2,2,2,2,2, # c8 - cf
333
+ 2,2,2,2,2,2,2,2, # d0 - d7
334
+ 2,2,2,2,2,2,2,2, # d8 - df
335
+ 3,3,3,3,3,3,3,3, # e0 - e7
336
+ 3,3,3,3,3,4,4,4, # e8 - ef
337
+ 4,4,4,4,4,4,4,4, # f0 - f7
338
+ 4,4,4,4,4,0,0,0 # f8 - ff
339
+ ]
340
+
341
+ SJIS_st = [
342
+ EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
343
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
344
+ EItsMe,EItsMe,EError,EError,EStart,EStart,EStart,EStart#10-17
345
+ ]
346
+
347
+ SJISCharLenTable = [0, 1, 1, 2, 0, 0]
348
+
349
+ SJISSMModel = {'classTable' => SJIS_cls,
350
+ 'classFactor' => 6,
351
+ 'stateTable' => SJIS_st,
352
+ 'charLenTable' => SJISCharLenTable,
353
+ 'name' => 'Shift_JIS'
354
+ }
355
+
356
+ # UCS2-BE
357
+
358
+ UCS2BE_cls = [
359
+ 0,0,0,0,0,0,0,0, # 00 - 07
360
+ 0,0,1,0,0,2,0,0, # 08 - 0f
361
+ 0,0,0,0,0,0,0,0, # 10 - 17
362
+ 0,0,0,3,0,0,0,0, # 18 - 1f
363
+ 0,0,0,0,0,0,0,0, # 20 - 27
364
+ 0,3,3,3,3,3,0,0, # 28 - 2f
365
+ 0,0,0,0,0,0,0,0, # 30 - 37
366
+ 0,0,0,0,0,0,0,0, # 38 - 3f
367
+ 0,0,0,0,0,0,0,0, # 40 - 47
368
+ 0,0,0,0,0,0,0,0, # 48 - 4f
369
+ 0,0,0,0,0,0,0,0, # 50 - 57
370
+ 0,0,0,0,0,0,0,0, # 58 - 5f
371
+ 0,0,0,0,0,0,0,0, # 60 - 67
372
+ 0,0,0,0,0,0,0,0, # 68 - 6f
373
+ 0,0,0,0,0,0,0,0, # 70 - 77
374
+ 0,0,0,0,0,0,0,0, # 78 - 7f
375
+ 0,0,0,0,0,0,0,0, # 80 - 87
376
+ 0,0,0,0,0,0,0,0, # 88 - 8f
377
+ 0,0,0,0,0,0,0,0, # 90 - 97
378
+ 0,0,0,0,0,0,0,0, # 98 - 9f
379
+ 0,0,0,0,0,0,0,0, # a0 - a7
380
+ 0,0,0,0,0,0,0,0, # a8 - af
381
+ 0,0,0,0,0,0,0,0, # b0 - b7
382
+ 0,0,0,0,0,0,0,0, # b8 - bf
383
+ 0,0,0,0,0,0,0,0, # c0 - c7
384
+ 0,0,0,0,0,0,0,0, # c8 - cf
385
+ 0,0,0,0,0,0,0,0, # d0 - d7
386
+ 0,0,0,0,0,0,0,0, # d8 - df
387
+ 0,0,0,0,0,0,0,0, # e0 - e7
388
+ 0,0,0,0,0,0,0,0, # e8 - ef
389
+ 0,0,0,0,0,0,0,0, # f0 - f7
390
+ 0,0,0,0,0,0,4,5 # f8 - ff
391
+ ]
392
+
393
+ UCS2BE_st = [
394
+ 5, 7, 7,EError, 4, 3,EError,EError,#00-07
395
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
396
+ EItsMe,EItsMe, 6, 6, 6, 6,EError,EError,#10-17
397
+ 6, 6, 6, 6, 6,EItsMe, 6, 6,#18-1f
398
+ 6, 6, 6, 6, 5, 7, 7,EError,#20-27
399
+ 5, 8, 6, 6,EError, 6, 6, 6,#28-2f
400
+ 6, 6, 6, 6,EError,EError,EStart,EStart#30-37
401
+ ]
402
+
403
+ UCS2BECharLenTable = [2, 2, 2, 0, 2, 2]
404
+
405
+ UCS2BESMModel = {'classTable' => UCS2BE_cls,
406
+ 'classFactor' => 6,
407
+ 'stateTable' => UCS2BE_st,
408
+ 'charLenTable' => UCS2BECharLenTable,
409
+ 'name' => 'UTF-16BE'
410
+ }
411
+
412
+ # UCS2-LE
413
+
414
+ UCS2LE_cls = [
415
+ 0,0,0,0,0,0,0,0, # 00 - 07
416
+ 0,0,1,0,0,2,0,0, # 08 - 0f
417
+ 0,0,0,0,0,0,0,0, # 10 - 17
418
+ 0,0,0,3,0,0,0,0, # 18 - 1f
419
+ 0,0,0,0,0,0,0,0, # 20 - 27
420
+ 0,3,3,3,3,3,0,0, # 28 - 2f
421
+ 0,0,0,0,0,0,0,0, # 30 - 37
422
+ 0,0,0,0,0,0,0,0, # 38 - 3f
423
+ 0,0,0,0,0,0,0,0, # 40 - 47
424
+ 0,0,0,0,0,0,0,0, # 48 - 4f
425
+ 0,0,0,0,0,0,0,0, # 50 - 57
426
+ 0,0,0,0,0,0,0,0, # 58 - 5f
427
+ 0,0,0,0,0,0,0,0, # 60 - 67
428
+ 0,0,0,0,0,0,0,0, # 68 - 6f
429
+ 0,0,0,0,0,0,0,0, # 70 - 77
430
+ 0,0,0,0,0,0,0,0, # 78 - 7f
431
+ 0,0,0,0,0,0,0,0, # 80 - 87
432
+ 0,0,0,0,0,0,0,0, # 88 - 8f
433
+ 0,0,0,0,0,0,0,0, # 90 - 97
434
+ 0,0,0,0,0,0,0,0, # 98 - 9f
435
+ 0,0,0,0,0,0,0,0, # a0 - a7
436
+ 0,0,0,0,0,0,0,0, # a8 - af
437
+ 0,0,0,0,0,0,0,0, # b0 - b7
438
+ 0,0,0,0,0,0,0,0, # b8 - bf
439
+ 0,0,0,0,0,0,0,0, # c0 - c7
440
+ 0,0,0,0,0,0,0,0, # c8 - cf
441
+ 0,0,0,0,0,0,0,0, # d0 - d7
442
+ 0,0,0,0,0,0,0,0, # d8 - df
443
+ 0,0,0,0,0,0,0,0, # e0 - e7
444
+ 0,0,0,0,0,0,0,0, # e8 - ef
445
+ 0,0,0,0,0,0,0,0, # f0 - f7
446
+ 0,0,0,0,0,0,4,5 # f8 - ff
447
+ ]
448
+
449
+ UCS2LE_st = [
450
+ 6, 6, 7, 6, 4, 3,EError,EError,#00-07
451
+ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
452
+ EItsMe,EItsMe, 5, 5, 5,EError,EItsMe,EError,#10-17
453
+ 5, 5, 5,EError, 5,EError, 6, 6,#18-1f
454
+ 7, 6, 8, 8, 5, 5, 5,EError,#20-27
455
+ 5, 5, 5,EError,EError,EError, 5, 5,#28-2f
456
+ 5, 5, 5,EError, 5,EError,EStart,EStart#30-37
457
+ ]
458
+
459
+ UCS2LECharLenTable = [2, 2, 2, 2, 2, 2]
460
+
461
+ UCS2LESMModel = {'classTable' => UCS2LE_cls,
462
+ 'classFactor' => 6,
463
+ 'stateTable' => UCS2LE_st,
464
+ 'charLenTable' => UCS2LECharLenTable,
465
+ 'name' => 'UTF-16LE'
466
+ }
467
+
468
+ # UTF-8
469
+
470
+ UTF8_cls = [
471
+ 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
472
+ 1,1,1,1,1,1,0,0, # 08 - 0f
473
+ 1,1,1,1,1,1,1,1, # 10 - 17
474
+ 1,1,1,0,1,1,1,1, # 18 - 1f
475
+ 1,1,1,1,1,1,1,1, # 20 - 27
476
+ 1,1,1,1,1,1,1,1, # 28 - 2f
477
+ 1,1,1,1,1,1,1,1, # 30 - 37
478
+ 1,1,1,1,1,1,1,1, # 38 - 3f
479
+ 1,1,1,1,1,1,1,1, # 40 - 47
480
+ 1,1,1,1,1,1,1,1, # 48 - 4f
481
+ 1,1,1,1,1,1,1,1, # 50 - 57
482
+ 1,1,1,1,1,1,1,1, # 58 - 5f
483
+ 1,1,1,1,1,1,1,1, # 60 - 67
484
+ 1,1,1,1,1,1,1,1, # 68 - 6f
485
+ 1,1,1,1,1,1,1,1, # 70 - 77
486
+ 1,1,1,1,1,1,1,1, # 78 - 7f
487
+ 2,2,2,2,3,3,3,3, # 80 - 87
488
+ 4,4,4,4,4,4,4,4, # 88 - 8f
489
+ 4,4,4,4,4,4,4,4, # 90 - 97
490
+ 4,4,4,4,4,4,4,4, # 98 - 9f
491
+ 5,5,5,5,5,5,5,5, # a0 - a7
492
+ 5,5,5,5,5,5,5,5, # a8 - af
493
+ 5,5,5,5,5,5,5,5, # b0 - b7
494
+ 5,5,5,5,5,5,5,5, # b8 - bf
495
+ 0,0,6,6,6,6,6,6, # c0 - c7
496
+ 6,6,6,6,6,6,6,6, # c8 - cf
497
+ 6,6,6,6,6,6,6,6, # d0 - d7
498
+ 6,6,6,6,6,6,6,6, # d8 - df
499
+ 7,8,8,8,8,8,8,8, # e0 - e7
500
+ 8,8,8,8,8,9,8,8, # e8 - ef
501
+ 10,11,11,11,11,11,11,11, # f0 - f7
502
+ 12,13,13,13,14,15,0,0 # f8 - ff
503
+ ]
504
+
505
+ UTF8_st = [
506
+ EError,EStart,EError,EError,EError,EError, 12, 10,#00-07
507
+ 9, 11, 8, 7, 6, 5, 4, 3,#08-0f
508
+ EError,EError,EError,EError,EError,EError,EError,EError,#10-17
509
+ EError,EError,EError,EError,EError,EError,EError,EError,#18-1f
510
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#20-27
511
+ EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#28-2f
512
+ EError,EError, 5, 5, 5, 5,EError,EError,#30-37
513
+ EError,EError,EError,EError,EError,EError,EError,EError,#38-3f
514
+ EError,EError,EError, 5, 5, 5,EError,EError,#40-47
515
+ EError,EError,EError,EError,EError,EError,EError,EError,#48-4f
516
+ EError,EError, 7, 7, 7, 7,EError,EError,#50-57
517
+ EError,EError,EError,EError,EError,EError,EError,EError,#58-5f
518
+ EError,EError,EError,EError, 7, 7,EError,EError,#60-67
519
+ EError,EError,EError,EError,EError,EError,EError,EError,#68-6f
520
+ EError,EError, 9, 9, 9, 9,EError,EError,#70-77
521
+ EError,EError,EError,EError,EError,EError,EError,EError,#78-7f
522
+ EError,EError,EError,EError,EError, 9,EError,EError,#80-87
523
+ EError,EError,EError,EError,EError,EError,EError,EError,#88-8f
524
+ EError,EError, 12, 12, 12, 12,EError,EError,#90-97
525
+ EError,EError,EError,EError,EError,EError,EError,EError,#98-9f
526
+ EError,EError,EError,EError,EError, 12,EError,EError,#a0-a7
527
+ EError,EError,EError,EError,EError,EError,EError,EError,#a8-af
528
+ EError,EError, 12, 12, 12,EError,EError,EError,#b0-b7
529
+ EError,EError,EError,EError,EError,EError,EError,EError,#b8-bf
530
+ EError,EError,EStart,EStart,EStart,EStart,EError,EError,#c0-c7
531
+ EError,EError,EError,EError,EError,EError,EError,EError#c8-cf
532
+ ]
533
+
534
+ UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6]
535
+
536
+ UTF8SMModel = {'classTable' => UTF8_cls,
537
+ 'classFactor' => 16,
538
+ 'stateTable' => UTF8_st,
539
+ 'charLenTable' => UTF8CharLenTable,
540
+ 'name' => 'UTF-8'
541
+ }
542
+ end