actionmailer 2.3.18 → 3.0.0.beta

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of actionmailer might be problematic. Click here for more details.

Files changed (154) hide show
  1. data/CHANGELOG +13 -16
  2. data/README +39 -21
  3. data/lib/action_mailer/adv_attr_accessor.rb +14 -18
  4. data/lib/action_mailer/base.rb +437 -560
  5. data/lib/action_mailer/collector.rb +36 -0
  6. data/lib/action_mailer/delivery_methods.rb +86 -0
  7. data/lib/action_mailer/deprecated_api.rb +139 -0
  8. data/lib/action_mailer/mail_helper.rb +25 -13
  9. data/lib/action_mailer/old_api.rb +248 -0
  10. data/lib/action_mailer/quoting.rb +4 -2
  11. data/lib/action_mailer/railtie.rb +25 -0
  12. data/lib/action_mailer/railties/subscriber.rb +20 -0
  13. data/lib/action_mailer/test_case.rb +4 -6
  14. data/lib/action_mailer/test_helper.rb +0 -1
  15. data/lib/action_mailer/tmail_compat.rb +34 -0
  16. data/lib/action_mailer/version.rb +3 -3
  17. data/lib/action_mailer.rb +25 -30
  18. metadata +41 -161
  19. data/Rakefile +0 -97
  20. data/install.rb +0 -30
  21. data/lib/action_mailer/helpers.rb +0 -113
  22. data/lib/action_mailer/part.rb +0 -107
  23. data/lib/action_mailer/part_container.rb +0 -55
  24. data/lib/action_mailer/utils.rb +0 -7
  25. data/lib/action_mailer/vendor/text-format-0.6.3/text/format.rb +0 -1466
  26. data/lib/action_mailer/vendor/text_format.rb +0 -10
  27. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/Makefile +0 -18
  28. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/address.rb +0 -392
  29. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/attachments.rb +0 -65
  30. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/base64.rb +0 -46
  31. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/compat.rb +0 -41
  32. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/config.rb +0 -67
  33. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/core_extensions.rb +0 -63
  34. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/encode.rb +0 -590
  35. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/header.rb +0 -962
  36. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/index.rb +0 -9
  37. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/interface.rb +0 -1162
  38. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/loader.rb +0 -3
  39. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/mail.rb +0 -578
  40. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/mailbox.rb +0 -496
  41. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/main.rb +0 -6
  42. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/mbox.rb +0 -3
  43. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/net.rb +0 -250
  44. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/obsolete.rb +0 -132
  45. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/parser.rb +0 -1060
  46. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/parser.y +0 -416
  47. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/port.rb +0 -379
  48. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/quoting.rb +0 -164
  49. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/require_arch.rb +0 -58
  50. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/scanner.rb +0 -49
  51. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/scanner_r.rb +0 -262
  52. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/stringio.rb +0 -280
  53. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/utils.rb +0 -362
  54. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/COPYING +0 -504
  55. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/README +0 -12
  56. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +0 -927
  57. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +0 -42
  58. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +0 -238
  59. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +0 -112
  60. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +0 -75
  61. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +0 -64
  62. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +0 -42
  63. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +0 -89
  64. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +0 -244
  65. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +0 -88
  66. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +0 -596
  67. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +0 -42
  68. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +0 -430
  69. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +0 -42
  70. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +0 -474
  71. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +0 -42
  72. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +0 -289
  73. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +0 -570
  74. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +0 -229
  75. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +0 -229
  76. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +0 -330
  77. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +0 -227
  78. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +0 -202
  79. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +0 -226
  80. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +0 -201
  81. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +0 -147
  82. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +0 -89
  83. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +0 -45
  84. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +0 -542
  85. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +0 -124
  86. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +0 -56
  87. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +0 -88
  88. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +0 -168
  89. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +0 -87
  90. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet.rb +0 -67
  91. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/version.rb +0 -39
  92. data/lib/action_mailer/vendor/tmail-1.2.7/tmail.rb +0 -6
  93. data/lib/action_mailer/vendor/tmail.rb +0 -17
  94. data/lib/actionmailer.rb +0 -2
  95. data/test/abstract_unit.rb +0 -62
  96. data/test/asset_host_test.rb +0 -54
  97. data/test/delivery_method_test.rb +0 -51
  98. data/test/fixtures/asset_host_mailer/email_with_asset.html.erb +0 -1
  99. data/test/fixtures/auto_layout_mailer/hello.html.erb +0 -1
  100. data/test/fixtures/auto_layout_mailer/multipart.text.html.erb +0 -1
  101. data/test/fixtures/auto_layout_mailer/multipart.text.plain.erb +0 -1
  102. data/test/fixtures/explicit_layout_mailer/logout.html.erb +0 -1
  103. data/test/fixtures/explicit_layout_mailer/signup.html.erb +0 -1
  104. data/test/fixtures/first_mailer/share.erb +0 -1
  105. data/test/fixtures/helper_mailer/use_example_helper.erb +0 -1
  106. data/test/fixtures/helper_mailer/use_helper.erb +0 -1
  107. data/test/fixtures/helper_mailer/use_helper_method.erb +0 -1
  108. data/test/fixtures/helper_mailer/use_mail_helper.erb +0 -5
  109. data/test/fixtures/helpers/example_helper.rb +0 -5
  110. data/test/fixtures/layouts/auto_layout_mailer.html.erb +0 -1
  111. data/test/fixtures/layouts/auto_layout_mailer.text.erb +0 -1
  112. data/test/fixtures/layouts/spam.html.erb +0 -1
  113. data/test/fixtures/path.with.dots/funky_path_mailer/multipart_with_template_path_with_dots.erb +0 -1
  114. data/test/fixtures/raw_email +0 -14
  115. data/test/fixtures/raw_email10 +0 -20
  116. data/test/fixtures/raw_email12 +0 -32
  117. data/test/fixtures/raw_email13 +0 -29
  118. data/test/fixtures/raw_email2 +0 -114
  119. data/test/fixtures/raw_email3 +0 -70
  120. data/test/fixtures/raw_email4 +0 -59
  121. data/test/fixtures/raw_email5 +0 -19
  122. data/test/fixtures/raw_email6 +0 -20
  123. data/test/fixtures/raw_email7 +0 -66
  124. data/test/fixtures/raw_email8 +0 -47
  125. data/test/fixtures/raw_email9 +0 -28
  126. data/test/fixtures/raw_email_quoted_with_0d0a +0 -14
  127. data/test/fixtures/raw_email_with_invalid_characters_in_content_type +0 -104
  128. data/test/fixtures/raw_email_with_nested_attachment +0 -100
  129. data/test/fixtures/raw_email_with_partially_quoted_subject +0 -14
  130. data/test/fixtures/second_mailer/share.erb +0 -1
  131. data/test/fixtures/templates/signed_up.erb +0 -3
  132. data/test/fixtures/test_mailer/_subtemplate.text.plain.erb +0 -1
  133. data/test/fixtures/test_mailer/body_ivar.erb +0 -2
  134. data/test/fixtures/test_mailer/custom_templating_extension.text.html.haml +0 -6
  135. data/test/fixtures/test_mailer/custom_templating_extension.text.plain.haml +0 -6
  136. data/test/fixtures/test_mailer/implicitly_multipart_example.ignored.erb +0 -1
  137. data/test/fixtures/test_mailer/implicitly_multipart_example.rhtml.bak +0 -1
  138. data/test/fixtures/test_mailer/implicitly_multipart_example.text.html.erb +0 -10
  139. data/test/fixtures/test_mailer/implicitly_multipart_example.text.html.erb~ +0 -10
  140. data/test/fixtures/test_mailer/implicitly_multipart_example.text.plain.erb +0 -2
  141. data/test/fixtures/test_mailer/implicitly_multipart_example.text.yaml.erb +0 -1
  142. data/test/fixtures/test_mailer/included_subtemplate.text.plain.erb +0 -1
  143. data/test/fixtures/test_mailer/rxml_template.builder +0 -2
  144. data/test/fixtures/test_mailer/rxml_template.rxml +0 -2
  145. data/test/fixtures/test_mailer/signed_up.html.erb +0 -3
  146. data/test/fixtures/test_mailer/signed_up_with_url.erb +0 -5
  147. data/test/mail_helper_test.rb +0 -95
  148. data/test/mail_layout_test.rb +0 -123
  149. data/test/mail_render_test.rb +0 -116
  150. data/test/mail_service_test.rb +0 -1145
  151. data/test/quoting_test.rb +0 -105
  152. data/test/test_helper_test.rb +0 -129
  153. data/test/tmail_test.rb +0 -22
  154. data/test/url_test.rb +0 -76
@@ -1,542 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is mozilla.org code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 1998
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- #
13
- # This library is free software; you can redistribute it and/or
14
- # modify it under the terms of the GNU Lesser General Public
15
- # License as published by the Free Software Foundation; either
16
- # version 2.1 of the License, or (at your option) any later version.
17
- #
18
- # This library is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
- # Lesser General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Lesser General Public
24
- # License along with this library; if not, write to the Free Software
25
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
- # 02110-1301 USA
27
- ######################### END LICENSE BLOCK #########################
28
-
29
- module CharDet
30
- # BIG5
31
-
32
- BIG5_cls = [
33
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
34
- 1,1,1,1,1,1,0,0, # 08 - 0f
35
- 1,1,1,1,1,1,1,1, # 10 - 17
36
- 1,1,1,0,1,1,1,1, # 18 - 1f
37
- 1,1,1,1,1,1,1,1, # 20 - 27
38
- 1,1,1,1,1,1,1,1, # 28 - 2f
39
- 1,1,1,1,1,1,1,1, # 30 - 37
40
- 1,1,1,1,1,1,1,1, # 38 - 3f
41
- 2,2,2,2,2,2,2,2, # 40 - 47
42
- 2,2,2,2,2,2,2,2, # 48 - 4f
43
- 2,2,2,2,2,2,2,2, # 50 - 57
44
- 2,2,2,2,2,2,2,2, # 58 - 5f
45
- 2,2,2,2,2,2,2,2, # 60 - 67
46
- 2,2,2,2,2,2,2,2, # 68 - 6f
47
- 2,2,2,2,2,2,2,2, # 70 - 77
48
- 2,2,2,2,2,2,2,1, # 78 - 7f
49
- 4,4,4,4,4,4,4,4, # 80 - 87
50
- 4,4,4,4,4,4,4,4, # 88 - 8f
51
- 4,4,4,4,4,4,4,4, # 90 - 97
52
- 4,4,4,4,4,4,4,4, # 98 - 9f
53
- 4,3,3,3,3,3,3,3, # a0 - a7
54
- 3,3,3,3,3,3,3,3, # a8 - af
55
- 3,3,3,3,3,3,3,3, # b0 - b7
56
- 3,3,3,3,3,3,3,3, # b8 - bf
57
- 3,3,3,3,3,3,3,3, # c0 - c7
58
- 3,3,3,3,3,3,3,3, # c8 - cf
59
- 3,3,3,3,3,3,3,3, # d0 - d7
60
- 3,3,3,3,3,3,3,3, # d8 - df
61
- 3,3,3,3,3,3,3,3, # e0 - e7
62
- 3,3,3,3,3,3,3,3, # e8 - ef
63
- 3,3,3,3,3,3,3,3, # f0 - f7
64
- 3,3,3,3,3,3,3,0 # f8 - ff
65
- ]
66
-
67
- BIG5_st = [
68
- EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
69
- EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,#08-0f
70
- EError,EStart,EStart,EStart,EStart,EStart,EStart,EStart #10-17
71
- ]
72
-
73
- Big5CharLenTable = [0, 1, 1, 2, 0]
74
-
75
- Big5SMModel = {'classTable' => BIG5_cls,
76
- 'classFactor' => 5,
77
- 'stateTable' => BIG5_st,
78
- 'charLenTable' => Big5CharLenTable,
79
- 'name' => 'Big5'
80
- }
81
-
82
- # EUC-JP
83
-
84
- EUCJP_cls = [
85
- 4,4,4,4,4,4,4,4, # 00 - 07
86
- 4,4,4,4,4,4,5,5, # 08 - 0f
87
- 4,4,4,4,4,4,4,4, # 10 - 17
88
- 4,4,4,5,4,4,4,4, # 18 - 1f
89
- 4,4,4,4,4,4,4,4, # 20 - 27
90
- 4,4,4,4,4,4,4,4, # 28 - 2f
91
- 4,4,4,4,4,4,4,4, # 30 - 37
92
- 4,4,4,4,4,4,4,4, # 38 - 3f
93
- 4,4,4,4,4,4,4,4, # 40 - 47
94
- 4,4,4,4,4,4,4,4, # 48 - 4f
95
- 4,4,4,4,4,4,4,4, # 50 - 57
96
- 4,4,4,4,4,4,4,4, # 58 - 5f
97
- 4,4,4,4,4,4,4,4, # 60 - 67
98
- 4,4,4,4,4,4,4,4, # 68 - 6f
99
- 4,4,4,4,4,4,4,4, # 70 - 77
100
- 4,4,4,4,4,4,4,4, # 78 - 7f
101
- 5,5,5,5,5,5,5,5, # 80 - 87
102
- 5,5,5,5,5,5,1,3, # 88 - 8f
103
- 5,5,5,5,5,5,5,5, # 90 - 97
104
- 5,5,5,5,5,5,5,5, # 98 - 9f
105
- 5,2,2,2,2,2,2,2, # a0 - a7
106
- 2,2,2,2,2,2,2,2, # a8 - af
107
- 2,2,2,2,2,2,2,2, # b0 - b7
108
- 2,2,2,2,2,2,2,2, # b8 - bf
109
- 2,2,2,2,2,2,2,2, # c0 - c7
110
- 2,2,2,2,2,2,2,2, # c8 - cf
111
- 2,2,2,2,2,2,2,2, # d0 - d7
112
- 2,2,2,2,2,2,2,2, # d8 - df
113
- 0,0,0,0,0,0,0,0, # e0 - e7
114
- 0,0,0,0,0,0,0,0, # e8 - ef
115
- 0,0,0,0,0,0,0,0, # f0 - f7
116
- 0,0,0,0,0,0,0,5 # f8 - ff
117
- ]
118
-
119
- EUCJP_st = [
120
- 3, 4, 3, 5,EStart,EError,EError,EError,#00-07
121
- EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
122
- EItsMe,EItsMe,EStart,EError,EStart,EError,EError,EError,#10-17
123
- EError,EError,EStart,EError,EError,EError, 3,EError,#18-1f
124
- 3,EError,EError,EError,EStart,EStart,EStart,EStart #20-27
125
- ]
126
-
127
- EUCJPCharLenTable = [2, 2, 2, 3, 1, 0]
128
-
129
- EUCJPSMModel = {'classTable' => EUCJP_cls,
130
- 'classFactor' => 6,
131
- 'stateTable' => EUCJP_st,
132
- 'charLenTable' => EUCJPCharLenTable,
133
- 'name' => 'EUC-JP'
134
- }
135
-
136
- # EUC-KR
137
-
138
- EUCKR_cls = [
139
- 1,1,1,1,1,1,1,1, # 00 - 07
140
- 1,1,1,1,1,1,0,0, # 08 - 0f
141
- 1,1,1,1,1,1,1,1, # 10 - 17
142
- 1,1,1,0,1,1,1,1, # 18 - 1f
143
- 1,1,1,1,1,1,1,1, # 20 - 27
144
- 1,1,1,1,1,1,1,1, # 28 - 2f
145
- 1,1,1,1,1,1,1,1, # 30 - 37
146
- 1,1,1,1,1,1,1,1, # 38 - 3f
147
- 1,1,1,1,1,1,1,1, # 40 - 47
148
- 1,1,1,1,1,1,1,1, # 48 - 4f
149
- 1,1,1,1,1,1,1,1, # 50 - 57
150
- 1,1,1,1,1,1,1,1, # 58 - 5f
151
- 1,1,1,1,1,1,1,1, # 60 - 67
152
- 1,1,1,1,1,1,1,1, # 68 - 6f
153
- 1,1,1,1,1,1,1,1, # 70 - 77
154
- 1,1,1,1,1,1,1,1, # 78 - 7f
155
- 0,0,0,0,0,0,0,0, # 80 - 87
156
- 0,0,0,0,0,0,0,0, # 88 - 8f
157
- 0,0,0,0,0,0,0,0, # 90 - 97
158
- 0,0,0,0,0,0,0,0, # 98 - 9f
159
- 0,2,2,2,2,2,2,2, # a0 - a7
160
- 2,2,2,2,2,3,3,3, # a8 - af
161
- 2,2,2,2,2,2,2,2, # b0 - b7
162
- 2,2,2,2,2,2,2,2, # b8 - bf
163
- 2,2,2,2,2,2,2,2, # c0 - c7
164
- 2,3,2,2,2,2,2,2, # c8 - cf
165
- 2,2,2,2,2,2,2,2, # d0 - d7
166
- 2,2,2,2,2,2,2,2, # d8 - df
167
- 2,2,2,2,2,2,2,2, # e0 - e7
168
- 2,2,2,2,2,2,2,2, # e8 - ef
169
- 2,2,2,2,2,2,2,2, # f0 - f7
170
- 2,2,2,2,2,2,2,0 # f8 - ff
171
- ]
172
-
173
- EUCKR_st = [
174
- EError,EStart, 3,EError,EError,EError,EError,EError,#00-07
175
- EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,EStart#08-0f
176
- ]
177
-
178
- EUCKRCharLenTable = [0, 1, 2, 0]
179
-
180
- EUCKRSMModel = {'classTable' => EUCKR_cls,
181
- 'classFactor' => 4,
182
- 'stateTable' => EUCKR_st,
183
- 'charLenTable' => EUCKRCharLenTable,
184
- 'name' => 'EUC-KR'
185
- }
186
-
187
- # EUC-TW
188
-
189
- EUCTW_cls = [
190
- 2,2,2,2,2,2,2,2, # 00 - 07
191
- 2,2,2,2,2,2,0,0, # 08 - 0f
192
- 2,2,2,2,2,2,2,2, # 10 - 17
193
- 2,2,2,0,2,2,2,2, # 18 - 1f
194
- 2,2,2,2,2,2,2,2, # 20 - 27
195
- 2,2,2,2,2,2,2,2, # 28 - 2f
196
- 2,2,2,2,2,2,2,2, # 30 - 37
197
- 2,2,2,2,2,2,2,2, # 38 - 3f
198
- 2,2,2,2,2,2,2,2, # 40 - 47
199
- 2,2,2,2,2,2,2,2, # 48 - 4f
200
- 2,2,2,2,2,2,2,2, # 50 - 57
201
- 2,2,2,2,2,2,2,2, # 58 - 5f
202
- 2,2,2,2,2,2,2,2, # 60 - 67
203
- 2,2,2,2,2,2,2,2, # 68 - 6f
204
- 2,2,2,2,2,2,2,2, # 70 - 77
205
- 2,2,2,2,2,2,2,2, # 78 - 7f
206
- 0,0,0,0,0,0,0,0, # 80 - 87
207
- 0,0,0,0,0,0,6,0, # 88 - 8f
208
- 0,0,0,0,0,0,0,0, # 90 - 97
209
- 0,0,0,0,0,0,0,0, # 98 - 9f
210
- 0,3,4,4,4,4,4,4, # a0 - a7
211
- 5,5,1,1,1,1,1,1, # a8 - af
212
- 1,1,1,1,1,1,1,1, # b0 - b7
213
- 1,1,1,1,1,1,1,1, # b8 - bf
214
- 1,1,3,1,3,3,3,3, # c0 - c7
215
- 3,3,3,3,3,3,3,3, # c8 - cf
216
- 3,3,3,3,3,3,3,3, # d0 - d7
217
- 3,3,3,3,3,3,3,3, # d8 - df
218
- 3,3,3,3,3,3,3,3, # e0 - e7
219
- 3,3,3,3,3,3,3,3, # e8 - ef
220
- 3,3,3,3,3,3,3,3, # f0 - f7
221
- 3,3,3,3,3,3,3,0 # f8 - ff
222
- ]
223
-
224
- EUCTW_st = [
225
- EError,EError,EStart, 3, 3, 3, 4,EError,#00-07
226
- EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
227
- EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EStart,EError,#10-17
228
- EStart,EStart,EStart,EError,EError,EError,EError,EError,#18-1f
229
- 5,EError,EError,EError,EStart,EError,EStart,EStart,#20-27
230
- EStart,EError,EStart,EStart,EStart,EStart,EStart,EStart #28-2f
231
- ]
232
-
233
- EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3]
234
-
235
- EUCTWSMModel = {'classTable' => EUCTW_cls,
236
- 'classFactor' => 7,
237
- 'stateTable' => EUCTW_st,
238
- 'charLenTable' => EUCTWCharLenTable,
239
- 'name' => 'x-euc-tw'
240
- }
241
-
242
- # GB2312
243
-
244
- GB2312_cls = [
245
- 1,1,1,1,1,1,1,1, # 00 - 07
246
- 1,1,1,1,1,1,0,0, # 08 - 0f
247
- 1,1,1,1,1,1,1,1, # 10 - 17
248
- 1,1,1,0,1,1,1,1, # 18 - 1f
249
- 1,1,1,1,1,1,1,1, # 20 - 27
250
- 1,1,1,1,1,1,1,1, # 28 - 2f
251
- 3,3,3,3,3,3,3,3, # 30 - 37
252
- 3,3,1,1,1,1,1,1, # 38 - 3f
253
- 2,2,2,2,2,2,2,2, # 40 - 47
254
- 2,2,2,2,2,2,2,2, # 48 - 4f
255
- 2,2,2,2,2,2,2,2, # 50 - 57
256
- 2,2,2,2,2,2,2,2, # 58 - 5f
257
- 2,2,2,2,2,2,2,2, # 60 - 67
258
- 2,2,2,2,2,2,2,2, # 68 - 6f
259
- 2,2,2,2,2,2,2,2, # 70 - 77
260
- 2,2,2,2,2,2,2,4, # 78 - 7f
261
- 5,6,6,6,6,6,6,6, # 80 - 87
262
- 6,6,6,6,6,6,6,6, # 88 - 8f
263
- 6,6,6,6,6,6,6,6, # 90 - 97
264
- 6,6,6,6,6,6,6,6, # 98 - 9f
265
- 6,6,6,6,6,6,6,6, # a0 - a7
266
- 6,6,6,6,6,6,6,6, # a8 - af
267
- 6,6,6,6,6,6,6,6, # b0 - b7
268
- 6,6,6,6,6,6,6,6, # b8 - bf
269
- 6,6,6,6,6,6,6,6, # c0 - c7
270
- 6,6,6,6,6,6,6,6, # c8 - cf
271
- 6,6,6,6,6,6,6,6, # d0 - d7
272
- 6,6,6,6,6,6,6,6, # d8 - df
273
- 6,6,6,6,6,6,6,6, # e0 - e7
274
- 6,6,6,6,6,6,6,6, # e8 - ef
275
- 6,6,6,6,6,6,6,6, # f0 - f7
276
- 6,6,6,6,6,6,6,0 # f8 - ff
277
- ]
278
-
279
- GB2312_st = [
280
- EError,EStart,EStart,EStart,EStart,EStart, 3,EError,#00-07
281
- EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
282
- EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,#10-17
283
- 4,EError,EStart,EStart,EError,EError,EError,EError,#18-1f
284
- EError,EError, 5,EError,EError,EError,EItsMe,EError,#20-27
285
- EError,EError,EStart,EStart,EStart,EStart,EStart,EStart#28-2f
286
- ]
287
-
288
- # To be accurate, the length of class 6 can be either 2 or 4.
289
- # But it is not necessary to discriminate between the two since
290
- # it is used for frequency analysis only, and we are validing
291
- # each code range there as well. So it is safe to set it to be
292
- # 2 here.
293
- GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2]
294
-
295
- GB2312SMModel = {'classTable' => GB2312_cls,
296
- 'classFactor' => 7,
297
- 'stateTable' => GB2312_st,
298
- 'charLenTable' => GB2312CharLenTable,
299
- 'name' => 'GB2312'
300
- }
301
-
302
- # Shift_JIS
303
-
304
- SJIS_cls = [
305
- 1,1,1,1,1,1,1,1, # 00 - 07
306
- 1,1,1,1,1,1,0,0, # 08 - 0f
307
- 1,1,1,1,1,1,1,1, # 10 - 17
308
- 1,1,1,0,1,1,1,1, # 18 - 1f
309
- 1,1,1,1,1,1,1,1, # 20 - 27
310
- 1,1,1,1,1,1,1,1, # 28 - 2f
311
- 1,1,1,1,1,1,1,1, # 30 - 37
312
- 1,1,1,1,1,1,1,1, # 38 - 3f
313
- 2,2,2,2,2,2,2,2, # 40 - 47
314
- 2,2,2,2,2,2,2,2, # 48 - 4f
315
- 2,2,2,2,2,2,2,2, # 50 - 57
316
- 2,2,2,2,2,2,2,2, # 58 - 5f
317
- 2,2,2,2,2,2,2,2, # 60 - 67
318
- 2,2,2,2,2,2,2,2, # 68 - 6f
319
- 2,2,2,2,2,2,2,2, # 70 - 77
320
- 2,2,2,2,2,2,2,1, # 78 - 7f
321
- 3,3,3,3,3,3,3,3, # 80 - 87
322
- 3,3,3,3,3,3,3,3, # 88 - 8f
323
- 3,3,3,3,3,3,3,3, # 90 - 97
324
- 3,3,3,3,3,3,3,3, # 98 - 9f
325
- #0xa0 is illegal in sjis encoding, but some pages does
326
- #contain such byte. We need to be more error forgiven.
327
- 2,2,2,2,2,2,2,2, # a0 - a7
328
- 2,2,2,2,2,2,2,2, # a8 - af
329
- 2,2,2,2,2,2,2,2, # b0 - b7
330
- 2,2,2,2,2,2,2,2, # b8 - bf
331
- 2,2,2,2,2,2,2,2, # c0 - c7
332
- 2,2,2,2,2,2,2,2, # c8 - cf
333
- 2,2,2,2,2,2,2,2, # d0 - d7
334
- 2,2,2,2,2,2,2,2, # d8 - df
335
- 3,3,3,3,3,3,3,3, # e0 - e7
336
- 3,3,3,3,3,4,4,4, # e8 - ef
337
- 4,4,4,4,4,4,4,4, # f0 - f7
338
- 4,4,4,4,4,0,0,0 # f8 - ff
339
- ]
340
-
341
- SJIS_st = [
342
- EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
343
- EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
344
- EItsMe,EItsMe,EError,EError,EStart,EStart,EStart,EStart#10-17
345
- ]
346
-
347
- SJISCharLenTable = [0, 1, 1, 2, 0, 0]
348
-
349
- SJISSMModel = {'classTable' => SJIS_cls,
350
- 'classFactor' => 6,
351
- 'stateTable' => SJIS_st,
352
- 'charLenTable' => SJISCharLenTable,
353
- 'name' => 'Shift_JIS'
354
- }
355
-
356
- # UCS2-BE
357
-
358
- UCS2BE_cls = [
359
- 0,0,0,0,0,0,0,0, # 00 - 07
360
- 0,0,1,0,0,2,0,0, # 08 - 0f
361
- 0,0,0,0,0,0,0,0, # 10 - 17
362
- 0,0,0,3,0,0,0,0, # 18 - 1f
363
- 0,0,0,0,0,0,0,0, # 20 - 27
364
- 0,3,3,3,3,3,0,0, # 28 - 2f
365
- 0,0,0,0,0,0,0,0, # 30 - 37
366
- 0,0,0,0,0,0,0,0, # 38 - 3f
367
- 0,0,0,0,0,0,0,0, # 40 - 47
368
- 0,0,0,0,0,0,0,0, # 48 - 4f
369
- 0,0,0,0,0,0,0,0, # 50 - 57
370
- 0,0,0,0,0,0,0,0, # 58 - 5f
371
- 0,0,0,0,0,0,0,0, # 60 - 67
372
- 0,0,0,0,0,0,0,0, # 68 - 6f
373
- 0,0,0,0,0,0,0,0, # 70 - 77
374
- 0,0,0,0,0,0,0,0, # 78 - 7f
375
- 0,0,0,0,0,0,0,0, # 80 - 87
376
- 0,0,0,0,0,0,0,0, # 88 - 8f
377
- 0,0,0,0,0,0,0,0, # 90 - 97
378
- 0,0,0,0,0,0,0,0, # 98 - 9f
379
- 0,0,0,0,0,0,0,0, # a0 - a7
380
- 0,0,0,0,0,0,0,0, # a8 - af
381
- 0,0,0,0,0,0,0,0, # b0 - b7
382
- 0,0,0,0,0,0,0,0, # b8 - bf
383
- 0,0,0,0,0,0,0,0, # c0 - c7
384
- 0,0,0,0,0,0,0,0, # c8 - cf
385
- 0,0,0,0,0,0,0,0, # d0 - d7
386
- 0,0,0,0,0,0,0,0, # d8 - df
387
- 0,0,0,0,0,0,0,0, # e0 - e7
388
- 0,0,0,0,0,0,0,0, # e8 - ef
389
- 0,0,0,0,0,0,0,0, # f0 - f7
390
- 0,0,0,0,0,0,4,5 # f8 - ff
391
- ]
392
-
393
- UCS2BE_st = [
394
- 5, 7, 7,EError, 4, 3,EError,EError,#00-07
395
- EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
396
- EItsMe,EItsMe, 6, 6, 6, 6,EError,EError,#10-17
397
- 6, 6, 6, 6, 6,EItsMe, 6, 6,#18-1f
398
- 6, 6, 6, 6, 5, 7, 7,EError,#20-27
399
- 5, 8, 6, 6,EError, 6, 6, 6,#28-2f
400
- 6, 6, 6, 6,EError,EError,EStart,EStart#30-37
401
- ]
402
-
403
- UCS2BECharLenTable = [2, 2, 2, 0, 2, 2]
404
-
405
- UCS2BESMModel = {'classTable' => UCS2BE_cls,
406
- 'classFactor' => 6,
407
- 'stateTable' => UCS2BE_st,
408
- 'charLenTable' => UCS2BECharLenTable,
409
- 'name' => 'UTF-16BE'
410
- }
411
-
412
- # UCS2-LE
413
-
414
- UCS2LE_cls = [
415
- 0,0,0,0,0,0,0,0, # 00 - 07
416
- 0,0,1,0,0,2,0,0, # 08 - 0f
417
- 0,0,0,0,0,0,0,0, # 10 - 17
418
- 0,0,0,3,0,0,0,0, # 18 - 1f
419
- 0,0,0,0,0,0,0,0, # 20 - 27
420
- 0,3,3,3,3,3,0,0, # 28 - 2f
421
- 0,0,0,0,0,0,0,0, # 30 - 37
422
- 0,0,0,0,0,0,0,0, # 38 - 3f
423
- 0,0,0,0,0,0,0,0, # 40 - 47
424
- 0,0,0,0,0,0,0,0, # 48 - 4f
425
- 0,0,0,0,0,0,0,0, # 50 - 57
426
- 0,0,0,0,0,0,0,0, # 58 - 5f
427
- 0,0,0,0,0,0,0,0, # 60 - 67
428
- 0,0,0,0,0,0,0,0, # 68 - 6f
429
- 0,0,0,0,0,0,0,0, # 70 - 77
430
- 0,0,0,0,0,0,0,0, # 78 - 7f
431
- 0,0,0,0,0,0,0,0, # 80 - 87
432
- 0,0,0,0,0,0,0,0, # 88 - 8f
433
- 0,0,0,0,0,0,0,0, # 90 - 97
434
- 0,0,0,0,0,0,0,0, # 98 - 9f
435
- 0,0,0,0,0,0,0,0, # a0 - a7
436
- 0,0,0,0,0,0,0,0, # a8 - af
437
- 0,0,0,0,0,0,0,0, # b0 - b7
438
- 0,0,0,0,0,0,0,0, # b8 - bf
439
- 0,0,0,0,0,0,0,0, # c0 - c7
440
- 0,0,0,0,0,0,0,0, # c8 - cf
441
- 0,0,0,0,0,0,0,0, # d0 - d7
442
- 0,0,0,0,0,0,0,0, # d8 - df
443
- 0,0,0,0,0,0,0,0, # e0 - e7
444
- 0,0,0,0,0,0,0,0, # e8 - ef
445
- 0,0,0,0,0,0,0,0, # f0 - f7
446
- 0,0,0,0,0,0,4,5 # f8 - ff
447
- ]
448
-
449
- UCS2LE_st = [
450
- 6, 6, 7, 6, 4, 3,EError,EError,#00-07
451
- EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
452
- EItsMe,EItsMe, 5, 5, 5,EError,EItsMe,EError,#10-17
453
- 5, 5, 5,EError, 5,EError, 6, 6,#18-1f
454
- 7, 6, 8, 8, 5, 5, 5,EError,#20-27
455
- 5, 5, 5,EError,EError,EError, 5, 5,#28-2f
456
- 5, 5, 5,EError, 5,EError,EStart,EStart#30-37
457
- ]
458
-
459
- UCS2LECharLenTable = [2, 2, 2, 2, 2, 2]
460
-
461
- UCS2LESMModel = {'classTable' => UCS2LE_cls,
462
- 'classFactor' => 6,
463
- 'stateTable' => UCS2LE_st,
464
- 'charLenTable' => UCS2LECharLenTable,
465
- 'name' => 'UTF-16LE'
466
- }
467
-
468
- # UTF-8
469
-
470
- UTF8_cls = [
471
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
472
- 1,1,1,1,1,1,0,0, # 08 - 0f
473
- 1,1,1,1,1,1,1,1, # 10 - 17
474
- 1,1,1,0,1,1,1,1, # 18 - 1f
475
- 1,1,1,1,1,1,1,1, # 20 - 27
476
- 1,1,1,1,1,1,1,1, # 28 - 2f
477
- 1,1,1,1,1,1,1,1, # 30 - 37
478
- 1,1,1,1,1,1,1,1, # 38 - 3f
479
- 1,1,1,1,1,1,1,1, # 40 - 47
480
- 1,1,1,1,1,1,1,1, # 48 - 4f
481
- 1,1,1,1,1,1,1,1, # 50 - 57
482
- 1,1,1,1,1,1,1,1, # 58 - 5f
483
- 1,1,1,1,1,1,1,1, # 60 - 67
484
- 1,1,1,1,1,1,1,1, # 68 - 6f
485
- 1,1,1,1,1,1,1,1, # 70 - 77
486
- 1,1,1,1,1,1,1,1, # 78 - 7f
487
- 2,2,2,2,3,3,3,3, # 80 - 87
488
- 4,4,4,4,4,4,4,4, # 88 - 8f
489
- 4,4,4,4,4,4,4,4, # 90 - 97
490
- 4,4,4,4,4,4,4,4, # 98 - 9f
491
- 5,5,5,5,5,5,5,5, # a0 - a7
492
- 5,5,5,5,5,5,5,5, # a8 - af
493
- 5,5,5,5,5,5,5,5, # b0 - b7
494
- 5,5,5,5,5,5,5,5, # b8 - bf
495
- 0,0,6,6,6,6,6,6, # c0 - c7
496
- 6,6,6,6,6,6,6,6, # c8 - cf
497
- 6,6,6,6,6,6,6,6, # d0 - d7
498
- 6,6,6,6,6,6,6,6, # d8 - df
499
- 7,8,8,8,8,8,8,8, # e0 - e7
500
- 8,8,8,8,8,9,8,8, # e8 - ef
501
- 10,11,11,11,11,11,11,11, # f0 - f7
502
- 12,13,13,13,14,15,0,0 # f8 - ff
503
- ]
504
-
505
- UTF8_st = [
506
- EError,EStart,EError,EError,EError,EError, 12, 10,#00-07
507
- 9, 11, 8, 7, 6, 5, 4, 3,#08-0f
508
- EError,EError,EError,EError,EError,EError,EError,EError,#10-17
509
- EError,EError,EError,EError,EError,EError,EError,EError,#18-1f
510
- EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#20-27
511
- EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#28-2f
512
- EError,EError, 5, 5, 5, 5,EError,EError,#30-37
513
- EError,EError,EError,EError,EError,EError,EError,EError,#38-3f
514
- EError,EError,EError, 5, 5, 5,EError,EError,#40-47
515
- EError,EError,EError,EError,EError,EError,EError,EError,#48-4f
516
- EError,EError, 7, 7, 7, 7,EError,EError,#50-57
517
- EError,EError,EError,EError,EError,EError,EError,EError,#58-5f
518
- EError,EError,EError,EError, 7, 7,EError,EError,#60-67
519
- EError,EError,EError,EError,EError,EError,EError,EError,#68-6f
520
- EError,EError, 9, 9, 9, 9,EError,EError,#70-77
521
- EError,EError,EError,EError,EError,EError,EError,EError,#78-7f
522
- EError,EError,EError,EError,EError, 9,EError,EError,#80-87
523
- EError,EError,EError,EError,EError,EError,EError,EError,#88-8f
524
- EError,EError, 12, 12, 12, 12,EError,EError,#90-97
525
- EError,EError,EError,EError,EError,EError,EError,EError,#98-9f
526
- EError,EError,EError,EError,EError, 12,EError,EError,#a0-a7
527
- EError,EError,EError,EError,EError,EError,EError,EError,#a8-af
528
- EError,EError, 12, 12, 12,EError,EError,EError,#b0-b7
529
- EError,EError,EError,EError,EError,EError,EError,EError,#b8-bf
530
- EError,EError,EStart,EStart,EStart,EStart,EError,EError,#c0-c7
531
- EError,EError,EError,EError,EError,EError,EError,EError#c8-cf
532
- ]
533
-
534
- UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6]
535
-
536
- UTF8SMModel = {'classTable' => UTF8_cls,
537
- 'classFactor' => 16,
538
- 'stateTable' => UTF8_st,
539
- 'charLenTable' => UTF8CharLenTable,
540
- 'name' => 'UTF-8'
541
- }
542
- end
@@ -1,124 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Universal charset detector code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 2001
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- # Shy Shalom - original C code
13
- #
14
- # This library is free software; you can redistribute it and/or
15
- # modify it under the terms of the GNU Lesser General Public
16
- # License as published by the Free Software Foundation; either
17
- # version 2.1 of the License, or (at your option) any later version.
18
- #
19
- # This library is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
- # Lesser General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Lesser General Public
25
- # License along with this library; if not, write to the Free Software
26
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
- # 02110-1301 USA
28
- ######################### END LICENSE BLOCK #########################
29
-
30
- module CharDet
31
- SAMPLE_SIZE = 64
32
- SB_ENOUGH_REL_THRESHOLD = 1024
33
- POSITIVE_SHORTCUT_THRESHOLD = 0.95
34
- NEGATIVE_SHORTCUT_THRESHOLD = 0.05
35
- SYMBOL_CAT_ORDER = 250
36
- NUMBER_OF_SEQ_CAT = 4
37
- POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
38
- #NEGATIVE_CAT = 0
39
-
40
- class SingleByteCharSetProber < CharSetProber
41
- def initialize(model, reversed=false, nameProber=nil)
42
- super()
43
- @_mModel = model
44
- @_mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
45
- @_mNameProber = nameProber # Optional auxiliary prober for name decision
46
- reset()
47
- end
48
-
49
- def reset
50
- super()
51
- @_mLastOrder = 255 # char order of last character
52
- @_mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
53
- @_mTotalSeqs = 0
54
- @_mTotalChar = 0
55
- @_mFreqChar = 0 # characters that fall in our sampling range
56
- end
57
-
58
- def get_charset_name
59
- if @_mNameProber
60
- return @_mNameProber.get_charset_name()
61
- else
62
- return @_mModel['charsetName']
63
- end
64
- end
65
-
66
- def feed(aBuf)
67
- if not @_mModel['keepEnglishLetter']
68
- aBuf = filter_without_english_letters(aBuf)
69
- end
70
- aLen = aBuf.length
71
- if not aLen
72
- return get_state()
73
- end
74
- aBuf.each_byte do |b|
75
- c = b.chr
76
- order = @_mModel['charToOrderMap'][c[0]]
77
- if order < SYMBOL_CAT_ORDER
78
- @_mTotalChar += 1
79
- end
80
- if order < SAMPLE_SIZE
81
- @_mFreqChar += 1
82
- if @_mLastOrder < SAMPLE_SIZE
83
- @_mTotalSeqs += 1
84
- if not @_mReversed
85
- @_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
86
- else # reverse the order of the letters in the lookup
87
- @_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
88
- end
89
- end
90
- end
91
- @_mLastOrder = order
92
- end
93
-
94
- if get_state() == EDetecting
95
- if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
96
- cf = get_confidence()
97
- if cf > POSITIVE_SHORTCUT_THRESHOLD
98
- $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
99
- @_mState = EFoundIt
100
- elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
101
- $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
102
- @_mState = ENotMe
103
- end
104
- end
105
- end
106
-
107
- return get_state()
108
- end
109
-
110
- def get_confidence
111
- r = 0.01
112
- if @_mTotalSeqs > 0
113
- # print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
114
- r = (1.0 * @_mSeqCounters[POSITIVE_CAT]) / @_mTotalSeqs / @_mModel['mTypicalPositiveRatio']
115
- # print r, self._mFreqChar, self._mTotalChar
116
- r = r * @_mFreqChar / @_mTotalChar
117
- if r >= 1.0
118
- r = 0.99
119
- end
120
- end
121
- return r
122
- end
123
- end
124
- end