actionmailer-rack-upgrade 2.3.14.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. data/CHANGELOG +387 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README +149 -0
  4. data/Rakefile +97 -0
  5. data/install.rb +30 -0
  6. data/lib/action_mailer/adv_attr_accessor.rb +30 -0
  7. data/lib/action_mailer/base.rb +739 -0
  8. data/lib/action_mailer/helpers.rb +113 -0
  9. data/lib/action_mailer/mail_helper.rb +17 -0
  10. data/lib/action_mailer/part.rb +107 -0
  11. data/lib/action_mailer/part_container.rb +55 -0
  12. data/lib/action_mailer/quoting.rb +62 -0
  13. data/lib/action_mailer/test_case.rb +64 -0
  14. data/lib/action_mailer/test_helper.rb +68 -0
  15. data/lib/action_mailer/utils.rb +7 -0
  16. data/lib/action_mailer/vendor/text-format-0.6.3/text/format.rb +1466 -0
  17. data/lib/action_mailer/vendor/text_format.rb +10 -0
  18. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/Makefile +18 -0
  19. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/address.rb +392 -0
  20. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/attachments.rb +65 -0
  21. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/base64.rb +46 -0
  22. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/compat.rb +41 -0
  23. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/config.rb +67 -0
  24. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/core_extensions.rb +63 -0
  25. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/encode.rb +590 -0
  26. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/header.rb +962 -0
  27. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/index.rb +9 -0
  28. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/interface.rb +1162 -0
  29. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/loader.rb +3 -0
  30. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/mail.rb +578 -0
  31. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/mailbox.rb +496 -0
  32. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/main.rb +6 -0
  33. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/mbox.rb +3 -0
  34. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/net.rb +250 -0
  35. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/obsolete.rb +132 -0
  36. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/parser.rb +1060 -0
  37. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/parser.y +416 -0
  38. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/port.rb +379 -0
  39. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/quoting.rb +164 -0
  40. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/require_arch.rb +58 -0
  41. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/scanner.rb +49 -0
  42. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/scanner_r.rb +262 -0
  43. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/stringio.rb +280 -0
  44. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/utils.rb +362 -0
  45. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/COPYING +504 -0
  46. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/README +12 -0
  47. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
  48. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
  49. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +238 -0
  50. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
  51. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
  52. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
  53. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
  54. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +89 -0
  55. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
  56. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
  57. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
  58. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
  59. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
  60. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
  61. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
  62. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
  63. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
  64. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
  65. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
  66. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
  67. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
  68. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
  69. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
  70. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
  71. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
  72. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
  73. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
  74. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +45 -0
  75. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
  76. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
  77. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +56 -0
  78. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
  79. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +168 -0
  80. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
  81. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
  82. data/lib/action_mailer/vendor/tmail-1.2.7/tmail/version.rb +39 -0
  83. data/lib/action_mailer/vendor/tmail-1.2.7/tmail.rb +6 -0
  84. data/lib/action_mailer/vendor/tmail.rb +17 -0
  85. data/lib/action_mailer/version.rb +9 -0
  86. data/lib/action_mailer.rb +62 -0
  87. data/lib/actionmailer.rb +2 -0
  88. data/test/abstract_unit.rb +62 -0
  89. data/test/asset_host_test.rb +54 -0
  90. data/test/delivery_method_test.rb +51 -0
  91. data/test/fixtures/asset_host_mailer/email_with_asset.html.erb +1 -0
  92. data/test/fixtures/auto_layout_mailer/hello.html.erb +1 -0
  93. data/test/fixtures/auto_layout_mailer/multipart.text.html.erb +1 -0
  94. data/test/fixtures/auto_layout_mailer/multipart.text.plain.erb +1 -0
  95. data/test/fixtures/explicit_layout_mailer/logout.html.erb +1 -0
  96. data/test/fixtures/explicit_layout_mailer/signup.html.erb +1 -0
  97. data/test/fixtures/first_mailer/share.erb +1 -0
  98. data/test/fixtures/helper_mailer/use_example_helper.erb +1 -0
  99. data/test/fixtures/helper_mailer/use_helper.erb +1 -0
  100. data/test/fixtures/helper_mailer/use_helper_method.erb +1 -0
  101. data/test/fixtures/helper_mailer/use_mail_helper.erb +5 -0
  102. data/test/fixtures/helpers/example_helper.rb +5 -0
  103. data/test/fixtures/layouts/auto_layout_mailer.html.erb +1 -0
  104. data/test/fixtures/layouts/auto_layout_mailer.text.erb +1 -0
  105. data/test/fixtures/layouts/spam.html.erb +1 -0
  106. data/test/fixtures/path.with.dots/funky_path_mailer/multipart_with_template_path_with_dots.erb +1 -0
  107. data/test/fixtures/raw_email +14 -0
  108. data/test/fixtures/raw_email10 +20 -0
  109. data/test/fixtures/raw_email12 +32 -0
  110. data/test/fixtures/raw_email13 +29 -0
  111. data/test/fixtures/raw_email2 +114 -0
  112. data/test/fixtures/raw_email3 +70 -0
  113. data/test/fixtures/raw_email4 +59 -0
  114. data/test/fixtures/raw_email5 +19 -0
  115. data/test/fixtures/raw_email6 +20 -0
  116. data/test/fixtures/raw_email7 +66 -0
  117. data/test/fixtures/raw_email8 +47 -0
  118. data/test/fixtures/raw_email9 +28 -0
  119. data/test/fixtures/raw_email_quoted_with_0d0a +14 -0
  120. data/test/fixtures/raw_email_with_invalid_characters_in_content_type +104 -0
  121. data/test/fixtures/raw_email_with_nested_attachment +100 -0
  122. data/test/fixtures/raw_email_with_partially_quoted_subject +14 -0
  123. data/test/fixtures/second_mailer/share.erb +1 -0
  124. data/test/fixtures/templates/signed_up.erb +3 -0
  125. data/test/fixtures/test_mailer/_subtemplate.text.plain.erb +1 -0
  126. data/test/fixtures/test_mailer/body_ivar.erb +2 -0
  127. data/test/fixtures/test_mailer/custom_templating_extension.text.html.haml +6 -0
  128. data/test/fixtures/test_mailer/custom_templating_extension.text.plain.haml +6 -0
  129. data/test/fixtures/test_mailer/implicitly_multipart_example.ignored.erb +1 -0
  130. data/test/fixtures/test_mailer/implicitly_multipart_example.rhtml.bak +1 -0
  131. data/test/fixtures/test_mailer/implicitly_multipart_example.text.html.erb +10 -0
  132. data/test/fixtures/test_mailer/implicitly_multipart_example.text.html.erb~ +10 -0
  133. data/test/fixtures/test_mailer/implicitly_multipart_example.text.plain.erb +2 -0
  134. data/test/fixtures/test_mailer/implicitly_multipart_example.text.yaml.erb +1 -0
  135. data/test/fixtures/test_mailer/included_subtemplate.text.plain.erb +1 -0
  136. data/test/fixtures/test_mailer/rxml_template.builder +2 -0
  137. data/test/fixtures/test_mailer/rxml_template.rxml +2 -0
  138. data/test/fixtures/test_mailer/signed_up.html.erb +3 -0
  139. data/test/fixtures/test_mailer/signed_up_with_url.erb +5 -0
  140. data/test/mail_helper_test.rb +95 -0
  141. data/test/mail_layout_test.rb +123 -0
  142. data/test/mail_render_test.rb +116 -0
  143. data/test/mail_service_test.rb +1145 -0
  144. data/test/quoting_test.rb +105 -0
  145. data/test/test_helper_test.rb +129 -0
  146. data/test/tmail_test.rb +22 -0
  147. data/test/url_test.rb +76 -0
  148. metadata +209 -0
@@ -0,0 +1,289 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Universal charset detector code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Shy Shalom
6
+ # Portions created by the Initial Developer are Copyright (C) 2005
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ # This prober doesn't actually recognize a language or a charset.
30
+ # It is a helper prober for the use of the Hebrew model probers
31
+
32
+ ### General ideas of the Hebrew charset recognition ###
33
+ #
34
+ # Four main charsets exist in Hebrew:
35
+ # "ISO-8859-8" - Visual Hebrew
36
+ # "windows-1255" - Logical Hebrew
37
+ # "ISO-8859-8-I" - Logical Hebrew
38
+ # "x-mac-hebrew" - ?? Logical Hebrew ??
39
+ #
40
+ # Both "ISO" charsets use a completely identical set of code points, whereas
41
+ # "windows-1255" and "x-mac-hebrew" are two different proper supersets of
42
+ # these code points. windows-1255 defines additional characters in the range
43
+ # 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
44
+ # diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
45
+ # x-mac-hebrew defines similar additional code points but with a different
46
+ # mapping.
47
+ #
48
+ # As far as an average Hebrew text with no diacritics is concerned, all four
49
+ # charsets are identical with respect to code points. Meaning that for the
50
+ # main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
51
+ # (including final letters).
52
+ #
53
+ # The dominant difference between these charsets is their directionality.
54
+ # "Visual" directionality means that the text is ordered as if the renderer is
55
+ # not aware of a BIDI rendering algorithm. The renderer sees the text and
56
+ # draws it from left to right. The text itself when ordered naturally is read
57
+ # backwards. A buffer of Visual Hebrew generally looks like so:
58
+ # "[last word of first line spelled backwards] [whole line ordered backwards
59
+ # and spelled backwards] [first word of first line spelled backwards]
60
+ # [end of line] [last word of second line] ... etc' "
61
+ # adding punctuation marks, numbers and English text to visual text is
62
+ # naturally also "visual" and from left to right.
63
+ #
64
+ # "Logical" directionality means the text is ordered "naturally" according to
65
+ # the order it is read. It is the responsibility of the renderer to display
66
+ # the text from right to left. A BIDI algorithm is used to place general
67
+ # punctuation marks, numbers and English text in the text.
68
+ #
69
+ # Texts in x-mac-hebrew are almost impossible to find on the Internet. From
70
+ # what little evidence I could find, it seems that its general directionality
71
+ # is Logical.
72
+ #
73
+ # To sum up all of the above, the Hebrew probing mechanism knows about two
74
+ # charsets:
75
+ # Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
76
+ # backwards while line order is natural. For charset recognition purposes
77
+ # the line order is unimportant (In fact, for this implementation, even
78
+ # word order is unimportant).
79
+ # Logical Hebrew - "windows-1255" - normal, naturally ordered text.
80
+ #
81
+ # "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
82
+ # specifically identified.
83
+ # "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
84
+ # that contain special punctuation marks or diacritics is displayed with
85
+ # some unconverted characters showing as question marks. This problem might
86
+ # be corrected using another model prober for x-mac-hebrew. Due to the fact
87
+ # that x-mac-hebrew texts are so rare, writing another model prober isn't
88
+ # worth the effort and performance hit.
89
+ #
90
+ #### The Prober ####
91
+ #
92
+ # The prober is divided between two SBCharSetProbers and a HebrewProber,
93
+ # all of which are managed, created, fed data, inquired and deleted by the
94
+ # SBCSGroupProber. The two SBCharSetProbers identify that the text is in
95
+ # fact some kind of Hebrew, Logical or Visual. The final decision about which
96
+ # one is it is made by the HebrewProber by combining final-letter scores
97
+ # with the scores of the two SBCharSetProbers to produce a final answer.
98
+ #
99
+ # The SBCSGroupProber is responsible for stripping the original text of HTML
100
+ # tags, English characters, numbers, low-ASCII punctuation characters, spaces
101
+ # and new lines. It reduces any sequence of such characters to a single space.
102
+ # The buffer fed to each prober in the SBCS group prober is pure text in
103
+ # high-ASCII.
104
+ # The two SBCharSetProbers (model probers) share the same language model:
105
+ # Win1255Model.
106
+ # The first SBCharSetProber uses the model normally as any other
107
+ # SBCharSetProber does, to recognize windows-1255, upon which this model was
108
+ # built. The second SBCharSetProber is told to make the pair-of-letter
109
+ # lookup in the language model backwards. This in practice exactly simulates
110
+ # a visual Hebrew model using the windows-1255 logical Hebrew model.
111
+ #
112
+ # The HebrewProber is not using any language model. All it does is look for
113
+ # final-letter evidence suggesting the text is either logical Hebrew or visual
114
+ # Hebrew. Disjointed from the model probers, the results of the HebrewProber
115
+ # alone are meaningless. HebrewProber always returns 0.00 as confidence
116
+ # since it never identifies a charset by itself. Instead, the pointer to the
117
+ # HebrewProber is passed to the model probers as a helper "Name Prober".
118
+ # When the Group prober receives a positive identification from any prober,
119
+ # it asks for the name of the charset identified. If the prober queried is a
120
+ # Hebrew model prober, the model prober forwards the call to the
121
+ # HebrewProber to make the final decision. In the HebrewProber, the
122
+ # decision is made according to the final-letters scores maintained and Both
123
+ # model probers scores. The answer is returned in the form of the name of the
124
+ # charset identified, either "windows-1255" or "ISO-8859-8".
125
+
126
+ # windows-1255 / ISO-8859-8 code points of interest
127
+ module CharDet
128
+ FINAL_KAF = "\xea"
129
+ NORMAL_KAF = "\xeb"
130
+ FINAL_MEM = "\xed"
131
+ NORMAL_MEM = "\xee"
132
+ FINAL_NUN = "\xef"
133
+ NORMAL_NUN = "\xf0"
134
+ FINAL_PE = "\xf3"
135
+ NORMAL_PE = "\xf4"
136
+ FINAL_TSADI = "\xf5"
137
+ NORMAL_TSADI = "\xf6"
138
+
139
+ # Minimum Visual vs Logical final letter score difference.
140
+ # If the difference is below this, don't rely solely on the final letter score distance.
141
+ MIN_FINAL_CHAR_DISTANCE = 5
142
+
143
+ # Minimum Visual vs Logical model score difference.
144
+ # If the difference is below this, don't rely at all on the model score distance.
145
+ MIN_MODEL_DISTANCE = 0.01
146
+
147
+ VISUAL_HEBREW_NAME = "ISO-8859-8"
148
+ LOGICAL_HEBREW_NAME = "windows-1255"
149
+
150
+ class HebrewProber < CharSetProber
151
+ def initialize
152
+ super()
153
+ @_mLogicalProber = nil
154
+ @_mVisualProber = nil
155
+ reset()
156
+ end
157
+
158
+ def reset
159
+ @_mFinalCharLogicalScore = 0
160
+ @_mFinalCharVisualScore = 0
161
+ # The two last characters seen in the previous buffer,
162
+ # mPrev and mBeforePrev are initialized to space in order to simulate a word
163
+ # delimiter at the beginning of the data
164
+ @_mPrev = ' '
165
+ @_mBeforePrev = ' '
166
+ # These probers are owned by the group prober.
167
+ end
168
+
169
+ def set_model_probers(logicalProber, visualProber)
170
+ @_mLogicalProber = logicalProber
171
+ @_mVisualProber = visualProber
172
+ end
173
+
174
+ def is_final(c)
175
+ return [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, FINAL_TSADI].include?(c)
176
+ end
177
+
178
+ def is_non_final(c)
179
+ # The normal Tsadi is not a good Non-Final letter due to words like
180
+ # 'lechotet' (to chat) containing an apostrophe after the tsadi. This
181
+ # apostrophe is converted to a space in FilterWithoutEnglishLetters causing
182
+ # the Non-Final tsadi to appear at an end of a word even though this is not
183
+ # the case in the original text.
184
+ # The letters Pe and Kaf rarely display a related behavior of not being a
185
+ # good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for
186
+ # example legally end with a Non-Final Pe or Kaf. However, the benefit of
187
+ # these letters as Non-Final letters outweighs the damage since these words
188
+ # are quite rare.
189
+ return [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE].include?(c)
190
+ end
191
+
192
+ def feed(aBuf)
193
+ # Final letter analysis for logical-visual decision.
194
+ # Look for evidence that the received buffer is either logical Hebrew or
195
+ # visual Hebrew.
196
+ # The following cases are checked:
197
+ # 1) A word longer than 1 letter, ending with a final letter. This is an
198
+ # indication that the text is laid out "naturally" since the final letter
199
+ # really appears at the end. +1 for logical score.
200
+ # 2) A word longer than 1 letter, ending with a Non-Final letter. In normal
201
+ # Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with
202
+ # the Non-Final form of that letter. Exceptions to this rule are mentioned
203
+ # above in isNonFinal(). This is an indication that the text is laid out
204
+ # backwards. +1 for visual score
205
+ # 3) A word longer than 1 letter, starting with a final letter. Final letters
206
+ # should not appear at the beginning of a word. This is an indication that
207
+ # the text is laid out backwards. +1 for visual score.
208
+ #
209
+ # The visual score and logical score are accumulated throughout the text and
210
+ # are finally checked against each other in GetCharSetName().
211
+ # No checking for final letters in the middle of words is done since that case
212
+ # is not an indication for either Logical or Visual text.
213
+ #
214
+ # We automatically filter out all 7-bit characters (replace them with spaces)
215
+ # so the word boundary detection works properly. [MAP]
216
+
217
+ if get_state() == ENotMe
218
+ # Both model probers say it's not them. No reason to continue.
219
+ return ENotMe
220
+ end
221
+
222
+ aBuf = filter_high_bit_only(aBuf)
223
+
224
+ for cur in aBuf.split(' ')
225
+ if cur == ' '
226
+ # We stand on a space - a word just ended
227
+ if @_mBeforePrev != ' '
228
+ # next-to-last char was not a space so self._mPrev is not a 1 letter word
229
+ if is_final(@_mPrev)
230
+ # case (1) [-2:not space][-1:final letter][cur:space]
231
+ @_mFinalCharLogicalScore += 1
232
+ elsif is_non_final(@_mPrev)
233
+ # case (2) [-2:not space][-1:Non-Final letter][cur:space]
234
+ @_mFinalCharVisualScore += 1
235
+ end
236
+ end
237
+ else
238
+ # Not standing on a space
239
+ if (@_mBeforePrev == ' ') and (is_final(@_mPrev)) and (cur != ' ')
240
+ # case (3) [-2:space][-1:final letter][cur:not space]
241
+ @_mFinalCharVisualScore += 1
242
+ end
243
+ end
244
+ @_mBeforePrev = @_mPrev
245
+ @_mPrev = cur
246
+ end
247
+
248
+ # Forever detecting, till the end or until both model probers return eNotMe (handled above)
249
+ return EDetecting
250
+ end
251
+
252
+ def get_charset_name
253
+ # Make the decision: is it Logical or Visual?
254
+ # If the final letter score distance is dominant enough, rely on it.
255
+ finalsub = @_mFinalCharLogicalScore - @_mFinalCharVisualScore
256
+ if finalsub >= MIN_FINAL_CHAR_DISTANCE
257
+ return LOGICAL_HEBREW_NAME
258
+ end
259
+ if finalsub <= -MIN_FINAL_CHAR_DISTANCE
260
+ return VISUAL_HEBREW_NAME
261
+ end
262
+
263
+ # It's not dominant enough, try to rely on the model scores instead.
264
+ modelsub = @_mLogicalProber.get_confidence() - @_mVisualProber.get_confidence()
265
+ if modelsub > MIN_MODEL_DISTANCE
266
+ return LOGICAL_HEBREW_NAME
267
+ end
268
+ if modelsub < -MIN_MODEL_DISTANCE
269
+ return VISUAL_HEBREW_NAME
270
+ end
271
+
272
+ # Still no good, back to final letter distance, maybe it'll save the day.
273
+ if finalsub < 0.0
274
+ return VISUAL_HEBREW_NAME
275
+ end
276
+
277
+ # (finalsub > 0 - Logical) or (don't know what to do) default to Logical.
278
+ return LOGICAL_HEBREW_NAME
279
+ end
280
+
281
+ def get_state
282
+ # Remain active as long as any of the model probers are active.
283
+ if (@_mLogicalProber.get_state() == ENotMe) and (@_mVisualProber.get_state() == ENotMe)
284
+ return ENotMe
285
+ end
286
+ return EDetecting
287
+ end
288
+ end
289
+ end