cchardet 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/.rubocop.yml +11 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +10 -0
  7. data/README.md +35 -0
  8. data/Rakefile +15 -0
  9. data/cchardet.gemspec +30 -0
  10. data/ext/cchardet/extconf.rb +26 -0
  11. data/ext/uchardet/.gitignore +1 -0
  12. data/ext/uchardet/.gitlab-ci.yml +106 -0
  13. data/ext/uchardet/AUTHORS +16 -0
  14. data/ext/uchardet/CMakeLists.txt +74 -0
  15. data/ext/uchardet/COPYING +1316 -0
  16. data/ext/uchardet/INSTALL +26 -0
  17. data/ext/uchardet/README.md +295 -0
  18. data/ext/uchardet/build-mac/uchardet.cpp +7 -0
  19. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
  20. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  21. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
  22. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
  23. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
  24. data/ext/uchardet/doc/CMakeLists.txt +6 -0
  25. data/ext/uchardet/doc/README.maintainer +59 -0
  26. data/ext/uchardet/doc/uchardet.1 +18 -0
  27. data/ext/uchardet/script/BuildLangModel.py +533 -0
  28. data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
  29. data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
  30. data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
  31. data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
  32. data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
  33. data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
  34. data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
  35. data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
  36. data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
  37. data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
  38. data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
  39. data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
  40. data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
  41. data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
  42. data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
  43. data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
  44. data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
  45. data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
  46. data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
  47. data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
  48. data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
  49. data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
  50. data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
  51. data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
  52. data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
  53. data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
  54. data/ext/uchardet/script/README +63 -0
  55. data/ext/uchardet/script/charsets/codepoints.py +53 -0
  56. data/ext/uchardet/script/charsets/db.py +73 -0
  57. data/ext/uchardet/script/charsets/ibm852.py +72 -0
  58. data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
  59. data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
  60. data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
  61. data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
  62. data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
  63. data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
  64. data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
  65. data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
  66. data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
  67. data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
  68. data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
  69. data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
  70. data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
  71. data/ext/uchardet/script/charsets/tis-620.py +77 -0
  72. data/ext/uchardet/script/charsets/viscii.py +72 -0
  73. data/ext/uchardet/script/charsets/windows-1250.py +75 -0
  74. data/ext/uchardet/script/charsets/windows-1252.py +76 -0
  75. data/ext/uchardet/script/charsets/windows-1253.py +72 -0
  76. data/ext/uchardet/script/charsets/windows-1256.py +75 -0
  77. data/ext/uchardet/script/charsets/windows-1257.py +72 -0
  78. data/ext/uchardet/script/charsets/windows-1258.py +72 -0
  79. data/ext/uchardet/script/debug.sh +9 -0
  80. data/ext/uchardet/script/header-template.cpp +38 -0
  81. data/ext/uchardet/script/langs/ar.py +59 -0
  82. data/ext/uchardet/script/langs/cs.py +80 -0
  83. data/ext/uchardet/script/langs/da.py +69 -0
  84. data/ext/uchardet/script/langs/de.py +69 -0
  85. data/ext/uchardet/script/langs/el.py +55 -0
  86. data/ext/uchardet/script/langs/eo.py +67 -0
  87. data/ext/uchardet/script/langs/es.py +69 -0
  88. data/ext/uchardet/script/langs/et.py +57 -0
  89. data/ext/uchardet/script/langs/fi.py +60 -0
  90. data/ext/uchardet/script/langs/fr.py +79 -0
  91. data/ext/uchardet/script/langs/ga.py +60 -0
  92. data/ext/uchardet/script/langs/hr.py +59 -0
  93. data/ext/uchardet/script/langs/hu.py +66 -0
  94. data/ext/uchardet/script/langs/it.py +56 -0
  95. data/ext/uchardet/script/langs/lt.py +70 -0
  96. data/ext/uchardet/script/langs/lv.py +69 -0
  97. data/ext/uchardet/script/langs/mt.py +80 -0
  98. data/ext/uchardet/script/langs/pl.py +81 -0
  99. data/ext/uchardet/script/langs/pt.py +80 -0
  100. data/ext/uchardet/script/langs/ro.py +65 -0
  101. data/ext/uchardet/script/langs/sk.py +80 -0
  102. data/ext/uchardet/script/langs/sl.py +59 -0
  103. data/ext/uchardet/script/langs/sv.py +56 -0
  104. data/ext/uchardet/script/langs/th.py +55 -0
  105. data/ext/uchardet/script/langs/tr.py +67 -0
  106. data/ext/uchardet/script/langs/vi.py +64 -0
  107. data/ext/uchardet/script/release.sh +8 -0
  108. data/ext/uchardet/script/win32.sh +7 -0
  109. data/ext/uchardet/src/Big5Freq.tab +943 -0
  110. data/ext/uchardet/src/CMakeLists.txt +160 -0
  111. data/ext/uchardet/src/CharDistribution.cpp +109 -0
  112. data/ext/uchardet/src/CharDistribution.h +242 -0
  113. data/ext/uchardet/src/EUCKRFreq.tab +614 -0
  114. data/ext/uchardet/src/EUCTWFreq.tab +447 -0
  115. data/ext/uchardet/src/GB2312Freq.tab +491 -0
  116. data/ext/uchardet/src/JISFreq.tab +589 -0
  117. data/ext/uchardet/src/JpCntx.cpp +230 -0
  118. data/ext/uchardet/src/JpCntx.h +140 -0
  119. data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
  120. data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
  121. data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
  122. data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
  123. data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
  124. data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
  125. data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
  126. data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
  127. data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
  128. data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
  129. data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
  130. data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
  131. data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
  132. data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
  133. data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
  134. data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
  135. data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
  136. data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
  137. data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
  138. data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
  139. data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
  140. data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
  141. data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
  142. data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
  143. data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
  144. data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
  145. data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
  146. data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
  147. data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
  148. data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
  149. data/ext/uchardet/src/nsBig5Prober.h +75 -0
  150. data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
  151. data/ext/uchardet/src/nsCharSetProber.h +77 -0
  152. data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
  153. data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
  154. data/ext/uchardet/src/nsEUCJPProber.h +79 -0
  155. data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
  156. data/ext/uchardet/src/nsEUCKRProber.h +81 -0
  157. data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
  158. data/ext/uchardet/src/nsEUCTWProber.h +75 -0
  159. data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
  160. data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
  161. data/ext/uchardet/src/nsEscSM.cpp +267 -0
  162. data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
  163. data/ext/uchardet/src/nsGB2312Prober.h +77 -0
  164. data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
  165. data/ext/uchardet/src/nsHebrewProber.h +177 -0
  166. data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
  167. data/ext/uchardet/src/nsLatin1Prober.h +73 -0
  168. data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
  169. data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
  170. data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
  171. data/ext/uchardet/src/nsPkgInt.h +89 -0
  172. data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
  173. data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
  174. data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
  175. data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
  176. data/ext/uchardet/src/nsSJISProber.cpp +98 -0
  177. data/ext/uchardet/src/nsSJISProber.h +81 -0
  178. data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
  179. data/ext/uchardet/src/nsUTF8Prober.h +66 -0
  180. data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
  181. data/ext/uchardet/src/nsUniversalDetector.h +91 -0
  182. data/ext/uchardet/src/nscore.h +59 -0
  183. data/ext/uchardet/src/prmem.h +49 -0
  184. data/ext/uchardet/src/symbols.cmake +41 -0
  185. data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
  186. data/ext/uchardet/src/tools/uchardet.cpp +254 -0
  187. data/ext/uchardet/src/uchardet.cpp +274 -0
  188. data/ext/uchardet/src/uchardet.h +136 -0
  189. data/ext/uchardet/test/CMakeLists.txt +47 -0
  190. data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
  191. data/ext/uchardet/test/ar/utf-8.txt +3 -0
  192. data/ext/uchardet/test/ar/windows-1256.txt +3 -0
  193. data/ext/uchardet/test/bg/windows-1251.txt +3 -0
  194. data/ext/uchardet/test/cs/ibm852.txt +4 -0
  195. data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
  196. data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
  197. data/ext/uchardet/test/cs/utf-8.txt +4 -0
  198. data/ext/uchardet/test/cs/windows-1250.txt +4 -0
  199. data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
  200. data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
  201. data/ext/uchardet/test/da/utf-8.txt +10 -0
  202. data/ext/uchardet/test/da/windows-1252.txt +10 -0
  203. data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
  204. data/ext/uchardet/test/de/windows-1252.txt +11 -0
  205. data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
  206. data/ext/uchardet/test/el/utf-8.txt +3 -0
  207. data/ext/uchardet/test/el/windows-1253.txt +5 -0
  208. data/ext/uchardet/test/en/ascii.txt +4 -0
  209. data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
  210. data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
  211. data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
  212. data/ext/uchardet/test/es/utf-8.txt +5 -0
  213. data/ext/uchardet/test/es/windows-1252.txt +5 -0
  214. data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
  215. data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
  216. data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
  217. data/ext/uchardet/test/et/utf-8.txt +6 -0
  218. data/ext/uchardet/test/et/windows-1252.txt +6 -0
  219. data/ext/uchardet/test/et/windows-1257.txt +6 -0
  220. data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
  221. data/ext/uchardet/test/fi/utf-8.txt +8 -0
  222. data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
  223. data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
  224. data/ext/uchardet/test/fr/utf-16.be +0 -0
  225. data/ext/uchardet/test/fr/utf-32.le +0 -0
  226. data/ext/uchardet/test/fr/utf-8.txt +14 -0
  227. data/ext/uchardet/test/fr/windows-1252.txt +3 -0
  228. data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
  229. data/ext/uchardet/test/ga/utf-8.txt +6 -0
  230. data/ext/uchardet/test/ga/windows-1252.txt +6 -0
  231. data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
  232. data/ext/uchardet/test/he/utf-8.txt +3 -0
  233. data/ext/uchardet/test/he/windows-1255.txt +1 -0
  234. data/ext/uchardet/test/hr/ibm852.txt +4 -0
  235. data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
  236. data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
  237. data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
  238. data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
  239. data/ext/uchardet/test/hr/utf-8.txt +4 -0
  240. data/ext/uchardet/test/hr/windows-1250.txt +4 -0
  241. data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
  242. data/ext/uchardet/test/hu/windows-1250.txt +1 -0
  243. data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
  244. data/ext/uchardet/test/it/utf-8.txt +18 -0
  245. data/ext/uchardet/test/ja/euc-jp.txt +10 -0
  246. data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
  247. data/ext/uchardet/test/ja/shift_jis.txt +1 -0
  248. data/ext/uchardet/test/ja/utf-16be.txt +0 -0
  249. data/ext/uchardet/test/ja/utf-16le.txt +0 -0
  250. data/ext/uchardet/test/ja/utf-8.txt +9 -0
  251. data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
  252. data/ext/uchardet/test/ko/uhc.smi +16 -0
  253. data/ext/uchardet/test/ko/utf-16.le +0 -0
  254. data/ext/uchardet/test/ko/utf-32.be +0 -0
  255. data/ext/uchardet/test/ko/utf-8.txt +3 -0
  256. data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
  257. data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
  258. data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
  259. data/ext/uchardet/test/lt/utf-8.txt +3 -0
  260. data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
  261. data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
  262. data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
  263. data/ext/uchardet/test/lv/utf-8.txt +6 -0
  264. data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
  265. data/ext/uchardet/test/mt/utf-8.txt +4 -0
  266. data/ext/uchardet/test/pl/ibm852.txt +3 -0
  267. data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
  268. data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
  269. data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
  270. data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
  271. data/ext/uchardet/test/pl/utf-8.txt +3 -0
  272. data/ext/uchardet/test/pl/windows-1250.txt +3 -0
  273. data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
  274. data/ext/uchardet/test/pt/utf-8.txt +6 -0
  275. data/ext/uchardet/test/ro/ibm852.txt +9 -0
  276. data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
  277. data/ext/uchardet/test/ro/utf-8.txt +9 -0
  278. data/ext/uchardet/test/ro/windows-1250.txt +9 -0
  279. data/ext/uchardet/test/ru/ibm855.txt +5 -0
  280. data/ext/uchardet/test/ru/ibm866.txt +11 -0
  281. data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
  282. data/ext/uchardet/test/ru/koi8-r.txt +1 -0
  283. data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
  284. data/ext/uchardet/test/ru/windows-1251.txt +4 -0
  285. data/ext/uchardet/test/sk/ibm852.txt +3 -0
  286. data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
  287. data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
  288. data/ext/uchardet/test/sk/utf-8.txt +3 -0
  289. data/ext/uchardet/test/sk/windows-1250.txt +3 -0
  290. data/ext/uchardet/test/sl/ibm852.txt +9 -0
  291. data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
  292. data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
  293. data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
  294. data/ext/uchardet/test/sl/utf-8.txt +9 -0
  295. data/ext/uchardet/test/sl/windows-1250.txt +9 -0
  296. data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
  297. data/ext/uchardet/test/sv/utf-8.txt +10 -0
  298. data/ext/uchardet/test/sv/windows-1252.txt +10 -0
  299. data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
  300. data/ext/uchardet/test/th/tis-620.txt +5 -0
  301. data/ext/uchardet/test/th/utf-8.txt +1 -0
  302. data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
  303. data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
  304. data/ext/uchardet/test/uchardet-tests.c +130 -0
  305. data/ext/uchardet/test/vi/utf-8.txt +4 -0
  306. data/ext/uchardet/test/vi/viscii.txt +4 -0
  307. data/ext/uchardet/test/vi/windows-1258.txt +4 -0
  308. data/ext/uchardet/test/zh/big5.txt +1 -0
  309. data/ext/uchardet/test/zh/euc-tw.txt +1 -0
  310. data/ext/uchardet/test/zh/gb18030.txt +1 -0
  311. data/ext/uchardet/test/zh/utf-8.txt +1 -0
  312. data/ext/uchardet/uchardet.doap +51 -0
  313. data/ext/uchardet/uchardet.pc.in +10 -0
  314. data/lib/cchardet.rb +56 -0
  315. data/lib/cchardet/lib_finder.rb +32 -0
  316. data/lib/cchardet/version.rb +5 -0
  317. metadata +362 -0
@@ -0,0 +1,89 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
+ /* ***** BEGIN LICENSE BLOCK *****
3
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
+ *
5
+ * The contents of this file are subject to the Mozilla Public License Version
6
+ * 1.1 (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ * http://www.mozilla.org/MPL/
9
+ *
10
+ * Software distributed under the License is distributed on an "AS IS" basis,
11
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
+ * for the specific language governing rights and limitations under the
13
+ * License.
14
+ *
15
+ * The Original Code is mozilla.org code.
16
+ *
17
+ * The Initial Developer of the Original Code is
18
+ * Netscape Communications Corporation.
19
+ * Portions created by the Initial Developer are Copyright (C) 1998
20
+ * the Initial Developer. All Rights Reserved.
21
+ *
22
+ * Contributor(s):
23
+ *
24
+ * Alternatively, the contents of this file may be used under the terms of
25
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
26
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
+ * in which case the provisions of the GPL or the LGPL are applicable instead
28
+ * of those above. If you wish to allow use of your version of this file only
29
+ * under the terms of either the GPL or the LGPL, and not to allow others to
30
+ * use your version of this file under the terms of the MPL, indicate your
31
+ * decision by deleting the provisions above and replace them with the notice
32
+ * and other provisions required by the GPL or the LGPL. If you do not delete
33
+ * the provisions above, a recipient may use your version of this file under
34
+ * the terms of any one of the MPL, the GPL or the LGPL.
35
+ *
36
+ * ***** END LICENSE BLOCK ***** */
37
+
38
+ #ifndef nsPkgInt_h__
39
+ #define nsPkgInt_h__
40
+ #include "nscore.h"
41
+
42
+ typedef enum {
43
+ eIdxSft4bits = 3,
44
+ eIdxSft8bits = 2,
45
+ eIdxSft16bits = 1
46
+ } nsIdxSft;
47
+
48
+ typedef enum {
49
+ eSftMsk4bits = 7,
50
+ eSftMsk8bits = 3,
51
+ eSftMsk16bits = 1
52
+ } nsSftMsk;
53
+
54
+ typedef enum {
55
+ eBitSft4bits = 2,
56
+ eBitSft8bits = 3,
57
+ eBitSft16bits = 4
58
+ } nsBitSft;
59
+
60
+ typedef enum {
61
+ eUnitMsk4bits = 0x0000000FL,
62
+ eUnitMsk8bits = 0x000000FFL,
63
+ eUnitMsk16bits = 0x0000FFFFL
64
+ } nsUnitMsk;
65
+
66
+ typedef struct nsPkgInt {
67
+ nsIdxSft idxsft;
68
+ nsSftMsk sftmsk;
69
+ nsBitSft bitsft;
70
+ nsUnitMsk unitmsk;
71
+ const PRUint32* const data;
72
+ } nsPkgInt;
73
+
74
+
75
+ #define PCK16BITS(a,b) ((PRUint32)(((b) << 16) | (a)))
76
+
77
+ #define PCK8BITS(a,b,c,d) PCK16BITS( ((PRUint32)(((b) << 8) | (a))), \
78
+ ((PRUint32)(((d) << 8) | (c))))
79
+
80
+ #define PCK4BITS(a,b,c,d,e,f,g,h) PCK8BITS( ((PRUint32)(((b) << 4) | (a))), \
81
+ ((PRUint32)(((d) << 4) | (c))), \
82
+ ((PRUint32)(((f) << 4) | (e))), \
83
+ ((PRUint32)(((h) << 4) | (g))) )
84
+
85
+ #define GETFROMPCK(i, c) \
86
+ (((((c).data)[(i)>>(c).idxsft])>>(((i)&(c).sftmsk)<<(c).bitsft))&(c).unitmsk)
87
+
88
+ #endif /* nsPkgInt_h__ */
89
+
@@ -0,0 +1,343 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
+ /* ***** BEGIN LICENSE BLOCK *****
3
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
+ *
5
+ * The contents of this file are subject to the Mozilla Public License Version
6
+ * 1.1 (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ * http://www.mozilla.org/MPL/
9
+ *
10
+ * Software distributed under the License is distributed on an "AS IS" basis,
11
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
+ * for the specific language governing rights and limitations under the
13
+ * License.
14
+ *
15
+ * The Original Code is Mozilla Universal charset detector code.
16
+ *
17
+ * The Initial Developer of the Original Code is
18
+ * Netscape Communications Corporation.
19
+ * Portions created by the Initial Developer are Copyright (C) 2001
20
+ * the Initial Developer. All Rights Reserved.
21
+ *
22
+ * Contributor(s):
23
+ * Shy Shalom <shooshX@gmail.com>
24
+ *
25
+ * Alternatively, the contents of this file may be used under the terms of
26
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
27
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28
+ * in which case the provisions of the GPL or the LGPL are applicable instead
29
+ * of those above. If you wish to allow use of your version of this file only
30
+ * under the terms of either the GPL or the LGPL, and not to allow others to
31
+ * use your version of this file under the terms of the MPL, indicate your
32
+ * decision by deleting the provisions above and replace them with the notice
33
+ * and other provisions required by the GPL or the LGPL. If you do not delete
34
+ * the provisions above, a recipient may use your version of this file under
35
+ * the terms of any one of the MPL, the GPL or the LGPL.
36
+ *
37
+ * ***** END LICENSE BLOCK ***** */
38
+
39
+ #include <stdio.h>
40
+ #include "prmem.h"
41
+
42
+ #include "nsSBCharSetProber.h"
43
+ #include "nsSBCSGroupProber.h"
44
+
45
+ #include "nsHebrewProber.h"
46
+
47
+ nsSBCSGroupProber::nsSBCSGroupProber()
48
+ {
49
+ mProbers[0] = new nsSingleByteCharSetProber(&Win1251RussianModel);
50
+ mProbers[1] = new nsSingleByteCharSetProber(&Koi8rRussianModel);
51
+ mProbers[2] = new nsSingleByteCharSetProber(&Latin5RussianModel);
52
+ mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicRussianModel);
53
+ mProbers[4] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
54
+ mProbers[5] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
55
+
56
+ mProbers[6] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
57
+ mProbers[7] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
58
+
59
+ mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
60
+ mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
61
+
62
+ nsHebrewProber *hebprober = new nsHebrewProber();
63
+ // Notice: Any change in these indexes - 10,11,12 must be reflected
64
+ // in the code below as well.
65
+ mProbers[10] = hebprober;
66
+ mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
67
+ mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
68
+ // Tell the Hebrew prober about the logical and visual probers
69
+ if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
70
+ {
71
+ hebprober->SetModelProbers(mProbers[11], mProbers[12]);
72
+ }
73
+ else // One or more is null. avoid any Hebrew probing, null them all
74
+ {
75
+ for (PRUint32 i = 10; i <= 12; ++i)
76
+ {
77
+ delete mProbers[i];
78
+ mProbers[i] = 0;
79
+ }
80
+ }
81
+
82
+ mProbers[13] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
83
+ mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_11ThaiModel);
84
+
85
+ mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
86
+ mProbers[16] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
87
+ mProbers[17] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
88
+
89
+ mProbers[18] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel);
90
+ mProbers[19] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel);
91
+ mProbers[20] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel);
92
+
93
+ mProbers[21] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
94
+ mProbers[22] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
95
+
96
+ mProbers[23] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
97
+ mProbers[24] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
98
+
99
+ mProbers[25] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
100
+
101
+ mProbers[26] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
102
+ mProbers[27] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
103
+
104
+ mProbers[28] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
105
+ mProbers[29] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
106
+
107
+ mProbers[30] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
108
+ mProbers[31] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
109
+
110
+ mProbers[32] = new nsSingleByteCharSetProber(&Iso_8859_15DanishModel);
111
+ mProbers[33] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel);
112
+ mProbers[34] = new nsSingleByteCharSetProber(&Windows_1252DanishModel);
113
+
114
+ mProbers[35] = new nsSingleByteCharSetProber(&Iso_8859_13LithuanianModel);
115
+ mProbers[36] = new nsSingleByteCharSetProber(&Iso_8859_10LithuanianModel);
116
+ mProbers[37] = new nsSingleByteCharSetProber(&Iso_8859_4LithuanianModel);
117
+
118
+ mProbers[38] = new nsSingleByteCharSetProber(&Iso_8859_13LatvianModel);
119
+ mProbers[39] = new nsSingleByteCharSetProber(&Iso_8859_10LatvianModel);
120
+ mProbers[40] = new nsSingleByteCharSetProber(&Iso_8859_4LatvianModel);
121
+
122
+ mProbers[41] = new nsSingleByteCharSetProber(&Iso_8859_1PortugueseModel);
123
+ mProbers[42] = new nsSingleByteCharSetProber(&Iso_8859_9PortugueseModel);
124
+ mProbers[43] = new nsSingleByteCharSetProber(&Iso_8859_15PortugueseModel);
125
+ mProbers[44] = new nsSingleByteCharSetProber(&Windows_1252PortugueseModel);
126
+
127
+ mProbers[45] = new nsSingleByteCharSetProber(&Iso_8859_3MalteseModel);
128
+
129
+ mProbers[46] = new nsSingleByteCharSetProber(&Windows_1250CzechModel);
130
+ mProbers[47] = new nsSingleByteCharSetProber(&Iso_8859_2CzechModel);
131
+ mProbers[48] = new nsSingleByteCharSetProber(&Mac_CentraleuropeCzechModel);
132
+ mProbers[49] = new nsSingleByteCharSetProber(&Ibm852CzechModel);
133
+
134
+ mProbers[50] = new nsSingleByteCharSetProber(&Windows_1250SlovakModel);
135
+ mProbers[51] = new nsSingleByteCharSetProber(&Iso_8859_2SlovakModel);
136
+ mProbers[52] = new nsSingleByteCharSetProber(&Mac_CentraleuropeSlovakModel);
137
+ mProbers[53] = new nsSingleByteCharSetProber(&Ibm852SlovakModel);
138
+
139
+ mProbers[54] = new nsSingleByteCharSetProber(&Windows_1250PolishModel);
140
+ mProbers[55] = new nsSingleByteCharSetProber(&Iso_8859_2PolishModel);
141
+ mProbers[56] = new nsSingleByteCharSetProber(&Iso_8859_13PolishModel);
142
+ mProbers[57] = new nsSingleByteCharSetProber(&Iso_8859_16PolishModel);
143
+ mProbers[58] = new nsSingleByteCharSetProber(&Mac_CentraleuropePolishModel);
144
+ mProbers[59] = new nsSingleByteCharSetProber(&Ibm852PolishModel);
145
+
146
+ mProbers[60] = new nsSingleByteCharSetProber(&Iso_8859_1FinnishModel);
147
+ mProbers[61] = new nsSingleByteCharSetProber(&Iso_8859_4FinnishModel);
148
+ mProbers[62] = new nsSingleByteCharSetProber(&Iso_8859_9FinnishModel);
149
+ mProbers[63] = new nsSingleByteCharSetProber(&Iso_8859_13FinnishModel);
150
+ mProbers[64] = new nsSingleByteCharSetProber(&Iso_8859_15FinnishModel);
151
+ mProbers[65] = new nsSingleByteCharSetProber(&Windows_1252FinnishModel);
152
+
153
+ mProbers[66] = new nsSingleByteCharSetProber(&Iso_8859_1ItalianModel);
154
+ mProbers[67] = new nsSingleByteCharSetProber(&Iso_8859_3ItalianModel);
155
+ mProbers[68] = new nsSingleByteCharSetProber(&Iso_8859_9ItalianModel);
156
+ mProbers[69] = new nsSingleByteCharSetProber(&Iso_8859_15ItalianModel);
157
+ mProbers[70] = new nsSingleByteCharSetProber(&Windows_1252ItalianModel);
158
+
159
+ mProbers[71] = new nsSingleByteCharSetProber(&Windows_1250CroatianModel);
160
+ mProbers[72] = new nsSingleByteCharSetProber(&Iso_8859_2CroatianModel);
161
+ mProbers[73] = new nsSingleByteCharSetProber(&Iso_8859_13CroatianModel);
162
+ mProbers[74] = new nsSingleByteCharSetProber(&Iso_8859_16CroatianModel);
163
+ mProbers[75] = new nsSingleByteCharSetProber(&Mac_CentraleuropeCroatianModel);
164
+ mProbers[76] = new nsSingleByteCharSetProber(&Ibm852CroatianModel);
165
+
166
+ mProbers[77] = new nsSingleByteCharSetProber(&Windows_1252EstonianModel);
167
+ mProbers[78] = new nsSingleByteCharSetProber(&Windows_1257EstonianModel);
168
+ mProbers[79] = new nsSingleByteCharSetProber(&Iso_8859_4EstonianModel);
169
+ mProbers[80] = new nsSingleByteCharSetProber(&Iso_8859_13EstonianModel);
170
+ mProbers[81] = new nsSingleByteCharSetProber(&Iso_8859_15EstonianModel);
171
+
172
+ mProbers[82] = new nsSingleByteCharSetProber(&Iso_8859_1IrishModel);
173
+ mProbers[83] = new nsSingleByteCharSetProber(&Iso_8859_9IrishModel);
174
+ mProbers[84] = new nsSingleByteCharSetProber(&Iso_8859_15IrishModel);
175
+ mProbers[85] = new nsSingleByteCharSetProber(&Windows_1252IrishModel);
176
+
177
+ mProbers[86] = new nsSingleByteCharSetProber(&Windows_1250RomanianModel);
178
+ mProbers[87] = new nsSingleByteCharSetProber(&Iso_8859_2RomanianModel);
179
+ mProbers[88] = new nsSingleByteCharSetProber(&Iso_8859_16RomanianModel);
180
+ mProbers[89] = new nsSingleByteCharSetProber(&Ibm852RomanianModel);
181
+
182
+ mProbers[90] = new nsSingleByteCharSetProber(&Windows_1250SloveneModel);
183
+ mProbers[91] = new nsSingleByteCharSetProber(&Iso_8859_2SloveneModel);
184
+ mProbers[92] = new nsSingleByteCharSetProber(&Iso_8859_16SloveneModel);
185
+ mProbers[93] = new nsSingleByteCharSetProber(&Mac_CentraleuropeSloveneModel);
186
+ mProbers[94] = new nsSingleByteCharSetProber(&Ibm852SloveneModel);
187
+
188
+ mProbers[95] = new nsSingleByteCharSetProber(&Iso_8859_1SwedishModel);
189
+ mProbers[96] = new nsSingleByteCharSetProber(&Iso_8859_4SwedishModel);
190
+ mProbers[97] = new nsSingleByteCharSetProber(&Iso_8859_9SwedishModel);
191
+ mProbers[98] = new nsSingleByteCharSetProber(&Iso_8859_15SwedishModel);
192
+ mProbers[99] = new nsSingleByteCharSetProber(&Windows_1252SwedishModel);
193
+
194
+ Reset();
195
+ }
196
+
197
+ nsSBCSGroupProber::~nsSBCSGroupProber()
198
+ {
199
+ for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
200
+ {
201
+ delete mProbers[i];
202
+ }
203
+ }
204
+
205
+
206
+ const char* nsSBCSGroupProber::GetCharSetName()
207
+ {
208
+ //if we have no answer yet
209
+ if (mBestGuess == -1)
210
+ {
211
+ GetConfidence();
212
+ //no charset seems positive
213
+ if (mBestGuess == -1)
214
+ //we will use default.
215
+ mBestGuess = 0;
216
+ }
217
+ return mProbers[mBestGuess]->GetCharSetName();
218
+ }
219
+
220
+ const char* nsSBCSGroupProber::GetLanguage()
221
+ {
222
+ if (mBestGuess == -1)
223
+ {
224
+ GetConfidence();
225
+ if (mBestGuess == -1)
226
+ mBestGuess = 0;
227
+ }
228
+ return mProbers[mBestGuess]->GetLanguage();
229
+ }
230
+
231
+ void nsSBCSGroupProber::Reset(void)
232
+ {
233
+ mActiveNum = 0;
234
+ for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
235
+ {
236
+ if (mProbers[i]) // not null
237
+ {
238
+ mProbers[i]->Reset();
239
+ mIsActive[i] = PR_TRUE;
240
+ ++mActiveNum;
241
+ }
242
+ else
243
+ mIsActive[i] = PR_FALSE;
244
+ }
245
+ mBestGuess = -1;
246
+ mState = eDetecting;
247
+ }
248
+
249
+
250
+ nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
251
+ {
252
+ nsProbingState st;
253
+ PRUint32 i;
254
+ char *newBuf1 = 0;
255
+ PRUint32 newLen1 = 0;
256
+
257
+ //apply filter to original buffer, and we got new buffer back
258
+ //depend on what script it is, we will feed them the new buffer
259
+ //we got after applying proper filter
260
+ //this is done without any consideration to KeepEnglishLetters
261
+ //of each prober since as of now, there are no probers here which
262
+ //recognize languages with English characters.
263
+ if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1))
264
+ goto done;
265
+
266
+ if (newLen1 == 0)
267
+ goto done; // Nothing to see here, move on.
268
+
269
+ for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
270
+ {
271
+ if (!mIsActive[i])
272
+ continue;
273
+ st = mProbers[i]->HandleData(newBuf1, newLen1);
274
+ if (st == eFoundIt)
275
+ {
276
+ mBestGuess = i;
277
+ mState = eFoundIt;
278
+ break;
279
+ }
280
+ else if (st == eNotMe)
281
+ {
282
+ mIsActive[i] = PR_FALSE;
283
+ mActiveNum--;
284
+ if (mActiveNum <= 0)
285
+ {
286
+ mState = eNotMe;
287
+ break;
288
+ }
289
+ }
290
+ }
291
+
292
+ done:
293
+ PR_FREEIF(newBuf1);
294
+
295
+ return mState;
296
+ }
297
+
298
+ float nsSBCSGroupProber::GetConfidence(void)
299
+ {
300
+ PRUint32 i;
301
+ float bestConf = 0.0, cf;
302
+
303
+ switch (mState)
304
+ {
305
+ case eFoundIt:
306
+ return (float)0.99; //sure yes
307
+ case eNotMe:
308
+ return (float)0.01; //sure no
309
+ default:
310
+ for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
311
+ {
312
+ if (!mIsActive[i])
313
+ continue;
314
+ cf = mProbers[i]->GetConfidence();
315
+ if (bestConf < cf)
316
+ {
317
+ bestConf = cf;
318
+ mBestGuess = i;
319
+ }
320
+ }
321
+ }
322
+ return bestConf;
323
+ }
324
+
325
+ #ifdef DEBUG_chardet
326
+ void nsSBCSGroupProber::DumpStatus()
327
+ {
328
+ PRUint32 i;
329
+ float cf;
330
+
331
+ cf = GetConfidence();
332
+ printf(" SBCS Group Prober --------begin status \r\n");
333
+ for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
334
+ {
335
+ if (!mIsActive[i])
336
+ printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
337
+ else
338
+ mProbers[i]->DumpStatus();
339
+ }
340
+ printf(" SBCS Group found best match [%s] confidence %f.\r\n",
341
+ mProbers[mBestGuess]->GetCharSetName(), cf);
342
+ }
343
+ #endif
@@ -0,0 +1,71 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
+ /* ***** BEGIN LICENSE BLOCK *****
3
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
+ *
5
+ * The contents of this file are subject to the Mozilla Public License Version
6
+ * 1.1 (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ * http://www.mozilla.org/MPL/
9
+ *
10
+ * Software distributed under the License is distributed on an "AS IS" basis,
11
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
+ * for the specific language governing rights and limitations under the
13
+ * License.
14
+ *
15
+ * The Original Code is Mozilla Universal charset detector code.
16
+ *
17
+ * The Initial Developer of the Original Code is
18
+ * Netscape Communications Corporation.
19
+ * Portions created by the Initial Developer are Copyright (C) 2001
20
+ * the Initial Developer. All Rights Reserved.
21
+ *
22
+ * Contributor(s):
23
+ * Shy Shalom <shooshX@gmail.com>
24
+ *
25
+ * Alternatively, the contents of this file may be used under the terms of
26
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
27
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28
+ * in which case the provisions of the GPL or the LGPL are applicable instead
29
+ * of those above. If you wish to allow use of your version of this file only
30
+ * under the terms of either the GPL or the LGPL, and not to allow others to
31
+ * use your version of this file under the terms of the MPL, indicate your
32
+ * decision by deleting the provisions above and replace them with the notice
33
+ * and other provisions required by the GPL or the LGPL. If you do not delete
34
+ * the provisions above, a recipient may use your version of this file under
35
+ * the terms of any one of the MPL, the GPL or the LGPL.
36
+ *
37
+ * ***** END LICENSE BLOCK ***** */
38
+
39
+ #ifndef nsSBCSGroupProber_h__
40
+ #define nsSBCSGroupProber_h__
41
+
42
+
43
+ #define NUM_OF_SBCS_PROBERS 100
44
+
45
+ class nsCharSetProber;
46
+ class nsSBCSGroupProber: public nsCharSetProber {
47
+ public:
48
+ nsSBCSGroupProber();
49
+ virtual ~nsSBCSGroupProber();
50
+ nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
51
+ const char* GetCharSetName();
52
+ const char* GetLanguage();
53
+ nsProbingState GetState(void) {return mState;}
54
+ void Reset(void);
55
+ float GetConfidence(void);
56
+ void SetOpion() {}
57
+
58
+ #ifdef DEBUG_chardet
59
+ void DumpStatus();
60
+ #endif
61
+
62
+ protected:
63
+ nsProbingState mState;
64
+ nsCharSetProber* mProbers[NUM_OF_SBCS_PROBERS];
65
+ PRBool mIsActive[NUM_OF_SBCS_PROBERS];
66
+ PRInt32 mBestGuess;
67
+ PRUint32 mActiveNum;
68
+ };
69
+
70
+ #endif /* nsSBCSGroupProber_h__ */
71
+