cchardet 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/.rubocop.yml +11 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +10 -0
  7. data/README.md +35 -0
  8. data/Rakefile +15 -0
  9. data/cchardet.gemspec +30 -0
  10. data/ext/cchardet/extconf.rb +26 -0
  11. data/ext/uchardet/.gitignore +1 -0
  12. data/ext/uchardet/.gitlab-ci.yml +106 -0
  13. data/ext/uchardet/AUTHORS +16 -0
  14. data/ext/uchardet/CMakeLists.txt +74 -0
  15. data/ext/uchardet/COPYING +1316 -0
  16. data/ext/uchardet/INSTALL +26 -0
  17. data/ext/uchardet/README.md +295 -0
  18. data/ext/uchardet/build-mac/uchardet.cpp +7 -0
  19. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
  20. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  21. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
  22. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
  23. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
  24. data/ext/uchardet/doc/CMakeLists.txt +6 -0
  25. data/ext/uchardet/doc/README.maintainer +59 -0
  26. data/ext/uchardet/doc/uchardet.1 +18 -0
  27. data/ext/uchardet/script/BuildLangModel.py +533 -0
  28. data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
  29. data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
  30. data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
  31. data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
  32. data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
  33. data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
  34. data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
  35. data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
  36. data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
  37. data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
  38. data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
  39. data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
  40. data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
  41. data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
  42. data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
  43. data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
  44. data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
  45. data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
  46. data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
  47. data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
  48. data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
  49. data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
  50. data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
  51. data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
  52. data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
  53. data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
  54. data/ext/uchardet/script/README +63 -0
  55. data/ext/uchardet/script/charsets/codepoints.py +53 -0
  56. data/ext/uchardet/script/charsets/db.py +73 -0
  57. data/ext/uchardet/script/charsets/ibm852.py +72 -0
  58. data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
  59. data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
  60. data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
  61. data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
  62. data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
  63. data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
  64. data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
  65. data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
  66. data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
  67. data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
  68. data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
  69. data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
  70. data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
  71. data/ext/uchardet/script/charsets/tis-620.py +77 -0
  72. data/ext/uchardet/script/charsets/viscii.py +72 -0
  73. data/ext/uchardet/script/charsets/windows-1250.py +75 -0
  74. data/ext/uchardet/script/charsets/windows-1252.py +76 -0
  75. data/ext/uchardet/script/charsets/windows-1253.py +72 -0
  76. data/ext/uchardet/script/charsets/windows-1256.py +75 -0
  77. data/ext/uchardet/script/charsets/windows-1257.py +72 -0
  78. data/ext/uchardet/script/charsets/windows-1258.py +72 -0
  79. data/ext/uchardet/script/debug.sh +9 -0
  80. data/ext/uchardet/script/header-template.cpp +38 -0
  81. data/ext/uchardet/script/langs/ar.py +59 -0
  82. data/ext/uchardet/script/langs/cs.py +80 -0
  83. data/ext/uchardet/script/langs/da.py +69 -0
  84. data/ext/uchardet/script/langs/de.py +69 -0
  85. data/ext/uchardet/script/langs/el.py +55 -0
  86. data/ext/uchardet/script/langs/eo.py +67 -0
  87. data/ext/uchardet/script/langs/es.py +69 -0
  88. data/ext/uchardet/script/langs/et.py +57 -0
  89. data/ext/uchardet/script/langs/fi.py +60 -0
  90. data/ext/uchardet/script/langs/fr.py +79 -0
  91. data/ext/uchardet/script/langs/ga.py +60 -0
  92. data/ext/uchardet/script/langs/hr.py +59 -0
  93. data/ext/uchardet/script/langs/hu.py +66 -0
  94. data/ext/uchardet/script/langs/it.py +56 -0
  95. data/ext/uchardet/script/langs/lt.py +70 -0
  96. data/ext/uchardet/script/langs/lv.py +69 -0
  97. data/ext/uchardet/script/langs/mt.py +80 -0
  98. data/ext/uchardet/script/langs/pl.py +81 -0
  99. data/ext/uchardet/script/langs/pt.py +80 -0
  100. data/ext/uchardet/script/langs/ro.py +65 -0
  101. data/ext/uchardet/script/langs/sk.py +80 -0
  102. data/ext/uchardet/script/langs/sl.py +59 -0
  103. data/ext/uchardet/script/langs/sv.py +56 -0
  104. data/ext/uchardet/script/langs/th.py +55 -0
  105. data/ext/uchardet/script/langs/tr.py +67 -0
  106. data/ext/uchardet/script/langs/vi.py +64 -0
  107. data/ext/uchardet/script/release.sh +8 -0
  108. data/ext/uchardet/script/win32.sh +7 -0
  109. data/ext/uchardet/src/Big5Freq.tab +943 -0
  110. data/ext/uchardet/src/CMakeLists.txt +160 -0
  111. data/ext/uchardet/src/CharDistribution.cpp +109 -0
  112. data/ext/uchardet/src/CharDistribution.h +242 -0
  113. data/ext/uchardet/src/EUCKRFreq.tab +614 -0
  114. data/ext/uchardet/src/EUCTWFreq.tab +447 -0
  115. data/ext/uchardet/src/GB2312Freq.tab +491 -0
  116. data/ext/uchardet/src/JISFreq.tab +589 -0
  117. data/ext/uchardet/src/JpCntx.cpp +230 -0
  118. data/ext/uchardet/src/JpCntx.h +140 -0
  119. data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
  120. data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
  121. data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
  122. data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
  123. data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
  124. data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
  125. data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
  126. data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
  127. data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
  128. data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
  129. data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
  130. data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
  131. data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
  132. data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
  133. data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
  134. data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
  135. data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
  136. data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
  137. data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
  138. data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
  139. data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
  140. data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
  141. data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
  142. data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
  143. data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
  144. data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
  145. data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
  146. data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
  147. data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
  148. data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
  149. data/ext/uchardet/src/nsBig5Prober.h +75 -0
  150. data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
  151. data/ext/uchardet/src/nsCharSetProber.h +77 -0
  152. data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
  153. data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
  154. data/ext/uchardet/src/nsEUCJPProber.h +79 -0
  155. data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
  156. data/ext/uchardet/src/nsEUCKRProber.h +81 -0
  157. data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
  158. data/ext/uchardet/src/nsEUCTWProber.h +75 -0
  159. data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
  160. data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
  161. data/ext/uchardet/src/nsEscSM.cpp +267 -0
  162. data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
  163. data/ext/uchardet/src/nsGB2312Prober.h +77 -0
  164. data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
  165. data/ext/uchardet/src/nsHebrewProber.h +177 -0
  166. data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
  167. data/ext/uchardet/src/nsLatin1Prober.h +73 -0
  168. data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
  169. data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
  170. data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
  171. data/ext/uchardet/src/nsPkgInt.h +89 -0
  172. data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
  173. data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
  174. data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
  175. data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
  176. data/ext/uchardet/src/nsSJISProber.cpp +98 -0
  177. data/ext/uchardet/src/nsSJISProber.h +81 -0
  178. data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
  179. data/ext/uchardet/src/nsUTF8Prober.h +66 -0
  180. data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
  181. data/ext/uchardet/src/nsUniversalDetector.h +91 -0
  182. data/ext/uchardet/src/nscore.h +59 -0
  183. data/ext/uchardet/src/prmem.h +49 -0
  184. data/ext/uchardet/src/symbols.cmake +41 -0
  185. data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
  186. data/ext/uchardet/src/tools/uchardet.cpp +254 -0
  187. data/ext/uchardet/src/uchardet.cpp +274 -0
  188. data/ext/uchardet/src/uchardet.h +136 -0
  189. data/ext/uchardet/test/CMakeLists.txt +47 -0
  190. data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
  191. data/ext/uchardet/test/ar/utf-8.txt +3 -0
  192. data/ext/uchardet/test/ar/windows-1256.txt +3 -0
  193. data/ext/uchardet/test/bg/windows-1251.txt +3 -0
  194. data/ext/uchardet/test/cs/ibm852.txt +4 -0
  195. data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
  196. data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
  197. data/ext/uchardet/test/cs/utf-8.txt +4 -0
  198. data/ext/uchardet/test/cs/windows-1250.txt +4 -0
  199. data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
  200. data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
  201. data/ext/uchardet/test/da/utf-8.txt +10 -0
  202. data/ext/uchardet/test/da/windows-1252.txt +10 -0
  203. data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
  204. data/ext/uchardet/test/de/windows-1252.txt +11 -0
  205. data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
  206. data/ext/uchardet/test/el/utf-8.txt +3 -0
  207. data/ext/uchardet/test/el/windows-1253.txt +5 -0
  208. data/ext/uchardet/test/en/ascii.txt +4 -0
  209. data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
  210. data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
  211. data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
  212. data/ext/uchardet/test/es/utf-8.txt +5 -0
  213. data/ext/uchardet/test/es/windows-1252.txt +5 -0
  214. data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
  215. data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
  216. data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
  217. data/ext/uchardet/test/et/utf-8.txt +6 -0
  218. data/ext/uchardet/test/et/windows-1252.txt +6 -0
  219. data/ext/uchardet/test/et/windows-1257.txt +6 -0
  220. data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
  221. data/ext/uchardet/test/fi/utf-8.txt +8 -0
  222. data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
  223. data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
  224. data/ext/uchardet/test/fr/utf-16.be +0 -0
  225. data/ext/uchardet/test/fr/utf-32.le +0 -0
  226. data/ext/uchardet/test/fr/utf-8.txt +14 -0
  227. data/ext/uchardet/test/fr/windows-1252.txt +3 -0
  228. data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
  229. data/ext/uchardet/test/ga/utf-8.txt +6 -0
  230. data/ext/uchardet/test/ga/windows-1252.txt +6 -0
  231. data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
  232. data/ext/uchardet/test/he/utf-8.txt +3 -0
  233. data/ext/uchardet/test/he/windows-1255.txt +1 -0
  234. data/ext/uchardet/test/hr/ibm852.txt +4 -0
  235. data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
  236. data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
  237. data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
  238. data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
  239. data/ext/uchardet/test/hr/utf-8.txt +4 -0
  240. data/ext/uchardet/test/hr/windows-1250.txt +4 -0
  241. data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
  242. data/ext/uchardet/test/hu/windows-1250.txt +1 -0
  243. data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
  244. data/ext/uchardet/test/it/utf-8.txt +18 -0
  245. data/ext/uchardet/test/ja/euc-jp.txt +10 -0
  246. data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
  247. data/ext/uchardet/test/ja/shift_jis.txt +1 -0
  248. data/ext/uchardet/test/ja/utf-16be.txt +0 -0
  249. data/ext/uchardet/test/ja/utf-16le.txt +0 -0
  250. data/ext/uchardet/test/ja/utf-8.txt +9 -0
  251. data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
  252. data/ext/uchardet/test/ko/uhc.smi +16 -0
  253. data/ext/uchardet/test/ko/utf-16.le +0 -0
  254. data/ext/uchardet/test/ko/utf-32.be +0 -0
  255. data/ext/uchardet/test/ko/utf-8.txt +3 -0
  256. data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
  257. data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
  258. data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
  259. data/ext/uchardet/test/lt/utf-8.txt +3 -0
  260. data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
  261. data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
  262. data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
  263. data/ext/uchardet/test/lv/utf-8.txt +6 -0
  264. data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
  265. data/ext/uchardet/test/mt/utf-8.txt +4 -0
  266. data/ext/uchardet/test/pl/ibm852.txt +3 -0
  267. data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
  268. data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
  269. data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
  270. data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
  271. data/ext/uchardet/test/pl/utf-8.txt +3 -0
  272. data/ext/uchardet/test/pl/windows-1250.txt +3 -0
  273. data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
  274. data/ext/uchardet/test/pt/utf-8.txt +6 -0
  275. data/ext/uchardet/test/ro/ibm852.txt +9 -0
  276. data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
  277. data/ext/uchardet/test/ro/utf-8.txt +9 -0
  278. data/ext/uchardet/test/ro/windows-1250.txt +9 -0
  279. data/ext/uchardet/test/ru/ibm855.txt +5 -0
  280. data/ext/uchardet/test/ru/ibm866.txt +11 -0
  281. data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
  282. data/ext/uchardet/test/ru/koi8-r.txt +1 -0
  283. data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
  284. data/ext/uchardet/test/ru/windows-1251.txt +4 -0
  285. data/ext/uchardet/test/sk/ibm852.txt +3 -0
  286. data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
  287. data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
  288. data/ext/uchardet/test/sk/utf-8.txt +3 -0
  289. data/ext/uchardet/test/sk/windows-1250.txt +3 -0
  290. data/ext/uchardet/test/sl/ibm852.txt +9 -0
  291. data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
  292. data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
  293. data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
  294. data/ext/uchardet/test/sl/utf-8.txt +9 -0
  295. data/ext/uchardet/test/sl/windows-1250.txt +9 -0
  296. data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
  297. data/ext/uchardet/test/sv/utf-8.txt +10 -0
  298. data/ext/uchardet/test/sv/windows-1252.txt +10 -0
  299. data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
  300. data/ext/uchardet/test/th/tis-620.txt +5 -0
  301. data/ext/uchardet/test/th/utf-8.txt +1 -0
  302. data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
  303. data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
  304. data/ext/uchardet/test/uchardet-tests.c +130 -0
  305. data/ext/uchardet/test/vi/utf-8.txt +4 -0
  306. data/ext/uchardet/test/vi/viscii.txt +4 -0
  307. data/ext/uchardet/test/vi/windows-1258.txt +4 -0
  308. data/ext/uchardet/test/zh/big5.txt +1 -0
  309. data/ext/uchardet/test/zh/euc-tw.txt +1 -0
  310. data/ext/uchardet/test/zh/gb18030.txt +1 -0
  311. data/ext/uchardet/test/zh/utf-8.txt +1 -0
  312. data/ext/uchardet/uchardet.doap +51 -0
  313. data/ext/uchardet/uchardet.pc.in +10 -0
  314. data/lib/cchardet.rb +56 -0
  315. data/lib/cchardet/lib_finder.rb +32 -0
  316. data/lib/cchardet/version.rb +5 -0
  317. metadata +362 -0
@@ -0,0 +1,160 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
+ /* ***** BEGIN LICENSE BLOCK *****
3
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
+ *
5
+ * The contents of this file are subject to the Mozilla Public License Version
6
+ * 1.1 (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ * http://www.mozilla.org/MPL/
9
+ *
10
+ * Software distributed under the License is distributed on an "AS IS" basis,
11
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
+ * for the specific language governing rights and limitations under the
13
+ * License.
14
+ *
15
+ * The Original Code is Mozilla Universal charset detector code.
16
+ *
17
+ * The Initial Developer of the Original Code is
18
+ * Netscape Communications Corporation.
19
+ * Portions created by the Initial Developer are Copyright (C) 2001
20
+ * the Initial Developer. All Rights Reserved.
21
+ *
22
+ * Contributor(s):
23
+ * Shy Shalom <shooshX@gmail.com>
24
+ *
25
+ * Alternatively, the contents of this file may be used under the terms of
26
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
27
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28
+ * in which case the provisions of the GPL or the LGPL are applicable instead
29
+ * of those above. If you wish to allow use of your version of this file only
30
+ * under the terms of either the GPL or the LGPL, and not to allow others to
31
+ * use your version of this file under the terms of the MPL, indicate your
32
+ * decision by deleting the provisions above and replace them with the notice
33
+ * and other provisions required by the GPL or the LGPL. If you do not delete
34
+ * the provisions above, a recipient may use your version of this file under
35
+ * the terms of any one of the MPL, the GPL or the LGPL.
36
+ *
37
+ * ***** END LICENSE BLOCK ***** */
38
+ #include <stdio.h>
39
+ #include "nsSBCharSetProber.h"
40
+
41
+ nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32 aLen)
42
+ {
43
+ unsigned char order;
44
+
45
+ for (PRUint32 i = 0; i < aLen; i++)
46
+ {
47
+ order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
48
+
49
+ if (order < SYMBOL_CAT_ORDER)
50
+ {
51
+ mTotalChar++;
52
+ }
53
+ else if (order == ILL)
54
+ {
55
+ /* When encountering an illegal codepoint, no need
56
+ * to continue analyzing data. */
57
+ mState = eNotMe;
58
+ break;
59
+ }
60
+ else if (order == CTR)
61
+ {
62
+ mCtrlChar++;
63
+ }
64
+ if (order < mModel->freqCharCount)
65
+ {
66
+ mFreqChar++;
67
+
68
+ if (mLastOrder < mModel->freqCharCount)
69
+ {
70
+ mTotalSeqs++;
71
+ if (!mReversed)
72
+ ++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*mModel->freqCharCount+order]]);
73
+ else // reverse the order of the letters in the lookup
74
+ ++(mSeqCounters[mModel->precedenceMatrix[order*mModel->freqCharCount+mLastOrder]]);
75
+ }
76
+ }
77
+ mLastOrder = order;
78
+ }
79
+
80
+ if (mState == eDetecting)
81
+ if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
82
+ {
83
+ float cf = GetConfidence();
84
+ if (cf > POSITIVE_SHORTCUT_THRESHOLD)
85
+ mState = eFoundIt;
86
+ else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
87
+ mState = eNotMe;
88
+ }
89
+
90
+ return mState;
91
+ }
92
+
93
+ void nsSingleByteCharSetProber::Reset(void)
94
+ {
95
+ mState = eDetecting;
96
+ mLastOrder = 255;
97
+ for (PRUint32 i = 0; i < NUMBER_OF_SEQ_CAT; i++)
98
+ mSeqCounters[i] = 0;
99
+ mTotalSeqs = 0;
100
+ mTotalChar = 0;
101
+ mCtrlChar = 0;
102
+ mFreqChar = 0;
103
+ }
104
+
105
+ //#define NEGATIVE_APPROACH 1
106
+
107
+ float nsSingleByteCharSetProber::GetConfidence(void)
108
+ {
109
+ #ifdef NEGATIVE_APPROACH
110
+ if (mTotalSeqs > 0)
111
+ if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )
112
+ return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;
113
+ return (float)0.01;
114
+ #else //POSITIVE_APPROACH
115
+ float r;
116
+
117
+ if (mTotalSeqs > 0) {
118
+ r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
119
+ /* Multiply by a ratio of positive sequences per characters.
120
+ * This would help in particular to distinguish close winners.
121
+ * Indeed if you add a letter, you'd expect the positive sequence count
122
+ * to increase as well. If it doesn't, it may mean that this new codepoint
123
+ * may not have been a letter, but instead a symbol (or some other
124
+ * character). This could make the difference between very closely related
125
+ * charsets used for the same language.
126
+ */
127
+ r = r * (mSeqCounters[POSITIVE_CAT] + (float) mSeqCounters[PROBABLE_CAT] / 4) / mTotalChar;
128
+ /* The more control characters (proportionnaly to the size of the text), the
129
+ * less confident we become in the current charset.
130
+ */
131
+ r = r * (mTotalChar - mCtrlChar) / mTotalChar;
132
+ r = r*mFreqChar/mTotalChar;
133
+ if (r >= (float)1.00)
134
+ r = (float)0.99;
135
+ return r;
136
+ }
137
+ return (float)0.01;
138
+ #endif
139
+ }
140
+
141
+ const char* nsSingleByteCharSetProber::GetCharSetName()
142
+ {
143
+ if (!mNameProber)
144
+ return mModel->charsetName;
145
+ return mNameProber->GetCharSetName();
146
+ }
147
+
148
+ const char* nsSingleByteCharSetProber::GetLanguage()
149
+ {
150
+ if (!mNameProber)
151
+ return mModel->langName;
152
+ return mNameProber->GetLanguage();
153
+ }
154
+
155
+ #ifdef DEBUG_chardet
156
+ void nsSingleByteCharSetProber::DumpStatus()
157
+ {
158
+ printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
159
+ }
160
+ #endif
@@ -0,0 +1,258 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
+ /* ***** BEGIN LICENSE BLOCK *****
3
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
+ *
5
+ * The contents of this file are subject to the Mozilla Public License Version
6
+ * 1.1 (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ * http://www.mozilla.org/MPL/
9
+ *
10
+ * Software distributed under the License is distributed on an "AS IS" basis,
11
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
+ * for the specific language governing rights and limitations under the
13
+ * License.
14
+ *
15
+ * The Original Code is Mozilla Universal charset detector code.
16
+ *
17
+ * The Initial Developer of the Original Code is
18
+ * Netscape Communications Corporation.
19
+ * Portions created by the Initial Developer are Copyright (C) 2001
20
+ * the Initial Developer. All Rights Reserved.
21
+ *
22
+ * Contributor(s):
23
+ * Shy Shalom <shooshX@gmail.com>
24
+ *
25
+ * Alternatively, the contents of this file may be used under the terms of
26
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
27
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28
+ * in which case the provisions of the GPL or the LGPL are applicable instead
29
+ * of those above. If you wish to allow use of your version of this file only
30
+ * under the terms of either the GPL or the LGPL, and not to allow others to
31
+ * use your version of this file under the terms of the MPL, indicate your
32
+ * decision by deleting the provisions above and replace them with the notice
33
+ * and other provisions required by the GPL or the LGPL. If you do not delete
34
+ * the provisions above, a recipient may use your version of this file under
35
+ * the terms of any one of the MPL, the GPL or the LGPL.
36
+ *
37
+ * ***** END LICENSE BLOCK ***** */
38
+ #ifndef nsSingleByteCharSetProber_h__
39
+ #define nsSingleByteCharSetProber_h__
40
+
41
+ #include "nsCharSetProber.h"
42
+
43
+ /** Codepoints **/
44
+
45
+ /* Illegal codepoints.*/
46
+ #define ILL 255
47
+ /* Control character. */
48
+ #define CTR 254
49
+ /* Symbols and punctuation that does not belong to words. */
50
+ #define SYM 253
51
+ /* Return/Line feeds. */
52
+ #define RET 252
53
+ /* Numbers 0-9. */
54
+ #define NUM 251
55
+
56
+ #define SB_ENOUGH_REL_THRESHOLD 1024
57
+ #define POSITIVE_SHORTCUT_THRESHOLD (float)0.95
58
+ #define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05
59
+ #define SYMBOL_CAT_ORDER 250
60
+
61
+ #define NUMBER_OF_SEQ_CAT 4
62
+ #define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1)
63
+ #define PROBABLE_CAT (NUMBER_OF_SEQ_CAT-2)
64
+ #define NEUTRAL_CAT (NUMBER_OF_SEQ_CAT-3)
65
+ #define NEGATIVE_CAT 0
66
+
67
+ typedef struct
68
+ {
69
+ /* [256] table mapping codepoints to chararacter orders. */
70
+ const unsigned char* const charToOrderMap;
71
+ /* freqCharCount x freqCharCount table of 2-char sequence's frequencies. */
72
+ const PRUint8* const precedenceMatrix;
73
+ /* The count of frequent characters. */
74
+ int freqCharCount;
75
+ float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
76
+ PRBool keepEnglishLetter; // says if this script contains English characters (not implemented)
77
+ const char* const charsetName;
78
+ const char* const langName;
79
+ } SequenceModel;
80
+
81
+
82
+ class nsSingleByteCharSetProber : public nsCharSetProber{
83
+ public:
84
+ nsSingleByteCharSetProber(const SequenceModel *model)
85
+ :mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); }
86
+ nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber)
87
+ :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
88
+
89
+ virtual const char* GetCharSetName();
90
+ virtual const char* GetLanguage();
91
+ virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
92
+ virtual nsProbingState GetState(void) {return mState;}
93
+ virtual void Reset(void);
94
+ virtual float GetConfidence(void);
95
+ virtual void SetOpion() {}
96
+
97
+ // This feature is not implemented yet. any current language model
98
+ // contain this parameter as PR_FALSE. No one is looking at this
99
+ // parameter or calling this method.
100
+ // Moreover, the nsSBCSGroupProber which calls the HandleData of this
101
+ // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid
102
+ // of the English letters.
103
+ PRBool KeepEnglishLetters() {return mModel->keepEnglishLetter;} // (not implemented)
104
+
105
+ #ifdef DEBUG_chardet
106
+ virtual void DumpStatus();
107
+ #endif
108
+
109
+ protected:
110
+ nsProbingState mState;
111
+ const SequenceModel* const mModel;
112
+ const PRBool mReversed; // PR_TRUE if we need to reverse every pair in the model lookup
113
+
114
+ //char order of last character
115
+ unsigned char mLastOrder;
116
+
117
+ PRUint32 mTotalSeqs;
118
+ PRUint32 mSeqCounters[NUMBER_OF_SEQ_CAT];
119
+
120
+ PRUint32 mTotalChar;
121
+ PRUint32 mCtrlChar;
122
+ //characters that fall in our sampling range
123
+ PRUint32 mFreqChar;
124
+
125
+ // Optional auxiliary prober for name decision. created and destroyed by the GroupProber
126
+ nsCharSetProber* mNameProber;
127
+
128
+ };
129
+
130
+ extern const SequenceModel Windows_1256ArabicModel;
131
+ extern const SequenceModel Iso_8859_6ArabicModel;
132
+
133
+ extern const SequenceModel Koi8rRussianModel;
134
+ extern const SequenceModel Win1251RussianModel;
135
+ extern const SequenceModel Latin5RussianModel;
136
+ extern const SequenceModel MacCyrillicRussianModel;
137
+ extern const SequenceModel Ibm866RussianModel;
138
+ extern const SequenceModel Ibm855RussianModel;
139
+
140
+ extern const SequenceModel Iso_8859_7GreekModel;
141
+ extern const SequenceModel Windows_1253GreekModel;
142
+
143
+ extern const SequenceModel Latin5BulgarianModel;
144
+ extern const SequenceModel Win1251BulgarianModel;
145
+
146
+ extern const SequenceModel Iso_8859_2HungarianModel;
147
+ extern const SequenceModel Windows_1250HungarianModel;
148
+
149
+ extern const SequenceModel Win1255Model;
150
+
151
+ extern const SequenceModel Tis_620ThaiModel;
152
+ extern const SequenceModel Iso_8859_11ThaiModel;
153
+
154
+ extern const SequenceModel Iso_8859_15FrenchModel;
155
+ extern const SequenceModel Iso_8859_1FrenchModel;
156
+ extern const SequenceModel Windows_1252FrenchModel;
157
+
158
+ extern const SequenceModel Iso_8859_15SpanishModel;
159
+ extern const SequenceModel Iso_8859_1SpanishModel;
160
+ extern const SequenceModel Windows_1252SpanishModel;
161
+
162
+ extern const SequenceModel Iso_8859_1GermanModel;
163
+ extern const SequenceModel Windows_1252GermanModel;
164
+
165
+ extern const SequenceModel Iso_8859_3EsperantoModel;
166
+
167
+ extern const SequenceModel Iso_8859_3TurkishModel;
168
+ extern const SequenceModel Iso_8859_9TurkishModel;
169
+
170
+ extern const SequenceModel VisciiVietnameseModel;
171
+ extern const SequenceModel Windows_1258VietnameseModel;
172
+
173
+ extern const SequenceModel Iso_8859_15DanishModel;
174
+ extern const SequenceModel Iso_8859_1DanishModel;
175
+ extern const SequenceModel Windows_1252DanishModel;
176
+
177
+ extern const SequenceModel Iso_8859_13LithuanianModel;
178
+ extern const SequenceModel Iso_8859_10LithuanianModel;
179
+ extern const SequenceModel Iso_8859_4LithuanianModel;
180
+
181
+ extern const SequenceModel Iso_8859_13LatvianModel;
182
+ extern const SequenceModel Iso_8859_10LatvianModel;
183
+ extern const SequenceModel Iso_8859_4LatvianModel;
184
+
185
+ extern const SequenceModel Iso_8859_1PortugueseModel;
186
+ extern const SequenceModel Iso_8859_9PortugueseModel;
187
+ extern const SequenceModel Iso_8859_15PortugueseModel;
188
+ extern const SequenceModel Windows_1252PortugueseModel;
189
+
190
+ extern const SequenceModel Iso_8859_3MalteseModel;
191
+
192
+ extern const SequenceModel Windows_1250CzechModel;
193
+ extern const SequenceModel Iso_8859_2CzechModel;
194
+ extern const SequenceModel Ibm852CzechModel;
195
+ extern const SequenceModel Mac_CentraleuropeCzechModel;
196
+
197
+ extern const SequenceModel Windows_1250SlovakModel;
198
+ extern const SequenceModel Iso_8859_2SlovakModel;
199
+ extern const SequenceModel Ibm852SlovakModel;
200
+ extern const SequenceModel Mac_CentraleuropeSlovakModel;
201
+
202
+ extern const SequenceModel Windows_1250PolishModel;
203
+ extern const SequenceModel Iso_8859_2PolishModel;
204
+ extern const SequenceModel Iso_8859_13PolishModel;
205
+ extern const SequenceModel Iso_8859_16PolishModel;
206
+ extern const SequenceModel Ibm852PolishModel;
207
+ extern const SequenceModel Mac_CentraleuropePolishModel;
208
+
209
+ extern const SequenceModel Iso_8859_1FinnishModel;
210
+ extern const SequenceModel Iso_8859_4FinnishModel;
211
+ extern const SequenceModel Iso_8859_9FinnishModel;
212
+ extern const SequenceModel Iso_8859_13FinnishModel;
213
+ extern const SequenceModel Iso_8859_15FinnishModel;
214
+ extern const SequenceModel Windows_1252FinnishModel;
215
+
216
+ extern const SequenceModel Iso_8859_1ItalianModel;
217
+ extern const SequenceModel Iso_8859_3ItalianModel;
218
+ extern const SequenceModel Iso_8859_9ItalianModel;
219
+ extern const SequenceModel Iso_8859_15ItalianModel;
220
+ extern const SequenceModel Windows_1252ItalianModel;
221
+
222
+ extern const SequenceModel Windows_1250CroatianModel;
223
+ extern const SequenceModel Iso_8859_2CroatianModel;
224
+ extern const SequenceModel Iso_8859_13CroatianModel;
225
+ extern const SequenceModel Iso_8859_16CroatianModel;
226
+ extern const SequenceModel Ibm852CroatianModel;
227
+ extern const SequenceModel Mac_CentraleuropeCroatianModel;
228
+
229
+ extern const SequenceModel Windows_1252EstonianModel;
230
+ extern const SequenceModel Windows_1257EstonianModel;
231
+ extern const SequenceModel Iso_8859_4EstonianModel;
232
+ extern const SequenceModel Iso_8859_13EstonianModel;
233
+ extern const SequenceModel Iso_8859_15EstonianModel;
234
+
235
+ extern const SequenceModel Iso_8859_15IrishModel;
236
+ extern const SequenceModel Iso_8859_9IrishModel;
237
+ extern const SequenceModel Iso_8859_1IrishModel;
238
+ extern const SequenceModel Windows_1252IrishModel;
239
+
240
+ extern const SequenceModel Windows_1250RomanianModel;
241
+ extern const SequenceModel Iso_8859_2RomanianModel;
242
+ extern const SequenceModel Iso_8859_16RomanianModel;
243
+ extern const SequenceModel Ibm852RomanianModel;
244
+
245
+ extern const SequenceModel Windows_1250SloveneModel;
246
+ extern const SequenceModel Iso_8859_2SloveneModel;
247
+ extern const SequenceModel Iso_8859_16SloveneModel;
248
+ extern const SequenceModel Ibm852SloveneModel;
249
+ extern const SequenceModel Mac_CentraleuropeSloveneModel;
250
+
251
+ extern const SequenceModel Iso_8859_1SwedishModel;
252
+ extern const SequenceModel Iso_8859_4SwedishModel;
253
+ extern const SequenceModel Iso_8859_9SwedishModel;
254
+ extern const SequenceModel Iso_8859_15SwedishModel;
255
+ extern const SequenceModel Windows_1252SwedishModel;
256
+
257
+ #endif /* nsSingleByteCharSetProber_h__ */
258
+
@@ -0,0 +1,98 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
+ /* ***** BEGIN LICENSE BLOCK *****
3
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
+ *
5
+ * The contents of this file are subject to the Mozilla Public License Version
6
+ * 1.1 (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ * http://www.mozilla.org/MPL/
9
+ *
10
+ * Software distributed under the License is distributed on an "AS IS" basis,
11
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
+ * for the specific language governing rights and limitations under the
13
+ * License.
14
+ *
15
+ * The Original Code is mozilla.org code.
16
+ *
17
+ * The Initial Developer of the Original Code is
18
+ * Netscape Communications Corporation.
19
+ * Portions created by the Initial Developer are Copyright (C) 1998
20
+ * the Initial Developer. All Rights Reserved.
21
+ *
22
+ * Contributor(s):
23
+ *
24
+ * Alternatively, the contents of this file may be used under the terms of
25
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
26
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
+ * in which case the provisions of the GPL or the LGPL are applicable instead
28
+ * of those above. If you wish to allow use of your version of this file only
29
+ * under the terms of either the GPL or the LGPL, and not to allow others to
30
+ * use your version of this file under the terms of the MPL, indicate your
31
+ * decision by deleting the provisions above and replace them with the notice
32
+ * and other provisions required by the GPL or the LGPL. If you do not delete
33
+ * the provisions above, a recipient may use your version of this file under
34
+ * the terms of any one of the MPL, the GPL or the LGPL.
35
+ *
36
+ * ***** END LICENSE BLOCK ***** */
37
+
38
+ // for S-JIS encoding, obeserve characteristic:
39
+ // 1, kana character (or hankaku?) often have hight frequency of appereance
40
+ // 2, kana character often exist in group
41
+ // 3, certain combination of kana is never used in japanese language
42
+
43
+ #include "nsSJISProber.h"
44
+
45
+ void nsSJISProber::Reset(void)
46
+ {
47
+ mCodingSM->Reset();
48
+ mState = eDetecting;
49
+ mContextAnalyser.Reset(mIsPreferredLanguage);
50
+ mDistributionAnalyser.Reset(mIsPreferredLanguage);
51
+ }
52
+
53
+ nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen)
54
+ {
55
+ PRUint32 codingState;
56
+
57
+ for (PRUint32 i = 0; i < aLen; i++)
58
+ {
59
+ codingState = mCodingSM->NextState(aBuf[i]);
60
+ if (codingState == eItsMe)
61
+ {
62
+ mState = eFoundIt;
63
+ break;
64
+ }
65
+ if (codingState == eStart)
66
+ {
67
+ PRUint32 charLen = mCodingSM->GetCurrentCharLen();
68
+ if (i == 0)
69
+ {
70
+ mLastChar[1] = aBuf[0];
71
+ mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen);
72
+ mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
73
+ }
74
+ else
75
+ {
76
+ mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen);
77
+ mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
78
+ }
79
+ }
80
+ }
81
+
82
+ mLastChar[0] = aBuf[aLen-1];
83
+
84
+ if (mState == eDetecting)
85
+ if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
86
+ mState = eFoundIt;
87
+
88
+ return mState;
89
+ }
90
+
91
+ float nsSJISProber::GetConfidence(void)
92
+ {
93
+ float contxtCf = mContextAnalyser.GetConfidence();
94
+ float distribCf = mDistributionAnalyser.GetConfidence();
95
+
96
+ return (contxtCf > distribCf ? contxtCf : distribCf);
97
+ }
98
+