cchardet 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/.rubocop.yml +11 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +10 -0
  7. data/README.md +35 -0
  8. data/Rakefile +15 -0
  9. data/cchardet.gemspec +30 -0
  10. data/ext/cchardet/extconf.rb +26 -0
  11. data/ext/uchardet/.gitignore +1 -0
  12. data/ext/uchardet/.gitlab-ci.yml +106 -0
  13. data/ext/uchardet/AUTHORS +16 -0
  14. data/ext/uchardet/CMakeLists.txt +74 -0
  15. data/ext/uchardet/COPYING +1316 -0
  16. data/ext/uchardet/INSTALL +26 -0
  17. data/ext/uchardet/README.md +295 -0
  18. data/ext/uchardet/build-mac/uchardet.cpp +7 -0
  19. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
  20. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  21. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
  22. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
  23. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
  24. data/ext/uchardet/doc/CMakeLists.txt +6 -0
  25. data/ext/uchardet/doc/README.maintainer +59 -0
  26. data/ext/uchardet/doc/uchardet.1 +18 -0
  27. data/ext/uchardet/script/BuildLangModel.py +533 -0
  28. data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
  29. data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
  30. data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
  31. data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
  32. data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
  33. data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
  34. data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
  35. data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
  36. data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
  37. data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
  38. data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
  39. data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
  40. data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
  41. data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
  42. data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
  43. data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
  44. data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
  45. data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
  46. data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
  47. data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
  48. data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
  49. data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
  50. data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
  51. data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
  52. data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
  53. data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
  54. data/ext/uchardet/script/README +63 -0
  55. data/ext/uchardet/script/charsets/codepoints.py +53 -0
  56. data/ext/uchardet/script/charsets/db.py +73 -0
  57. data/ext/uchardet/script/charsets/ibm852.py +72 -0
  58. data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
  59. data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
  60. data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
  61. data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
  62. data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
  63. data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
  64. data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
  65. data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
  66. data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
  67. data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
  68. data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
  69. data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
  70. data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
  71. data/ext/uchardet/script/charsets/tis-620.py +77 -0
  72. data/ext/uchardet/script/charsets/viscii.py +72 -0
  73. data/ext/uchardet/script/charsets/windows-1250.py +75 -0
  74. data/ext/uchardet/script/charsets/windows-1252.py +76 -0
  75. data/ext/uchardet/script/charsets/windows-1253.py +72 -0
  76. data/ext/uchardet/script/charsets/windows-1256.py +75 -0
  77. data/ext/uchardet/script/charsets/windows-1257.py +72 -0
  78. data/ext/uchardet/script/charsets/windows-1258.py +72 -0
  79. data/ext/uchardet/script/debug.sh +9 -0
  80. data/ext/uchardet/script/header-template.cpp +38 -0
  81. data/ext/uchardet/script/langs/ar.py +59 -0
  82. data/ext/uchardet/script/langs/cs.py +80 -0
  83. data/ext/uchardet/script/langs/da.py +69 -0
  84. data/ext/uchardet/script/langs/de.py +69 -0
  85. data/ext/uchardet/script/langs/el.py +55 -0
  86. data/ext/uchardet/script/langs/eo.py +67 -0
  87. data/ext/uchardet/script/langs/es.py +69 -0
  88. data/ext/uchardet/script/langs/et.py +57 -0
  89. data/ext/uchardet/script/langs/fi.py +60 -0
  90. data/ext/uchardet/script/langs/fr.py +79 -0
  91. data/ext/uchardet/script/langs/ga.py +60 -0
  92. data/ext/uchardet/script/langs/hr.py +59 -0
  93. data/ext/uchardet/script/langs/hu.py +66 -0
  94. data/ext/uchardet/script/langs/it.py +56 -0
  95. data/ext/uchardet/script/langs/lt.py +70 -0
  96. data/ext/uchardet/script/langs/lv.py +69 -0
  97. data/ext/uchardet/script/langs/mt.py +80 -0
  98. data/ext/uchardet/script/langs/pl.py +81 -0
  99. data/ext/uchardet/script/langs/pt.py +80 -0
  100. data/ext/uchardet/script/langs/ro.py +65 -0
  101. data/ext/uchardet/script/langs/sk.py +80 -0
  102. data/ext/uchardet/script/langs/sl.py +59 -0
  103. data/ext/uchardet/script/langs/sv.py +56 -0
  104. data/ext/uchardet/script/langs/th.py +55 -0
  105. data/ext/uchardet/script/langs/tr.py +67 -0
  106. data/ext/uchardet/script/langs/vi.py +64 -0
  107. data/ext/uchardet/script/release.sh +8 -0
  108. data/ext/uchardet/script/win32.sh +7 -0
  109. data/ext/uchardet/src/Big5Freq.tab +943 -0
  110. data/ext/uchardet/src/CMakeLists.txt +160 -0
  111. data/ext/uchardet/src/CharDistribution.cpp +109 -0
  112. data/ext/uchardet/src/CharDistribution.h +242 -0
  113. data/ext/uchardet/src/EUCKRFreq.tab +614 -0
  114. data/ext/uchardet/src/EUCTWFreq.tab +447 -0
  115. data/ext/uchardet/src/GB2312Freq.tab +491 -0
  116. data/ext/uchardet/src/JISFreq.tab +589 -0
  117. data/ext/uchardet/src/JpCntx.cpp +230 -0
  118. data/ext/uchardet/src/JpCntx.h +140 -0
  119. data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
  120. data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
  121. data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
  122. data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
  123. data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
  124. data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
  125. data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
  126. data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
  127. data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
  128. data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
  129. data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
  130. data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
  131. data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
  132. data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
  133. data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
  134. data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
  135. data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
  136. data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
  137. data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
  138. data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
  139. data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
  140. data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
  141. data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
  142. data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
  143. data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
  144. data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
  145. data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
  146. data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
  147. data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
  148. data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
  149. data/ext/uchardet/src/nsBig5Prober.h +75 -0
  150. data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
  151. data/ext/uchardet/src/nsCharSetProber.h +77 -0
  152. data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
  153. data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
  154. data/ext/uchardet/src/nsEUCJPProber.h +79 -0
  155. data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
  156. data/ext/uchardet/src/nsEUCKRProber.h +81 -0
  157. data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
  158. data/ext/uchardet/src/nsEUCTWProber.h +75 -0
  159. data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
  160. data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
  161. data/ext/uchardet/src/nsEscSM.cpp +267 -0
  162. data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
  163. data/ext/uchardet/src/nsGB2312Prober.h +77 -0
  164. data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
  165. data/ext/uchardet/src/nsHebrewProber.h +177 -0
  166. data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
  167. data/ext/uchardet/src/nsLatin1Prober.h +73 -0
  168. data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
  169. data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
  170. data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
  171. data/ext/uchardet/src/nsPkgInt.h +89 -0
  172. data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
  173. data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
  174. data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
  175. data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
  176. data/ext/uchardet/src/nsSJISProber.cpp +98 -0
  177. data/ext/uchardet/src/nsSJISProber.h +81 -0
  178. data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
  179. data/ext/uchardet/src/nsUTF8Prober.h +66 -0
  180. data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
  181. data/ext/uchardet/src/nsUniversalDetector.h +91 -0
  182. data/ext/uchardet/src/nscore.h +59 -0
  183. data/ext/uchardet/src/prmem.h +49 -0
  184. data/ext/uchardet/src/symbols.cmake +41 -0
  185. data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
  186. data/ext/uchardet/src/tools/uchardet.cpp +254 -0
  187. data/ext/uchardet/src/uchardet.cpp +274 -0
  188. data/ext/uchardet/src/uchardet.h +136 -0
  189. data/ext/uchardet/test/CMakeLists.txt +47 -0
  190. data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
  191. data/ext/uchardet/test/ar/utf-8.txt +3 -0
  192. data/ext/uchardet/test/ar/windows-1256.txt +3 -0
  193. data/ext/uchardet/test/bg/windows-1251.txt +3 -0
  194. data/ext/uchardet/test/cs/ibm852.txt +4 -0
  195. data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
  196. data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
  197. data/ext/uchardet/test/cs/utf-8.txt +4 -0
  198. data/ext/uchardet/test/cs/windows-1250.txt +4 -0
  199. data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
  200. data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
  201. data/ext/uchardet/test/da/utf-8.txt +10 -0
  202. data/ext/uchardet/test/da/windows-1252.txt +10 -0
  203. data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
  204. data/ext/uchardet/test/de/windows-1252.txt +11 -0
  205. data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
  206. data/ext/uchardet/test/el/utf-8.txt +3 -0
  207. data/ext/uchardet/test/el/windows-1253.txt +5 -0
  208. data/ext/uchardet/test/en/ascii.txt +4 -0
  209. data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
  210. data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
  211. data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
  212. data/ext/uchardet/test/es/utf-8.txt +5 -0
  213. data/ext/uchardet/test/es/windows-1252.txt +5 -0
  214. data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
  215. data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
  216. data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
  217. data/ext/uchardet/test/et/utf-8.txt +6 -0
  218. data/ext/uchardet/test/et/windows-1252.txt +6 -0
  219. data/ext/uchardet/test/et/windows-1257.txt +6 -0
  220. data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
  221. data/ext/uchardet/test/fi/utf-8.txt +8 -0
  222. data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
  223. data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
  224. data/ext/uchardet/test/fr/utf-16.be +0 -0
  225. data/ext/uchardet/test/fr/utf-32.le +0 -0
  226. data/ext/uchardet/test/fr/utf-8.txt +14 -0
  227. data/ext/uchardet/test/fr/windows-1252.txt +3 -0
  228. data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
  229. data/ext/uchardet/test/ga/utf-8.txt +6 -0
  230. data/ext/uchardet/test/ga/windows-1252.txt +6 -0
  231. data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
  232. data/ext/uchardet/test/he/utf-8.txt +3 -0
  233. data/ext/uchardet/test/he/windows-1255.txt +1 -0
  234. data/ext/uchardet/test/hr/ibm852.txt +4 -0
  235. data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
  236. data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
  237. data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
  238. data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
  239. data/ext/uchardet/test/hr/utf-8.txt +4 -0
  240. data/ext/uchardet/test/hr/windows-1250.txt +4 -0
  241. data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
  242. data/ext/uchardet/test/hu/windows-1250.txt +1 -0
  243. data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
  244. data/ext/uchardet/test/it/utf-8.txt +18 -0
  245. data/ext/uchardet/test/ja/euc-jp.txt +10 -0
  246. data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
  247. data/ext/uchardet/test/ja/shift_jis.txt +1 -0
  248. data/ext/uchardet/test/ja/utf-16be.txt +0 -0
  249. data/ext/uchardet/test/ja/utf-16le.txt +0 -0
  250. data/ext/uchardet/test/ja/utf-8.txt +9 -0
  251. data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
  252. data/ext/uchardet/test/ko/uhc.smi +16 -0
  253. data/ext/uchardet/test/ko/utf-16.le +0 -0
  254. data/ext/uchardet/test/ko/utf-32.be +0 -0
  255. data/ext/uchardet/test/ko/utf-8.txt +3 -0
  256. data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
  257. data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
  258. data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
  259. data/ext/uchardet/test/lt/utf-8.txt +3 -0
  260. data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
  261. data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
  262. data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
  263. data/ext/uchardet/test/lv/utf-8.txt +6 -0
  264. data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
  265. data/ext/uchardet/test/mt/utf-8.txt +4 -0
  266. data/ext/uchardet/test/pl/ibm852.txt +3 -0
  267. data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
  268. data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
  269. data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
  270. data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
  271. data/ext/uchardet/test/pl/utf-8.txt +3 -0
  272. data/ext/uchardet/test/pl/windows-1250.txt +3 -0
  273. data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
  274. data/ext/uchardet/test/pt/utf-8.txt +6 -0
  275. data/ext/uchardet/test/ro/ibm852.txt +9 -0
  276. data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
  277. data/ext/uchardet/test/ro/utf-8.txt +9 -0
  278. data/ext/uchardet/test/ro/windows-1250.txt +9 -0
  279. data/ext/uchardet/test/ru/ibm855.txt +5 -0
  280. data/ext/uchardet/test/ru/ibm866.txt +11 -0
  281. data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
  282. data/ext/uchardet/test/ru/koi8-r.txt +1 -0
  283. data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
  284. data/ext/uchardet/test/ru/windows-1251.txt +4 -0
  285. data/ext/uchardet/test/sk/ibm852.txt +3 -0
  286. data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
  287. data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
  288. data/ext/uchardet/test/sk/utf-8.txt +3 -0
  289. data/ext/uchardet/test/sk/windows-1250.txt +3 -0
  290. data/ext/uchardet/test/sl/ibm852.txt +9 -0
  291. data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
  292. data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
  293. data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
  294. data/ext/uchardet/test/sl/utf-8.txt +9 -0
  295. data/ext/uchardet/test/sl/windows-1250.txt +9 -0
  296. data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
  297. data/ext/uchardet/test/sv/utf-8.txt +10 -0
  298. data/ext/uchardet/test/sv/windows-1252.txt +10 -0
  299. data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
  300. data/ext/uchardet/test/th/tis-620.txt +5 -0
  301. data/ext/uchardet/test/th/utf-8.txt +1 -0
  302. data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
  303. data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
  304. data/ext/uchardet/test/uchardet-tests.c +130 -0
  305. data/ext/uchardet/test/vi/utf-8.txt +4 -0
  306. data/ext/uchardet/test/vi/viscii.txt +4 -0
  307. data/ext/uchardet/test/vi/windows-1258.txt +4 -0
  308. data/ext/uchardet/test/zh/big5.txt +1 -0
  309. data/ext/uchardet/test/zh/euc-tw.txt +1 -0
  310. data/ext/uchardet/test/zh/gb18030.txt +1 -0
  311. data/ext/uchardet/test/zh/utf-8.txt +1 -0
  312. data/ext/uchardet/uchardet.doap +51 -0
  313. data/ext/uchardet/uchardet.pc.in +10 -0
  314. data/lib/cchardet.rb +56 -0
  315. data/lib/cchardet/lib_finder.rb +32 -0
  316. data/lib/cchardet/version.rb +5 -0
  317. metadata +362 -0
@@ -0,0 +1,148 @@
1
+ = Logs of language model for Slovene (sl) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-09-28 22:00:35.243966
5
+ - Maximum depth: 5
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ XCOM: Enemy Unknown (revision 4704271)
11
+ 1UP.com (revision 4547348)
12
+ 2K Games (revision 4110089)
13
+ Android (operacijski sistem) (revision 4619359)
14
+ Animator videoigre (revision 4702643)
15
+ App Store (revision 3903089)
16
+ Artefakt (revision 4484504)
17
+ Athlon (revision 4524746)
18
+ Avstralazija (revision 4623530)
19
+ Avtopsija (revision 4541344)
20
+ Bralno-pisalni pomnilnik (revision 4256388)
21
+ Civilization (serija) (revision 4645770)
22
+ Deus Ex: Human Revolution (revision 4694860)
23
+ Digitalna distribucija (revision 4696215)
24
+ DirectX (revision 4477913)
25
+ Dishonored (revision 4619444)
26
+ Edge (magazine) (revision 4690049)
27
+ Electronic Entertainment Expo (revision 4538691)
28
+ Enoigralska videoigra (revision 4610359)
29
+ Eurogamer (revision 4694860)
30
+ Evropa (revision 4687833)
31
+ Fantasy Flight Games (revision 4649361)
32
+ Firaxis Games (revision 4110089)
33
+ GameRankings (revision 3934020)
34
+ GameSpot (revision 4238015)
35
+ GameSpy (revision 4538691)
36
+ GameTrailers (revision 4704271)
37
+ Game Informer (revision 4704271)
38
+ GamesTM (revision 4704271)
39
+ Grafična kartica (revision 4257980)
40
+ Granata (revision 3859332)
41
+ Holograf (revision 4477482)
42
+ IGN (revision 4576233)
43
+ IOS (revision 4597264)
44
+ Igra igranja vlog (revision 4642276)
45
+ Igra na deski (revision 4649363)
46
+ Igralna konzola (revision 4649866)
47
+ Igralni pogon (revision 4622773)
48
+ Intel (revision 4626025)
49
+ International Standard Book Number (revision 4015087)
50
+ Izdelovalec videoigre (revision 3851747)
51
+ Joker (revija) (revision 3867772)
52
+ Kotaku (revision 4613535)
53
+ Kristal (revision 4156234)
54
+ Linux (revision 4524740)
55
+ Lovec prestreznik (revision 4102792)
56
+ MTV (revision 4621758)
57
+ Mac OS X (revision 4601645)
58
+ Machinima (revision 4601716)
59
+ Major (revision 4245802)
60
+ Mednarodna različica (revision 4116054)
61
+ Metacritic (revision 3934020)
62
+ Michael McCann (skladatelj) (revision 4694860)
63
+ MicroProse (revision 4382810)
64
+ Microsoft Windows (revision 4691357)
65
+ Nezemeljsko življenje (revision 4620576)
66
+ NowGamer (revision 4704271)
67
+ OS X (revision 4601645)
68
+ Ognjena ekipa (revision 4694450)
69
+ Operacijski sistem (revision 4698515)
70
+ Ostrostrelec (revision 4529694)
71
+ Pilot (revision 4069093)
72
+ PlayStation 3 (revision 4382944)
73
+ PlayStation Network (revision 4382944)
74
+ PlayStation Vita (revision 3944025)
75
+ Pogon igre (revision 4622773)
76
+ Procesor (revision 4702518)
77
+ Producent videoiger (revision 4599904)
78
+ Razvijalec videoiger (revision 4093281)
79
+ Računalniška miška (revision 4385579)
80
+ Računalniška platforma (revision 4673669)
81
+ Severna Amerika (revision 4643798)
82
+ Sid Meier (revision 4061487)
83
+ Stealth (revision 4618630)
84
+ Steam (revision 4696215)
85
+ Strateška videoigra (revision 4236795)
86
+ Tablični računalnik (revision 4409985)
87
+ Take-Two Interactive (revision 4110089)
88
+ Telepatija (revision 4481192)
89
+ The Bureau: XCOM Declassified (revision 4704271)
90
+ The Guardian (revision 3929479)
91
+ Trdi disk (revision 4644623)
92
+ UFO: Enemy Unknown (revision 4704271)
93
+ Unreal Engine (revision 4622773)
94
+ Unreal Engine 3 (revision 4622773)
95
+ Uporabniški vmesnik (revision 4552473)
96
+ Valve Corporation (revision 4110105)
97
+ Večigralska videoigra (revision 4618639)
98
+ VideoGamer.com (revision 4704271)
99
+ Vohunski satelit (revision 4215166)
100
+ Vojaška taktika (revision 3970259)
101
+ Vojaški čini (revision 4363026)
102
+
103
+ == End of Parsed pages ==
104
+
105
+ - Wikipedia parsing ended at: 2016-09-28 22:06:46.133919
106
+
107
+ 41 characters appeared 411226 times.
108
+
109
+ First 29 characters:
110
+ [ 0] Char a: 10.090315301075321 %
111
+ [ 1] Char e: 9.90477255815537 %
112
+ [ 2] Char i: 9.666703953543793 %
113
+ [ 3] Char o: 9.177921629468953 %
114
+ [ 4] Char n: 7.28309980400072 %
115
+ [ 5] Char r: 5.808241696779873 %
116
+ [ 6] Char s: 4.575586174025961 %
117
+ [ 7] Char t: 4.4963110309173056 %
118
+ [ 8] Char j: 4.343840126840229 %
119
+ [ 9] Char l: 4.2672399118732764 %
120
+ [10] Char v: 3.802775116359374 %
121
+ [11] Char p: 3.5216644861949393 %
122
+ [12] Char k: 3.5136397017698293 %
123
+ [13] Char d: 3.0387183689747244 %
124
+ [14] Char m: 2.9487435132992563 %
125
+ [15] Char z: 2.350775485985808 %
126
+ [16] Char u: 1.9719083910064055 %
127
+ [17] Char g: 1.9342162217369525 %
128
+ [18] Char b: 1.5392995579073308 %
129
+ [19] Char c: 1.2924766430138173 %
130
+ [20] Char h: 1.1864522184881305 %
131
+ [21] Char č: 1.137087635509428 %
132
+ [22] Char š: 0.6932927392723223 %
133
+ [23] Char ž: 0.45303555709026183 %
134
+ [24] Char f: 0.40707542811009034 %
135
+ [25] Char x: 0.19381070263067024 %
136
+ [26] Char y: 0.19040624863213904 %
137
+ [27] Char w: 0.18919037220409216 %
138
+ [28] Char q: 0.011186063138031156 %
139
+
140
+ The first 29 characters have an accumulated ratio of 0.9998978663800442.
141
+
142
+ 727 sequences found.
143
+
144
+ First 512 (typical positive ratio): 0.9983524317161332
145
+ Next 512 (512-1024): 2.4317528560937295e-06
146
+ Rest: -3.859759734048396e-17
147
+
148
+ - Processing end: 2016-09-28 22:06:46.601266
@@ -0,0 +1,109 @@
1
+ = Logs of language model for Spanish (es) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2015-12-12 18:37:37.085123
5
+ - Maximum depth: 2
6
+ - Max number of pages: 50
7
+
8
+ == Parsed pages ==
9
+
10
+ Wikipedia:Portada (revision 84894710)
11
+ 11 de diciembre (revision 87735970)
12
+ 12 de diciembre (revision 87742023)
13
+ 13 de diciembre (revision 87697780)
14
+ 1474 (revision 66715698)
15
+ 1915 (revision 86935345)
16
+ 2000 (revision 87686385)
17
+ 2015 (revision 87743360)
18
+ Actuación (revision 87459085)
19
+ Akiyuki Nosaka (revision 87726149)
20
+ Alberto Podestá (revision 87729965)
21
+ Alejandro Magno (revision 87717064)
22
+ Argentina (revision 87742018)
23
+ Arnold Peralta (revision 87733100)
24
+ Atentados del 11 de diciembre de 2007 (revision 87720544)
25
+ Cantante (revision 86761085)
26
+ Canto (revision 87664585)
27
+ Carlo Furno (revision 87726011)
28
+ Ciencia ficción (revision 87662615)
29
+ Copa Mundial de Clubes de la FIFA 2015 (revision 87734956)
30
+ Corona de Castilla (revision 87209578)
31
+ Crisis migratoria en Europa (revision 87609406)
32
+ Dictadura de Primo de Rivera (revision 87371131)
33
+ Dionisio Miguel Recio (revision 87724426)
34
+ Disneyland (revision 87665192)
35
+ Dolph Schayes (revision 87730770)
36
+ Día Internacional de las Montañas (revision 87739490)
37
+ El discurso del rey (revision 87570241)
38
+ Elecciones regionales de Francia de 2015 (revision 87744011)
39
+ Estados Unidos (revision 87510736)
40
+ Fiction House (revision 87732511)
41
+ Filoxeno de Eretria (revision 83958621)
42
+ Frank Sinatra (revision 87742871)
43
+ Fundación Wikimedia (revision 87703852)
44
+ Geoffrey Marcy (revision 87706505)
45
+ Gheorghe Gruia (revision 87737327)
46
+ Grupo de Acción Republicana (revision 87739104)
47
+ Guerra contra el Estado Islámico (revision 87648946)
48
+ Here We Go Again (canción) (revision 87680365)
49
+ Isaac Asimov (revision 87591711)
50
+ Isabel I de Castilla (revision 87743713)
51
+ John "Hot Rod" Williams (revision 87730438)
52
+ José Subirà-Puig (revision 87740413)
53
+ Julio Terrazas Sandoval (revision 87736542)
54
+ Libertad Lamarque (revision 87508996)
55
+ Mosaico de Issos (revision 87731652)
56
+ Museo Arqueológico Nacional de Nápoles (revision 87302262)
57
+ Philip K. Dick (revision 87725371)
58
+ Planet Comics (revision 86698920)
59
+
60
+ == End of Parsed pages ==
61
+
62
+ - Wikipedia parsing ended at: 2015-12-12 18:39:02.288858
63
+
64
+ 52 characters appeared 991829 times.
65
+
66
+ First 33 characters:
67
+ [ 0] Char e: 12.571925200815867 %
68
+ [ 1] Char a: 11.81988024145291 %
69
+ [ 2] Char o: 8.07941691561751 %
70
+ [ 3] Char n: 7.234513207417812 %
71
+ [ 4] Char s: 7.042242160695039 %
72
+ [ 5] Char i: 7.040528155559072 %
73
+ [ 6] Char r: 6.8208330266608455 %
74
+ [ 7] Char l: 5.722559029832763 %
75
+ [ 8] Char d: 5.275707808503281 %
76
+ [ 9] Char t: 4.668647518876742 %
77
+ [10] Char c: 4.466999855821921 %
78
+ [11] Char u: 3.673717949364255 %
79
+ [12] Char m: 2.710547886782903 %
80
+ [13] Char p: 2.4541528832086983 %
81
+ [14] Char b: 1.3867309788280036 %
82
+ [15] Char g: 1.2748165258325779 %
83
+ [16] Char f: 0.925058654263991 %
84
+ [17] Char y: 0.9045914164639268 %
85
+ [18] Char v: 0.8877538365988492 %
86
+ [19] Char ó: 0.8641610600214351 %
87
+ [20] Char h: 0.7369213846338432 %
88
+ [21] Char q: 0.5913317719082624 %
89
+ [22] Char í: 0.5612862701130941 %
90
+ [23] Char j: 0.43283670874717317 %
91
+ [24] Char z: 0.38071078784750195 %
92
+ [25] Char á: 0.37587124393418625 %
93
+ [26] Char é: 0.29632124085905936 %
94
+ [27] Char k: 0.2001353055819098 %
95
+ [28] Char x: 0.18743150280945606 %
96
+ [29] Char ñ: 0.17462687620547493 %
97
+ [30] Char ú: 0.12865120902897575 %
98
+ [31] Char w: 0.0972949974239511 %
99
+ [32] Char ü: 0.004436248587206061 %
100
+
101
+ The first 33 characters have an accumulated ratio of 0.9999263986029848.
102
+
103
+ 897 sequences found.
104
+
105
+ First 512 (typical positive ratio): 0.9970385677528184
106
+ Next 512 (512-1024): 1.0082383152741046e-06
107
+ Rest: 4.597017211338539e-17
108
+
109
+ - Processing end: 2015-12-12 18:39:02.460105
@@ -0,0 +1,151 @@
1
+ = Logs of language model for Swedish (sv) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-09-28 22:26:37.221506
5
+ - Maximum depth: 5
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ Kakapo (revision 36509929)
11
+ Akut hotad (revision 32517788)
12
+ Aotearoa (revision 36575359)
13
+ Art (revision 36771341)
14
+ Artepitet (revision 36771341)
15
+ Auckland (revision 35752058)
16
+ Auktorsnamn (revision 35976965)
17
+ BBC (revision 36508743)
18
+ Basalomsättning (revision 30567523)
19
+ Beilschmiedia tawa (revision 29101923)
20
+ Berguv (revision 36295501)
21
+ Betesmark (revision 34292168)
22
+ Biotop (revision 35528052)
23
+ BirdLife International (revision 36124283)
24
+ Bonaparte (revision 37325183)
25
+ British Museum (revision 36420244)
26
+ Bröstben (revision 30602527)
27
+ Dacrydium cupressinum (revision 32986501)
28
+ Digital object identifier (revision 27637223)
29
+ Djur (revision 37300775)
30
+ Djurpark (revision 37147093)
31
+ Domän (biologi) (revision 33377709)
32
+ Don Merton (revision 36509929)
33
+ Douglas Adams (revision 36556245)
34
+ Däggdjur (revision 37328286)
35
+ Ekologisk nisch (revision 33898643)
36
+ Ekosystem (revision 36598266)
37
+ Endemisk (revision 30647109)
38
+ Eukaryoter (revision 37095313)
39
+ Evolution (revision 37093592)
40
+ Familj (biologi) (revision 30280200)
41
+ Femininum (revision 30597527)
42
+ Fjäder (biologi) (revision 36364943)
43
+ Fjäderdräkt (revision 36364943)
44
+ Fladdermöss (revision 37307257)
45
+ Flygg (revision 36479633)
46
+ Frukter (revision 34088588)
47
+ Frö (revision 37333131)
48
+ Fågelläte (revision 34034723)
49
+ Fåglar (revision 37387306)
50
+ Fåglarnas liv (revision 36509929)
51
+ Genitiv (revision 37388438)
52
+ George Edward Grey (revision 36509929)
53
+ George Robert Gray (revision 20426710)
54
+ Haasts örn (revision 29175076)
55
+ Hauturu/Little Barrier Island (revision 36509929)
56
+ Hermelin (revision 36578682)
57
+ Hertz (revision 37104488)
58
+ Hjortdjur (revision 36493550)
59
+ Hund (revision 37351832)
60
+ Husdjur (revision 37384850)
61
+ Huskatt (revision 32922967)
62
+ Hāngi (revision 29609696)
63
+ IUCN (revision 30570280)
64
+ Iller (revision 30663158)
65
+ Infraröd (revision 36770733)
66
+ Internationella naturvårdsunionen (revision 30570280)
67
+ Jordbruk (revision 37352625)
68
+ Kahurangi National Park (revision 35956142)
69
+ Kamouflage (revision 36579595)
70
+ Kaniner (revision 36877621)
71
+ Kapiti Island (revision 37395588)
72
+ Katt (revision 36734686)
73
+ Kelp (revision 30312471)
74
+ Kivier (revision 36373234)
75
+ Klass (biologi) (revision 30280201)
76
+ Kroppsfett (revision 35066611)
77
+ Könsdimorfism (revision 30816932)
78
+ Könsfördelning (revision 24769321)
79
+ Lamm- och fårkött (revision 36187205)
80
+ Lek (fortplantningsbeteende) (revision 30508235)
81
+ Mandel (revision 36577529)
82
+ Maori (revision 32560474)
83
+ Maorier (revision 35862066)
84
+ Maoripapegojor (revision 36545138)
85
+ Mark Carwardine (revision 20375916)
86
+ Markpapegoja (revision 36295722)
87
+ Maskulinum (revision 32704551)
88
+ Masterton (revision 29859631)
89
+ Metrosideros umbellata (revision 29071212)
90
+ Milford Sound (revision 20284758)
91
+ Morrhår (revision 36533839)
92
+ Muskelmage (revision 31196380)
93
+ Mustela (revision 20934105)
94
+ Mårddjur (revision 37306347)
95
+ Māori (revision 32560474)
96
+ NHNZ (revision 36509929)
97
+ Nattpapegoja (revision 33486517)
98
+ Nordön (revision 24810231)
99
+ Nya Zeeland (revision 36575359)
100
+ Näbb (revision 23648463)
101
+ Ollonår (revision 36509929)
102
+ Ordning (biologi) (revision 30280196)
103
+
104
+ == End of Parsed pages ==
105
+
106
+ - Wikipedia parsing ended at: 2016-09-28 22:29:21.480287
107
+
108
+ 48 characters appeared 594415 times.
109
+
110
+ First 31 characters:
111
+ [ 0] Char a: 10.070741821791172 %
112
+ [ 1] Char e: 9.737136512369304 %
113
+ [ 2] Char r: 9.110638190489809 %
114
+ [ 3] Char n: 8.378826240925951 %
115
+ [ 4] Char t: 7.481305148759705 %
116
+ [ 5] Char s: 5.828587771169974 %
117
+ [ 6] Char i: 5.359891658184939 %
118
+ [ 7] Char l: 5.173489901836259 %
119
+ [ 8] Char o: 4.694195133029954 %
120
+ [ 9] Char d: 4.597293136949774 %
121
+ [10] Char k: 3.297359588839447 %
122
+ [11] Char m: 3.1898589369379975 %
123
+ [12] Char g: 3.004466576381821 %
124
+ [13] Char v: 2.2324470277499726 %
125
+ [14] Char f: 2.1988005013332437 %
126
+ [15] Char p: 2.06017681249632 %
127
+ [16] Char u: 2.0499146219392173 %
128
+ [17] Char ä: 2.0475593650900468 %
129
+ [18] Char h: 2.028380845032511 %
130
+ [19] Char å: 1.5443755625278637 %
131
+ [20] Char c: 1.442594820117258 %
132
+ [21] Char ö: 1.3515809661600062 %
133
+ [22] Char b: 1.268642278542769 %
134
+ [23] Char j: 0.7302978558751041 %
135
+ [24] Char y: 0.6699023409570755 %
136
+ [25] Char x: 0.2111319532649748 %
137
+ [26] Char w: 0.10262190557102362 %
138
+ [27] Char z: 0.09151855185350302 %
139
+ [28] Char é: 0.021197311642539303 %
140
+ [29] Char ā: 0.011103353717520588 %
141
+ [30] Char q: 0.007570468443764037 %
142
+
143
+ The first 31 characters have an accumulated ratio of 0.999936071599808.
144
+
145
+ 748 sequences found.
146
+
147
+ First 512 (typical positive ratio): 0.997323508584682
148
+ Next 512 (512-1024): 1.6823263208364526e-06
149
+ Rest: 1.7780915628762273e-17
150
+
151
+ - Processing end: 2016-09-28 22:29:21.590354
@@ -0,0 +1,141 @@
1
+ = Logs of language model for Thai (th) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2015-12-04 03:01:52.148282
5
+ - Maximum depth: 3
6
+ - Max number of pages: 50
7
+
8
+ == Parsed pages ==
9
+
10
+ หน้าหลัก (revision 5512633)
11
+ 26 พฤศจิกายน (revision 5570053)
12
+ 27 พฤศจิกายน (revision 5888433)
13
+ 28 พฤศจิกายน (revision 6110206)
14
+ กล้องโทรทรรศน์อวกาศฮับเบิล (revision 5830742)
15
+ การประชุมสภาสงฆ์แห่งแคลมงต์ (revision 5463877)
16
+ ความเอนเอียงเพื่อยืนยัน (revision 6231756)
17
+ คัมภีร์พระเวท (revision 6109417)
18
+ คาบสมุทรไซนาย (revision 5661104)
19
+ จักรวรรดิโรมันตะวันออก (revision 6150148)
20
+ ชาวมุสลิม (revision 6242838)
21
+ ซุคฮอย ซู-24 (revision 6015891)
22
+ ดาวอังคาร (revision 6235017)
23
+ ดาวเคราะห์นอกระบบ (revision 5823077)
24
+ ดินแดนศักดิ์สิทธิ์ (revision 6179072)
25
+ ทฤษฎี (revision 5606447)
26
+ ทะกะอะกิ คะจิตะ (revision 6177601)
27
+ ท่าอากาศยานนานาชาติตริภูวัน (revision 6010470)
28
+ นกกาเหว่า (revision 6142782)
29
+ ประเทศอัฟกานิสถาน (revision 6216996)
30
+ ประเทศเนปาล (revision 6206980)
31
+ ปรากฏการณ์การวางกรอบ (revision 6046655)
32
+ ปารีส (revision 6222115)
33
+ พ.ศ. 1638 (revision 4723508)
34
+ พ.ศ. 2438 (revision 5737055)
35
+ พ.ศ. 2515 (revision 6197082)
36
+ พ.ศ. 2544 (revision 6189598)
37
+ พินัยกรรม (revision 5607889)
38
+ มูลนิธิวิกิมีเดีย (revision 5816103)
39
+ ระบบสุริยะ (revision 6201228)
40
+ รางวัลโนเบล (revision 5828030)
41
+ รางวัลโนเบลสาขาฟิสิกส์ (revision 6177103)
42
+ รายชื่อบทความวันนี้ในอดีต (revision 5410610)
43
+ ลักกีสไตรก์ (เพลง) (revision 6195816)
44
+ ลุฟต์ฮันซา (revision 6116038)
45
+ วิกฤตการณ์ผู้ย้ายถิ่นยุโรป (revision 6219634)
46
+ วิกิพีเดีย (revision 6086299)
47
+ วิกิพีเดียภาษาไทย (revision 6209148)
48
+ สงครามครูเสด (revision 6228828)
49
+ สงครามอังกฤษ–แซนซิบาร์ (revision 5829349)
50
+ สติ (จิตวิทยา) (revision 6039161)
51
+ สมมติฐาน (revision 6221744)
52
+ สมเด็จพระราชินีมารีแห่งโรมาเนีย (revision 6211695)
53
+ สมเด็จพระสันตะปาปาเออร์บันที่ 2 (revision 5828365)
54
+ สารานุกรม (revision 6070482)
55
+ อัลเฟรด โนเบล (revision 6214514)
56
+ อาร์เธอร์ แมคโดนัลด์ (revision 6188035)
57
+ เซนต์ปีเตอร์สเบิร์ก (revision 6162201)
58
+ เทือกเขาฮินดูกูช (revision 5218921)
59
+ เนื้อหาเสรี (revision 6160507)
60
+
61
+ == End of Parsed pages ==
62
+
63
+ - Wikipedia parsing ended at: 2015-12-04 03:05:06.181487
64
+
65
+ 105 characters appeared 401052 times.
66
+
67
+ First 64 characters:
68
+ [ 0] Char า: 8.857704237854442 %
69
+ [ 1] Char น: 6.7679502907353655 %
70
+ [ 2] Char ร: 6.739026360671434 %
71
+ [ 3] Char ก: 5.388079351306065 %
72
+ [ 4] Char อ: 5.099837427565503 %
73
+ [ 5] Char ง: 4.861713692987443 %
74
+ [ 6] Char เ: 4.5198627609387305 %
75
+ [ 7] Char ม: 4.133628556895365 %
76
+ [ 8] Char ว: 3.864336794231172 %
77
+ [ 9] Char ด: 3.3152808114658447 %
78
+ [10] Char ย: 3.195844927839781 %
79
+ [11] Char ล: 3.1312647736453125 %
80
+ [12] Char ท: 2.69615910156289 %
81
+ [13] Char ส: 2.6001615750575984 %
82
+ [14] Char ะ: 2.392457835891605 %
83
+ [15] Char ค: 2.384229476476866 %
84
+ [16] Char บ: 2.3321165335168503 %
85
+ [17] Char ต: 2.196473275285998 %
86
+ [18] Char ห: 1.983782651626223 %
87
+ [19] Char ป: 1.9192024974317545 %
88
+ [20] Char แ: 1.7813151411787 %
89
+ [21] Char จ: 1.76261432432702 %
90
+ [22] Char พ: 1.5075351824701035 %
91
+ [23] Char ข: 1.3519443862641254 %
92
+ [24] Char ใ: 1.3295034060421091 %
93
+ [25] Char ไ: 1.2227840778751882 %
94
+ [26] Char ช: 1.0407627938521689 %
95
+ [27] Char โ: 0.9382823175049619 %
96
+ [28] Char ศ: 0.8078752879925796 %
97
+ [29] Char ำ: 0.7393056262030859 %
98
+ [30] Char ถ: 0.599672860377208 %
99
+ [31] Char ซ: 0.541076967575277 %
100
+ [32] Char e: 0.43734977010462484 %
101
+ [33] Char ผ: 0.43585370475649043 %
102
+ [34] Char ณ: 0.4019428901987772 %
103
+ [35] Char a: 0.3897250231890129 %
104
+ [36] Char i: 0.3657879776188624 %
105
+ [37] Char ษ: 0.3647906007201061 %
106
+ [38] Char ภ: 0.34185093204871186 %
107
+ [39] Char ธ: 0.3181632307032505 %
108
+ [40] Char o: 0.3176645422538723 %
109
+ [41] Char n: 0.3139243788835363 %
110
+ [42] Char ญ: 0.29248077556027646 %
111
+ [43] Char r: 0.28350438347147006 %
112
+ [44] Char t: 0.2705384837876385 %
113
+ [45] Char s: 0.2488455362396896 %
114
+ [46] Char l: 0.19598456060560726 %
115
+ [47] Char ฟ: 0.19473783948216192 %
116
+ [48] Char c: 0.16356981139602844 %
117
+ [49] Char ฐ: 0.15833358267755804 %
118
+ [50] Char ฤ: 0.15284800973439852 %
119
+ [51] Char ๆ: 0.14910784636406252 %
120
+ [52] Char d: 0.13090571796176056 %
121
+ [53] Char ฮ: 0.1244227681198448 %
122
+ [54] Char h: 0.12043326052481973 %
123
+ [55] Char u: 0.12043326052481973 %
124
+ [56] Char m: 0.09599752650529109 %
125
+ [57] Char y: 0.08951457666337533 %
126
+ [58] Char ฏ: 0.08677179019179557 %
127
+ [59] Char p: 0.08253293837208142 %
128
+ [60] Char f: 0.08153556147332515 %
129
+ [61] Char S: 0.07604998853016566 %
130
+ [62] Char ฝ: 0.07330720205858592 %
131
+ [63] Char ฉ: 0.0673229406660483 %
132
+
133
+ The first 64 characters have an accumulated ratio of 0.989480167160368.
134
+
135
+ 2324 sequences found.
136
+
137
+ First 512 (typical positive ratio): 0.8815720594354438
138
+ Next 512 (512-1024): 7.480326740672033e-06
139
+ Rest: 0.026341928296264486
140
+
141
+ - Processing end: 2015-12-04 03:05:06.800467