cchardet 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/.rubocop.yml +11 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +10 -0
  7. data/README.md +35 -0
  8. data/Rakefile +15 -0
  9. data/cchardet.gemspec +30 -0
  10. data/ext/cchardet/extconf.rb +26 -0
  11. data/ext/uchardet/.gitignore +1 -0
  12. data/ext/uchardet/.gitlab-ci.yml +106 -0
  13. data/ext/uchardet/AUTHORS +16 -0
  14. data/ext/uchardet/CMakeLists.txt +74 -0
  15. data/ext/uchardet/COPYING +1316 -0
  16. data/ext/uchardet/INSTALL +26 -0
  17. data/ext/uchardet/README.md +295 -0
  18. data/ext/uchardet/build-mac/uchardet.cpp +7 -0
  19. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
  20. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  21. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
  22. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
  23. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
  24. data/ext/uchardet/doc/CMakeLists.txt +6 -0
  25. data/ext/uchardet/doc/README.maintainer +59 -0
  26. data/ext/uchardet/doc/uchardet.1 +18 -0
  27. data/ext/uchardet/script/BuildLangModel.py +533 -0
  28. data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
  29. data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
  30. data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
  31. data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
  32. data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
  33. data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
  34. data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
  35. data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
  36. data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
  37. data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
  38. data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
  39. data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
  40. data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
  41. data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
  42. data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
  43. data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
  44. data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
  45. data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
  46. data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
  47. data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
  48. data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
  49. data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
  50. data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
  51. data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
  52. data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
  53. data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
  54. data/ext/uchardet/script/README +63 -0
  55. data/ext/uchardet/script/charsets/codepoints.py +53 -0
  56. data/ext/uchardet/script/charsets/db.py +73 -0
  57. data/ext/uchardet/script/charsets/ibm852.py +72 -0
  58. data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
  59. data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
  60. data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
  61. data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
  62. data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
  63. data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
  64. data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
  65. data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
  66. data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
  67. data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
  68. data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
  69. data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
  70. data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
  71. data/ext/uchardet/script/charsets/tis-620.py +77 -0
  72. data/ext/uchardet/script/charsets/viscii.py +72 -0
  73. data/ext/uchardet/script/charsets/windows-1250.py +75 -0
  74. data/ext/uchardet/script/charsets/windows-1252.py +76 -0
  75. data/ext/uchardet/script/charsets/windows-1253.py +72 -0
  76. data/ext/uchardet/script/charsets/windows-1256.py +75 -0
  77. data/ext/uchardet/script/charsets/windows-1257.py +72 -0
  78. data/ext/uchardet/script/charsets/windows-1258.py +72 -0
  79. data/ext/uchardet/script/debug.sh +9 -0
  80. data/ext/uchardet/script/header-template.cpp +38 -0
  81. data/ext/uchardet/script/langs/ar.py +59 -0
  82. data/ext/uchardet/script/langs/cs.py +80 -0
  83. data/ext/uchardet/script/langs/da.py +69 -0
  84. data/ext/uchardet/script/langs/de.py +69 -0
  85. data/ext/uchardet/script/langs/el.py +55 -0
  86. data/ext/uchardet/script/langs/eo.py +67 -0
  87. data/ext/uchardet/script/langs/es.py +69 -0
  88. data/ext/uchardet/script/langs/et.py +57 -0
  89. data/ext/uchardet/script/langs/fi.py +60 -0
  90. data/ext/uchardet/script/langs/fr.py +79 -0
  91. data/ext/uchardet/script/langs/ga.py +60 -0
  92. data/ext/uchardet/script/langs/hr.py +59 -0
  93. data/ext/uchardet/script/langs/hu.py +66 -0
  94. data/ext/uchardet/script/langs/it.py +56 -0
  95. data/ext/uchardet/script/langs/lt.py +70 -0
  96. data/ext/uchardet/script/langs/lv.py +69 -0
  97. data/ext/uchardet/script/langs/mt.py +80 -0
  98. data/ext/uchardet/script/langs/pl.py +81 -0
  99. data/ext/uchardet/script/langs/pt.py +80 -0
  100. data/ext/uchardet/script/langs/ro.py +65 -0
  101. data/ext/uchardet/script/langs/sk.py +80 -0
  102. data/ext/uchardet/script/langs/sl.py +59 -0
  103. data/ext/uchardet/script/langs/sv.py +56 -0
  104. data/ext/uchardet/script/langs/th.py +55 -0
  105. data/ext/uchardet/script/langs/tr.py +67 -0
  106. data/ext/uchardet/script/langs/vi.py +64 -0
  107. data/ext/uchardet/script/release.sh +8 -0
  108. data/ext/uchardet/script/win32.sh +7 -0
  109. data/ext/uchardet/src/Big5Freq.tab +943 -0
  110. data/ext/uchardet/src/CMakeLists.txt +160 -0
  111. data/ext/uchardet/src/CharDistribution.cpp +109 -0
  112. data/ext/uchardet/src/CharDistribution.h +242 -0
  113. data/ext/uchardet/src/EUCKRFreq.tab +614 -0
  114. data/ext/uchardet/src/EUCTWFreq.tab +447 -0
  115. data/ext/uchardet/src/GB2312Freq.tab +491 -0
  116. data/ext/uchardet/src/JISFreq.tab +589 -0
  117. data/ext/uchardet/src/JpCntx.cpp +230 -0
  118. data/ext/uchardet/src/JpCntx.h +140 -0
  119. data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
  120. data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
  121. data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
  122. data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
  123. data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
  124. data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
  125. data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
  126. data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
  127. data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
  128. data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
  129. data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
  130. data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
  131. data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
  132. data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
  133. data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
  134. data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
  135. data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
  136. data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
  137. data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
  138. data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
  139. data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
  140. data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
  141. data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
  142. data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
  143. data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
  144. data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
  145. data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
  146. data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
  147. data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
  148. data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
  149. data/ext/uchardet/src/nsBig5Prober.h +75 -0
  150. data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
  151. data/ext/uchardet/src/nsCharSetProber.h +77 -0
  152. data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
  153. data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
  154. data/ext/uchardet/src/nsEUCJPProber.h +79 -0
  155. data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
  156. data/ext/uchardet/src/nsEUCKRProber.h +81 -0
  157. data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
  158. data/ext/uchardet/src/nsEUCTWProber.h +75 -0
  159. data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
  160. data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
  161. data/ext/uchardet/src/nsEscSM.cpp +267 -0
  162. data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
  163. data/ext/uchardet/src/nsGB2312Prober.h +77 -0
  164. data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
  165. data/ext/uchardet/src/nsHebrewProber.h +177 -0
  166. data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
  167. data/ext/uchardet/src/nsLatin1Prober.h +73 -0
  168. data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
  169. data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
  170. data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
  171. data/ext/uchardet/src/nsPkgInt.h +89 -0
  172. data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
  173. data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
  174. data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
  175. data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
  176. data/ext/uchardet/src/nsSJISProber.cpp +98 -0
  177. data/ext/uchardet/src/nsSJISProber.h +81 -0
  178. data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
  179. data/ext/uchardet/src/nsUTF8Prober.h +66 -0
  180. data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
  181. data/ext/uchardet/src/nsUniversalDetector.h +91 -0
  182. data/ext/uchardet/src/nscore.h +59 -0
  183. data/ext/uchardet/src/prmem.h +49 -0
  184. data/ext/uchardet/src/symbols.cmake +41 -0
  185. data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
  186. data/ext/uchardet/src/tools/uchardet.cpp +254 -0
  187. data/ext/uchardet/src/uchardet.cpp +274 -0
  188. data/ext/uchardet/src/uchardet.h +136 -0
  189. data/ext/uchardet/test/CMakeLists.txt +47 -0
  190. data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
  191. data/ext/uchardet/test/ar/utf-8.txt +3 -0
  192. data/ext/uchardet/test/ar/windows-1256.txt +3 -0
  193. data/ext/uchardet/test/bg/windows-1251.txt +3 -0
  194. data/ext/uchardet/test/cs/ibm852.txt +4 -0
  195. data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
  196. data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
  197. data/ext/uchardet/test/cs/utf-8.txt +4 -0
  198. data/ext/uchardet/test/cs/windows-1250.txt +4 -0
  199. data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
  200. data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
  201. data/ext/uchardet/test/da/utf-8.txt +10 -0
  202. data/ext/uchardet/test/da/windows-1252.txt +10 -0
  203. data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
  204. data/ext/uchardet/test/de/windows-1252.txt +11 -0
  205. data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
  206. data/ext/uchardet/test/el/utf-8.txt +3 -0
  207. data/ext/uchardet/test/el/windows-1253.txt +5 -0
  208. data/ext/uchardet/test/en/ascii.txt +4 -0
  209. data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
  210. data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
  211. data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
  212. data/ext/uchardet/test/es/utf-8.txt +5 -0
  213. data/ext/uchardet/test/es/windows-1252.txt +5 -0
  214. data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
  215. data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
  216. data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
  217. data/ext/uchardet/test/et/utf-8.txt +6 -0
  218. data/ext/uchardet/test/et/windows-1252.txt +6 -0
  219. data/ext/uchardet/test/et/windows-1257.txt +6 -0
  220. data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
  221. data/ext/uchardet/test/fi/utf-8.txt +8 -0
  222. data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
  223. data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
  224. data/ext/uchardet/test/fr/utf-16.be +0 -0
  225. data/ext/uchardet/test/fr/utf-32.le +0 -0
  226. data/ext/uchardet/test/fr/utf-8.txt +14 -0
  227. data/ext/uchardet/test/fr/windows-1252.txt +3 -0
  228. data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
  229. data/ext/uchardet/test/ga/utf-8.txt +6 -0
  230. data/ext/uchardet/test/ga/windows-1252.txt +6 -0
  231. data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
  232. data/ext/uchardet/test/he/utf-8.txt +3 -0
  233. data/ext/uchardet/test/he/windows-1255.txt +1 -0
  234. data/ext/uchardet/test/hr/ibm852.txt +4 -0
  235. data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
  236. data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
  237. data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
  238. data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
  239. data/ext/uchardet/test/hr/utf-8.txt +4 -0
  240. data/ext/uchardet/test/hr/windows-1250.txt +4 -0
  241. data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
  242. data/ext/uchardet/test/hu/windows-1250.txt +1 -0
  243. data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
  244. data/ext/uchardet/test/it/utf-8.txt +18 -0
  245. data/ext/uchardet/test/ja/euc-jp.txt +10 -0
  246. data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
  247. data/ext/uchardet/test/ja/shift_jis.txt +1 -0
  248. data/ext/uchardet/test/ja/utf-16be.txt +0 -0
  249. data/ext/uchardet/test/ja/utf-16le.txt +0 -0
  250. data/ext/uchardet/test/ja/utf-8.txt +9 -0
  251. data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
  252. data/ext/uchardet/test/ko/uhc.smi +16 -0
  253. data/ext/uchardet/test/ko/utf-16.le +0 -0
  254. data/ext/uchardet/test/ko/utf-32.be +0 -0
  255. data/ext/uchardet/test/ko/utf-8.txt +3 -0
  256. data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
  257. data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
  258. data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
  259. data/ext/uchardet/test/lt/utf-8.txt +3 -0
  260. data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
  261. data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
  262. data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
  263. data/ext/uchardet/test/lv/utf-8.txt +6 -0
  264. data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
  265. data/ext/uchardet/test/mt/utf-8.txt +4 -0
  266. data/ext/uchardet/test/pl/ibm852.txt +3 -0
  267. data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
  268. data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
  269. data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
  270. data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
  271. data/ext/uchardet/test/pl/utf-8.txt +3 -0
  272. data/ext/uchardet/test/pl/windows-1250.txt +3 -0
  273. data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
  274. data/ext/uchardet/test/pt/utf-8.txt +6 -0
  275. data/ext/uchardet/test/ro/ibm852.txt +9 -0
  276. data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
  277. data/ext/uchardet/test/ro/utf-8.txt +9 -0
  278. data/ext/uchardet/test/ro/windows-1250.txt +9 -0
  279. data/ext/uchardet/test/ru/ibm855.txt +5 -0
  280. data/ext/uchardet/test/ru/ibm866.txt +11 -0
  281. data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
  282. data/ext/uchardet/test/ru/koi8-r.txt +1 -0
  283. data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
  284. data/ext/uchardet/test/ru/windows-1251.txt +4 -0
  285. data/ext/uchardet/test/sk/ibm852.txt +3 -0
  286. data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
  287. data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
  288. data/ext/uchardet/test/sk/utf-8.txt +3 -0
  289. data/ext/uchardet/test/sk/windows-1250.txt +3 -0
  290. data/ext/uchardet/test/sl/ibm852.txt +9 -0
  291. data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
  292. data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
  293. data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
  294. data/ext/uchardet/test/sl/utf-8.txt +9 -0
  295. data/ext/uchardet/test/sl/windows-1250.txt +9 -0
  296. data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
  297. data/ext/uchardet/test/sv/utf-8.txt +10 -0
  298. data/ext/uchardet/test/sv/windows-1252.txt +10 -0
  299. data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
  300. data/ext/uchardet/test/th/tis-620.txt +5 -0
  301. data/ext/uchardet/test/th/utf-8.txt +1 -0
  302. data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
  303. data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
  304. data/ext/uchardet/test/uchardet-tests.c +130 -0
  305. data/ext/uchardet/test/vi/utf-8.txt +4 -0
  306. data/ext/uchardet/test/vi/viscii.txt +4 -0
  307. data/ext/uchardet/test/vi/windows-1258.txt +4 -0
  308. data/ext/uchardet/test/zh/big5.txt +1 -0
  309. data/ext/uchardet/test/zh/euc-tw.txt +1 -0
  310. data/ext/uchardet/test/zh/gb18030.txt +1 -0
  311. data/ext/uchardet/test/zh/utf-8.txt +1 -0
  312. data/ext/uchardet/uchardet.doap +51 -0
  313. data/ext/uchardet/uchardet.pc.in +10 -0
  314. data/lib/cchardet.rb +56 -0
  315. data/lib/cchardet/lib_finder.rb +32 -0
  316. data/lib/cchardet/version.rb +5 -0
  317. metadata +362 -0
@@ -0,0 +1,116 @@
1
+ = Logs of language model for French (fr) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2015-12-03 21:07:37.508739
5
+ - Maximum depth: 2
6
+ - Max number of pages: 50
7
+
8
+ == Parsed pages ==
9
+
10
+ Wikipédia:Accueil_principal (revision 115957655)
11
+ Bœuf (animal) (revision 115500130)
12
+ 1500 av. J.-C. (revision 110583603)
13
+ 1898 dans les chemins de fer (revision 106801806)
14
+ 1913 dans les chemins de fer (revision 112852042)
15
+ 1974 dans les chemins de fer (revision 90170756)
16
+ 1er décembre (revision 121012781)
17
+ 2009 dans les chemins de fer (revision 107042206)
18
+ 2011 dans les chemins de fer (revision 109560866)
19
+ 24 novembre (revision 120782024)
20
+ 26 novembre (revision 120833172)
21
+ 29 novembre (revision 120918160)
22
+ 2 décembre (revision 121025437)
23
+ 30 novembre (revision 120947714)
24
+ 3 décembre (revision 121030621)
25
+ Amphibien (revision 120332329)
26
+ Angleterre (revision 120784240)
27
+ Anne-Josèphe Théroigne de Méricourt (revision 121009789)
28
+ Années 1930 (revision 120558236)
29
+ Antonio Troyo Calderón (revision 121028881)
30
+ António Costa (revision 120993829)
31
+ Attentat du 24 novembre 2015 à Tunis (revision 121015161)
32
+ Balard (métro de Paris) (revision 118979088)
33
+ Bois de Vincennes (revision 120822909)
34
+ Buse à tête blanche (revision 121009499)
35
+ Californie (revision 120922479)
36
+ Charenton-le-Pont (revision 120210025)
37
+ Charenton - Écoles (métro de Paris) (revision 108644873)
38
+ Chronique médiévale (revision 100253272)
39
+ Concorde (métro de Paris) (revision 120856751)
40
+ Conférence de Paris de 2015 sur le climat (revision 121029398)
41
+ Crise de la dette publique grecque (revision 120905208)
42
+ Crise entre la Colombie et le Venezuela de 2015 (revision 120857143)
43
+ Crise migratoire en Europe (revision 121002308)
44
+ Crise russo-turque de 2015 (revision 121030214)
45
+ Créteil (revision 120684618)
46
+ Créteil - Préfecture (métro de Paris) (revision 113486387)
47
+ Deuxième guerre civile libyenne (revision 121027704)
48
+ Devise (monnaie) (revision 121015771)
49
+ Droits de tirage spéciaux (revision 121009135)
50
+ Décembre 2015 (revision 121010045)
51
+ Département français (revision 120993190)
52
+ Eldar Riazanov (revision 120996396)
53
+ Enfants verts de Woolpit (revision 121002303)
54
+ Ernst Larsen (revision 121026772)
55
+ Fatima Mernissi (revision 120992271)
56
+ Fejervarya cancrivora (revision 120353807)
57
+ Fonds monétaire international (revision 120754406)
58
+ Français (revision 120883858)
59
+ Freyja (revision 121028677)
60
+ Fusillade du 2 décembre 2015 en Californie (revision 121030353)
61
+
62
+ == End of Parsed pages ==
63
+
64
+ - Wikipedia parsing ended at: 2015-12-03 21:10:27.682316
65
+
66
+ 56 characters appeared 728239 times.
67
+
68
+ First 38 characters:
69
+ [ 0] Char e: 14.339660468609894 %
70
+ [ 1] Char s: 7.954806045817375 %
71
+ [ 2] Char a: 7.864176458552756 %
72
+ [ 3] Char n: 7.572102015959047 %
73
+ [ 4] Char i: 7.34154583866011 %
74
+ [ 5] Char r: 7.020222756540091 %
75
+ [ 6] Char t: 6.833608197308851 %
76
+ [ 7] Char l: 5.9446143367768 %
77
+ [ 8] Char o: 5.386418469760614 %
78
+ [ 9] Char u: 5.024861343597363 %
79
+ [10] Char d: 4.169235649285468 %
80
+ [11] Char c: 3.4240132703686568 %
81
+ [12] Char p: 2.8882001650557028 %
82
+ [13] Char m: 2.803063280049544 %
83
+ [14] Char é: 2.498355622261373 %
84
+ [15] Char g: 1.277739862874688 %
85
+ [16] Char v: 1.1729665672945284 %
86
+ [17] Char f: 1.1614318925517584 %
87
+ [18] Char b: 0.9925312981040565 %
88
+ [19] Char h: 0.8580974103282026 %
89
+ [20] Char q: 0.7740590657737364 %
90
+ [21] Char x: 0.43570860665248634 %
91
+ [22] Char y: 0.41044217626356183 %
92
+ [23] Char è: 0.4100302235941771 %
93
+ [24] Char à: 0.363479571953713 %
94
+ [25] Char j: 0.29591933417463223 %
95
+ [26] Char k: 0.1359443808969308 %
96
+ [27] Char ç: 0.11685724054877589 %
97
+ [28] Char ê: 0.11218844362908331 %
98
+ [29] Char z: 0.10738232915292918 %
99
+ [30] Char w: 0.08239053387692777 %
100
+ [31] Char ô: 0.04792382720507965 %
101
+ [32] Char â: 0.03364280133307884 %
102
+ [33] Char î: 0.029385957082770905 %
103
+ [34] Char û: 0.024854477719539875 %
104
+ [35] Char œ: 0.021146903695078125 %
105
+ [36] Char ï: 0.017851282340001016 %
106
+ [37] Char ù: 0.015242248767231636 %
107
+
108
+ The first 38 characters have an accumulated ratio of 0.999621003544166.
109
+
110
+ 914 sequences found.
111
+
112
+ First 512 (typical positive ratio): 0.997057879992383
113
+ Next 512 (512-1024): 1.3731755646154627e-06
114
+ Rest: 3.8163916471489756e-17
115
+
116
+ - Processing end: 2015-12-03 21:10:27.987730
@@ -0,0 +1,159 @@
1
+ = Logs of language model for German (de) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2015-12-03 22:42:29.154759
5
+ - Maximum depth: 3
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ Wikipedia:Hauptseite (revision 140459035)
11
+ 1740 (revision 145584733)
12
+ 1890 (revision 148575121)
13
+ 1925 (revision 148682812)
14
+ 1965 (revision 148411693)
15
+ 3. Dezember (revision 148684818)
16
+ Bundeswehreinsatz in Syrien (revision 148714599)
17
+ Clara Klabunde (revision 148697193)
18
+ Day Tripper (revision 145956669)
19
+ Dezember 2015 (revision 148713161)
20
+ Edwar al-Charrat (revision 148656295)
21
+ Enzyklika (revision 148704406)
22
+ Enzyklopädie (revision 148364925)
23
+ Facebook Inc. (revision 148280344)
24
+ Franz Neubauer (CSU) (revision 148710968)
25
+ Freie Inhalte (revision 148123311)
26
+ Gabriele Ferzetti (revision 148715582)
27
+ Georg von Waldburg zu Zeil und Trauchburg (revision 148710609)
28
+ Jim Loscutoff (revision 148690370)
29
+ Katarina Witt (revision 148713884)
30
+ Klavierkonzert (Gershwin) (revision 143900338)
31
+ Ludolf Camphausen (revision 145088962)
32
+ Mark Zuckerberg (revision 148714452)
33
+ Montenegro (revision 148692773)
34
+ NATO (revision 148697872)
35
+ NATO-Osterweiterung (revision 148697354)
36
+ Nekrolog 2015 (revision 148711617)
37
+ Peter-Ulrich-Haus (revision 148654149)
38
+ Philanthropie (revision 145561255)
39
+ Präsidentschaftswahl in Burkina Faso 2015 (revision 148677453)
40
+ Québec (Stadt) (revision 148716893)
41
+ Rivka Zohar (revision 148708850)
42
+ Roch Marc Kaboré (revision 148673951)
43
+ Rubber Soul (revision 148665720)
44
+ Salve Regina (Latry) (revision 148713279)
45
+ Schießerei in San Bernardino (revision 148711974)
46
+ Single (Musik) (revision 146450210)
47
+ The Giving Pledge (revision 148711856)
48
+ Ubi primum (Benedikt XIV.) (revision 136691297)
49
+ VTech (revision 148704025)
50
+ Walter Damrosch (revision 148716127)
51
+ We Can Work It Out (revision 148706519)
52
+ 1. August (revision 148089156)
53
+ 1. Januar (revision 148659041)
54
+ 1. Juni (revision 148375663)
55
+ 1. November (revision 147888516)
56
+ 10. August (revision 148079904)
57
+ 10. November (revision 148658709)
58
+ 10. September (revision 148201788)
59
+ 11. August (revision 148315737)
60
+ 11. Oktober (revision 148087353)
61
+ 12. Januar (revision 147377586)
62
+ 12. September (revision 148359994)
63
+ 13. Dezember (revision 148614781)
64
+ 13. September (revision 148320520)
65
+ 14. August (revision 148513270)
66
+ 14. Dezember (revision 147968142)
67
+ 15. April (revision 146544147)
68
+ 15. August (revision 147827975)
69
+ 16. April (revision 148712866)
70
+ 16. Dezember (revision 148392316)
71
+ 16. Februar (revision 148221712)
72
+ 16. Jahrhundert (revision 147390194)
73
+ 16. Juli (revision 147928181)
74
+ 1652 (revision 142931287)
75
+ 1654 (revision 145531451)
76
+ 1656 (revision 144194148)
77
+ 1657 (revision 147492859)
78
+ 1662 (revision 147548355)
79
+ 1665 (revision 147757128)
80
+ 1666 (revision 147843417)
81
+ 1667 (revision 148566099)
82
+ 1668 (revision 145304760)
83
+ 1670 (revision 147643990)
84
+ 1672 (revision 145296252)
85
+ 1673 (revision 147879655)
86
+ 1674 (revision 146784434)
87
+ 1679 (revision 146069377)
88
+ 1685 (revision 148596629)
89
+ 1688 (revision 140370621)
90
+ 1692 (revision 146892539)
91
+ 1693 (revision 147464373)
92
+ 17. August (revision 148288443)
93
+ 17. Februar (revision 145814425)
94
+ 17. Jahrhundert (revision 147869798)
95
+ 17. Oktober (revision 148327370)
96
+ 1700er (revision 127393249)
97
+ 1707 (revision 148288721)
98
+ 1710er (revision 134739897)
99
+ 1720er (revision 127302296)
100
+ 1730 (revision 148694277)
101
+ 1730er (revision 127393280)
102
+ 1731 (revision 147730204)
103
+ 1735 (revision 145436596)
104
+ 1736 (revision 145680122)
105
+ 1737 (revision 146645905)
106
+ 1738 (revision 145094942)
107
+ 1739 (revision 147843445)
108
+ 1740er (revision 127393296)
109
+ 1741 (revision 146530178)
110
+ 1742 (revision 147010984)
111
+
112
+ == End of Parsed pages ==
113
+
114
+ - Wikipedia parsing ended at: 2015-12-03 22:50:46.517106
115
+
116
+ 59 characters appeared 1746165 times.
117
+
118
+ First 31 characters:
119
+ [ 0] Char e: 14.27997926885489 %
120
+ [ 1] Char r: 8.696257226550754 %
121
+ [ 2] Char n: 8.464091308667852 %
122
+ [ 3] Char i: 8.258784250056554 %
123
+ [ 4] Char s: 6.690833913175444 %
124
+ [ 5] Char a: 6.370703799469123 %
125
+ [ 6] Char t: 5.925728668253001 %
126
+ [ 7] Char h: 4.540979804314025 %
127
+ [ 8] Char d: 4.367284878576767 %
128
+ [ 9] Char l: 4.083634708060234 %
129
+ [10] Char u: 3.899917819908199 %
130
+ [11] Char o: 3.6450163644329145 %
131
+ [12] Char c: 3.392405643223865 %
132
+ [13] Char m: 2.578565026787274 %
133
+ [14] Char g: 2.543631329227192 %
134
+ [15] Char b: 1.9455206123132693 %
135
+ [16] Char k: 1.7604292836014925 %
136
+ [17] Char f: 1.6422273954637734 %
137
+ [18] Char p: 1.519329502080273 %
138
+ [19] Char w: 1.0273370500496803 %
139
+ [20] Char z: 1.0037997554641171 %
140
+ [21] Char v: 0.9010603236234834 %
141
+ [22] Char ä: 0.4926224039538073 %
142
+ [23] Char j: 0.4661644231787947 %
143
+ [24] Char ü: 0.4094687500894818 %
144
+ [25] Char y: 0.34229296773214446 %
145
+ [26] Char ö: 0.3044958523392692 %
146
+ [27] Char ß: 0.14477440562604335 %
147
+ [28] Char x: 0.09918879372796958 %
148
+ [29] Char é: 0.07633871942227682 %
149
+ [30] Char q: 0.06099079983850323 %
150
+
151
+ The first 31 characters have an accumulated ratio of 0.9993385504806246.
152
+
153
+ 1188 sequences found.
154
+
155
+ First 512 (typical positive ratio): 0.9934041448127945
156
+ Next 512 (512-1024): 1.1453671331174316e-06
157
+ Rest: 0.0001130256702826099
158
+
159
+ - Processing end: 2015-12-03 22:50:46.681265
@@ -0,0 +1,272 @@
1
+ = Logs of language model for Greek (el) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-05-25 15:16:42.898905
5
+ - Maximum depth: 5
6
+ - Max number of pages: 200
7
+
8
+ == Parsed pages ==
9
+
10
+ Πύλη:Κύρια (revision 5511929)
11
+ 14 Σεπτεμβρίου (revision 5808678)
12
+ 16 Σεπτεμβρίου (revision 5810117)
13
+ 1771 (revision 4940722)
14
+ 1829 (revision 5863423)
15
+ 1921 (revision 5819621)
16
+ 1948 (revision 5785943)
17
+ 1965 (revision 5846907)
18
+ 1970 (revision 5816968)
19
+ 1973 (revision 5423504)
20
+ 25 Μαΐου (revision 5865973)
21
+ Eurovision (revision 5865484)
22
+ Scorpions (revision 5586116)
23
+ Wiki (revision 5859059)
24
+ Wikimedia (revision 5771416)
25
+ Αγία Πετρούπολη (revision 5782933)
26
+ Αγγλική γλώσσα (revision 5851128)
27
+ Αλεξάντρ Πούσκιν (revision 5790131)
28
+ Βέλος ΙΙ (Αντιτορπιλικό) (revision 5178914)
29
+ Βραζιλία (revision 5857981)
30
+ Γαλλική γλώσσα (revision 5851119)
31
+ Γαλλική εισβολή στην Ρωσία (revision 5858523)
32
+ Γενικές εκλογές στη Δομινικανή Δημοκρατία 2016 (revision 5848770)
33
+ Γηραιότερο πρόσωπο στον κόσμο (revision 5852034)
34
+ Διαγωνισμός Τραγουδιού Eurovision 2016 (revision 5863783)
35
+ Δικτατορία των Συνταγματαρχών (revision 5864405)
36
+ Δομινικανή Δημοκρατία (revision 5848627)
37
+ Εγκυκλοπαίδεια (revision 5566281)
38
+ Ελεύθερο περιεχόμενο (revision 5824058)
39
+ Ελλάδα (revision 5863759)
40
+ Ελληνική γλώσσα (revision 5790854)
41
+ Ιππικό (revision 5376587)
42
+ Ιταλία (revision 5781867)
43
+ Κίεβο (revision 5794613)
44
+ Κατάληψη του Παρισιού (1814) (revision 5729368)
45
+ Κλάους Μάιν (revision 5668218)
46
+ Μάχη της Λειψίας (revision 5729316)
47
+ Μάχη της Σαλτάνοφκα (revision 5865460)
48
+ Μάχη του Μποροντίνο (revision 5670322)
49
+ Μαξ Βερστάπεν (revision 5864745)
50
+ Μπλουζ (revision 5846428)
51
+ Νίκος Καχτίτσης (revision 5723615)
52
+ Νικολάι Νικολάεβιτς Ραέφσκι (revision 5865460)
53
+ Ντίλμα Ρούσεφ (revision 5843412)
54
+ Ομοσπονδιακό Σοβιέτ της Ρωσικής Αυτοκρατορίας (revision 5865460)
55
+ Ουκρανία (revision 5847651)
56
+ Πάτρα (revision 5800331)
57
+ Ποδόσφαιρο (revision 5864952)
58
+ Πριμέρα Ντιβιζιόν (revision 5846965)
59
+ Ρωσική Αυτοκρατορία (revision 5858419)
60
+ Ρωσική γλώσσα (revision 5818960)
61
+ Ρώσοι (revision 5376764)
62
+ Σουζάνα Μούσατ Τζόουνς (revision 5848866)
63
+ Στοκχόλμη (revision 5670508)
64
+ Στρατηγός (revision 5464718)
65
+ Τζακ Στάινμπεργκερ (revision 5820361)
66
+ Τζαμάλα (revision 5863755)
67
+ ΦΚ Μπαρτσελόνα (revision 5862032)
68
+ Φόρμουλα Ένα (revision 5809160)
69
+ 10 Σεπτεμβρίου (revision 5841838)
70
+ 11 Σεπτεμβρίου (revision 5796866)
71
+ 12 Σεπτεμβρίου (revision 5795991)
72
+ 1321 (revision 5811404)
73
+ 13 Σεπτεμβρίου (revision 5830505)
74
+ 1435 (revision 5600729)
75
+ 1498 (revision 5831868)
76
+ 1523 (revision 5863396)
77
+ 1527 (revision 5579042)
78
+ 1580 (revision 5742938)
79
+ 15 Σεπτεμβρίου (revision 5817369)
80
+ 1712 (revision 5699806)
81
+ 1741 (revision 5817896)
82
+ 1752 (revision 5666171)
83
+ 1760 (revision 5490201)
84
+ 1769 (revision 5336004)
85
+ 17 Σεπτεμβρίου (revision 5843911)
86
+ 1812 (revision 5703237)
87
+ 1814 (revision 5751122)
88
+ 1851 (revision 5854460)
89
+ 1878 (revision 5863501)
90
+ 1889 (revision 5795061)
91
+ 1890 (revision 5705460)
92
+ 1898 (revision 5863504)
93
+ 18 Σεπτεμβρίου (revision 5661544)
94
+ 1901 (revision 5865687)
95
+ 1902 (revision 5779111)
96
+ 1905 (revision 5862599)
97
+ 1910 (revision 5794858)
98
+ 1916 (revision 5800363)
99
+ 1917 (revision 5865701)
100
+ 1925 (revision 5854774)
101
+ 1927 (revision 5839595)
102
+ 1928 (revision 5814308)
103
+ 1933 (revision 5854834)
104
+ 1936 (revision 5854290)
105
+ 1937 (revision 5794891)
106
+ 1943 (revision 5807315)
107
+ 1944 (revision 5865804)
108
+ 1950 (revision 5807377)
109
+ 1956 (revision 5795994)
110
+ 1960 (revision 5795065)
111
+ 1963 (revision 5863751)
112
+ 1966 (revision 5707508)
113
+ 1969 (revision 5668647)
114
+ 1980 (revision 5832053)
115
+ 1981 (revision 5817635)
116
+ 1982 (revision 5788879)
117
+ 1983 (revision 5812702)
118
+ 1984 (revision 5749754)
119
+ 1989 (revision 5846909)
120
+ 1994 (revision 5863999)
121
+ 1999 (revision 5795003)
122
+ 19 Σεπτεμβρίου (revision 5850863)
123
+ 1 Σεπτεμβρίου (revision 5630491)
124
+ 2000 (revision 5779037)
125
+ 2001 (revision 5779042)
126
+ 2005 (revision 5779066)
127
+ 2006 (revision 5808681)
128
+ 2009 (revision 5827105)
129
+ 2011 (revision 5808660)
130
+ 2016 (revision 5801621)
131
+ 20 Σεπτεμβρίου (revision 5808561)
132
+ 21 Σεπτεμβρίου (revision 5751207)
133
+ 22 Σεπτεμβρίου (revision 5807133)
134
+ 23 Σεπτεμβρίου (revision 5800012)
135
+ 24 Σεπτεμβρίου (revision 5662618)
136
+ 258 (revision 4952368)
137
+ 25 Σεπτεμβρίου (revision 5817621)
138
+ 26 Σεπτεμβρίου (revision 5817637)
139
+ 27 Σεπτεμβρίου (revision 5817648)
140
+ 28 Σεπτεμβρίου (revision 5817677)
141
+ 29 Σεπτεμβρίου (revision 5703562)
142
+ 2 Σεπτεμβρίου (revision 5701639)
143
+ 30 Σεπτεμβρίου (revision 5838312)
144
+ 326 (revision 5818811)
145
+ 3 Σεπτεμβρίου (revision 5816313)
146
+ 407 (revision 4952524)
147
+ 4 Σεπτεμβρίου (revision 5816970)
148
+ 5 Σεπτεμβρίου (revision 5817185)
149
+ 628 (revision 5398024)
150
+ 680 (revision 5365010)
151
+ 685 (revision 5819296)
152
+ 6 Σεπτεμβρίου (revision 5765157)
153
+ 775 (revision 5373211)
154
+ 786 (revision 5398031)
155
+ 7 Σεπτεμβρίου (revision 5749649)
156
+ 81 (revision 5397958)
157
+ 891 (revision 4952139)
158
+ 8 Σεπτεμβρίου (revision 5788878)
159
+ 9 Σεπτεμβρίου (revision 5817240)
160
+ CIA (revision 5857678)
161
+ Miyavi (revision 4944860)
162
+ Άρμεν Κούπτσιος (revision 5766774)
163
+ Έιμι Γουάινχαουζ (revision 5809279)
164
+ Έρβιν Θάλμπεργκ (revision 5716376)
165
+ Ίων Δραγούμης (revision 5818568)
166
+ Αγία Ελένη (revision 5821916)
167
+ Αλεξάντερ φον Χούμπολτ (revision 5773636)
168
+ Αλμπέρτο Κόρντα (revision 5800055)
169
+ Απρίλιος (revision 5766829)
170
+ Αυτοκρατορία των Σασσανιδών (revision 5859880)
171
+ Αύγουστος (revision 5461793)
172
+ Β΄ Παγκόσμιος Πόλεμος (revision 5848530)
173
+ Βέρμαχτ (revision 5212228)
174
+ Βασίλης Λάσκος (revision 5695445)
175
+ Βενεζουέλα (revision 5847962)
176
+ Βρετανική Αυτοκρατορία (revision 5606306)
177
+ Βόρεια Ελλάδα (revision 5670938)
178
+ Γαλλία (revision 5776756)
179
+ Γεώργιος Καρατζαφέρης (revision 5803114)
180
+ Γιάννης Λάτσης (revision 5692530)
181
+ Γιάννος Κρανιδιώτης (revision 5574536)
182
+ Γιώργος Παπασιδέρης (μουσικός) (revision 5722203)
183
+ Γκέοργκ Φρήντριχ Χαίντελ (revision 5807098)
184
+ Γκρέις Κέλι (revision 5807168)
185
+ Γρηγοριανό Hμερολόγιο (revision 5793842)
186
+ Γρηγοριανό ημερολόγιο (revision 5793842)
187
+ Γρηγόρης Λαμπράκης (revision 5752808)
188
+ Δάντης Αλιγκέρι (revision 5648882)
189
+ Δήμος Βιάννου (revision 4816422)
190
+ Δεκέμβριος (revision 5461807)
191
+ Δομιτιανός (revision 5735554)
192
+ Δράμα (πόλη) (revision 5857326)
193
+ Ενιαία Δημοκρατική Αριστερά (revision 5742309)
194
+ Ετόρε Σότσας (revision 5785872)
195
+ Ζιλ Αντριαμαχαζό (revision 5819706)
196
+ Η.Π.Α. (revision 5845171)
197
+ Ηράκλειος (revision 5778827)
198
+ Θεσσαλονίκη (revision 5844955)
199
+ Θεόδωρος Ρούζβελτ (revision 5815087)
200
+ Ιανουάριος (revision 5615044)
201
+ Ιερουσαλήμ (revision 5824734)
202
+ Ιησούς Χριστός (revision 5859687)
203
+ Ιούλιος (revision 5712711)
204
+ Ιούνιος (revision 5461799)
205
+ Ιράκ (revision 5820378)
206
+ Ιράν (revision 5861249)
207
+ Ισιδώρα Ντάνκαν (revision 5044778)
208
+ Ιωάννης ο Χρυσόστομος (revision 5824898)
209
+
210
+ == End of Parsed pages ==
211
+
212
+ - Wikipedia parsing ended at: 2016-05-25 15:21:50.071087
213
+
214
+ 63 characters appeared 1875535 times.
215
+
216
+ First 46 characters:
217
+ [ 0] Char α: 9.004097497514042 %
218
+ [ 1] Char ο: 8.311015256980008 %
219
+ [ 2] Char τ: 7.94493304577094 %
220
+ [ 3] Char ι: 6.338831320129989 %
221
+ [ 4] Char ν: 5.836627948825269 %
222
+ [ 5] Char ε: 5.635565318695733 %
223
+ [ 6] Char ρ: 4.717907157157825 %
224
+ [ 7] Char σ: 4.307197679595422 %
225
+ [ 8] Char κ: 4.061294510632965 %
226
+ [ 9] Char ς: 3.766551943845356 %
227
+ [10] Char η: 3.7565281373048225 %
228
+ [11] Char π: 3.4156653968067783 %
229
+ [12] Char υ: 3.30956233821283 %
230
+ [13] Char μ: 3.1442761665338157 %
231
+ [14] Char λ: 3.0899983204792236 %
232
+ [15] Char ί: 2.429973314280992 %
233
+ [16] Char ό: 2.076100952528212 %
234
+ [17] Char ά: 1.922651403466211 %
235
+ [18] Char γ: 1.8994047031913561 %
236
+ [19] Char έ: 1.6641651582081913 %
237
+ [20] Char δ: 1.508582884350332 %
238
+ [21] Char ω: 1.2410325587099147 %
239
+ [22] Char ή: 1.2077087337746297 %
240
+ [23] Char χ: 1.0482342371643292 %
241
+ [24] Char ύ: 0.9225101104484854 %
242
+ [25] Char β: 0.8928652357860558 %
243
+ [26] Char θ: 0.8681256281541001 %
244
+ [27] Char φ: 0.806756472153279 %
245
+ [28] Char ώ: 0.6969211451665791 %
246
+ [29] Char ζ: 0.35515199663029484 %
247
+ [30] Char e: 0.35488540603081253 %
248
+ [31] Char ξ: 0.314736861748781 %
249
+ [32] Char a: 0.2909036621550651 %
250
+ [33] Char i: 0.2884510286398281 %
251
+ [34] Char o: 0.24137112877125727 %
252
+ [35] Char r: 0.23262695710823847 %
253
+ [36] Char n: 0.2206303801315358 %
254
+ [37] Char t: 0.21668483925919804 %
255
+ [38] Char s: 0.2013825388489151 %
256
+ [39] Char l: 0.14865091827131993 %
257
+ [40] Char d: 0.1359078876160669 %
258
+ [41] Char c: 0.12124540464454144 %
259
+ [42] Char h: 0.1166600463334462 %
260
+ [43] Char u: 0.10381037943840024 %
261
+ [44] Char m: 0.09074744006376848 %
262
+ [45] Char ψ: 0.08669526295163779 %
263
+
264
+ The first 46 characters have an accumulated ratio of 0.993456267145108.
265
+
266
+ 1579 sequences found.
267
+
268
+ First 512 (typical positive ratio): 0.958419074626211
269
+ Next 512 (512-1024): 0.006969211451665791
270
+ Rest: 0.0018920066107342773
271
+
272
+ - Processing end: 2016-05-25 15:21:50.812982