cchardet 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/.rubocop.yml +11 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +10 -0
  7. data/README.md +35 -0
  8. data/Rakefile +15 -0
  9. data/cchardet.gemspec +30 -0
  10. data/ext/cchardet/extconf.rb +26 -0
  11. data/ext/uchardet/.gitignore +1 -0
  12. data/ext/uchardet/.gitlab-ci.yml +106 -0
  13. data/ext/uchardet/AUTHORS +16 -0
  14. data/ext/uchardet/CMakeLists.txt +74 -0
  15. data/ext/uchardet/COPYING +1316 -0
  16. data/ext/uchardet/INSTALL +26 -0
  17. data/ext/uchardet/README.md +295 -0
  18. data/ext/uchardet/build-mac/uchardet.cpp +7 -0
  19. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
  20. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  21. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
  22. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
  23. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
  24. data/ext/uchardet/doc/CMakeLists.txt +6 -0
  25. data/ext/uchardet/doc/README.maintainer +59 -0
  26. data/ext/uchardet/doc/uchardet.1 +18 -0
  27. data/ext/uchardet/script/BuildLangModel.py +533 -0
  28. data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
  29. data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
  30. data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
  31. data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
  32. data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
  33. data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
  34. data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
  35. data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
  36. data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
  37. data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
  38. data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
  39. data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
  40. data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
  41. data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
  42. data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
  43. data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
  44. data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
  45. data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
  46. data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
  47. data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
  48. data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
  49. data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
  50. data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
  51. data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
  52. data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
  53. data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
  54. data/ext/uchardet/script/README +63 -0
  55. data/ext/uchardet/script/charsets/codepoints.py +53 -0
  56. data/ext/uchardet/script/charsets/db.py +73 -0
  57. data/ext/uchardet/script/charsets/ibm852.py +72 -0
  58. data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
  59. data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
  60. data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
  61. data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
  62. data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
  63. data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
  64. data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
  65. data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
  66. data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
  67. data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
  68. data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
  69. data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
  70. data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
  71. data/ext/uchardet/script/charsets/tis-620.py +77 -0
  72. data/ext/uchardet/script/charsets/viscii.py +72 -0
  73. data/ext/uchardet/script/charsets/windows-1250.py +75 -0
  74. data/ext/uchardet/script/charsets/windows-1252.py +76 -0
  75. data/ext/uchardet/script/charsets/windows-1253.py +72 -0
  76. data/ext/uchardet/script/charsets/windows-1256.py +75 -0
  77. data/ext/uchardet/script/charsets/windows-1257.py +72 -0
  78. data/ext/uchardet/script/charsets/windows-1258.py +72 -0
  79. data/ext/uchardet/script/debug.sh +9 -0
  80. data/ext/uchardet/script/header-template.cpp +38 -0
  81. data/ext/uchardet/script/langs/ar.py +59 -0
  82. data/ext/uchardet/script/langs/cs.py +80 -0
  83. data/ext/uchardet/script/langs/da.py +69 -0
  84. data/ext/uchardet/script/langs/de.py +69 -0
  85. data/ext/uchardet/script/langs/el.py +55 -0
  86. data/ext/uchardet/script/langs/eo.py +67 -0
  87. data/ext/uchardet/script/langs/es.py +69 -0
  88. data/ext/uchardet/script/langs/et.py +57 -0
  89. data/ext/uchardet/script/langs/fi.py +60 -0
  90. data/ext/uchardet/script/langs/fr.py +79 -0
  91. data/ext/uchardet/script/langs/ga.py +60 -0
  92. data/ext/uchardet/script/langs/hr.py +59 -0
  93. data/ext/uchardet/script/langs/hu.py +66 -0
  94. data/ext/uchardet/script/langs/it.py +56 -0
  95. data/ext/uchardet/script/langs/lt.py +70 -0
  96. data/ext/uchardet/script/langs/lv.py +69 -0
  97. data/ext/uchardet/script/langs/mt.py +80 -0
  98. data/ext/uchardet/script/langs/pl.py +81 -0
  99. data/ext/uchardet/script/langs/pt.py +80 -0
  100. data/ext/uchardet/script/langs/ro.py +65 -0
  101. data/ext/uchardet/script/langs/sk.py +80 -0
  102. data/ext/uchardet/script/langs/sl.py +59 -0
  103. data/ext/uchardet/script/langs/sv.py +56 -0
  104. data/ext/uchardet/script/langs/th.py +55 -0
  105. data/ext/uchardet/script/langs/tr.py +67 -0
  106. data/ext/uchardet/script/langs/vi.py +64 -0
  107. data/ext/uchardet/script/release.sh +8 -0
  108. data/ext/uchardet/script/win32.sh +7 -0
  109. data/ext/uchardet/src/Big5Freq.tab +943 -0
  110. data/ext/uchardet/src/CMakeLists.txt +160 -0
  111. data/ext/uchardet/src/CharDistribution.cpp +109 -0
  112. data/ext/uchardet/src/CharDistribution.h +242 -0
  113. data/ext/uchardet/src/EUCKRFreq.tab +614 -0
  114. data/ext/uchardet/src/EUCTWFreq.tab +447 -0
  115. data/ext/uchardet/src/GB2312Freq.tab +491 -0
  116. data/ext/uchardet/src/JISFreq.tab +589 -0
  117. data/ext/uchardet/src/JpCntx.cpp +230 -0
  118. data/ext/uchardet/src/JpCntx.h +140 -0
  119. data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
  120. data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
  121. data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
  122. data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
  123. data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
  124. data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
  125. data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
  126. data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
  127. data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
  128. data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
  129. data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
  130. data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
  131. data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
  132. data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
  133. data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
  134. data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
  135. data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
  136. data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
  137. data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
  138. data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
  139. data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
  140. data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
  141. data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
  142. data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
  143. data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
  144. data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
  145. data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
  146. data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
  147. data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
  148. data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
  149. data/ext/uchardet/src/nsBig5Prober.h +75 -0
  150. data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
  151. data/ext/uchardet/src/nsCharSetProber.h +77 -0
  152. data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
  153. data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
  154. data/ext/uchardet/src/nsEUCJPProber.h +79 -0
  155. data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
  156. data/ext/uchardet/src/nsEUCKRProber.h +81 -0
  157. data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
  158. data/ext/uchardet/src/nsEUCTWProber.h +75 -0
  159. data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
  160. data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
  161. data/ext/uchardet/src/nsEscSM.cpp +267 -0
  162. data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
  163. data/ext/uchardet/src/nsGB2312Prober.h +77 -0
  164. data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
  165. data/ext/uchardet/src/nsHebrewProber.h +177 -0
  166. data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
  167. data/ext/uchardet/src/nsLatin1Prober.h +73 -0
  168. data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
  169. data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
  170. data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
  171. data/ext/uchardet/src/nsPkgInt.h +89 -0
  172. data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
  173. data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
  174. data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
  175. data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
  176. data/ext/uchardet/src/nsSJISProber.cpp +98 -0
  177. data/ext/uchardet/src/nsSJISProber.h +81 -0
  178. data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
  179. data/ext/uchardet/src/nsUTF8Prober.h +66 -0
  180. data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
  181. data/ext/uchardet/src/nsUniversalDetector.h +91 -0
  182. data/ext/uchardet/src/nscore.h +59 -0
  183. data/ext/uchardet/src/prmem.h +49 -0
  184. data/ext/uchardet/src/symbols.cmake +41 -0
  185. data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
  186. data/ext/uchardet/src/tools/uchardet.cpp +254 -0
  187. data/ext/uchardet/src/uchardet.cpp +274 -0
  188. data/ext/uchardet/src/uchardet.h +136 -0
  189. data/ext/uchardet/test/CMakeLists.txt +47 -0
  190. data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
  191. data/ext/uchardet/test/ar/utf-8.txt +3 -0
  192. data/ext/uchardet/test/ar/windows-1256.txt +3 -0
  193. data/ext/uchardet/test/bg/windows-1251.txt +3 -0
  194. data/ext/uchardet/test/cs/ibm852.txt +4 -0
  195. data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
  196. data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
  197. data/ext/uchardet/test/cs/utf-8.txt +4 -0
  198. data/ext/uchardet/test/cs/windows-1250.txt +4 -0
  199. data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
  200. data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
  201. data/ext/uchardet/test/da/utf-8.txt +10 -0
  202. data/ext/uchardet/test/da/windows-1252.txt +10 -0
  203. data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
  204. data/ext/uchardet/test/de/windows-1252.txt +11 -0
  205. data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
  206. data/ext/uchardet/test/el/utf-8.txt +3 -0
  207. data/ext/uchardet/test/el/windows-1253.txt +5 -0
  208. data/ext/uchardet/test/en/ascii.txt +4 -0
  209. data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
  210. data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
  211. data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
  212. data/ext/uchardet/test/es/utf-8.txt +5 -0
  213. data/ext/uchardet/test/es/windows-1252.txt +5 -0
  214. data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
  215. data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
  216. data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
  217. data/ext/uchardet/test/et/utf-8.txt +6 -0
  218. data/ext/uchardet/test/et/windows-1252.txt +6 -0
  219. data/ext/uchardet/test/et/windows-1257.txt +6 -0
  220. data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
  221. data/ext/uchardet/test/fi/utf-8.txt +8 -0
  222. data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
  223. data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
  224. data/ext/uchardet/test/fr/utf-16.be +0 -0
  225. data/ext/uchardet/test/fr/utf-32.le +0 -0
  226. data/ext/uchardet/test/fr/utf-8.txt +14 -0
  227. data/ext/uchardet/test/fr/windows-1252.txt +3 -0
  228. data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
  229. data/ext/uchardet/test/ga/utf-8.txt +6 -0
  230. data/ext/uchardet/test/ga/windows-1252.txt +6 -0
  231. data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
  232. data/ext/uchardet/test/he/utf-8.txt +3 -0
  233. data/ext/uchardet/test/he/windows-1255.txt +1 -0
  234. data/ext/uchardet/test/hr/ibm852.txt +4 -0
  235. data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
  236. data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
  237. data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
  238. data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
  239. data/ext/uchardet/test/hr/utf-8.txt +4 -0
  240. data/ext/uchardet/test/hr/windows-1250.txt +4 -0
  241. data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
  242. data/ext/uchardet/test/hu/windows-1250.txt +1 -0
  243. data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
  244. data/ext/uchardet/test/it/utf-8.txt +18 -0
  245. data/ext/uchardet/test/ja/euc-jp.txt +10 -0
  246. data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
  247. data/ext/uchardet/test/ja/shift_jis.txt +1 -0
  248. data/ext/uchardet/test/ja/utf-16be.txt +0 -0
  249. data/ext/uchardet/test/ja/utf-16le.txt +0 -0
  250. data/ext/uchardet/test/ja/utf-8.txt +9 -0
  251. data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
  252. data/ext/uchardet/test/ko/uhc.smi +16 -0
  253. data/ext/uchardet/test/ko/utf-16.le +0 -0
  254. data/ext/uchardet/test/ko/utf-32.be +0 -0
  255. data/ext/uchardet/test/ko/utf-8.txt +3 -0
  256. data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
  257. data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
  258. data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
  259. data/ext/uchardet/test/lt/utf-8.txt +3 -0
  260. data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
  261. data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
  262. data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
  263. data/ext/uchardet/test/lv/utf-8.txt +6 -0
  264. data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
  265. data/ext/uchardet/test/mt/utf-8.txt +4 -0
  266. data/ext/uchardet/test/pl/ibm852.txt +3 -0
  267. data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
  268. data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
  269. data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
  270. data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
  271. data/ext/uchardet/test/pl/utf-8.txt +3 -0
  272. data/ext/uchardet/test/pl/windows-1250.txt +3 -0
  273. data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
  274. data/ext/uchardet/test/pt/utf-8.txt +6 -0
  275. data/ext/uchardet/test/ro/ibm852.txt +9 -0
  276. data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
  277. data/ext/uchardet/test/ro/utf-8.txt +9 -0
  278. data/ext/uchardet/test/ro/windows-1250.txt +9 -0
  279. data/ext/uchardet/test/ru/ibm855.txt +5 -0
  280. data/ext/uchardet/test/ru/ibm866.txt +11 -0
  281. data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
  282. data/ext/uchardet/test/ru/koi8-r.txt +1 -0
  283. data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
  284. data/ext/uchardet/test/ru/windows-1251.txt +4 -0
  285. data/ext/uchardet/test/sk/ibm852.txt +3 -0
  286. data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
  287. data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
  288. data/ext/uchardet/test/sk/utf-8.txt +3 -0
  289. data/ext/uchardet/test/sk/windows-1250.txt +3 -0
  290. data/ext/uchardet/test/sl/ibm852.txt +9 -0
  291. data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
  292. data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
  293. data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
  294. data/ext/uchardet/test/sl/utf-8.txt +9 -0
  295. data/ext/uchardet/test/sl/windows-1250.txt +9 -0
  296. data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
  297. data/ext/uchardet/test/sv/utf-8.txt +10 -0
  298. data/ext/uchardet/test/sv/windows-1252.txt +10 -0
  299. data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
  300. data/ext/uchardet/test/th/tis-620.txt +5 -0
  301. data/ext/uchardet/test/th/utf-8.txt +1 -0
  302. data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
  303. data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
  304. data/ext/uchardet/test/uchardet-tests.c +130 -0
  305. data/ext/uchardet/test/vi/utf-8.txt +4 -0
  306. data/ext/uchardet/test/vi/viscii.txt +4 -0
  307. data/ext/uchardet/test/vi/windows-1258.txt +4 -0
  308. data/ext/uchardet/test/zh/big5.txt +1 -0
  309. data/ext/uchardet/test/zh/euc-tw.txt +1 -0
  310. data/ext/uchardet/test/zh/gb18030.txt +1 -0
  311. data/ext/uchardet/test/zh/utf-8.txt +1 -0
  312. data/ext/uchardet/uchardet.doap +51 -0
  313. data/ext/uchardet/uchardet.pc.in +10 -0
  314. data/lib/cchardet.rb +56 -0
  315. data/lib/cchardet/lib_finder.rb +32 -0
  316. data/lib/cchardet/version.rb +5 -0
  317. metadata +362 -0
@@ -0,0 +1,116 @@
1
+ = Logs of language model for French (fr) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2015-12-03 21:07:37.508739
5
+ - Maximum depth: 2
6
+ - Max number of pages: 50
7
+
8
+ == Parsed pages ==
9
+
10
+ Wikipédia:Accueil_principal (revision 115957655)
11
+ Bœuf (animal) (revision 115500130)
12
+ 1500 av. J.-C. (revision 110583603)
13
+ 1898 dans les chemins de fer (revision 106801806)
14
+ 1913 dans les chemins de fer (revision 112852042)
15
+ 1974 dans les chemins de fer (revision 90170756)
16
+ 1er décembre (revision 121012781)
17
+ 2009 dans les chemins de fer (revision 107042206)
18
+ 2011 dans les chemins de fer (revision 109560866)
19
+ 24 novembre (revision 120782024)
20
+ 26 novembre (revision 120833172)
21
+ 29 novembre (revision 120918160)
22
+ 2 décembre (revision 121025437)
23
+ 30 novembre (revision 120947714)
24
+ 3 décembre (revision 121030621)
25
+ Amphibien (revision 120332329)
26
+ Angleterre (revision 120784240)
27
+ Anne-Josèphe Théroigne de Méricourt (revision 121009789)
28
+ Années 1930 (revision 120558236)
29
+ Antonio Troyo Calderón (revision 121028881)
30
+ António Costa (revision 120993829)
31
+ Attentat du 24 novembre 2015 à Tunis (revision 121015161)
32
+ Balard (métro de Paris) (revision 118979088)
33
+ Bois de Vincennes (revision 120822909)
34
+ Buse à tête blanche (revision 121009499)
35
+ Californie (revision 120922479)
36
+ Charenton-le-Pont (revision 120210025)
37
+ Charenton - Écoles (métro de Paris) (revision 108644873)
38
+ Chronique médiévale (revision 100253272)
39
+ Concorde (métro de Paris) (revision 120856751)
40
+ Conférence de Paris de 2015 sur le climat (revision 121029398)
41
+ Crise de la dette publique grecque (revision 120905208)
42
+ Crise entre la Colombie et le Venezuela de 2015 (revision 120857143)
43
+ Crise migratoire en Europe (revision 121002308)
44
+ Crise russo-turque de 2015 (revision 121030214)
45
+ Créteil (revision 120684618)
46
+ Créteil - Préfecture (métro de Paris) (revision 113486387)
47
+ Deuxième guerre civile libyenne (revision 121027704)
48
+ Devise (monnaie) (revision 121015771)
49
+ Droits de tirage spéciaux (revision 121009135)
50
+ Décembre 2015 (revision 121010045)
51
+ Département français (revision 120993190)
52
+ Eldar Riazanov (revision 120996396)
53
+ Enfants verts de Woolpit (revision 121002303)
54
+ Ernst Larsen (revision 121026772)
55
+ Fatima Mernissi (revision 120992271)
56
+ Fejervarya cancrivora (revision 120353807)
57
+ Fonds monétaire international (revision 120754406)
58
+ Français (revision 120883858)
59
+ Freyja (revision 121028677)
60
+ Fusillade du 2 décembre 2015 en Californie (revision 121030353)
61
+
62
+ == End of Parsed pages ==
63
+
64
+ - Wikipedia parsing ended at: 2015-12-03 21:10:27.682316
65
+
66
+ 56 characters appeared 728239 times.
67
+
68
+ First 38 characters:
69
+ [ 0] Char e: 14.339660468609894 %
70
+ [ 1] Char s: 7.954806045817375 %
71
+ [ 2] Char a: 7.864176458552756 %
72
+ [ 3] Char n: 7.572102015959047 %
73
+ [ 4] Char i: 7.34154583866011 %
74
+ [ 5] Char r: 7.020222756540091 %
75
+ [ 6] Char t: 6.833608197308851 %
76
+ [ 7] Char l: 5.9446143367768 %
77
+ [ 8] Char o: 5.386418469760614 %
78
+ [ 9] Char u: 5.024861343597363 %
79
+ [10] Char d: 4.169235649285468 %
80
+ [11] Char c: 3.4240132703686568 %
81
+ [12] Char p: 2.8882001650557028 %
82
+ [13] Char m: 2.803063280049544 %
83
+ [14] Char é: 2.498355622261373 %
84
+ [15] Char g: 1.277739862874688 %
85
+ [16] Char v: 1.1729665672945284 %
86
+ [17] Char f: 1.1614318925517584 %
87
+ [18] Char b: 0.9925312981040565 %
88
+ [19] Char h: 0.8580974103282026 %
89
+ [20] Char q: 0.7740590657737364 %
90
+ [21] Char x: 0.43570860665248634 %
91
+ [22] Char y: 0.41044217626356183 %
92
+ [23] Char è: 0.4100302235941771 %
93
+ [24] Char à: 0.363479571953713 %
94
+ [25] Char j: 0.29591933417463223 %
95
+ [26] Char k: 0.1359443808969308 %
96
+ [27] Char ç: 0.11685724054877589 %
97
+ [28] Char ê: 0.11218844362908331 %
98
+ [29] Char z: 0.10738232915292918 %
99
+ [30] Char w: 0.08239053387692777 %
100
+ [31] Char ô: 0.04792382720507965 %
101
+ [32] Char â: 0.03364280133307884 %
102
+ [33] Char î: 0.029385957082770905 %
103
+ [34] Char û: 0.024854477719539875 %
104
+ [35] Char œ: 0.021146903695078125 %
105
+ [36] Char ï: 0.017851282340001016 %
106
+ [37] Char ù: 0.015242248767231636 %
107
+
108
+ The first 38 characters have an accumulated ratio of 0.999621003544166.
109
+
110
+ 914 sequences found.
111
+
112
+ First 512 (typical positive ratio): 0.997057879992383
113
+ Next 512 (512-1024): 1.3731755646154627e-06
114
+ Rest: 3.8163916471489756e-17
115
+
116
+ - Processing end: 2015-12-03 21:10:27.987730
@@ -0,0 +1,159 @@
1
+ = Logs of language model for German (de) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2015-12-03 22:42:29.154759
5
+ - Maximum depth: 3
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ Wikipedia:Hauptseite (revision 140459035)
11
+ 1740 (revision 145584733)
12
+ 1890 (revision 148575121)
13
+ 1925 (revision 148682812)
14
+ 1965 (revision 148411693)
15
+ 3. Dezember (revision 148684818)
16
+ Bundeswehreinsatz in Syrien (revision 148714599)
17
+ Clara Klabunde (revision 148697193)
18
+ Day Tripper (revision 145956669)
19
+ Dezember 2015 (revision 148713161)
20
+ Edwar al-Charrat (revision 148656295)
21
+ Enzyklika (revision 148704406)
22
+ Enzyklopädie (revision 148364925)
23
+ Facebook Inc. (revision 148280344)
24
+ Franz Neubauer (CSU) (revision 148710968)
25
+ Freie Inhalte (revision 148123311)
26
+ Gabriele Ferzetti (revision 148715582)
27
+ Georg von Waldburg zu Zeil und Trauchburg (revision 148710609)
28
+ Jim Loscutoff (revision 148690370)
29
+ Katarina Witt (revision 148713884)
30
+ Klavierkonzert (Gershwin) (revision 143900338)
31
+ Ludolf Camphausen (revision 145088962)
32
+ Mark Zuckerberg (revision 148714452)
33
+ Montenegro (revision 148692773)
34
+ NATO (revision 148697872)
35
+ NATO-Osterweiterung (revision 148697354)
36
+ Nekrolog 2015 (revision 148711617)
37
+ Peter-Ulrich-Haus (revision 148654149)
38
+ Philanthropie (revision 145561255)
39
+ Präsidentschaftswahl in Burkina Faso 2015 (revision 148677453)
40
+ Québec (Stadt) (revision 148716893)
41
+ Rivka Zohar (revision 148708850)
42
+ Roch Marc Kaboré (revision 148673951)
43
+ Rubber Soul (revision 148665720)
44
+ Salve Regina (Latry) (revision 148713279)
45
+ Schießerei in San Bernardino (revision 148711974)
46
+ Single (Musik) (revision 146450210)
47
+ The Giving Pledge (revision 148711856)
48
+ Ubi primum (Benedikt XIV.) (revision 136691297)
49
+ VTech (revision 148704025)
50
+ Walter Damrosch (revision 148716127)
51
+ We Can Work It Out (revision 148706519)
52
+ 1. August (revision 148089156)
53
+ 1. Januar (revision 148659041)
54
+ 1. Juni (revision 148375663)
55
+ 1. November (revision 147888516)
56
+ 10. August (revision 148079904)
57
+ 10. November (revision 148658709)
58
+ 10. September (revision 148201788)
59
+ 11. August (revision 148315737)
60
+ 11. Oktober (revision 148087353)
61
+ 12. Januar (revision 147377586)
62
+ 12. September (revision 148359994)
63
+ 13. Dezember (revision 148614781)
64
+ 13. September (revision 148320520)
65
+ 14. August (revision 148513270)
66
+ 14. Dezember (revision 147968142)
67
+ 15. April (revision 146544147)
68
+ 15. August (revision 147827975)
69
+ 16. April (revision 148712866)
70
+ 16. Dezember (revision 148392316)
71
+ 16. Februar (revision 148221712)
72
+ 16. Jahrhundert (revision 147390194)
73
+ 16. Juli (revision 147928181)
74
+ 1652 (revision 142931287)
75
+ 1654 (revision 145531451)
76
+ 1656 (revision 144194148)
77
+ 1657 (revision 147492859)
78
+ 1662 (revision 147548355)
79
+ 1665 (revision 147757128)
80
+ 1666 (revision 147843417)
81
+ 1667 (revision 148566099)
82
+ 1668 (revision 145304760)
83
+ 1670 (revision 147643990)
84
+ 1672 (revision 145296252)
85
+ 1673 (revision 147879655)
86
+ 1674 (revision 146784434)
87
+ 1679 (revision 146069377)
88
+ 1685 (revision 148596629)
89
+ 1688 (revision 140370621)
90
+ 1692 (revision 146892539)
91
+ 1693 (revision 147464373)
92
+ 17. August (revision 148288443)
93
+ 17. Februar (revision 145814425)
94
+ 17. Jahrhundert (revision 147869798)
95
+ 17. Oktober (revision 148327370)
96
+ 1700er (revision 127393249)
97
+ 1707 (revision 148288721)
98
+ 1710er (revision 134739897)
99
+ 1720er (revision 127302296)
100
+ 1730 (revision 148694277)
101
+ 1730er (revision 127393280)
102
+ 1731 (revision 147730204)
103
+ 1735 (revision 145436596)
104
+ 1736 (revision 145680122)
105
+ 1737 (revision 146645905)
106
+ 1738 (revision 145094942)
107
+ 1739 (revision 147843445)
108
+ 1740er (revision 127393296)
109
+ 1741 (revision 146530178)
110
+ 1742 (revision 147010984)
111
+
112
+ == End of Parsed pages ==
113
+
114
+ - Wikipedia parsing ended at: 2015-12-03 22:50:46.517106
115
+
116
+ 59 characters appeared 1746165 times.
117
+
118
+ First 31 characters:
119
+ [ 0] Char e: 14.27997926885489 %
120
+ [ 1] Char r: 8.696257226550754 %
121
+ [ 2] Char n: 8.464091308667852 %
122
+ [ 3] Char i: 8.258784250056554 %
123
+ [ 4] Char s: 6.690833913175444 %
124
+ [ 5] Char a: 6.370703799469123 %
125
+ [ 6] Char t: 5.925728668253001 %
126
+ [ 7] Char h: 4.540979804314025 %
127
+ [ 8] Char d: 4.367284878576767 %
128
+ [ 9] Char l: 4.083634708060234 %
129
+ [10] Char u: 3.899917819908199 %
130
+ [11] Char o: 3.6450163644329145 %
131
+ [12] Char c: 3.392405643223865 %
132
+ [13] Char m: 2.578565026787274 %
133
+ [14] Char g: 2.543631329227192 %
134
+ [15] Char b: 1.9455206123132693 %
135
+ [16] Char k: 1.7604292836014925 %
136
+ [17] Char f: 1.6422273954637734 %
137
+ [18] Char p: 1.519329502080273 %
138
+ [19] Char w: 1.0273370500496803 %
139
+ [20] Char z: 1.0037997554641171 %
140
+ [21] Char v: 0.9010603236234834 %
141
+ [22] Char ä: 0.4926224039538073 %
142
+ [23] Char j: 0.4661644231787947 %
143
+ [24] Char ü: 0.4094687500894818 %
144
+ [25] Char y: 0.34229296773214446 %
145
+ [26] Char ö: 0.3044958523392692 %
146
+ [27] Char ß: 0.14477440562604335 %
147
+ [28] Char x: 0.09918879372796958 %
148
+ [29] Char é: 0.07633871942227682 %
149
+ [30] Char q: 0.06099079983850323 %
150
+
151
+ The first 31 characters have an accumulated ratio of 0.9993385504806246.
152
+
153
+ 1188 sequences found.
154
+
155
+ First 512 (typical positive ratio): 0.9934041448127945
156
+ Next 512 (512-1024): 1.1453671331174316e-06
157
+ Rest: 0.0001130256702826099
158
+
159
+ - Processing end: 2015-12-03 22:50:46.681265
@@ -0,0 +1,272 @@
1
+ = Logs of language model for Greek (el) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-05-25 15:16:42.898905
5
+ - Maximum depth: 5
6
+ - Max number of pages: 200
7
+
8
+ == Parsed pages ==
9
+
10
+ Πύλη:Κύρια (revision 5511929)
11
+ 14 Σεπτεμβρίου (revision 5808678)
12
+ 16 Σεπτεμβρίου (revision 5810117)
13
+ 1771 (revision 4940722)
14
+ 1829 (revision 5863423)
15
+ 1921 (revision 5819621)
16
+ 1948 (revision 5785943)
17
+ 1965 (revision 5846907)
18
+ 1970 (revision 5816968)
19
+ 1973 (revision 5423504)
20
+ 25 Μαΐου (revision 5865973)
21
+ Eurovision (revision 5865484)
22
+ Scorpions (revision 5586116)
23
+ Wiki (revision 5859059)
24
+ Wikimedia (revision 5771416)
25
+ Αγία Πετρούπολη (revision 5782933)
26
+ Αγγλική γλώσσα (revision 5851128)
27
+ Αλεξάντρ Πούσκιν (revision 5790131)
28
+ Βέλος ΙΙ (Αντιτορπιλικό) (revision 5178914)
29
+ Βραζιλία (revision 5857981)
30
+ Γαλλική γλώσσα (revision 5851119)
31
+ Γαλλική εισβολή στην Ρωσία (revision 5858523)
32
+ Γενικές εκλογές στη Δομινικανή Δημοκρατία 2016 (revision 5848770)
33
+ Γηραιότερο πρόσωπο στον κόσμο (revision 5852034)
34
+ Διαγωνισμός Τραγουδιού Eurovision 2016 (revision 5863783)
35
+ Δικτατορία των Συνταγματαρχών (revision 5864405)
36
+ Δομινικανή Δημοκρατία (revision 5848627)
37
+ Εγκυκλοπαίδεια (revision 5566281)
38
+ Ελεύθερο περιεχόμενο (revision 5824058)
39
+ Ελλάδα (revision 5863759)
40
+ Ελληνική γλώσσα (revision 5790854)
41
+ Ιππικό (revision 5376587)
42
+ Ιταλία (revision 5781867)
43
+ Κίεβο (revision 5794613)
44
+ Κατάληψη του Παρισιού (1814) (revision 5729368)
45
+ Κλάους Μάιν (revision 5668218)
46
+ Μάχη της Λειψίας (revision 5729316)
47
+ Μάχη της Σαλτάνοφκα (revision 5865460)
48
+ Μάχη του Μποροντίνο (revision 5670322)
49
+ Μαξ Βερστάπεν (revision 5864745)
50
+ Μπλουζ (revision 5846428)
51
+ Νίκος Καχτίτσης (revision 5723615)
52
+ Νικολάι Νικολάεβιτς Ραέφσκι (revision 5865460)
53
+ Ντίλμα Ρούσεφ (revision 5843412)
54
+ Ομοσπονδιακό Σοβιέτ της Ρωσικής Αυτοκρατορίας (revision 5865460)
55
+ Ουκρανία (revision 5847651)
56
+ Πάτρα (revision 5800331)
57
+ Ποδόσφαιρο (revision 5864952)
58
+ Πριμέρα Ντιβιζιόν (revision 5846965)
59
+ Ρωσική Αυτοκρατορία (revision 5858419)
60
+ Ρωσική γλώσσα (revision 5818960)
61
+ Ρώσοι (revision 5376764)
62
+ Σουζάνα Μούσατ Τζόουνς (revision 5848866)
63
+ Στοκχόλμη (revision 5670508)
64
+ Στρατηγός (revision 5464718)
65
+ Τζακ Στάινμπεργκερ (revision 5820361)
66
+ Τζαμάλα (revision 5863755)
67
+ ΦΚ Μπαρτσελόνα (revision 5862032)
68
+ Φόρμουλα Ένα (revision 5809160)
69
+ 10 Σεπτεμβρίου (revision 5841838)
70
+ 11 Σεπτεμβρίου (revision 5796866)
71
+ 12 Σεπτεμβρίου (revision 5795991)
72
+ 1321 (revision 5811404)
73
+ 13 Σεπτεμβρίου (revision 5830505)
74
+ 1435 (revision 5600729)
75
+ 1498 (revision 5831868)
76
+ 1523 (revision 5863396)
77
+ 1527 (revision 5579042)
78
+ 1580 (revision 5742938)
79
+ 15 Σεπτεμβρίου (revision 5817369)
80
+ 1712 (revision 5699806)
81
+ 1741 (revision 5817896)
82
+ 1752 (revision 5666171)
83
+ 1760 (revision 5490201)
84
+ 1769 (revision 5336004)
85
+ 17 Σεπτεμβρίου (revision 5843911)
86
+ 1812 (revision 5703237)
87
+ 1814 (revision 5751122)
88
+ 1851 (revision 5854460)
89
+ 1878 (revision 5863501)
90
+ 1889 (revision 5795061)
91
+ 1890 (revision 5705460)
92
+ 1898 (revision 5863504)
93
+ 18 Σεπτεμβρίου (revision 5661544)
94
+ 1901 (revision 5865687)
95
+ 1902 (revision 5779111)
96
+ 1905 (revision 5862599)
97
+ 1910 (revision 5794858)
98
+ 1916 (revision 5800363)
99
+ 1917 (revision 5865701)
100
+ 1925 (revision 5854774)
101
+ 1927 (revision 5839595)
102
+ 1928 (revision 5814308)
103
+ 1933 (revision 5854834)
104
+ 1936 (revision 5854290)
105
+ 1937 (revision 5794891)
106
+ 1943 (revision 5807315)
107
+ 1944 (revision 5865804)
108
+ 1950 (revision 5807377)
109
+ 1956 (revision 5795994)
110
+ 1960 (revision 5795065)
111
+ 1963 (revision 5863751)
112
+ 1966 (revision 5707508)
113
+ 1969 (revision 5668647)
114
+ 1980 (revision 5832053)
115
+ 1981 (revision 5817635)
116
+ 1982 (revision 5788879)
117
+ 1983 (revision 5812702)
118
+ 1984 (revision 5749754)
119
+ 1989 (revision 5846909)
120
+ 1994 (revision 5863999)
121
+ 1999 (revision 5795003)
122
+ 19 Σεπτεμβρίου (revision 5850863)
123
+ 1 Σεπτεμβρίου (revision 5630491)
124
+ 2000 (revision 5779037)
125
+ 2001 (revision 5779042)
126
+ 2005 (revision 5779066)
127
+ 2006 (revision 5808681)
128
+ 2009 (revision 5827105)
129
+ 2011 (revision 5808660)
130
+ 2016 (revision 5801621)
131
+ 20 Σεπτεμβρίου (revision 5808561)
132
+ 21 Σεπτεμβρίου (revision 5751207)
133
+ 22 Σεπτεμβρίου (revision 5807133)
134
+ 23 Σεπτεμβρίου (revision 5800012)
135
+ 24 Σεπτεμβρίου (revision 5662618)
136
+ 258 (revision 4952368)
137
+ 25 Σεπτεμβρίου (revision 5817621)
138
+ 26 Σεπτεμβρίου (revision 5817637)
139
+ 27 Σεπτεμβρίου (revision 5817648)
140
+ 28 Σεπτεμβρίου (revision 5817677)
141
+ 29 Σεπτεμβρίου (revision 5703562)
142
+ 2 Σεπτεμβρίου (revision 5701639)
143
+ 30 Σεπτεμβρίου (revision 5838312)
144
+ 326 (revision 5818811)
145
+ 3 Σεπτεμβρίου (revision 5816313)
146
+ 407 (revision 4952524)
147
+ 4 Σεπτεμβρίου (revision 5816970)
148
+ 5 Σεπτεμβρίου (revision 5817185)
149
+ 628 (revision 5398024)
150
+ 680 (revision 5365010)
151
+ 685 (revision 5819296)
152
+ 6 Σεπτεμβρίου (revision 5765157)
153
+ 775 (revision 5373211)
154
+ 786 (revision 5398031)
155
+ 7 Σεπτεμβρίου (revision 5749649)
156
+ 81 (revision 5397958)
157
+ 891 (revision 4952139)
158
+ 8 Σεπτεμβρίου (revision 5788878)
159
+ 9 Σεπτεμβρίου (revision 5817240)
160
+ CIA (revision 5857678)
161
+ Miyavi (revision 4944860)
162
+ Άρμεν Κούπτσιος (revision 5766774)
163
+ Έιμι Γουάινχαουζ (revision 5809279)
164
+ Έρβιν Θάλμπεργκ (revision 5716376)
165
+ Ίων Δραγούμης (revision 5818568)
166
+ Αγία Ελένη (revision 5821916)
167
+ Αλεξάντερ φον Χούμπολτ (revision 5773636)
168
+ Αλμπέρτο Κόρντα (revision 5800055)
169
+ Απρίλιος (revision 5766829)
170
+ Αυτοκρατορία των Σασσανιδών (revision 5859880)
171
+ Αύγουστος (revision 5461793)
172
+ Β΄ Παγκόσμιος Πόλεμος (revision 5848530)
173
+ Βέρμαχτ (revision 5212228)
174
+ Βασίλης Λάσκος (revision 5695445)
175
+ Βενεζουέλα (revision 5847962)
176
+ Βρετανική Αυτοκρατορία (revision 5606306)
177
+ Βόρεια Ελλάδα (revision 5670938)
178
+ Γαλλία (revision 5776756)
179
+ Γεώργιος Καρατζαφέρης (revision 5803114)
180
+ Γιάννης Λάτσης (revision 5692530)
181
+ Γιάννος Κρανιδιώτης (revision 5574536)
182
+ Γιώργος Παπασιδέρης (μουσικός) (revision 5722203)
183
+ Γκέοργκ Φρήντριχ Χαίντελ (revision 5807098)
184
+ Γκρέις Κέλι (revision 5807168)
185
+ Γρηγοριανό Hμερολόγιο (revision 5793842)
186
+ Γρηγοριανό ημερολόγιο (revision 5793842)
187
+ Γρηγόρης Λαμπράκης (revision 5752808)
188
+ Δάντης Αλιγκέρι (revision 5648882)
189
+ Δήμος Βιάννου (revision 4816422)
190
+ Δεκέμβριος (revision 5461807)
191
+ Δομιτιανός (revision 5735554)
192
+ Δράμα (πόλη) (revision 5857326)
193
+ Ενιαία Δημοκρατική Αριστερά (revision 5742309)
194
+ Ετόρε Σότσας (revision 5785872)
195
+ Ζιλ Αντριαμαχαζό (revision 5819706)
196
+ Η.Π.Α. (revision 5845171)
197
+ Ηράκλειος (revision 5778827)
198
+ Θεσσαλονίκη (revision 5844955)
199
+ Θεόδωρος Ρούζβελτ (revision 5815087)
200
+ Ιανουάριος (revision 5615044)
201
+ Ιερουσαλήμ (revision 5824734)
202
+ Ιησούς Χριστός (revision 5859687)
203
+ Ιούλιος (revision 5712711)
204
+ Ιούνιος (revision 5461799)
205
+ Ιράκ (revision 5820378)
206
+ Ιράν (revision 5861249)
207
+ Ισιδώρα Ντάνκαν (revision 5044778)
208
+ Ιωάννης ο Χρυσόστομος (revision 5824898)
209
+
210
+ == End of Parsed pages ==
211
+
212
+ - Wikipedia parsing ended at: 2016-05-25 15:21:50.071087
213
+
214
+ 63 characters appeared 1875535 times.
215
+
216
+ First 46 characters:
217
+ [ 0] Char α: 9.004097497514042 %
218
+ [ 1] Char ο: 8.311015256980008 %
219
+ [ 2] Char τ: 7.94493304577094 %
220
+ [ 3] Char ι: 6.338831320129989 %
221
+ [ 4] Char ν: 5.836627948825269 %
222
+ [ 5] Char ε: 5.635565318695733 %
223
+ [ 6] Char ρ: 4.717907157157825 %
224
+ [ 7] Char σ: 4.307197679595422 %
225
+ [ 8] Char κ: 4.061294510632965 %
226
+ [ 9] Char ς: 3.766551943845356 %
227
+ [10] Char η: 3.7565281373048225 %
228
+ [11] Char π: 3.4156653968067783 %
229
+ [12] Char υ: 3.30956233821283 %
230
+ [13] Char μ: 3.1442761665338157 %
231
+ [14] Char λ: 3.0899983204792236 %
232
+ [15] Char ί: 2.429973314280992 %
233
+ [16] Char ό: 2.076100952528212 %
234
+ [17] Char ά: 1.922651403466211 %
235
+ [18] Char γ: 1.8994047031913561 %
236
+ [19] Char έ: 1.6641651582081913 %
237
+ [20] Char δ: 1.508582884350332 %
238
+ [21] Char ω: 1.2410325587099147 %
239
+ [22] Char ή: 1.2077087337746297 %
240
+ [23] Char χ: 1.0482342371643292 %
241
+ [24] Char ύ: 0.9225101104484854 %
242
+ [25] Char β: 0.8928652357860558 %
243
+ [26] Char θ: 0.8681256281541001 %
244
+ [27] Char φ: 0.806756472153279 %
245
+ [28] Char ώ: 0.6969211451665791 %
246
+ [29] Char ζ: 0.35515199663029484 %
247
+ [30] Char e: 0.35488540603081253 %
248
+ [31] Char ξ: 0.314736861748781 %
249
+ [32] Char a: 0.2909036621550651 %
250
+ [33] Char i: 0.2884510286398281 %
251
+ [34] Char o: 0.24137112877125727 %
252
+ [35] Char r: 0.23262695710823847 %
253
+ [36] Char n: 0.2206303801315358 %
254
+ [37] Char t: 0.21668483925919804 %
255
+ [38] Char s: 0.2013825388489151 %
256
+ [39] Char l: 0.14865091827131993 %
257
+ [40] Char d: 0.1359078876160669 %
258
+ [41] Char c: 0.12124540464454144 %
259
+ [42] Char h: 0.1166600463334462 %
260
+ [43] Char u: 0.10381037943840024 %
261
+ [44] Char m: 0.09074744006376848 %
262
+ [45] Char ψ: 0.08669526295163779 %
263
+
264
+ The first 46 characters have an accumulated ratio of 0.993456267145108.
265
+
266
+ 1579 sequences found.
267
+
268
+ First 512 (typical positive ratio): 0.958419074626211
269
+ Next 512 (512-1024): 0.006969211451665791
270
+ Rest: 0.0018920066107342773
271
+
272
+ - Processing end: 2016-05-25 15:21:50.812982