cchardet 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/.rubocop.yml +11 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +10 -0
  7. data/README.md +35 -0
  8. data/Rakefile +15 -0
  9. data/cchardet.gemspec +30 -0
  10. data/ext/cchardet/extconf.rb +26 -0
  11. data/ext/uchardet/.gitignore +1 -0
  12. data/ext/uchardet/.gitlab-ci.yml +106 -0
  13. data/ext/uchardet/AUTHORS +16 -0
  14. data/ext/uchardet/CMakeLists.txt +74 -0
  15. data/ext/uchardet/COPYING +1316 -0
  16. data/ext/uchardet/INSTALL +26 -0
  17. data/ext/uchardet/README.md +295 -0
  18. data/ext/uchardet/build-mac/uchardet.cpp +7 -0
  19. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
  20. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  21. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
  22. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
  23. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
  24. data/ext/uchardet/doc/CMakeLists.txt +6 -0
  25. data/ext/uchardet/doc/README.maintainer +59 -0
  26. data/ext/uchardet/doc/uchardet.1 +18 -0
  27. data/ext/uchardet/script/BuildLangModel.py +533 -0
  28. data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
  29. data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
  30. data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
  31. data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
  32. data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
  33. data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
  34. data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
  35. data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
  36. data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
  37. data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
  38. data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
  39. data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
  40. data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
  41. data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
  42. data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
  43. data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
  44. data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
  45. data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
  46. data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
  47. data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
  48. data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
  49. data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
  50. data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
  51. data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
  52. data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
  53. data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
  54. data/ext/uchardet/script/README +63 -0
  55. data/ext/uchardet/script/charsets/codepoints.py +53 -0
  56. data/ext/uchardet/script/charsets/db.py +73 -0
  57. data/ext/uchardet/script/charsets/ibm852.py +72 -0
  58. data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
  59. data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
  60. data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
  61. data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
  62. data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
  63. data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
  64. data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
  65. data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
  66. data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
  67. data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
  68. data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
  69. data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
  70. data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
  71. data/ext/uchardet/script/charsets/tis-620.py +77 -0
  72. data/ext/uchardet/script/charsets/viscii.py +72 -0
  73. data/ext/uchardet/script/charsets/windows-1250.py +75 -0
  74. data/ext/uchardet/script/charsets/windows-1252.py +76 -0
  75. data/ext/uchardet/script/charsets/windows-1253.py +72 -0
  76. data/ext/uchardet/script/charsets/windows-1256.py +75 -0
  77. data/ext/uchardet/script/charsets/windows-1257.py +72 -0
  78. data/ext/uchardet/script/charsets/windows-1258.py +72 -0
  79. data/ext/uchardet/script/debug.sh +9 -0
  80. data/ext/uchardet/script/header-template.cpp +38 -0
  81. data/ext/uchardet/script/langs/ar.py +59 -0
  82. data/ext/uchardet/script/langs/cs.py +80 -0
  83. data/ext/uchardet/script/langs/da.py +69 -0
  84. data/ext/uchardet/script/langs/de.py +69 -0
  85. data/ext/uchardet/script/langs/el.py +55 -0
  86. data/ext/uchardet/script/langs/eo.py +67 -0
  87. data/ext/uchardet/script/langs/es.py +69 -0
  88. data/ext/uchardet/script/langs/et.py +57 -0
  89. data/ext/uchardet/script/langs/fi.py +60 -0
  90. data/ext/uchardet/script/langs/fr.py +79 -0
  91. data/ext/uchardet/script/langs/ga.py +60 -0
  92. data/ext/uchardet/script/langs/hr.py +59 -0
  93. data/ext/uchardet/script/langs/hu.py +66 -0
  94. data/ext/uchardet/script/langs/it.py +56 -0
  95. data/ext/uchardet/script/langs/lt.py +70 -0
  96. data/ext/uchardet/script/langs/lv.py +69 -0
  97. data/ext/uchardet/script/langs/mt.py +80 -0
  98. data/ext/uchardet/script/langs/pl.py +81 -0
  99. data/ext/uchardet/script/langs/pt.py +80 -0
  100. data/ext/uchardet/script/langs/ro.py +65 -0
  101. data/ext/uchardet/script/langs/sk.py +80 -0
  102. data/ext/uchardet/script/langs/sl.py +59 -0
  103. data/ext/uchardet/script/langs/sv.py +56 -0
  104. data/ext/uchardet/script/langs/th.py +55 -0
  105. data/ext/uchardet/script/langs/tr.py +67 -0
  106. data/ext/uchardet/script/langs/vi.py +64 -0
  107. data/ext/uchardet/script/release.sh +8 -0
  108. data/ext/uchardet/script/win32.sh +7 -0
  109. data/ext/uchardet/src/Big5Freq.tab +943 -0
  110. data/ext/uchardet/src/CMakeLists.txt +160 -0
  111. data/ext/uchardet/src/CharDistribution.cpp +109 -0
  112. data/ext/uchardet/src/CharDistribution.h +242 -0
  113. data/ext/uchardet/src/EUCKRFreq.tab +614 -0
  114. data/ext/uchardet/src/EUCTWFreq.tab +447 -0
  115. data/ext/uchardet/src/GB2312Freq.tab +491 -0
  116. data/ext/uchardet/src/JISFreq.tab +589 -0
  117. data/ext/uchardet/src/JpCntx.cpp +230 -0
  118. data/ext/uchardet/src/JpCntx.h +140 -0
  119. data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
  120. data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
  121. data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
  122. data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
  123. data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
  124. data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
  125. data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
  126. data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
  127. data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
  128. data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
  129. data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
  130. data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
  131. data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
  132. data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
  133. data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
  134. data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
  135. data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
  136. data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
  137. data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
  138. data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
  139. data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
  140. data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
  141. data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
  142. data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
  143. data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
  144. data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
  145. data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
  146. data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
  147. data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
  148. data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
  149. data/ext/uchardet/src/nsBig5Prober.h +75 -0
  150. data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
  151. data/ext/uchardet/src/nsCharSetProber.h +77 -0
  152. data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
  153. data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
  154. data/ext/uchardet/src/nsEUCJPProber.h +79 -0
  155. data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
  156. data/ext/uchardet/src/nsEUCKRProber.h +81 -0
  157. data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
  158. data/ext/uchardet/src/nsEUCTWProber.h +75 -0
  159. data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
  160. data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
  161. data/ext/uchardet/src/nsEscSM.cpp +267 -0
  162. data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
  163. data/ext/uchardet/src/nsGB2312Prober.h +77 -0
  164. data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
  165. data/ext/uchardet/src/nsHebrewProber.h +177 -0
  166. data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
  167. data/ext/uchardet/src/nsLatin1Prober.h +73 -0
  168. data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
  169. data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
  170. data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
  171. data/ext/uchardet/src/nsPkgInt.h +89 -0
  172. data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
  173. data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
  174. data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
  175. data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
  176. data/ext/uchardet/src/nsSJISProber.cpp +98 -0
  177. data/ext/uchardet/src/nsSJISProber.h +81 -0
  178. data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
  179. data/ext/uchardet/src/nsUTF8Prober.h +66 -0
  180. data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
  181. data/ext/uchardet/src/nsUniversalDetector.h +91 -0
  182. data/ext/uchardet/src/nscore.h +59 -0
  183. data/ext/uchardet/src/prmem.h +49 -0
  184. data/ext/uchardet/src/symbols.cmake +41 -0
  185. data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
  186. data/ext/uchardet/src/tools/uchardet.cpp +254 -0
  187. data/ext/uchardet/src/uchardet.cpp +274 -0
  188. data/ext/uchardet/src/uchardet.h +136 -0
  189. data/ext/uchardet/test/CMakeLists.txt +47 -0
  190. data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
  191. data/ext/uchardet/test/ar/utf-8.txt +3 -0
  192. data/ext/uchardet/test/ar/windows-1256.txt +3 -0
  193. data/ext/uchardet/test/bg/windows-1251.txt +3 -0
  194. data/ext/uchardet/test/cs/ibm852.txt +4 -0
  195. data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
  196. data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
  197. data/ext/uchardet/test/cs/utf-8.txt +4 -0
  198. data/ext/uchardet/test/cs/windows-1250.txt +4 -0
  199. data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
  200. data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
  201. data/ext/uchardet/test/da/utf-8.txt +10 -0
  202. data/ext/uchardet/test/da/windows-1252.txt +10 -0
  203. data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
  204. data/ext/uchardet/test/de/windows-1252.txt +11 -0
  205. data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
  206. data/ext/uchardet/test/el/utf-8.txt +3 -0
  207. data/ext/uchardet/test/el/windows-1253.txt +5 -0
  208. data/ext/uchardet/test/en/ascii.txt +4 -0
  209. data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
  210. data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
  211. data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
  212. data/ext/uchardet/test/es/utf-8.txt +5 -0
  213. data/ext/uchardet/test/es/windows-1252.txt +5 -0
  214. data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
  215. data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
  216. data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
  217. data/ext/uchardet/test/et/utf-8.txt +6 -0
  218. data/ext/uchardet/test/et/windows-1252.txt +6 -0
  219. data/ext/uchardet/test/et/windows-1257.txt +6 -0
  220. data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
  221. data/ext/uchardet/test/fi/utf-8.txt +8 -0
  222. data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
  223. data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
  224. data/ext/uchardet/test/fr/utf-16.be +0 -0
  225. data/ext/uchardet/test/fr/utf-32.le +0 -0
  226. data/ext/uchardet/test/fr/utf-8.txt +14 -0
  227. data/ext/uchardet/test/fr/windows-1252.txt +3 -0
  228. data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
  229. data/ext/uchardet/test/ga/utf-8.txt +6 -0
  230. data/ext/uchardet/test/ga/windows-1252.txt +6 -0
  231. data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
  232. data/ext/uchardet/test/he/utf-8.txt +3 -0
  233. data/ext/uchardet/test/he/windows-1255.txt +1 -0
  234. data/ext/uchardet/test/hr/ibm852.txt +4 -0
  235. data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
  236. data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
  237. data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
  238. data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
  239. data/ext/uchardet/test/hr/utf-8.txt +4 -0
  240. data/ext/uchardet/test/hr/windows-1250.txt +4 -0
  241. data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
  242. data/ext/uchardet/test/hu/windows-1250.txt +1 -0
  243. data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
  244. data/ext/uchardet/test/it/utf-8.txt +18 -0
  245. data/ext/uchardet/test/ja/euc-jp.txt +10 -0
  246. data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
  247. data/ext/uchardet/test/ja/shift_jis.txt +1 -0
  248. data/ext/uchardet/test/ja/utf-16be.txt +0 -0
  249. data/ext/uchardet/test/ja/utf-16le.txt +0 -0
  250. data/ext/uchardet/test/ja/utf-8.txt +9 -0
  251. data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
  252. data/ext/uchardet/test/ko/uhc.smi +16 -0
  253. data/ext/uchardet/test/ko/utf-16.le +0 -0
  254. data/ext/uchardet/test/ko/utf-32.be +0 -0
  255. data/ext/uchardet/test/ko/utf-8.txt +3 -0
  256. data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
  257. data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
  258. data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
  259. data/ext/uchardet/test/lt/utf-8.txt +3 -0
  260. data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
  261. data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
  262. data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
  263. data/ext/uchardet/test/lv/utf-8.txt +6 -0
  264. data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
  265. data/ext/uchardet/test/mt/utf-8.txt +4 -0
  266. data/ext/uchardet/test/pl/ibm852.txt +3 -0
  267. data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
  268. data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
  269. data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
  270. data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
  271. data/ext/uchardet/test/pl/utf-8.txt +3 -0
  272. data/ext/uchardet/test/pl/windows-1250.txt +3 -0
  273. data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
  274. data/ext/uchardet/test/pt/utf-8.txt +6 -0
  275. data/ext/uchardet/test/ro/ibm852.txt +9 -0
  276. data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
  277. data/ext/uchardet/test/ro/utf-8.txt +9 -0
  278. data/ext/uchardet/test/ro/windows-1250.txt +9 -0
  279. data/ext/uchardet/test/ru/ibm855.txt +5 -0
  280. data/ext/uchardet/test/ru/ibm866.txt +11 -0
  281. data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
  282. data/ext/uchardet/test/ru/koi8-r.txt +1 -0
  283. data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
  284. data/ext/uchardet/test/ru/windows-1251.txt +4 -0
  285. data/ext/uchardet/test/sk/ibm852.txt +3 -0
  286. data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
  287. data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
  288. data/ext/uchardet/test/sk/utf-8.txt +3 -0
  289. data/ext/uchardet/test/sk/windows-1250.txt +3 -0
  290. data/ext/uchardet/test/sl/ibm852.txt +9 -0
  291. data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
  292. data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
  293. data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
  294. data/ext/uchardet/test/sl/utf-8.txt +9 -0
  295. data/ext/uchardet/test/sl/windows-1250.txt +9 -0
  296. data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
  297. data/ext/uchardet/test/sv/utf-8.txt +10 -0
  298. data/ext/uchardet/test/sv/windows-1252.txt +10 -0
  299. data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
  300. data/ext/uchardet/test/th/tis-620.txt +5 -0
  301. data/ext/uchardet/test/th/utf-8.txt +1 -0
  302. data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
  303. data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
  304. data/ext/uchardet/test/uchardet-tests.c +130 -0
  305. data/ext/uchardet/test/vi/utf-8.txt +4 -0
  306. data/ext/uchardet/test/vi/viscii.txt +4 -0
  307. data/ext/uchardet/test/vi/windows-1258.txt +4 -0
  308. data/ext/uchardet/test/zh/big5.txt +1 -0
  309. data/ext/uchardet/test/zh/euc-tw.txt +1 -0
  310. data/ext/uchardet/test/zh/gb18030.txt +1 -0
  311. data/ext/uchardet/test/zh/utf-8.txt +1 -0
  312. data/ext/uchardet/uchardet.doap +51 -0
  313. data/ext/uchardet/uchardet.pc.in +10 -0
  314. data/lib/cchardet.rb +56 -0
  315. data/lib/cchardet/lib_finder.rb +32 -0
  316. data/lib/cchardet/version.rb +5 -0
  317. metadata +362 -0
@@ -0,0 +1,109 @@
1
+ = Logs of language model for Hungarian (hu) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2015-12-12 18:01:21.560682
5
+ - Maximum depth: 2
6
+ - Max number of pages: 50
7
+
8
+ == Parsed pages ==
9
+
10
+ Kezdőlap (revision 12748721)
11
+ 1722 (revision 16471860)
12
+ 1780 (revision 16407861)
13
+ 1800 (revision 15028835)
14
+ 1831 (revision 16469576)
15
+ 1848–49-es forradalom és szabadságharc (revision 16955214)
16
+ 1875 (revision 16798555)
17
+ 1895 (revision 16649417)
18
+ 1900 (revision 16961019)
19
+ 1905 (revision 16601113)
20
+ 1915 (revision 16792868)
21
+ 1940 (revision 16936087)
22
+ 1950 (revision 16820817)
23
+ 1970 (revision 16093156)
24
+ 1985 (revision 16463340)
25
+ 1995 (revision 16945805)
26
+ 1998 (revision 16542908)
27
+ 2003 (revision 16943939)
28
+ 2015 (revision 16960983)
29
+ 73. Golden Globe-gála (revision 16937296)
30
+ Akacuki (revision 16960353)
31
+ Akasztottak erdeje (regény) (revision 16918702)
32
+ Alan Hodgkinson (revision 16953214)
33
+ Alfred Bernhard Nobel (revision 16654409)
34
+ Alkotmány (revision 16784843)
35
+ André-Marie Ampère (revision 16865419)
36
+ Angela Merkel (revision 16960753)
37
+ Anne Baxter (revision 15572176)
38
+ Az irgalmasság rendkívüli szentéve (revision 16951018)
39
+ Az év embereinek listája (revision 16961722)
40
+ Bencések (revision 16853524)
41
+ Boeing 747–400 (revision 16947261)
42
+ Chantal Szent Johanna Franciska (revision 16371923)
43
+ December 12. (revision 15637986)
44
+ December 13. (revision 16546152)
45
+ Dinamó (revision 15949492)
46
+ Dionne Warwick (revision 16522754)
47
+ Elektrodinamika (revision 14888277)
48
+ Elektromosság (revision 16051899)
49
+ Enciklopédia (revision 16556513)
50
+ Eric Maskin (revision 16907781)
51
+ Európai migrációs válság (revision 16922218)
52
+ Eötvös Loránd (revision 16960057)
53
+ Eötvös Loránd Tudományegyetem (revision 16684410)
54
+ Fellner Jakab (revision 16960223)
55
+ Feltaláló (revision 13609621)
56
+ Ferenc pápa (revision 16928970)
57
+ Frank Sinatra (revision 16927399)
58
+ François Jean Dominique Arago (revision 16197941)
59
+ Gabriella (revision 16906500)
60
+
61
+ == End of Parsed pages ==
62
+
63
+ - Wikipedia parsing ended at: 2015-12-12 18:02:46.729734
64
+
65
+ 55 characters appeared 375370 times.
66
+
67
+ First 32 characters:
68
+ [ 0] Char e: 9.710685457015744 %
69
+ [ 1] Char a: 8.803314063457389 %
70
+ [ 2] Char t: 7.322375256413672 %
71
+ [ 3] Char s: 6.666222660308496 %
72
+ [ 4] Char l: 5.73967019207715 %
73
+ [ 5] Char r: 5.4341050163838345 %
74
+ [ 6] Char n: 5.39920611663159 %
75
+ [ 7] Char i: 4.773689959240216 %
76
+ [ 8] Char o: 4.347976663025815 %
77
+ [ 9] Char k: 4.289634227562138 %
78
+ [10] Char z: 4.244611982843594 %
79
+ [11] Char á: 3.7855982097663636 %
80
+ [12] Char m: 3.2144284306151265 %
81
+ [13] Char g: 3.0727016010869277 %
82
+ [14] Char é: 3.0295441830727015 %
83
+ [15] Char b: 2.287609558568879 %
84
+ [16] Char d: 1.9966965926952074 %
85
+ [17] Char v: 1.8832085675466872 %
86
+ [18] Char y: 1.8453792258305137 %
87
+ [19] Char u: 1.5155713029810587 %
88
+ [20] Char h: 1.2960545595012922 %
89
+ [21] Char p: 1.288861656498921 %
90
+ [22] Char j: 1.2363801049631031 %
91
+ [23] Char c: 1.0951860830647095 %
92
+ [24] Char f: 1.0256546873751233 %
93
+ [25] Char ö: 1.020859418706876 %
94
+ [26] Char ó: 0.9955510562911262 %
95
+ [27] Char ő: 0.8399712283879905 %
96
+ [28] Char í: 0.6340410794682579 %
97
+ [29] Char ü: 0.4211844313610571 %
98
+ [30] Char ú: 0.3295415190345526 %
99
+ [31] Char ű: 0.2056637451048299 %
100
+
101
+ The first 32 characters have an accumulated ratio of 0.9975117883688093.
102
+
103
+ 1084 sequences found.
104
+
105
+ First 512 (typical positive ratio): 0.9748272224933486
106
+ Next 512 (512-1024): 5.328076298052588e-06
107
+ Rest: 0.0001889139024889644
108
+
109
+ - Processing end: 2015-12-12 18:02:46.902033
@@ -0,0 +1,156 @@
1
+ = Logs of language model for Irish (ga) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-09-27 00:31:16.489602
5
+ - Maximum depth: 5
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ Tracy Caldwell Dyson (revision 812158)
11
+ 14 Lúnasa (revision 716575)
12
+ 1969 (revision 810361)
13
+ California (revision 790976)
14
+ Ceimic (revision 759983)
15
+ Ceimic fhisiciúil (revision 656896)
16
+ NASA (revision 806394)
17
+ Rúisis (revision 771746)
18
+ SAM (revision 807668)
19
+ Spáinnis (revision 812323)
20
+ Stáisiún Idirnáisiúnta Spáis (revision 806394)
21
+ Tointeálaí spáis (revision 761309)
22
+ 10 Lúnasa (revision 649045)
23
+ 11 Lúnasa (revision 776455)
24
+ 12 Lúnasa (revision 716531)
25
+ 13 Lúnasa (revision 716546)
26
+ 1598 (revision 703178)
27
+ 15 Lúnasa (revision 776986)
28
+ 16 Lúnasa (revision 648836)
29
+ 1740 (revision 791225)
30
+ 1771 (revision 776762)
31
+ 17 Lúnasa (revision 777131)
32
+ 1823 (revision 791774)
33
+ 1832 (revision 794492)
34
+ 1898 (revision 805176)
35
+ 18 Lúnasa (revision 777242)
36
+ 1911 (revision 801932)
37
+ 1956 (revision 797081)
38
+ 1962 (revision 801511)
39
+ 1966 (revision 807415)
40
+ 19 Lúnasa (revision 648524)
41
+ 1 Lúnasa (revision 647726)
42
+ 2001 (revision 801012)
43
+ 2004 (revision 795759)
44
+ 2016 (revision 812091)
45
+ 20 Lúnasa (revision 777924)
46
+ 21 Lúnasa (revision 647805)
47
+ 22 Lúnasa (revision 778960)
48
+ 23 Lúnasa (revision 778453)
49
+ 24 Lúnasa (revision 778495)
50
+ 25 Lúnasa (revision 778551)
51
+ 26 Lúnasa (revision 649051)
52
+ 27 Lúnasa (revision 778763)
53
+ 28 Lúnasa (revision 778813)
54
+ 29 Lúnasa (revision 778959)
55
+ 2 Lúnasa (revision 774393)
56
+ 30 Lúnasa (revision 648308)
57
+ 31 Lúnasa (revision 649053)
58
+ 3 Lúnasa (revision 647811)
59
+ 4 Lúnasa (revision 786284)
60
+ 5 Lúnasa (revision 776845)
61
+ 6 Lúnasa (revision 647834)
62
+ 7 Lúnasa (revision 775859)
63
+ 8 Lúnasa (revision 648745)
64
+ 9 Lúnasa (revision 648522)
65
+ AK Parti (revision 792248)
66
+ An Phacastáin (revision 759339)
67
+ An Tuirc (revision 811970)
68
+ Aoine (revision 717430)
69
+ Bertolt Brecht (revision 800584)
70
+ Czesław Miłosz (revision 780306)
71
+ Céadaoin (revision 717606)
72
+ Dan Boyle (revision 797926)
73
+ Domhnach (revision 717663)
74
+ Déardaoin (revision 647860)
75
+ Féilire (revision 648837)
76
+ Halle Berry (revision 759955)
77
+ Henry Bagenal (revision 716575)
78
+ Iúil (revision 647071)
79
+ Luan (revision 717791)
80
+ Lúnasa (revision 810265)
81
+ Meán Fómhair (revision 779166)
82
+ Pápa Pius VII (revision 758126)
83
+ Satharn (revision 784525)
84
+ Walter Scott (revision 759029)
85
+ Áth Buí (revision 716575)
86
+ 11 Márta (revision 716519)
87
+ 17 Márta (revision 798614)
88
+ 1882 (revision 801198)
89
+ 1886 (revision 776624)
90
+ 1890 (revision 801200)
91
+ 1891 (revision 796677)
92
+ 1903 (revision 812849)
93
+ 1922 (revision 801227)
94
+ 1930í (revision 740221)
95
+ 1940í (revision 740219)
96
+ 1950í (revision 740217)
97
+ 1960í (revision 772724)
98
+ 1967 (revision 796983)
99
+ 1968 (revision 810926)
100
+ 1970 (revision 812852)
101
+ 1970í (revision 740213)
102
+ 1971 (revision 809746)
103
+ 1972 (revision 789490)
104
+ 1980í (revision 740211)
105
+ 1990í (revision 740208)
106
+ 19ú haois (revision 739964)
107
+ 1 Bealtaine (revision 647679)
108
+
109
+ == End of Parsed pages ==
110
+
111
+ - Wikipedia parsing ended at: 2016-09-27 00:33:40.157338
112
+
113
+ 44 characters appeared 183561 times.
114
+
115
+ First 31 characters:
116
+ [ 0] Char a: 15.192769705983297 %
117
+ [ 1] Char i: 10.534372769814938 %
118
+ [ 2] Char n: 8.106297089250985 %
119
+ [ 3] Char h: 7.243368689427493 %
120
+ [ 4] Char r: 6.442544985045844 %
121
+ [ 5] Char e: 6.198484427520007 %
122
+ [ 6] Char s: 5.622654049607488 %
123
+ [ 7] Char t: 4.776068990689743 %
124
+ [ 8] Char c: 4.543448771797931 %
125
+ [ 9] Char l: 4.1953356105054995 %
126
+ [10] Char o: 3.9469168287381304 %
127
+ [11] Char d: 3.2169142682813887 %
128
+ [12] Char g: 2.811054635788648 %
129
+ [13] Char m: 2.6269196615838877 %
130
+ [14] Char á: 2.2749930540801153 %
131
+ [15] Char u: 2.1932763495513754 %
132
+ [16] Char b: 2.0478206154902185 %
133
+ [17] Char í: 1.6599386579938005 %
134
+ [18] Char é: 1.2829522611012143 %
135
+ [19] Char f: 1.1494816437042727 %
136
+ [20] Char ú: 1.0525111543301682 %
137
+ [21] Char p: 0.9059658642086281 %
138
+ [22] Char ó: 0.8890777452726886 %
139
+ [23] Char v: 0.2522322279787101 %
140
+ [24] Char y: 0.23479933101257894 %
141
+ [25] Char k: 0.18195586208399386 %
142
+ [26] Char w: 0.1688811893593955 %
143
+ [27] Char j: 0.09697048937410452 %
144
+ [28] Char z: 0.07735848028720697 %
145
+ [29] Char x: 0.0343210159020707 %
146
+ [30] Char q: 0.010895560603831969 %
147
+
148
+ The first 31 characters have an accumulated ratio of 0.9997058198636966.
149
+
150
+ 701 sequences found.
151
+
152
+ First 512 (typical positive ratio): 0.9974076651249096
153
+ Next 512 (512-1024): 5.447780301915984e-06
154
+ Rest: -2.7755575615628914e-17
155
+
156
+ - Processing end: 2016-09-27 00:33:40.258886
@@ -0,0 +1,162 @@
1
+ = Logs of language model for Italian (it) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-09-21 18:43:12.831409
5
+ - Maximum depth: 5
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ Pieve Ligure (revision 83186252)
11
+ 010 (prefisso) (revision 76157203)
12
+ 1000 (revision 83185341)
13
+ 1143 (revision 70627567)
14
+ 1162 (revision 70627612)
15
+ 118 - Emergenza sanitaria (revision 83267411)
16
+ 1201 (revision 77523243)
17
+ 1202 (revision 76764411)
18
+ 1374 (revision 78259457)
19
+ 1404 (revision 70628069)
20
+ 1520 (revision 76854924)
21
+ 1537 (revision 70628296)
22
+ 1582 (revision 80626188)
23
+ 1584 (revision 76837051)
24
+ 1600 (revision 76869356)
25
+ 1619 (revision 70628455)
26
+ 1742 (revision 70628675)
27
+ 1748 (revision 70628682)
28
+ 1749 (revision 70628684)
29
+ 1750 (revision 70628690)
30
+ 1754 (revision 70628697)
31
+ 1775 (revision 70628734)
32
+ 1797 (revision 78338823)
33
+ 1798 (revision 82047236)
34
+ 1803 (revision 77502534)
35
+ 1805 (revision 79369853)
36
+ 1809 (revision 70628789)
37
+ 1810 (revision 82930218)
38
+ 1814 (revision 78338825)
39
+ 1815 (revision 82669615)
40
+ 1816 (revision 83185384)
41
+ 1818 (revision 72407239)
42
+ 1823 (revision 74880156)
43
+ 1859 (revision 83185401)
44
+ 1860 (revision 83185403)
45
+ 1861 (revision 83185412)
46
+ 1868 (revision 83185430)
47
+ 1874 (revision 83185441)
48
+ 1897 (revision 83185267)
49
+ 1908 (revision 83185631)
50
+ 1909 (revision 83185630)
51
+ 1913 (revision 83185626)
52
+ 1915 (revision 83185625)
53
+ 1917 (revision 83185270)
54
+ 1920 (revision 83185621)
55
+ 1921 (revision 83185619)
56
+ 1923 (revision 83185616)
57
+ 1925 (revision 83185614)
58
+ 1926 (revision 83185612)
59
+ 1928 (revision 83185610)
60
+ 1929 (revision 83185609)
61
+ 1939 (revision 83185598)
62
+ 1946 (revision 83185590)
63
+ 1947 (revision 83185589)
64
+ 1948 (revision 83185587)
65
+ 1951 (revision 83185584)
66
+ 1956 (revision 83185478)
67
+ 1960 (revision 83185487)
68
+ 1964 (revision 83185493)
69
+ 1965 (revision 83185494)
70
+ 1969 (revision 83185500)
71
+ 1970 (revision 83185503)
72
+ 1971 (revision 83185505)
73
+ 1975 (revision 83185510)
74
+ 1976 (revision 83185513)
75
+ 1977 (revision 83185514)
76
+ 1980 (revision 83185518)
77
+ 1981 (revision 83308867)
78
+ 1983 (revision 83185524)
79
+ 1985 (revision 83185526)
80
+ 1988 (revision 83185280)
81
+ 1990 (revision 83185531)
82
+ 1995 (revision 83185538)
83
+ 1999 (revision 83326325)
84
+ 2000 (revision 83185544)
85
+ 2001 (revision 83309058)
86
+ 2002 (revision 83185545)
87
+ 2003 (revision 83185546)
88
+ 2004 (revision 83185283)
89
+ 2005 (revision 83185285)
90
+ 2006 (revision 83185547)
91
+ 2007 (revision 83185549)
92
+ 2008 (revision 83185551)
93
+ 2009 (revision 83185552)
94
+ 2010 (revision 83185287)
95
+ 2012 (revision 83185289)
96
+ 712 (revision 70630167)
97
+ 749 (revision 78272323)
98
+ ATP (Provincia di Genova) (revision 82754117)
99
+ Abbazia di San Colombano (revision 83062997)
100
+ Abbazia di San Fruttuoso (revision 83288120)
101
+ Acacia dealbata (revision 83036867)
102
+ Acquedotto (revision 82973825)
103
+ Affresco (revision 82000422)
104
+ Agricoltura (revision 82578266)
105
+ Allevamento (revision 82971452)
106
+ Altitudine (revision 82971213)
107
+ Angelo (revision 82333116)
108
+ Anni 1960 (revision 83161222)
109
+ Anni 1970 (revision 81663175)
110
+ Antica Roma (revision 83125874)
111
+
112
+ == End of Parsed pages ==
113
+
114
+ - Wikipedia parsing ended at: 2016-09-21 18:46:08.840718
115
+
116
+ 59 characters appeared 823241 times.
117
+
118
+ First 34 characters:
119
+ [ 0] Char i: 11.823147778111148 %
120
+ [ 1] Char a: 11.252112078965942 %
121
+ [ 2] Char e: 10.910170897707962 %
122
+ [ 3] Char o: 8.936386793174782 %
123
+ [ 4] Char n: 7.317055394471364 %
124
+ [ 5] Char l: 6.931263141655967 %
125
+ [ 6] Char r: 6.521784021932824 %
126
+ [ 7] Char t: 6.386708145002497 %
127
+ [ 8] Char s: 4.572415610981475 %
128
+ [ 9] Char c: 4.116291584116923 %
129
+ [10] Char d: 3.9770856893667834 %
130
+ [11] Char u: 2.8944136650142545 %
131
+ [12] Char m: 2.762860450342002 %
132
+ [13] Char p: 2.6809889206198427 %
133
+ [14] Char g: 2.1493098618751985 %
134
+ [15] Char v: 1.5369739845318686 %
135
+ [16] Char b: 1.2855287819727153 %
136
+ [17] Char f: 0.9932692856648295 %
137
+ [18] Char z: 0.9664241698360504 %
138
+ [19] Char h: 0.7159507361756764 %
139
+ [20] Char q: 0.2416060424590126 %
140
+ [21] Char k: 0.18876610858788617 %
141
+ [22] Char à: 0.15596890825408355 %
142
+ [23] Char y: 0.12462936126844994 %
143
+ [24] Char è: 0.11600491229178332 %
144
+ [25] Char w: 0.10628722330398996 %
145
+ [26] Char x: 0.10312897438295712 %
146
+ [27] Char j: 0.07555503188009344 %
147
+ [28] Char ù: 0.05575524056746445 %
148
+ [29] Char ò: 0.03304014255849745 %
149
+ [30] Char é: 0.021014502436103158 %
150
+ [31] Char ì: 0.0191924357508919 %
151
+ [32] Char á: 0.004737373381549267 %
152
+ [33] Char ó: 0.003644133370422513 %
153
+
154
+ The first 34 characters have an accumulated ratio of 0.9997947138201325.
155
+
156
+ 872 sequences found.
157
+
158
+ First 512 (typical positive ratio): 0.9989484485502651
159
+ Next 512 (512-1024): 1.214711123474171e-06
160
+ Rest: -4.336808689942018e-17
161
+
162
+ - Processing end: 2016-09-21 18:46:08.920456
@@ -0,0 +1,162 @@
1
+ = Logs of language model for Latvian (lv) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-09-21 00:16:33.485953
5
+ - Maximum depth: 5
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ Zigfrīds Anna Meierovics (revision 2546984)
11
+ 1. Saeima (revision 2511127)
12
+ 1. Saeimas deputāti (revision 2303859)
13
+ 1. Saeimas frakcijas (revision 2429725)
14
+ 1. Saeimas vēlēšanas (revision 2464758)
15
+ 1887. gads (revision 2583253)
16
+ 1919. gada Parīzes miera konference (revision 2482078)
17
+ 1920 (revision 2401222)
18
+ 1921 (revision 2473337)
19
+ 1922 (revision 2486819)
20
+ 1923 (revision 2544643)
21
+ 1924 (revision 2539361)
22
+ 1925 (revision 2486795)
23
+ 22. augusts (revision 2583254)
24
+ 31. jūlijs (revision 2559648)
25
+ 5. februāris (revision 2581966)
26
+ ASV (revision 2549746)
27
+ Agrārā reforma Latvijā (revision 2473423)
28
+ Agudas Izrael (Latvija) (revision 2311143)
29
+ Aigars Kalvītis (revision 2545858)
30
+ Alberts Kviesis (revision 2546934)
31
+ Aleksandrs Bočagovs (revision 2329526)
32
+ Aleksandrs Dauge (revision 2546805)
33
+ Aleksandrs Jaunbērzs (revision 2462254)
34
+ Aleksandrs Kerenskis (revision 2461214)
35
+ Aleksandrs Millerāns (revision 2309419)
36
+ Aleksandrs Neibergs (revision 2491897)
37
+ Alfrēds Birznieks (revision 2567317)
38
+ Alfrēds Jēkabs Bērziņš (revision 2564068)
39
+ Alfrēds Riekstiņš (politiķis) (revision 2586148)
40
+ Andrejs Bērziņš (revision 2564283)
41
+ Andrejs Kurcijs (revision 2564338)
42
+ Andrejs Petrevics (revision 2460269)
43
+ Andrejs Sīmanis (revision 2547079)
44
+ Andrejs Veckalns (revision 2564224)
45
+ Andrievs Niedra (revision 2546988)
46
+ Andris Bērziņš (politiķis, 1951) (revision 2218488)
47
+ Andris Šķēle (revision 2457423)
48
+ Angļu valoda (revision 2447598)
49
+ Ansis Buševics (revision 2578312)
50
+ Ansis Rudevics (revision 2414854)
51
+ Antante (revision 2581862)
52
+ Antons Dzenis (revision 2564295)
53
+ Antons Laizāns (revision 2467408)
54
+ Antons Rubins (1885) (revision 2465396)
55
+ Antons Velkme (revision 2564425)
56
+ Ants Pīps (revision 2564383)
57
+ Apollo (portāls) (revision 2371202)
58
+ Apolonija Laurinoviča (revision 2466232)
59
+ Aprīļa pučs (revision 2150686)
60
+ Apvienotā Karaliste (revision 2566258)
61
+ Aristīds Briāns (revision 2536819)
62
+ Arons Nuroks (revision 2337085)
63
+ Arturs Alberings (revision 2442531)
64
+ Arturs Ozols (inženieris) (revision 2491399)
65
+ Artūrs Balfūrs (revision 2309461)
66
+ Artūrs Vīgants (revision 2461471)
67
+ Artūrs Žers (revision 2564230)
68
+ Arveds Bergs (revision 2564118)
69
+ Arveds Švābe (revision 2586288)
70
+ Arvīds Kalniņš (revision 2545254)
71
+ Aspazija (revision 2574081)
72
+ Augusts Briedis (revision 2546879)
73
+ Augusts Kalniņš (revision 2436647)
74
+ Augusts Kirhenšteins (revision 2547109)
75
+ Austroungārija (revision 2524307)
76
+ Autoritatīvā vadība (revision 2385793)
77
+ Balfūra nota (revision 2538973)
78
+ Baltijas Antante (revision 2541901)
79
+ Baltijas pārkrievošana (revision 2570657)
80
+ Bermontiāde (revision 2499160)
81
+ Bernards Kublinskis (revision 2441386)
82
+ Bezpartijiskais nacionālais centrs (revision 2438819)
83
+ Beļģija (revision 2579008)
84
+ Brestļitovskas miera līgums (revision 2569020)
85
+ Brizules muiža (revision 2584564)
86
+ Bruno Kalniņš (revision 2566572)
87
+ Brīvības piemineklis (revision 2578595)
88
+ Bulduru konference (revision 2193449)
89
+ Ceire-Cion (revision 2311779)
90
+ Celmiņa 1. Ministru kabinets (revision 2112830)
91
+ Delfi (portāls) (revision 2544918)
92
+ Demokrātiskais Centrs (revision 2113060)
93
+ Demokrātu savienība (revision 2179593)
94
+ Diena (laikraksts) (revision 2548854)
95
+ Donats Bicāns (revision 2479349)
96
+ Dubulti (Jūrmala) (revision 2456811)
97
+ Durbe (revision 2381790)
98
+ Dāvids Komisārs (revision 2574685)
99
+ Džovanni Džoliti (revision 2538055)
100
+ Ebreju bloks (revision 2311643)
101
+ Ebreju nacionāldemokrātu partija (revision 2312288)
102
+ Eduards Grantskalns (revision 2565167)
103
+ Eduards Jaunzems (revision 2452579)
104
+ Eduards Laimiņš (revision 2449521)
105
+ Eduards Radziņš (revision 2564393)
106
+
107
+ == End of Parsed pages ==
108
+
109
+ - Wikipedia parsing ended at: 2016-09-21 00:19:18.361533
110
+
111
+ 55 characters appeared 354745 times.
112
+
113
+ First 39 characters:
114
+ [ 0] Char a: 11.905171320244119 %
115
+ [ 1] Char i: 9.3977364022044 %
116
+ [ 2] Char s: 8.224217395594017 %
117
+ [ 3] Char e: 6.367108768270166 %
118
+ [ 4] Char r: 5.854064186951191 %
119
+ [ 5] Char t: 5.831230884156225 %
120
+ [ 6] Char u: 4.939604504644181 %
121
+ [ 7] Char n: 4.463769750102186 %
122
+ [ 8] Char ā: 3.9498794909019157 %
123
+ [ 9] Char l: 3.8030134321836813 %
124
+ [10] Char o: 3.6296494665182033 %
125
+ [11] Char k: 3.524785409237621 %
126
+ [12] Char m: 3.2739009711201 %
127
+ [13] Char d: 3.177775585279567 %
128
+ [14] Char v: 3.0046935122411873 %
129
+ [15] Char p: 2.827101157169234 %
130
+ [16] Char j: 2.8166711299665956 %
131
+ [17] Char b: 2.0279355593454453 %
132
+ [18] Char ī: 1.8855797826607845 %
133
+ [19] Char g: 1.6146809680192813 %
134
+ [20] Char z: 1.5343415692962552 %
135
+ [21] Char ē: 1.4593581304880971 %
136
+ [22] Char c: 1.2231321089796898 %
137
+ [23] Char š: 0.8876798827326671 %
138
+ [24] Char ņ: 0.46596851259355315 %
139
+ [25] Char f: 0.4203019070036223 %
140
+ [26] Char ļ: 0.34700982395805435 %
141
+ [27] Char ū: 0.30162511099522193 %
142
+ [28] Char h: 0.20070755049401684 %
143
+ [29] Char ž: 0.18774048964749326 %
144
+ [30] Char ķ: 0.14207388405756247 %
145
+ [31] Char ģ: 0.1268516821942522 %
146
+ [32] Char č: 0.08287643236691145 %
147
+ [33] Char w: 0.0324176521163089 %
148
+ [34] Char y: 0.02734358482853881 %
149
+ [35] Char x: 0.015785987117506943 %
150
+ [36] Char ö: 0.005074067287770088 %
151
+ [37] Char é: 0.003946496779376736 %
152
+ [38] Char q: 0.0031008188980817205 %
153
+
154
+ The first 39 characters have an accumulated ratio of 0.9998590536864506.
155
+
156
+ 970 sequences found.
157
+
158
+ First 512 (typical positive ratio): 0.9904102202220861
159
+ Next 512 (512-1024): 0.0018774048964749328
160
+ Rest: -1.734723475976807e-17
161
+
162
+ - Processing end: 2016-09-21 00:19:18.484318