cchardet 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/.rubocop.yml +11 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +10 -0
  7. data/README.md +35 -0
  8. data/Rakefile +15 -0
  9. data/cchardet.gemspec +30 -0
  10. data/ext/cchardet/extconf.rb +26 -0
  11. data/ext/uchardet/.gitignore +1 -0
  12. data/ext/uchardet/.gitlab-ci.yml +106 -0
  13. data/ext/uchardet/AUTHORS +16 -0
  14. data/ext/uchardet/CMakeLists.txt +74 -0
  15. data/ext/uchardet/COPYING +1316 -0
  16. data/ext/uchardet/INSTALL +26 -0
  17. data/ext/uchardet/README.md +295 -0
  18. data/ext/uchardet/build-mac/uchardet.cpp +7 -0
  19. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
  20. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  21. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
  22. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
  23. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
  24. data/ext/uchardet/doc/CMakeLists.txt +6 -0
  25. data/ext/uchardet/doc/README.maintainer +59 -0
  26. data/ext/uchardet/doc/uchardet.1 +18 -0
  27. data/ext/uchardet/script/BuildLangModel.py +533 -0
  28. data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
  29. data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
  30. data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
  31. data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
  32. data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
  33. data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
  34. data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
  35. data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
  36. data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
  37. data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
  38. data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
  39. data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
  40. data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
  41. data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
  42. data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
  43. data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
  44. data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
  45. data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
  46. data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
  47. data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
  48. data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
  49. data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
  50. data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
  51. data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
  52. data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
  53. data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
  54. data/ext/uchardet/script/README +63 -0
  55. data/ext/uchardet/script/charsets/codepoints.py +53 -0
  56. data/ext/uchardet/script/charsets/db.py +73 -0
  57. data/ext/uchardet/script/charsets/ibm852.py +72 -0
  58. data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
  59. data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
  60. data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
  61. data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
  62. data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
  63. data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
  64. data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
  65. data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
  66. data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
  67. data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
  68. data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
  69. data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
  70. data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
  71. data/ext/uchardet/script/charsets/tis-620.py +77 -0
  72. data/ext/uchardet/script/charsets/viscii.py +72 -0
  73. data/ext/uchardet/script/charsets/windows-1250.py +75 -0
  74. data/ext/uchardet/script/charsets/windows-1252.py +76 -0
  75. data/ext/uchardet/script/charsets/windows-1253.py +72 -0
  76. data/ext/uchardet/script/charsets/windows-1256.py +75 -0
  77. data/ext/uchardet/script/charsets/windows-1257.py +72 -0
  78. data/ext/uchardet/script/charsets/windows-1258.py +72 -0
  79. data/ext/uchardet/script/debug.sh +9 -0
  80. data/ext/uchardet/script/header-template.cpp +38 -0
  81. data/ext/uchardet/script/langs/ar.py +59 -0
  82. data/ext/uchardet/script/langs/cs.py +80 -0
  83. data/ext/uchardet/script/langs/da.py +69 -0
  84. data/ext/uchardet/script/langs/de.py +69 -0
  85. data/ext/uchardet/script/langs/el.py +55 -0
  86. data/ext/uchardet/script/langs/eo.py +67 -0
  87. data/ext/uchardet/script/langs/es.py +69 -0
  88. data/ext/uchardet/script/langs/et.py +57 -0
  89. data/ext/uchardet/script/langs/fi.py +60 -0
  90. data/ext/uchardet/script/langs/fr.py +79 -0
  91. data/ext/uchardet/script/langs/ga.py +60 -0
  92. data/ext/uchardet/script/langs/hr.py +59 -0
  93. data/ext/uchardet/script/langs/hu.py +66 -0
  94. data/ext/uchardet/script/langs/it.py +56 -0
  95. data/ext/uchardet/script/langs/lt.py +70 -0
  96. data/ext/uchardet/script/langs/lv.py +69 -0
  97. data/ext/uchardet/script/langs/mt.py +80 -0
  98. data/ext/uchardet/script/langs/pl.py +81 -0
  99. data/ext/uchardet/script/langs/pt.py +80 -0
  100. data/ext/uchardet/script/langs/ro.py +65 -0
  101. data/ext/uchardet/script/langs/sk.py +80 -0
  102. data/ext/uchardet/script/langs/sl.py +59 -0
  103. data/ext/uchardet/script/langs/sv.py +56 -0
  104. data/ext/uchardet/script/langs/th.py +55 -0
  105. data/ext/uchardet/script/langs/tr.py +67 -0
  106. data/ext/uchardet/script/langs/vi.py +64 -0
  107. data/ext/uchardet/script/release.sh +8 -0
  108. data/ext/uchardet/script/win32.sh +7 -0
  109. data/ext/uchardet/src/Big5Freq.tab +943 -0
  110. data/ext/uchardet/src/CMakeLists.txt +160 -0
  111. data/ext/uchardet/src/CharDistribution.cpp +109 -0
  112. data/ext/uchardet/src/CharDistribution.h +242 -0
  113. data/ext/uchardet/src/EUCKRFreq.tab +614 -0
  114. data/ext/uchardet/src/EUCTWFreq.tab +447 -0
  115. data/ext/uchardet/src/GB2312Freq.tab +491 -0
  116. data/ext/uchardet/src/JISFreq.tab +589 -0
  117. data/ext/uchardet/src/JpCntx.cpp +230 -0
  118. data/ext/uchardet/src/JpCntx.h +140 -0
  119. data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
  120. data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
  121. data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
  122. data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
  123. data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
  124. data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
  125. data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
  126. data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
  127. data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
  128. data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
  129. data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
  130. data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
  131. data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
  132. data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
  133. data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
  134. data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
  135. data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
  136. data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
  137. data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
  138. data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
  139. data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
  140. data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
  141. data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
  142. data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
  143. data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
  144. data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
  145. data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
  146. data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
  147. data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
  148. data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
  149. data/ext/uchardet/src/nsBig5Prober.h +75 -0
  150. data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
  151. data/ext/uchardet/src/nsCharSetProber.h +77 -0
  152. data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
  153. data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
  154. data/ext/uchardet/src/nsEUCJPProber.h +79 -0
  155. data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
  156. data/ext/uchardet/src/nsEUCKRProber.h +81 -0
  157. data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
  158. data/ext/uchardet/src/nsEUCTWProber.h +75 -0
  159. data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
  160. data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
  161. data/ext/uchardet/src/nsEscSM.cpp +267 -0
  162. data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
  163. data/ext/uchardet/src/nsGB2312Prober.h +77 -0
  164. data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
  165. data/ext/uchardet/src/nsHebrewProber.h +177 -0
  166. data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
  167. data/ext/uchardet/src/nsLatin1Prober.h +73 -0
  168. data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
  169. data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
  170. data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
  171. data/ext/uchardet/src/nsPkgInt.h +89 -0
  172. data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
  173. data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
  174. data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
  175. data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
  176. data/ext/uchardet/src/nsSJISProber.cpp +98 -0
  177. data/ext/uchardet/src/nsSJISProber.h +81 -0
  178. data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
  179. data/ext/uchardet/src/nsUTF8Prober.h +66 -0
  180. data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
  181. data/ext/uchardet/src/nsUniversalDetector.h +91 -0
  182. data/ext/uchardet/src/nscore.h +59 -0
  183. data/ext/uchardet/src/prmem.h +49 -0
  184. data/ext/uchardet/src/symbols.cmake +41 -0
  185. data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
  186. data/ext/uchardet/src/tools/uchardet.cpp +254 -0
  187. data/ext/uchardet/src/uchardet.cpp +274 -0
  188. data/ext/uchardet/src/uchardet.h +136 -0
  189. data/ext/uchardet/test/CMakeLists.txt +47 -0
  190. data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
  191. data/ext/uchardet/test/ar/utf-8.txt +3 -0
  192. data/ext/uchardet/test/ar/windows-1256.txt +3 -0
  193. data/ext/uchardet/test/bg/windows-1251.txt +3 -0
  194. data/ext/uchardet/test/cs/ibm852.txt +4 -0
  195. data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
  196. data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
  197. data/ext/uchardet/test/cs/utf-8.txt +4 -0
  198. data/ext/uchardet/test/cs/windows-1250.txt +4 -0
  199. data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
  200. data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
  201. data/ext/uchardet/test/da/utf-8.txt +10 -0
  202. data/ext/uchardet/test/da/windows-1252.txt +10 -0
  203. data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
  204. data/ext/uchardet/test/de/windows-1252.txt +11 -0
  205. data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
  206. data/ext/uchardet/test/el/utf-8.txt +3 -0
  207. data/ext/uchardet/test/el/windows-1253.txt +5 -0
  208. data/ext/uchardet/test/en/ascii.txt +4 -0
  209. data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
  210. data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
  211. data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
  212. data/ext/uchardet/test/es/utf-8.txt +5 -0
  213. data/ext/uchardet/test/es/windows-1252.txt +5 -0
  214. data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
  215. data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
  216. data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
  217. data/ext/uchardet/test/et/utf-8.txt +6 -0
  218. data/ext/uchardet/test/et/windows-1252.txt +6 -0
  219. data/ext/uchardet/test/et/windows-1257.txt +6 -0
  220. data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
  221. data/ext/uchardet/test/fi/utf-8.txt +8 -0
  222. data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
  223. data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
  224. data/ext/uchardet/test/fr/utf-16.be +0 -0
  225. data/ext/uchardet/test/fr/utf-32.le +0 -0
  226. data/ext/uchardet/test/fr/utf-8.txt +14 -0
  227. data/ext/uchardet/test/fr/windows-1252.txt +3 -0
  228. data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
  229. data/ext/uchardet/test/ga/utf-8.txt +6 -0
  230. data/ext/uchardet/test/ga/windows-1252.txt +6 -0
  231. data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
  232. data/ext/uchardet/test/he/utf-8.txt +3 -0
  233. data/ext/uchardet/test/he/windows-1255.txt +1 -0
  234. data/ext/uchardet/test/hr/ibm852.txt +4 -0
  235. data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
  236. data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
  237. data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
  238. data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
  239. data/ext/uchardet/test/hr/utf-8.txt +4 -0
  240. data/ext/uchardet/test/hr/windows-1250.txt +4 -0
  241. data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
  242. data/ext/uchardet/test/hu/windows-1250.txt +1 -0
  243. data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
  244. data/ext/uchardet/test/it/utf-8.txt +18 -0
  245. data/ext/uchardet/test/ja/euc-jp.txt +10 -0
  246. data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
  247. data/ext/uchardet/test/ja/shift_jis.txt +1 -0
  248. data/ext/uchardet/test/ja/utf-16be.txt +0 -0
  249. data/ext/uchardet/test/ja/utf-16le.txt +0 -0
  250. data/ext/uchardet/test/ja/utf-8.txt +9 -0
  251. data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
  252. data/ext/uchardet/test/ko/uhc.smi +16 -0
  253. data/ext/uchardet/test/ko/utf-16.le +0 -0
  254. data/ext/uchardet/test/ko/utf-32.be +0 -0
  255. data/ext/uchardet/test/ko/utf-8.txt +3 -0
  256. data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
  257. data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
  258. data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
  259. data/ext/uchardet/test/lt/utf-8.txt +3 -0
  260. data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
  261. data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
  262. data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
  263. data/ext/uchardet/test/lv/utf-8.txt +6 -0
  264. data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
  265. data/ext/uchardet/test/mt/utf-8.txt +4 -0
  266. data/ext/uchardet/test/pl/ibm852.txt +3 -0
  267. data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
  268. data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
  269. data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
  270. data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
  271. data/ext/uchardet/test/pl/utf-8.txt +3 -0
  272. data/ext/uchardet/test/pl/windows-1250.txt +3 -0
  273. data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
  274. data/ext/uchardet/test/pt/utf-8.txt +6 -0
  275. data/ext/uchardet/test/ro/ibm852.txt +9 -0
  276. data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
  277. data/ext/uchardet/test/ro/utf-8.txt +9 -0
  278. data/ext/uchardet/test/ro/windows-1250.txt +9 -0
  279. data/ext/uchardet/test/ru/ibm855.txt +5 -0
  280. data/ext/uchardet/test/ru/ibm866.txt +11 -0
  281. data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
  282. data/ext/uchardet/test/ru/koi8-r.txt +1 -0
  283. data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
  284. data/ext/uchardet/test/ru/windows-1251.txt +4 -0
  285. data/ext/uchardet/test/sk/ibm852.txt +3 -0
  286. data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
  287. data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
  288. data/ext/uchardet/test/sk/utf-8.txt +3 -0
  289. data/ext/uchardet/test/sk/windows-1250.txt +3 -0
  290. data/ext/uchardet/test/sl/ibm852.txt +9 -0
  291. data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
  292. data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
  293. data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
  294. data/ext/uchardet/test/sl/utf-8.txt +9 -0
  295. data/ext/uchardet/test/sl/windows-1250.txt +9 -0
  296. data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
  297. data/ext/uchardet/test/sv/utf-8.txt +10 -0
  298. data/ext/uchardet/test/sv/windows-1252.txt +10 -0
  299. data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
  300. data/ext/uchardet/test/th/tis-620.txt +5 -0
  301. data/ext/uchardet/test/th/utf-8.txt +1 -0
  302. data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
  303. data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
  304. data/ext/uchardet/test/uchardet-tests.c +130 -0
  305. data/ext/uchardet/test/vi/utf-8.txt +4 -0
  306. data/ext/uchardet/test/vi/viscii.txt +4 -0
  307. data/ext/uchardet/test/vi/windows-1258.txt +4 -0
  308. data/ext/uchardet/test/zh/big5.txt +1 -0
  309. data/ext/uchardet/test/zh/euc-tw.txt +1 -0
  310. data/ext/uchardet/test/zh/gb18030.txt +1 -0
  311. data/ext/uchardet/test/zh/utf-8.txt +1 -0
  312. data/ext/uchardet/uchardet.doap +51 -0
  313. data/ext/uchardet/uchardet.pc.in +10 -0
  314. data/lib/cchardet.rb +56 -0
  315. data/lib/cchardet/lib_finder.rb +32 -0
  316. data/lib/cchardet/version.rb +5 -0
  317. metadata +362 -0
@@ -0,0 +1,109 @@
1
+ = Logs of language model for Hungarian (hu) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2015-12-12 18:01:21.560682
5
+ - Maximum depth: 2
6
+ - Max number of pages: 50
7
+
8
+ == Parsed pages ==
9
+
10
+ Kezdőlap (revision 12748721)
11
+ 1722 (revision 16471860)
12
+ 1780 (revision 16407861)
13
+ 1800 (revision 15028835)
14
+ 1831 (revision 16469576)
15
+ 1848–49-es forradalom és szabadságharc (revision 16955214)
16
+ 1875 (revision 16798555)
17
+ 1895 (revision 16649417)
18
+ 1900 (revision 16961019)
19
+ 1905 (revision 16601113)
20
+ 1915 (revision 16792868)
21
+ 1940 (revision 16936087)
22
+ 1950 (revision 16820817)
23
+ 1970 (revision 16093156)
24
+ 1985 (revision 16463340)
25
+ 1995 (revision 16945805)
26
+ 1998 (revision 16542908)
27
+ 2003 (revision 16943939)
28
+ 2015 (revision 16960983)
29
+ 73. Golden Globe-gála (revision 16937296)
30
+ Akacuki (revision 16960353)
31
+ Akasztottak erdeje (regény) (revision 16918702)
32
+ Alan Hodgkinson (revision 16953214)
33
+ Alfred Bernhard Nobel (revision 16654409)
34
+ Alkotmány (revision 16784843)
35
+ André-Marie Ampère (revision 16865419)
36
+ Angela Merkel (revision 16960753)
37
+ Anne Baxter (revision 15572176)
38
+ Az irgalmasság rendkívüli szentéve (revision 16951018)
39
+ Az év embereinek listája (revision 16961722)
40
+ Bencések (revision 16853524)
41
+ Boeing 747–400 (revision 16947261)
42
+ Chantal Szent Johanna Franciska (revision 16371923)
43
+ December 12. (revision 15637986)
44
+ December 13. (revision 16546152)
45
+ Dinamó (revision 15949492)
46
+ Dionne Warwick (revision 16522754)
47
+ Elektrodinamika (revision 14888277)
48
+ Elektromosság (revision 16051899)
49
+ Enciklopédia (revision 16556513)
50
+ Eric Maskin (revision 16907781)
51
+ Európai migrációs válság (revision 16922218)
52
+ Eötvös Loránd (revision 16960057)
53
+ Eötvös Loránd Tudományegyetem (revision 16684410)
54
+ Fellner Jakab (revision 16960223)
55
+ Feltaláló (revision 13609621)
56
+ Ferenc pápa (revision 16928970)
57
+ Frank Sinatra (revision 16927399)
58
+ François Jean Dominique Arago (revision 16197941)
59
+ Gabriella (revision 16906500)
60
+
61
+ == End of Parsed pages ==
62
+
63
+ - Wikipedia parsing ended at: 2015-12-12 18:02:46.729734
64
+
65
+ 55 characters appeared 375370 times.
66
+
67
+ First 32 characters:
68
+ [ 0] Char e: 9.710685457015744 %
69
+ [ 1] Char a: 8.803314063457389 %
70
+ [ 2] Char t: 7.322375256413672 %
71
+ [ 3] Char s: 6.666222660308496 %
72
+ [ 4] Char l: 5.73967019207715 %
73
+ [ 5] Char r: 5.4341050163838345 %
74
+ [ 6] Char n: 5.39920611663159 %
75
+ [ 7] Char i: 4.773689959240216 %
76
+ [ 8] Char o: 4.347976663025815 %
77
+ [ 9] Char k: 4.289634227562138 %
78
+ [10] Char z: 4.244611982843594 %
79
+ [11] Char á: 3.7855982097663636 %
80
+ [12] Char m: 3.2144284306151265 %
81
+ [13] Char g: 3.0727016010869277 %
82
+ [14] Char é: 3.0295441830727015 %
83
+ [15] Char b: 2.287609558568879 %
84
+ [16] Char d: 1.9966965926952074 %
85
+ [17] Char v: 1.8832085675466872 %
86
+ [18] Char y: 1.8453792258305137 %
87
+ [19] Char u: 1.5155713029810587 %
88
+ [20] Char h: 1.2960545595012922 %
89
+ [21] Char p: 1.288861656498921 %
90
+ [22] Char j: 1.2363801049631031 %
91
+ [23] Char c: 1.0951860830647095 %
92
+ [24] Char f: 1.0256546873751233 %
93
+ [25] Char ö: 1.020859418706876 %
94
+ [26] Char ó: 0.9955510562911262 %
95
+ [27] Char ő: 0.8399712283879905 %
96
+ [28] Char í: 0.6340410794682579 %
97
+ [29] Char ü: 0.4211844313610571 %
98
+ [30] Char ú: 0.3295415190345526 %
99
+ [31] Char ű: 0.2056637451048299 %
100
+
101
+ The first 32 characters have an accumulated ratio of 0.9975117883688093.
102
+
103
+ 1084 sequences found.
104
+
105
+ First 512 (typical positive ratio): 0.9748272224933486
106
+ Next 512 (512-1024): 5.328076298052588e-06
107
+ Rest: 0.0001889139024889644
108
+
109
+ - Processing end: 2015-12-12 18:02:46.902033
@@ -0,0 +1,156 @@
1
+ = Logs of language model for Irish (ga) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-09-27 00:31:16.489602
5
+ - Maximum depth: 5
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ Tracy Caldwell Dyson (revision 812158)
11
+ 14 Lúnasa (revision 716575)
12
+ 1969 (revision 810361)
13
+ California (revision 790976)
14
+ Ceimic (revision 759983)
15
+ Ceimic fhisiciúil (revision 656896)
16
+ NASA (revision 806394)
17
+ Rúisis (revision 771746)
18
+ SAM (revision 807668)
19
+ Spáinnis (revision 812323)
20
+ Stáisiún Idirnáisiúnta Spáis (revision 806394)
21
+ Tointeálaí spáis (revision 761309)
22
+ 10 Lúnasa (revision 649045)
23
+ 11 Lúnasa (revision 776455)
24
+ 12 Lúnasa (revision 716531)
25
+ 13 Lúnasa (revision 716546)
26
+ 1598 (revision 703178)
27
+ 15 Lúnasa (revision 776986)
28
+ 16 Lúnasa (revision 648836)
29
+ 1740 (revision 791225)
30
+ 1771 (revision 776762)
31
+ 17 Lúnasa (revision 777131)
32
+ 1823 (revision 791774)
33
+ 1832 (revision 794492)
34
+ 1898 (revision 805176)
35
+ 18 Lúnasa (revision 777242)
36
+ 1911 (revision 801932)
37
+ 1956 (revision 797081)
38
+ 1962 (revision 801511)
39
+ 1966 (revision 807415)
40
+ 19 Lúnasa (revision 648524)
41
+ 1 Lúnasa (revision 647726)
42
+ 2001 (revision 801012)
43
+ 2004 (revision 795759)
44
+ 2016 (revision 812091)
45
+ 20 Lúnasa (revision 777924)
46
+ 21 Lúnasa (revision 647805)
47
+ 22 Lúnasa (revision 778960)
48
+ 23 Lúnasa (revision 778453)
49
+ 24 Lúnasa (revision 778495)
50
+ 25 Lúnasa (revision 778551)
51
+ 26 Lúnasa (revision 649051)
52
+ 27 Lúnasa (revision 778763)
53
+ 28 Lúnasa (revision 778813)
54
+ 29 Lúnasa (revision 778959)
55
+ 2 Lúnasa (revision 774393)
56
+ 30 Lúnasa (revision 648308)
57
+ 31 Lúnasa (revision 649053)
58
+ 3 Lúnasa (revision 647811)
59
+ 4 Lúnasa (revision 786284)
60
+ 5 Lúnasa (revision 776845)
61
+ 6 Lúnasa (revision 647834)
62
+ 7 Lúnasa (revision 775859)
63
+ 8 Lúnasa (revision 648745)
64
+ 9 Lúnasa (revision 648522)
65
+ AK Parti (revision 792248)
66
+ An Phacastáin (revision 759339)
67
+ An Tuirc (revision 811970)
68
+ Aoine (revision 717430)
69
+ Bertolt Brecht (revision 800584)
70
+ Czesław Miłosz (revision 780306)
71
+ Céadaoin (revision 717606)
72
+ Dan Boyle (revision 797926)
73
+ Domhnach (revision 717663)
74
+ Déardaoin (revision 647860)
75
+ Féilire (revision 648837)
76
+ Halle Berry (revision 759955)
77
+ Henry Bagenal (revision 716575)
78
+ Iúil (revision 647071)
79
+ Luan (revision 717791)
80
+ Lúnasa (revision 810265)
81
+ Meán Fómhair (revision 779166)
82
+ Pápa Pius VII (revision 758126)
83
+ Satharn (revision 784525)
84
+ Walter Scott (revision 759029)
85
+ Áth Buí (revision 716575)
86
+ 11 Márta (revision 716519)
87
+ 17 Márta (revision 798614)
88
+ 1882 (revision 801198)
89
+ 1886 (revision 776624)
90
+ 1890 (revision 801200)
91
+ 1891 (revision 796677)
92
+ 1903 (revision 812849)
93
+ 1922 (revision 801227)
94
+ 1930í (revision 740221)
95
+ 1940í (revision 740219)
96
+ 1950í (revision 740217)
97
+ 1960í (revision 772724)
98
+ 1967 (revision 796983)
99
+ 1968 (revision 810926)
100
+ 1970 (revision 812852)
101
+ 1970í (revision 740213)
102
+ 1971 (revision 809746)
103
+ 1972 (revision 789490)
104
+ 1980í (revision 740211)
105
+ 1990í (revision 740208)
106
+ 19ú haois (revision 739964)
107
+ 1 Bealtaine (revision 647679)
108
+
109
+ == End of Parsed pages ==
110
+
111
+ - Wikipedia parsing ended at: 2016-09-27 00:33:40.157338
112
+
113
+ 44 characters appeared 183561 times.
114
+
115
+ First 31 characters:
116
+ [ 0] Char a: 15.192769705983297 %
117
+ [ 1] Char i: 10.534372769814938 %
118
+ [ 2] Char n: 8.106297089250985 %
119
+ [ 3] Char h: 7.243368689427493 %
120
+ [ 4] Char r: 6.442544985045844 %
121
+ [ 5] Char e: 6.198484427520007 %
122
+ [ 6] Char s: 5.622654049607488 %
123
+ [ 7] Char t: 4.776068990689743 %
124
+ [ 8] Char c: 4.543448771797931 %
125
+ [ 9] Char l: 4.1953356105054995 %
126
+ [10] Char o: 3.9469168287381304 %
127
+ [11] Char d: 3.2169142682813887 %
128
+ [12] Char g: 2.811054635788648 %
129
+ [13] Char m: 2.6269196615838877 %
130
+ [14] Char á: 2.2749930540801153 %
131
+ [15] Char u: 2.1932763495513754 %
132
+ [16] Char b: 2.0478206154902185 %
133
+ [17] Char í: 1.6599386579938005 %
134
+ [18] Char é: 1.2829522611012143 %
135
+ [19] Char f: 1.1494816437042727 %
136
+ [20] Char ú: 1.0525111543301682 %
137
+ [21] Char p: 0.9059658642086281 %
138
+ [22] Char ó: 0.8890777452726886 %
139
+ [23] Char v: 0.2522322279787101 %
140
+ [24] Char y: 0.23479933101257894 %
141
+ [25] Char k: 0.18195586208399386 %
142
+ [26] Char w: 0.1688811893593955 %
143
+ [27] Char j: 0.09697048937410452 %
144
+ [28] Char z: 0.07735848028720697 %
145
+ [29] Char x: 0.0343210159020707 %
146
+ [30] Char q: 0.010895560603831969 %
147
+
148
+ The first 31 characters have an accumulated ratio of 0.9997058198636966.
149
+
150
+ 701 sequences found.
151
+
152
+ First 512 (typical positive ratio): 0.9974076651249096
153
+ Next 512 (512-1024): 5.447780301915984e-06
154
+ Rest: -2.7755575615628914e-17
155
+
156
+ - Processing end: 2016-09-27 00:33:40.258886
@@ -0,0 +1,162 @@
1
+ = Logs of language model for Italian (it) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-09-21 18:43:12.831409
5
+ - Maximum depth: 5
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ Pieve Ligure (revision 83186252)
11
+ 010 (prefisso) (revision 76157203)
12
+ 1000 (revision 83185341)
13
+ 1143 (revision 70627567)
14
+ 1162 (revision 70627612)
15
+ 118 - Emergenza sanitaria (revision 83267411)
16
+ 1201 (revision 77523243)
17
+ 1202 (revision 76764411)
18
+ 1374 (revision 78259457)
19
+ 1404 (revision 70628069)
20
+ 1520 (revision 76854924)
21
+ 1537 (revision 70628296)
22
+ 1582 (revision 80626188)
23
+ 1584 (revision 76837051)
24
+ 1600 (revision 76869356)
25
+ 1619 (revision 70628455)
26
+ 1742 (revision 70628675)
27
+ 1748 (revision 70628682)
28
+ 1749 (revision 70628684)
29
+ 1750 (revision 70628690)
30
+ 1754 (revision 70628697)
31
+ 1775 (revision 70628734)
32
+ 1797 (revision 78338823)
33
+ 1798 (revision 82047236)
34
+ 1803 (revision 77502534)
35
+ 1805 (revision 79369853)
36
+ 1809 (revision 70628789)
37
+ 1810 (revision 82930218)
38
+ 1814 (revision 78338825)
39
+ 1815 (revision 82669615)
40
+ 1816 (revision 83185384)
41
+ 1818 (revision 72407239)
42
+ 1823 (revision 74880156)
43
+ 1859 (revision 83185401)
44
+ 1860 (revision 83185403)
45
+ 1861 (revision 83185412)
46
+ 1868 (revision 83185430)
47
+ 1874 (revision 83185441)
48
+ 1897 (revision 83185267)
49
+ 1908 (revision 83185631)
50
+ 1909 (revision 83185630)
51
+ 1913 (revision 83185626)
52
+ 1915 (revision 83185625)
53
+ 1917 (revision 83185270)
54
+ 1920 (revision 83185621)
55
+ 1921 (revision 83185619)
56
+ 1923 (revision 83185616)
57
+ 1925 (revision 83185614)
58
+ 1926 (revision 83185612)
59
+ 1928 (revision 83185610)
60
+ 1929 (revision 83185609)
61
+ 1939 (revision 83185598)
62
+ 1946 (revision 83185590)
63
+ 1947 (revision 83185589)
64
+ 1948 (revision 83185587)
65
+ 1951 (revision 83185584)
66
+ 1956 (revision 83185478)
67
+ 1960 (revision 83185487)
68
+ 1964 (revision 83185493)
69
+ 1965 (revision 83185494)
70
+ 1969 (revision 83185500)
71
+ 1970 (revision 83185503)
72
+ 1971 (revision 83185505)
73
+ 1975 (revision 83185510)
74
+ 1976 (revision 83185513)
75
+ 1977 (revision 83185514)
76
+ 1980 (revision 83185518)
77
+ 1981 (revision 83308867)
78
+ 1983 (revision 83185524)
79
+ 1985 (revision 83185526)
80
+ 1988 (revision 83185280)
81
+ 1990 (revision 83185531)
82
+ 1995 (revision 83185538)
83
+ 1999 (revision 83326325)
84
+ 2000 (revision 83185544)
85
+ 2001 (revision 83309058)
86
+ 2002 (revision 83185545)
87
+ 2003 (revision 83185546)
88
+ 2004 (revision 83185283)
89
+ 2005 (revision 83185285)
90
+ 2006 (revision 83185547)
91
+ 2007 (revision 83185549)
92
+ 2008 (revision 83185551)
93
+ 2009 (revision 83185552)
94
+ 2010 (revision 83185287)
95
+ 2012 (revision 83185289)
96
+ 712 (revision 70630167)
97
+ 749 (revision 78272323)
98
+ ATP (Provincia di Genova) (revision 82754117)
99
+ Abbazia di San Colombano (revision 83062997)
100
+ Abbazia di San Fruttuoso (revision 83288120)
101
+ Acacia dealbata (revision 83036867)
102
+ Acquedotto (revision 82973825)
103
+ Affresco (revision 82000422)
104
+ Agricoltura (revision 82578266)
105
+ Allevamento (revision 82971452)
106
+ Altitudine (revision 82971213)
107
+ Angelo (revision 82333116)
108
+ Anni 1960 (revision 83161222)
109
+ Anni 1970 (revision 81663175)
110
+ Antica Roma (revision 83125874)
111
+
112
+ == End of Parsed pages ==
113
+
114
+ - Wikipedia parsing ended at: 2016-09-21 18:46:08.840718
115
+
116
+ 59 characters appeared 823241 times.
117
+
118
+ First 34 characters:
119
+ [ 0] Char i: 11.823147778111148 %
120
+ [ 1] Char a: 11.252112078965942 %
121
+ [ 2] Char e: 10.910170897707962 %
122
+ [ 3] Char o: 8.936386793174782 %
123
+ [ 4] Char n: 7.317055394471364 %
124
+ [ 5] Char l: 6.931263141655967 %
125
+ [ 6] Char r: 6.521784021932824 %
126
+ [ 7] Char t: 6.386708145002497 %
127
+ [ 8] Char s: 4.572415610981475 %
128
+ [ 9] Char c: 4.116291584116923 %
129
+ [10] Char d: 3.9770856893667834 %
130
+ [11] Char u: 2.8944136650142545 %
131
+ [12] Char m: 2.762860450342002 %
132
+ [13] Char p: 2.6809889206198427 %
133
+ [14] Char g: 2.1493098618751985 %
134
+ [15] Char v: 1.5369739845318686 %
135
+ [16] Char b: 1.2855287819727153 %
136
+ [17] Char f: 0.9932692856648295 %
137
+ [18] Char z: 0.9664241698360504 %
138
+ [19] Char h: 0.7159507361756764 %
139
+ [20] Char q: 0.2416060424590126 %
140
+ [21] Char k: 0.18876610858788617 %
141
+ [22] Char à: 0.15596890825408355 %
142
+ [23] Char y: 0.12462936126844994 %
143
+ [24] Char è: 0.11600491229178332 %
144
+ [25] Char w: 0.10628722330398996 %
145
+ [26] Char x: 0.10312897438295712 %
146
+ [27] Char j: 0.07555503188009344 %
147
+ [28] Char ù: 0.05575524056746445 %
148
+ [29] Char ò: 0.03304014255849745 %
149
+ [30] Char é: 0.021014502436103158 %
150
+ [31] Char ì: 0.0191924357508919 %
151
+ [32] Char á: 0.004737373381549267 %
152
+ [33] Char ó: 0.003644133370422513 %
153
+
154
+ The first 34 characters have an accumulated ratio of 0.9997947138201325.
155
+
156
+ 872 sequences found.
157
+
158
+ First 512 (typical positive ratio): 0.9989484485502651
159
+ Next 512 (512-1024): 1.214711123474171e-06
160
+ Rest: -4.336808689942018e-17
161
+
162
+ - Processing end: 2016-09-21 18:46:08.920456
@@ -0,0 +1,162 @@
1
+ = Logs of language model for Latvian (lv) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-09-21 00:16:33.485953
5
+ - Maximum depth: 5
6
+ - Max number of pages: 100
7
+
8
+ == Parsed pages ==
9
+
10
+ Zigfrīds Anna Meierovics (revision 2546984)
11
+ 1. Saeima (revision 2511127)
12
+ 1. Saeimas deputāti (revision 2303859)
13
+ 1. Saeimas frakcijas (revision 2429725)
14
+ 1. Saeimas vēlēšanas (revision 2464758)
15
+ 1887. gads (revision 2583253)
16
+ 1919. gada Parīzes miera konference (revision 2482078)
17
+ 1920 (revision 2401222)
18
+ 1921 (revision 2473337)
19
+ 1922 (revision 2486819)
20
+ 1923 (revision 2544643)
21
+ 1924 (revision 2539361)
22
+ 1925 (revision 2486795)
23
+ 22. augusts (revision 2583254)
24
+ 31. jūlijs (revision 2559648)
25
+ 5. februāris (revision 2581966)
26
+ ASV (revision 2549746)
27
+ Agrārā reforma Latvijā (revision 2473423)
28
+ Agudas Izrael (Latvija) (revision 2311143)
29
+ Aigars Kalvītis (revision 2545858)
30
+ Alberts Kviesis (revision 2546934)
31
+ Aleksandrs Bočagovs (revision 2329526)
32
+ Aleksandrs Dauge (revision 2546805)
33
+ Aleksandrs Jaunbērzs (revision 2462254)
34
+ Aleksandrs Kerenskis (revision 2461214)
35
+ Aleksandrs Millerāns (revision 2309419)
36
+ Aleksandrs Neibergs (revision 2491897)
37
+ Alfrēds Birznieks (revision 2567317)
38
+ Alfrēds Jēkabs Bērziņš (revision 2564068)
39
+ Alfrēds Riekstiņš (politiķis) (revision 2586148)
40
+ Andrejs Bērziņš (revision 2564283)
41
+ Andrejs Kurcijs (revision 2564338)
42
+ Andrejs Petrevics (revision 2460269)
43
+ Andrejs Sīmanis (revision 2547079)
44
+ Andrejs Veckalns (revision 2564224)
45
+ Andrievs Niedra (revision 2546988)
46
+ Andris Bērziņš (politiķis, 1951) (revision 2218488)
47
+ Andris Šķēle (revision 2457423)
48
+ Angļu valoda (revision 2447598)
49
+ Ansis Buševics (revision 2578312)
50
+ Ansis Rudevics (revision 2414854)
51
+ Antante (revision 2581862)
52
+ Antons Dzenis (revision 2564295)
53
+ Antons Laizāns (revision 2467408)
54
+ Antons Rubins (1885) (revision 2465396)
55
+ Antons Velkme (revision 2564425)
56
+ Ants Pīps (revision 2564383)
57
+ Apollo (portāls) (revision 2371202)
58
+ Apolonija Laurinoviča (revision 2466232)
59
+ Aprīļa pučs (revision 2150686)
60
+ Apvienotā Karaliste (revision 2566258)
61
+ Aristīds Briāns (revision 2536819)
62
+ Arons Nuroks (revision 2337085)
63
+ Arturs Alberings (revision 2442531)
64
+ Arturs Ozols (inženieris) (revision 2491399)
65
+ Artūrs Balfūrs (revision 2309461)
66
+ Artūrs Vīgants (revision 2461471)
67
+ Artūrs Žers (revision 2564230)
68
+ Arveds Bergs (revision 2564118)
69
+ Arveds Švābe (revision 2586288)
70
+ Arvīds Kalniņš (revision 2545254)
71
+ Aspazija (revision 2574081)
72
+ Augusts Briedis (revision 2546879)
73
+ Augusts Kalniņš (revision 2436647)
74
+ Augusts Kirhenšteins (revision 2547109)
75
+ Austroungārija (revision 2524307)
76
+ Autoritatīvā vadība (revision 2385793)
77
+ Balfūra nota (revision 2538973)
78
+ Baltijas Antante (revision 2541901)
79
+ Baltijas pārkrievošana (revision 2570657)
80
+ Bermontiāde (revision 2499160)
81
+ Bernards Kublinskis (revision 2441386)
82
+ Bezpartijiskais nacionālais centrs (revision 2438819)
83
+ Beļģija (revision 2579008)
84
+ Brestļitovskas miera līgums (revision 2569020)
85
+ Brizules muiža (revision 2584564)
86
+ Bruno Kalniņš (revision 2566572)
87
+ Brīvības piemineklis (revision 2578595)
88
+ Bulduru konference (revision 2193449)
89
+ Ceire-Cion (revision 2311779)
90
+ Celmiņa 1. Ministru kabinets (revision 2112830)
91
+ Delfi (portāls) (revision 2544918)
92
+ Demokrātiskais Centrs (revision 2113060)
93
+ Demokrātu savienība (revision 2179593)
94
+ Diena (laikraksts) (revision 2548854)
95
+ Donats Bicāns (revision 2479349)
96
+ Dubulti (Jūrmala) (revision 2456811)
97
+ Durbe (revision 2381790)
98
+ Dāvids Komisārs (revision 2574685)
99
+ Džovanni Džoliti (revision 2538055)
100
+ Ebreju bloks (revision 2311643)
101
+ Ebreju nacionāldemokrātu partija (revision 2312288)
102
+ Eduards Grantskalns (revision 2565167)
103
+ Eduards Jaunzems (revision 2452579)
104
+ Eduards Laimiņš (revision 2449521)
105
+ Eduards Radziņš (revision 2564393)
106
+
107
+ == End of Parsed pages ==
108
+
109
+ - Wikipedia parsing ended at: 2016-09-21 00:19:18.361533
110
+
111
+ 55 characters appeared 354745 times.
112
+
113
+ First 39 characters:
114
+ [ 0] Char a: 11.905171320244119 %
115
+ [ 1] Char i: 9.3977364022044 %
116
+ [ 2] Char s: 8.224217395594017 %
117
+ [ 3] Char e: 6.367108768270166 %
118
+ [ 4] Char r: 5.854064186951191 %
119
+ [ 5] Char t: 5.831230884156225 %
120
+ [ 6] Char u: 4.939604504644181 %
121
+ [ 7] Char n: 4.463769750102186 %
122
+ [ 8] Char ā: 3.9498794909019157 %
123
+ [ 9] Char l: 3.8030134321836813 %
124
+ [10] Char o: 3.6296494665182033 %
125
+ [11] Char k: 3.524785409237621 %
126
+ [12] Char m: 3.2739009711201 %
127
+ [13] Char d: 3.177775585279567 %
128
+ [14] Char v: 3.0046935122411873 %
129
+ [15] Char p: 2.827101157169234 %
130
+ [16] Char j: 2.8166711299665956 %
131
+ [17] Char b: 2.0279355593454453 %
132
+ [18] Char ī: 1.8855797826607845 %
133
+ [19] Char g: 1.6146809680192813 %
134
+ [20] Char z: 1.5343415692962552 %
135
+ [21] Char ē: 1.4593581304880971 %
136
+ [22] Char c: 1.2231321089796898 %
137
+ [23] Char š: 0.8876798827326671 %
138
+ [24] Char ņ: 0.46596851259355315 %
139
+ [25] Char f: 0.4203019070036223 %
140
+ [26] Char ļ: 0.34700982395805435 %
141
+ [27] Char ū: 0.30162511099522193 %
142
+ [28] Char h: 0.20070755049401684 %
143
+ [29] Char ž: 0.18774048964749326 %
144
+ [30] Char ķ: 0.14207388405756247 %
145
+ [31] Char ģ: 0.1268516821942522 %
146
+ [32] Char č: 0.08287643236691145 %
147
+ [33] Char w: 0.0324176521163089 %
148
+ [34] Char y: 0.02734358482853881 %
149
+ [35] Char x: 0.015785987117506943 %
150
+ [36] Char ö: 0.005074067287770088 %
151
+ [37] Char é: 0.003946496779376736 %
152
+ [38] Char q: 0.0031008188980817205 %
153
+
154
+ The first 39 characters have an accumulated ratio of 0.9998590536864506.
155
+
156
+ 970 sequences found.
157
+
158
+ First 512 (typical positive ratio): 0.9904102202220861
159
+ Next 512 (512-1024): 0.0018774048964749328
160
+ Rest: -1.734723475976807e-17
161
+
162
+ - Processing end: 2016-09-21 00:19:18.484318