cchardet 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +11 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +10 -0
- data/README.md +35 -0
- data/Rakefile +15 -0
- data/cchardet.gemspec +30 -0
- data/ext/cchardet/extconf.rb +26 -0
- data/ext/uchardet/.gitignore +1 -0
- data/ext/uchardet/.gitlab-ci.yml +106 -0
- data/ext/uchardet/AUTHORS +16 -0
- data/ext/uchardet/CMakeLists.txt +74 -0
- data/ext/uchardet/COPYING +1316 -0
- data/ext/uchardet/INSTALL +26 -0
- data/ext/uchardet/README.md +295 -0
- data/ext/uchardet/build-mac/uchardet.cpp +7 -0
- data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
- data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
- data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
- data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
- data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
- data/ext/uchardet/doc/CMakeLists.txt +6 -0
- data/ext/uchardet/doc/README.maintainer +59 -0
- data/ext/uchardet/doc/uchardet.1 +18 -0
- data/ext/uchardet/script/BuildLangModel.py +533 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
- data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
- data/ext/uchardet/script/README +63 -0
- data/ext/uchardet/script/charsets/codepoints.py +53 -0
- data/ext/uchardet/script/charsets/db.py +73 -0
- data/ext/uchardet/script/charsets/ibm852.py +72 -0
- data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
- data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
- data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
- data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
- data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
- data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
- data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
- data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
- data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
- data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
- data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
- data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
- data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
- data/ext/uchardet/script/charsets/tis-620.py +77 -0
- data/ext/uchardet/script/charsets/viscii.py +72 -0
- data/ext/uchardet/script/charsets/windows-1250.py +75 -0
- data/ext/uchardet/script/charsets/windows-1252.py +76 -0
- data/ext/uchardet/script/charsets/windows-1253.py +72 -0
- data/ext/uchardet/script/charsets/windows-1256.py +75 -0
- data/ext/uchardet/script/charsets/windows-1257.py +72 -0
- data/ext/uchardet/script/charsets/windows-1258.py +72 -0
- data/ext/uchardet/script/debug.sh +9 -0
- data/ext/uchardet/script/header-template.cpp +38 -0
- data/ext/uchardet/script/langs/ar.py +59 -0
- data/ext/uchardet/script/langs/cs.py +80 -0
- data/ext/uchardet/script/langs/da.py +69 -0
- data/ext/uchardet/script/langs/de.py +69 -0
- data/ext/uchardet/script/langs/el.py +55 -0
- data/ext/uchardet/script/langs/eo.py +67 -0
- data/ext/uchardet/script/langs/es.py +69 -0
- data/ext/uchardet/script/langs/et.py +57 -0
- data/ext/uchardet/script/langs/fi.py +60 -0
- data/ext/uchardet/script/langs/fr.py +79 -0
- data/ext/uchardet/script/langs/ga.py +60 -0
- data/ext/uchardet/script/langs/hr.py +59 -0
- data/ext/uchardet/script/langs/hu.py +66 -0
- data/ext/uchardet/script/langs/it.py +56 -0
- data/ext/uchardet/script/langs/lt.py +70 -0
- data/ext/uchardet/script/langs/lv.py +69 -0
- data/ext/uchardet/script/langs/mt.py +80 -0
- data/ext/uchardet/script/langs/pl.py +81 -0
- data/ext/uchardet/script/langs/pt.py +80 -0
- data/ext/uchardet/script/langs/ro.py +65 -0
- data/ext/uchardet/script/langs/sk.py +80 -0
- data/ext/uchardet/script/langs/sl.py +59 -0
- data/ext/uchardet/script/langs/sv.py +56 -0
- data/ext/uchardet/script/langs/th.py +55 -0
- data/ext/uchardet/script/langs/tr.py +67 -0
- data/ext/uchardet/script/langs/vi.py +64 -0
- data/ext/uchardet/script/release.sh +8 -0
- data/ext/uchardet/script/win32.sh +7 -0
- data/ext/uchardet/src/Big5Freq.tab +943 -0
- data/ext/uchardet/src/CMakeLists.txt +160 -0
- data/ext/uchardet/src/CharDistribution.cpp +109 -0
- data/ext/uchardet/src/CharDistribution.h +242 -0
- data/ext/uchardet/src/EUCKRFreq.tab +614 -0
- data/ext/uchardet/src/EUCTWFreq.tab +447 -0
- data/ext/uchardet/src/GB2312Freq.tab +491 -0
- data/ext/uchardet/src/JISFreq.tab +589 -0
- data/ext/uchardet/src/JpCntx.cpp +230 -0
- data/ext/uchardet/src/JpCntx.h +140 -0
- data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
- data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
- data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
- data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
- data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
- data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
- data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
- data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
- data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
- data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
- data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
- data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
- data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
- data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
- data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
- data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
- data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
- data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
- data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
- data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
- data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
- data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
- data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
- data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
- data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
- data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
- data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
- data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
- data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
- data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
- data/ext/uchardet/src/nsBig5Prober.h +75 -0
- data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
- data/ext/uchardet/src/nsCharSetProber.h +77 -0
- data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
- data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
- data/ext/uchardet/src/nsEUCJPProber.h +79 -0
- data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
- data/ext/uchardet/src/nsEUCKRProber.h +81 -0
- data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
- data/ext/uchardet/src/nsEUCTWProber.h +75 -0
- data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
- data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
- data/ext/uchardet/src/nsEscSM.cpp +267 -0
- data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
- data/ext/uchardet/src/nsGB2312Prober.h +77 -0
- data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
- data/ext/uchardet/src/nsHebrewProber.h +177 -0
- data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
- data/ext/uchardet/src/nsLatin1Prober.h +73 -0
- data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
- data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
- data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
- data/ext/uchardet/src/nsPkgInt.h +89 -0
- data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
- data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
- data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
- data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
- data/ext/uchardet/src/nsSJISProber.cpp +98 -0
- data/ext/uchardet/src/nsSJISProber.h +81 -0
- data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
- data/ext/uchardet/src/nsUTF8Prober.h +66 -0
- data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
- data/ext/uchardet/src/nsUniversalDetector.h +91 -0
- data/ext/uchardet/src/nscore.h +59 -0
- data/ext/uchardet/src/prmem.h +49 -0
- data/ext/uchardet/src/symbols.cmake +41 -0
- data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
- data/ext/uchardet/src/tools/uchardet.cpp +254 -0
- data/ext/uchardet/src/uchardet.cpp +274 -0
- data/ext/uchardet/src/uchardet.h +136 -0
- data/ext/uchardet/test/CMakeLists.txt +47 -0
- data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
- data/ext/uchardet/test/ar/utf-8.txt +3 -0
- data/ext/uchardet/test/ar/windows-1256.txt +3 -0
- data/ext/uchardet/test/bg/windows-1251.txt +3 -0
- data/ext/uchardet/test/cs/ibm852.txt +4 -0
- data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
- data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
- data/ext/uchardet/test/cs/utf-8.txt +4 -0
- data/ext/uchardet/test/cs/windows-1250.txt +4 -0
- data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
- data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
- data/ext/uchardet/test/da/utf-8.txt +10 -0
- data/ext/uchardet/test/da/windows-1252.txt +10 -0
- data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
- data/ext/uchardet/test/de/windows-1252.txt +11 -0
- data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
- data/ext/uchardet/test/el/utf-8.txt +3 -0
- data/ext/uchardet/test/el/windows-1253.txt +5 -0
- data/ext/uchardet/test/en/ascii.txt +4 -0
- data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
- data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
- data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
- data/ext/uchardet/test/es/utf-8.txt +5 -0
- data/ext/uchardet/test/es/windows-1252.txt +5 -0
- data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
- data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
- data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
- data/ext/uchardet/test/et/utf-8.txt +6 -0
- data/ext/uchardet/test/et/windows-1252.txt +6 -0
- data/ext/uchardet/test/et/windows-1257.txt +6 -0
- data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
- data/ext/uchardet/test/fi/utf-8.txt +8 -0
- data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
- data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
- data/ext/uchardet/test/fr/utf-16.be +0 -0
- data/ext/uchardet/test/fr/utf-32.le +0 -0
- data/ext/uchardet/test/fr/utf-8.txt +14 -0
- data/ext/uchardet/test/fr/windows-1252.txt +3 -0
- data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
- data/ext/uchardet/test/ga/utf-8.txt +6 -0
- data/ext/uchardet/test/ga/windows-1252.txt +6 -0
- data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
- data/ext/uchardet/test/he/utf-8.txt +3 -0
- data/ext/uchardet/test/he/windows-1255.txt +1 -0
- data/ext/uchardet/test/hr/ibm852.txt +4 -0
- data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
- data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
- data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
- data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
- data/ext/uchardet/test/hr/utf-8.txt +4 -0
- data/ext/uchardet/test/hr/windows-1250.txt +4 -0
- data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
- data/ext/uchardet/test/hu/windows-1250.txt +1 -0
- data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
- data/ext/uchardet/test/it/utf-8.txt +18 -0
- data/ext/uchardet/test/ja/euc-jp.txt +10 -0
- data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
- data/ext/uchardet/test/ja/shift_jis.txt +1 -0
- data/ext/uchardet/test/ja/utf-16be.txt +0 -0
- data/ext/uchardet/test/ja/utf-16le.txt +0 -0
- data/ext/uchardet/test/ja/utf-8.txt +9 -0
- data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
- data/ext/uchardet/test/ko/uhc.smi +16 -0
- data/ext/uchardet/test/ko/utf-16.le +0 -0
- data/ext/uchardet/test/ko/utf-32.be +0 -0
- data/ext/uchardet/test/ko/utf-8.txt +3 -0
- data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
- data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
- data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
- data/ext/uchardet/test/lt/utf-8.txt +3 -0
- data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
- data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
- data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
- data/ext/uchardet/test/lv/utf-8.txt +6 -0
- data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
- data/ext/uchardet/test/mt/utf-8.txt +4 -0
- data/ext/uchardet/test/pl/ibm852.txt +3 -0
- data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
- data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
- data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
- data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
- data/ext/uchardet/test/pl/utf-8.txt +3 -0
- data/ext/uchardet/test/pl/windows-1250.txt +3 -0
- data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
- data/ext/uchardet/test/pt/utf-8.txt +6 -0
- data/ext/uchardet/test/ro/ibm852.txt +9 -0
- data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
- data/ext/uchardet/test/ro/utf-8.txt +9 -0
- data/ext/uchardet/test/ro/windows-1250.txt +9 -0
- data/ext/uchardet/test/ru/ibm855.txt +5 -0
- data/ext/uchardet/test/ru/ibm866.txt +11 -0
- data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
- data/ext/uchardet/test/ru/koi8-r.txt +1 -0
- data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
- data/ext/uchardet/test/ru/windows-1251.txt +4 -0
- data/ext/uchardet/test/sk/ibm852.txt +3 -0
- data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
- data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
- data/ext/uchardet/test/sk/utf-8.txt +3 -0
- data/ext/uchardet/test/sk/windows-1250.txt +3 -0
- data/ext/uchardet/test/sl/ibm852.txt +9 -0
- data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
- data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
- data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
- data/ext/uchardet/test/sl/utf-8.txt +9 -0
- data/ext/uchardet/test/sl/windows-1250.txt +9 -0
- data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
- data/ext/uchardet/test/sv/utf-8.txt +10 -0
- data/ext/uchardet/test/sv/windows-1252.txt +10 -0
- data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
- data/ext/uchardet/test/th/tis-620.txt +5 -0
- data/ext/uchardet/test/th/utf-8.txt +1 -0
- data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
- data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
- data/ext/uchardet/test/uchardet-tests.c +130 -0
- data/ext/uchardet/test/vi/utf-8.txt +4 -0
- data/ext/uchardet/test/vi/viscii.txt +4 -0
- data/ext/uchardet/test/vi/windows-1258.txt +4 -0
- data/ext/uchardet/test/zh/big5.txt +1 -0
- data/ext/uchardet/test/zh/euc-tw.txt +1 -0
- data/ext/uchardet/test/zh/gb18030.txt +1 -0
- data/ext/uchardet/test/zh/utf-8.txt +1 -0
- data/ext/uchardet/uchardet.doap +51 -0
- data/ext/uchardet/uchardet.pc.in +10 -0
- data/lib/cchardet.rb +56 -0
- data/lib/cchardet/lib_finder.rb +32 -0
- data/lib/cchardet/version.rb +5 -0
- metadata +362 -0
@@ -0,0 +1,158 @@
|
|
1
|
+
= Logs of language model for Danish (da) =
|
2
|
+
|
3
|
+
- Generated by BuildLangModel.py
|
4
|
+
- Started: 2016-02-19 17:53:58.564190
|
5
|
+
- Maximum depth: 4
|
6
|
+
- Max number of pages: 100
|
7
|
+
|
8
|
+
== Parsed pages ==
|
9
|
+
|
10
|
+
Forside (revision 2692411)
|
11
|
+
16. februar (revision 6877446)
|
12
|
+
17. februar (revision 8454583)
|
13
|
+
1878 (revision 8280505)
|
14
|
+
19. februar (revision 8206479)
|
15
|
+
1922 (revision 8455105)
|
16
|
+
1926 (revision 8425271)
|
17
|
+
1942 (revision 8443554)
|
18
|
+
1945 (revision 8448461)
|
19
|
+
1948 (revision 8454392)
|
20
|
+
1985 (revision 8409096)
|
21
|
+
2. verdenskrig (revision 8433181)
|
22
|
+
23. oktober (revision 6877825)
|
23
|
+
26. oktober (revision 7849938)
|
24
|
+
3C 273 (revision 8443798)
|
25
|
+
A-bus (revision 8427319)
|
26
|
+
Aktuelle begivenheder (revision 8440596)
|
27
|
+
B-52 Stratofortress (revision 8422571)
|
28
|
+
Borgerkrigen i Syrien (revision 8447763)
|
29
|
+
Boutros Boutros-Ghali (revision 8453935)
|
30
|
+
Brasilien (revision 8452750)
|
31
|
+
Cusco (region) (revision 7693764)
|
32
|
+
Danmark (revision 8451178)
|
33
|
+
Danmark i Eurovision Song Contest (revision 8453514)
|
34
|
+
Dansk (sprog) (revision 8455750)
|
35
|
+
Dansk Melodi Grand Prix 2016 (revision 8452164)
|
36
|
+
Dobbeltmordet på Peter Bangs Vej (revision 8334648)
|
37
|
+
Encyklopædi (revision 8446641)
|
38
|
+
Eritrea-sagen (revision 8452285)
|
39
|
+
Eurovision Song Contest 2014 (revision 8445804)
|
40
|
+
Eurovision Song Contest 2016 (revision 8453588)
|
41
|
+
Flygtningekrisen i Europa 2015 (revision 8452286)
|
42
|
+
Fonograf (revision 8177165)
|
43
|
+
Formel 1 (revision 8450846)
|
44
|
+
Formel 1 2016 (revision 8456463)
|
45
|
+
Frederik 6. (revision 8438503)
|
46
|
+
Første observation af gravitationsbølger (revision 8451269)
|
47
|
+
Grammofon (revision 8375093)
|
48
|
+
Guadalcanal (revision 7796248)
|
49
|
+
Harper Lee (revision 8456583)
|
50
|
+
Hartkorn (revision 8437552)
|
51
|
+
IC4 (revision 8446402)
|
52
|
+
IC4-sagen (revision 8434463)
|
53
|
+
Islamisk Stat (revision 8439228)
|
54
|
+
Jonathan Leunbach (revision 8452603)
|
55
|
+
Juliane Marie af Braunschweig-Wolfenbüttel (revision 8437957)
|
56
|
+
Kaliumklorid (revision 8452216)
|
57
|
+
Kejserriget Japan (revision 8044942)
|
58
|
+
Kevin Magnussen (revision 8455302)
|
59
|
+
København (revision 8427847)
|
60
|
+
LIGO (revision 8451266)
|
61
|
+
Latinamerika (revision 7692181)
|
62
|
+
Leonid Hurwicz (revision 8445727)
|
63
|
+
Lighthouse X (revision 8452940)
|
64
|
+
Linkoban (revision 8455879)
|
65
|
+
Machu Picchu (revision 8406907)
|
66
|
+
Matador (tv-serie) (revision 8454648)
|
67
|
+
Middelaldercentret (revision 8449194)
|
68
|
+
Nobelprisen (revision 8409809)
|
69
|
+
Nykøbing Falster (revision 8452825)
|
70
|
+
Nyligt afdøde (revision 8456580)
|
71
|
+
Overvågning (revision 8455039)
|
72
|
+
Panorama (foto) (revision 8448393)
|
73
|
+
Peru (revision 8437485)
|
74
|
+
Peter Lauritsen (revision 8456097)
|
75
|
+
Professor (revision 8415451)
|
76
|
+
Renault F1 (revision 8450843)
|
77
|
+
S-bus (revision 8455589)
|
78
|
+
Salomonøerne (revision 8238961)
|
79
|
+
Slaget om Belgien (1940) (revision 8430013)
|
80
|
+
Slaget om Guadalcanal (revision 7762887)
|
81
|
+
Slaget om Henderson Field (revision 8445480)
|
82
|
+
Slaget om Iwo Jima (revision 8145239)
|
83
|
+
Soldiers of Love (Lighthouse X-sang) (revision 8452929)
|
84
|
+
Solen (revision 8276478)
|
85
|
+
Stillehavskrigen (revision 8430649)
|
86
|
+
Stockholm (revision 8358042)
|
87
|
+
Søslaget ved Guadalcanal (revision 7772812)
|
88
|
+
Thomas Edison (revision 8282441)
|
89
|
+
Togulykken ved Bad Aibling (revision 8455364)
|
90
|
+
Topografi (revision 6886168)
|
91
|
+
USA (revision 8448088)
|
92
|
+
United States Army (revision 8401635)
|
93
|
+
United States Marine Corps (revision 8401667)
|
94
|
+
Vestallierede (revision 6961443)
|
95
|
+
Wikimedia (revision 8263252)
|
96
|
+
Wikipedia (revision 8267051)
|
97
|
+
Zikavirus (revision 8454832)
|
98
|
+
1. februar (revision 8404985)
|
99
|
+
10. februar (revision 6877431)
|
100
|
+
11. februar (revision 6877433)
|
101
|
+
12. februar (revision 6877437)
|
102
|
+
13. februar (revision 6877438)
|
103
|
+
14. februar (revision 6877441)
|
104
|
+
1497 (revision 7369489)
|
105
|
+
15. februar (revision 7329463)
|
106
|
+
1560 (revision 7874693)
|
107
|
+
1568 (revision 7369703)
|
108
|
+
1620 (revision 7423903)
|
109
|
+
1688 (revision 7367090)
|
110
|
+
18. februar (revision 6877450)
|
111
|
+
|
112
|
+
== End of Parsed pages ==
|
113
|
+
|
114
|
+
- Wikipedia parsing ended at: 2016-02-19 17:56:42.162636
|
115
|
+
|
116
|
+
53 characters appeared 1301488 times.
|
117
|
+
|
118
|
+
First 30 characters:
|
119
|
+
[ 0] Char e: 15.272749345364689 %
|
120
|
+
[ 1] Char r: 8.48482659847805 %
|
121
|
+
[ 2] Char n: 7.695652975670924 %
|
122
|
+
[ 3] Char t: 6.977014002434137 %
|
123
|
+
[ 4] Char a: 6.780469739252302 %
|
124
|
+
[ 5] Char i: 6.164636170291236 %
|
125
|
+
[ 6] Char s: 6.0942551909814 %
|
126
|
+
[ 7] Char d: 5.953493232361728 %
|
127
|
+
[ 8] Char l: 5.076650725938311 %
|
128
|
+
[ 9] Char o: 4.883026197706011 %
|
129
|
+
[10] Char g: 4.012253666572415 %
|
130
|
+
[11] Char k: 3.232607599916403 %
|
131
|
+
[12] Char m: 3.0863135119186653 %
|
132
|
+
[13] Char f: 2.701600014752345 %
|
133
|
+
[14] Char v: 2.13970470722742 %
|
134
|
+
[15] Char b: 1.982423195603801 %
|
135
|
+
[16] Char u: 1.8339777239590376 %
|
136
|
+
[17] Char p: 1.5789619266562582 %
|
137
|
+
[18] Char h: 1.3433085821767086 %
|
138
|
+
[19] Char ø: 0.8730775850411222 %
|
139
|
+
[20] Char y: 0.859938777768216 %
|
140
|
+
[21] Char å: 0.7699648402443973 %
|
141
|
+
[22] Char æ: 0.7208671920140639 %
|
142
|
+
[23] Char j: 0.644108896893402 %
|
143
|
+
[24] Char c: 0.5698093259407694 %
|
144
|
+
[25] Char w: 0.11087309295206717 %
|
145
|
+
[26] Char z: 0.05309307500338075 %
|
146
|
+
[27] Char x: 0.032424424965885205 %
|
147
|
+
[28] Char é: 0.032193919575132464 %
|
148
|
+
[29] Char q: 0.012139950579644223 %
|
149
|
+
|
150
|
+
The first 30 characters have an accumulated ratio of 0.9997241618823994.
|
151
|
+
|
152
|
+
964 sequences found.
|
153
|
+
|
154
|
+
First 512 (typical positive ratio): 0.9968082796759031
|
155
|
+
Next 512 (512-1024): 7.68351302509128e-07
|
156
|
+
Rest: 3.903127820947816e-17
|
157
|
+
|
158
|
+
- Processing end: 2016-02-19 17:56:42.304278
|
@@ -0,0 +1,110 @@
|
|
1
|
+
= Logs of language model for Esperanto (eo) =
|
2
|
+
|
3
|
+
- Generated by BuildLangModel.py
|
4
|
+
- Started: 2015-12-04 01:22:51.466573
|
5
|
+
- Maximum depth: 3
|
6
|
+
- Max number of pages: 50
|
7
|
+
|
8
|
+
== Parsed pages ==
|
9
|
+
|
10
|
+
Vikipedio:Ĉefpaĝo (revision 5524911)
|
11
|
+
10-a de novembro (revision 5792999)
|
12
|
+
12-a de novembro (revision 5793854)
|
13
|
+
13-a de novembro (revision 5795088)
|
14
|
+
18-a de novembro (revision 5796972)
|
15
|
+
2-a de novembro (revision 5772615)
|
16
|
+
20-a de novembro (revision 5799664)
|
17
|
+
2015 (revision 5791963)
|
18
|
+
22-a de novembro (revision 5799355)
|
19
|
+
24-a de novembro (revision 5800563)
|
20
|
+
4-a de decembro (revision 5806422)
|
21
|
+
4-a de novembro (revision 5789811)
|
22
|
+
5-a de novembro (revision 5789774)
|
23
|
+
6-a de novembro (revision 5790336)
|
24
|
+
7-a de novembro (revision 5791066)
|
25
|
+
8-a de novembro (revision 5791337)
|
26
|
+
9-a de novembro (revision 5791916)
|
27
|
+
A Night at the Opera (Queen) (revision 5184272)
|
28
|
+
Abdelhamid Abaaoud (revision 5800134)
|
29
|
+
André Glucksmann (revision 5792591)
|
30
|
+
Anglio (revision 5693468)
|
31
|
+
Argentino (revision 5804665)
|
32
|
+
Atencoj de novembro 2015 en Parizo (revision 5800135)
|
33
|
+
Aung San Suu Kyi (revision 5791362)
|
34
|
+
Austin FX4 (revision 5583207)
|
35
|
+
Azilo (revision 5751210)
|
36
|
+
Aŭstrio (revision 5804014)
|
37
|
+
Bahio (revision 5773065)
|
38
|
+
Bamako (revision 5798202)
|
39
|
+
Bataclan (revision 5795605)
|
40
|
+
Bejruto (revision 5774306)
|
41
|
+
Birmo (revision 5790386)
|
42
|
+
Blonda (revision 5441229)
|
43
|
+
Bohemian rhapsody (revision 5654078)
|
44
|
+
Cayetano Redondo (revision 5591025)
|
45
|
+
Ciro la 2-a (revision 5774667)
|
46
|
+
DJ Abdel (revision 5628860)
|
47
|
+
Daniela Mercury (revision 5764721)
|
48
|
+
Decembro de 2015 (revision 5626904)
|
49
|
+
Dilatkoeficiento (revision 5806460)
|
50
|
+
Eksproprietigo (revision 5586845)
|
51
|
+
Elektroniko (revision 5788966)
|
52
|
+
Elle s'appelait Sarah (filmo) (revision 5475154)
|
53
|
+
Esperanto (revision 5804190)
|
54
|
+
Federaciero (revision 5696168)
|
55
|
+
Fondaĵo Vikimedio (revision 5772681)
|
56
|
+
Francio (revision 5759775)
|
57
|
+
François Hollande (revision 5627721)
|
58
|
+
|
59
|
+
== End of Parsed pages ==
|
60
|
+
|
61
|
+
- Wikipedia parsing ended at: 2015-12-04 01:27:38.176708
|
62
|
+
|
63
|
+
56 characters appeared 342524 times.
|
64
|
+
|
65
|
+
First 35 characters:
|
66
|
+
[ 0] Char a: 12.557952143499435 %
|
67
|
+
[ 1] Char o: 9.84719318938235 %
|
68
|
+
[ 2] Char e: 9.10242785906973 %
|
69
|
+
[ 3] Char i: 8.362333734278474 %
|
70
|
+
[ 4] Char n: 7.6359612757062285 %
|
71
|
+
[ 5] Char r: 6.630192336887342 %
|
72
|
+
[ 6] Char t: 5.70821314710794 %
|
73
|
+
[ 7] Char l: 5.610409781504361 %
|
74
|
+
[ 8] Char s: 5.004320865107262 %
|
75
|
+
[ 9] Char k: 3.8855671427403626 %
|
76
|
+
[10] Char d: 3.7194473963868226 %
|
77
|
+
[11] Char j: 3.28531723324497 %
|
78
|
+
[12] Char u: 2.8465158645817517 %
|
79
|
+
[13] Char m: 2.787833845219605 %
|
80
|
+
[14] Char p: 2.6582078920017285 %
|
81
|
+
[15] Char g: 1.6825098387266293 %
|
82
|
+
[16] Char v: 1.4048650605505015 %
|
83
|
+
[17] Char c: 1.3823848839789328 %
|
84
|
+
[18] Char b: 1.1406499982482978 %
|
85
|
+
[19] Char f: 1.077296773364786 %
|
86
|
+
[20] Char z: 0.7342551178895493 %
|
87
|
+
[21] Char h: 0.6735294461118053 %
|
88
|
+
[22] Char ĝ: 0.53572888323154 %
|
89
|
+
[23] Char ŭ: 0.4268314045147202 %
|
90
|
+
[24] Char ĉ: 0.33545094650301877 %
|
91
|
+
[25] Char y: 0.17079095187490512 %
|
92
|
+
[26] Char ŝ: 0.15327393116978666 %
|
93
|
+
[27] Char w: 0.1442234704721421 %
|
94
|
+
[28] Char ĵ: 0.1039343228503696 %
|
95
|
+
[29] Char á: 0.0814541462788009 %
|
96
|
+
[30] Char ó: 0.05430276418586727 %
|
97
|
+
[31] Char é: 0.053718863495696656 %
|
98
|
+
[32] Char q: 0.04350060141771087 %
|
99
|
+
[33] Char x: 0.040873048311943105 %
|
100
|
+
[34] Char ĥ: 0.03824549520617533 %
|
101
|
+
|
102
|
+
The first 35 characters have an accumulated ratio of 0.9991971365510156.
|
103
|
+
|
104
|
+
989 sequences found.
|
105
|
+
|
106
|
+
First 512 (typical positive ratio): 0.9942980632768038
|
107
|
+
Next 512 (512-1024): 0.0015327393116978665
|
108
|
+
Rest: -5.0306980803327406e-17
|
109
|
+
|
110
|
+
- Processing end: 2015-12-04 01:27:38.307198
|
@@ -0,0 +1,159 @@
|
|
1
|
+
= Logs of language model for Estonian (et) =
|
2
|
+
|
3
|
+
- Generated by BuildLangModel.py
|
4
|
+
- Started: 2016-09-26 23:45:22.351942
|
5
|
+
- Maximum depth: 5
|
6
|
+
- Max number of pages: 100
|
7
|
+
|
8
|
+
== Parsed pages ==
|
9
|
+
|
10
|
+
Harilik pohl (revision 4248853)
|
11
|
+
A-vitamiin (revision 4330862)
|
12
|
+
Aasta keskmine sademete hulk (revision 4266801)
|
13
|
+
Aasta keskmine õhutemperatuur (revision 3902142)
|
14
|
+
Ahm (revision 4343671)
|
15
|
+
Ain Raal (revision 4464651)
|
16
|
+
Alalehed (revision 2892741)
|
17
|
+
Alamliik (revision 3522810)
|
18
|
+
Alaska (revision 4216575)
|
19
|
+
Aleksander Heintalu (revision 4445156)
|
20
|
+
Aleuudid (revision 4335893)
|
21
|
+
Ameerika jänes (revision 4325220)
|
22
|
+
Ameerika valgejänes (revision 4355263)
|
23
|
+
Anneli Sihvart (revision 4211078)
|
24
|
+
Arbutiin (revision 4451788)
|
25
|
+
Baribal (revision 4268462)
|
26
|
+
Bensoehape (revision 3810308)
|
27
|
+
Binaarne nomenklatuur (revision 3970950)
|
28
|
+
C-vitamiin (revision 4444353)
|
29
|
+
Droog (revision 4352968)
|
30
|
+
E-vitamiin (revision 4336726)
|
31
|
+
Eesti (revision 4474984)
|
32
|
+
Eesti Entsüklopeediakirjastus (revision 4012421)
|
33
|
+
Eesti köök (revision 4314947)
|
34
|
+
Ellips (revision 4272113)
|
35
|
+
Emakakael (botaanika) (revision 3521516)
|
36
|
+
Euraasia (revision 3710768)
|
37
|
+
Fenoloogia (revision 3512905)
|
38
|
+
Folaadid (revision 4266628)
|
39
|
+
Fosfor (revision 4270122)
|
40
|
+
Fotosüntees (revision 4380600)
|
41
|
+
Fruktoos (revision 4285660)
|
42
|
+
Glükoos (revision 4047315)
|
43
|
+
Gneiss (revision 4333338)
|
44
|
+
Graniit (revision 4435351)
|
45
|
+
Gröönimaa (revision 4331557)
|
46
|
+
Halljänes (revision 4051603)
|
47
|
+
Haned (revision 4127680)
|
48
|
+
Happeline keskkond (revision 2966453)
|
49
|
+
Heilongjiang (revision 4342364)
|
50
|
+
Hendrik Relve (revision 4342591)
|
51
|
+
Hiina (revision 4448121)
|
52
|
+
Holland (revision 4307885)
|
53
|
+
Hunt (revision 4427752)
|
54
|
+
Hõimkond (revision 3489569)
|
55
|
+
Hüdrofiilsus (revision 4309797)
|
56
|
+
Ida-Euroopa (revision 4337624)
|
57
|
+
Ida-sinilind (revision 4248853)
|
58
|
+
Ida-vöötorav (revision 3520679)
|
59
|
+
Igihaljus (revision 3536500)
|
60
|
+
Ilves (revision 4404632)
|
61
|
+
Imetaja (revision 4289188)
|
62
|
+
Indiaanlased (revision 4479868)
|
63
|
+
Indrek Rohtmets (revision 4218674)
|
64
|
+
Itaalia (revision 4404119)
|
65
|
+
Jaapan (revision 4465542)
|
66
|
+
Jilin (revision 3894473)
|
67
|
+
Jood (revision 4025060)
|
68
|
+
Juurestik (revision 3341159)
|
69
|
+
Jääkaru (revision 4372399)
|
70
|
+
Jõhvikas (revision 4391549)
|
71
|
+
Kaalium (revision 4486067)
|
72
|
+
Kaheidulehelised (revision 4031352)
|
73
|
+
Kaheli õiekate (revision 3063362)
|
74
|
+
Kahesuguline õis (revision 3383221)
|
75
|
+
Kaitsestaatus (revision 3527096)
|
76
|
+
Kajakas (revision 4456839)
|
77
|
+
Kalorsus (revision 3843290)
|
78
|
+
Kaltsium (revision 4339861)
|
79
|
+
Kanada (revision 4434682)
|
80
|
+
Kanalised (revision 3616579)
|
81
|
+
Kanarbikulaadsed (revision 4318215)
|
82
|
+
Kanarbikulised (revision 3534760)
|
83
|
+
Karboksüülhapped (revision 3659011)
|
84
|
+
Karoteen (revision 4347634)
|
85
|
+
Kasvuperiood (revision 4231717)
|
86
|
+
Katteseemnetaimed (revision 4176294)
|
87
|
+
Kaukasus (revision 4476003)
|
88
|
+
Kesk-Euroopa (revision 3580746)
|
89
|
+
Kimalane (revision 4261145)
|
90
|
+
Kiudained (toit) (revision 3538655)
|
91
|
+
Klass (bioloogia) (revision 3489567)
|
92
|
+
Kliima (revision 4160781)
|
93
|
+
Korea (revision 4329396)
|
94
|
+
Kroom (revision 4030460)
|
95
|
+
Kroonlehed (revision 3543291)
|
96
|
+
Kuusepüü (revision 4028988)
|
97
|
+
Kvertsetiin (revision 4448461)
|
98
|
+
Laanemets (revision 4001157)
|
99
|
+
Laanepüü (revision 4475093)
|
100
|
+
Laiuskraad (revision 3990366)
|
101
|
+
Leesikas (revision 4420533)
|
102
|
+
Lehed (revision 4471821)
|
103
|
+
Leheroots (revision 3595351)
|
104
|
+
Liik (bioloogia) (revision 4320981)
|
105
|
+
Liiv (revision 4399494)
|
106
|
+
Liivakivi (revision 4330598)
|
107
|
+
Linnaeus (revision 4276836)
|
108
|
+
Linnud (revision 4479668)
|
109
|
+
|
110
|
+
== End of Parsed pages ==
|
111
|
+
|
112
|
+
- Wikipedia parsing ended at: 2016-09-26 23:47:54.476445
|
113
|
+
|
114
|
+
55 characters appeared 433559 times.
|
115
|
+
|
116
|
+
First 33 characters:
|
117
|
+
[ 0] Char a: 12.486881831538499 %
|
118
|
+
[ 1] Char i: 10.26503889897338 %
|
119
|
+
[ 2] Char e: 10.177622884082673 %
|
120
|
+
[ 3] Char s: 8.710233209320991 %
|
121
|
+
[ 4] Char t: 6.56634967789851 %
|
122
|
+
[ 5] Char l: 6.051540851418146 %
|
123
|
+
[ 6] Char u: 5.423944607308348 %
|
124
|
+
[ 7] Char n: 5.131020230233947 %
|
125
|
+
[ 8] Char k: 4.663033174262327 %
|
126
|
+
[ 9] Char o: 4.526950195936424 %
|
127
|
+
[10] Char d: 4.167368224393911 %
|
128
|
+
[11] Char r: 3.6740097656835635 %
|
129
|
+
[12] Char m: 3.552688330769284 %
|
130
|
+
[13] Char v: 2.4700213811730354 %
|
131
|
+
[14] Char p: 1.9229216784797456 %
|
132
|
+
[15] Char g: 1.865259399528092 %
|
133
|
+
[16] Char h: 1.8043680329551455 %
|
134
|
+
[17] Char j: 1.6860450365463524 %
|
135
|
+
[18] Char ä: 1.0247740215287884 %
|
136
|
+
[19] Char b: 0.9255949017319443 %
|
137
|
+
[20] Char õ: 0.9246723052687178 %
|
138
|
+
[21] Char ü: 0.6536595941959457 %
|
139
|
+
[22] Char f: 0.37342091849090897 %
|
140
|
+
[23] Char c: 0.34851081398379463 %
|
141
|
+
[24] Char ö: 0.24333481717597835 %
|
142
|
+
[25] Char y: 0.1287022066200909 %
|
143
|
+
[26] Char x: 0.06781084004714467 %
|
144
|
+
[27] Char w: 0.04082489349777078 %
|
145
|
+
[28] Char q: 0.020989069538401926 %
|
146
|
+
[29] Char š: 0.018913227496142396 %
|
147
|
+
[30] Char z: 0.017529332801302706 %
|
148
|
+
[31] Char ō: 0.010379210211297655 %
|
149
|
+
[32] Char ž: 0.009687262863877812 %
|
150
|
+
|
151
|
+
The first 33 characters have an accumulated ratio of 0.9995410082595447.
|
152
|
+
|
153
|
+
853 sequences found.
|
154
|
+
|
155
|
+
First 512 (typical positive ratio): 0.9972721312183132
|
156
|
+
Next 512 (512-1024): 9.687262863877811e-05
|
157
|
+
Rest: -5.204170427930421e-18
|
158
|
+
|
159
|
+
- Processing end: 2016-09-26 23:47:54.561846
|
@@ -0,0 +1,156 @@
|
|
1
|
+
= Logs of language model for Finnish (fi) =
|
2
|
+
|
3
|
+
- Generated by BuildLangModel.py
|
4
|
+
- Started: 2016-09-21 18:12:24.181917
|
5
|
+
- Maximum depth: 5
|
6
|
+
- Max number of pages: 100
|
7
|
+
|
8
|
+
== Parsed pages ==
|
9
|
+
|
10
|
+
Yhdistynyt kuningaskunta (revision 15843357)
|
11
|
+
1. toukokuuta (revision 15910178)
|
12
|
+
1700-luku (revision 15493702)
|
13
|
+
1707 (revision 15106709)
|
14
|
+
1800-luku (revision 15708929)
|
15
|
+
2014 (revision 15891601)
|
16
|
+
409 (revision 12809782)
|
17
|
+
5. marraskuuta (revision 15421719)
|
18
|
+
927 (revision 12785964)
|
19
|
+
Aasia (revision 15948161)
|
20
|
+
Abhasia (revision 15730328)
|
21
|
+
Adolf Hitler (revision 15951829)
|
22
|
+
Afrikka (revision 15934209)
|
23
|
+
Agatha Christie (revision 15760740)
|
24
|
+
Aikavyöhyke (revision 15800313)
|
25
|
+
Ajoneuvon kansallisuustunnus (revision 15897445)
|
26
|
+
Akrotiri ja Dhekelia (revision 14625383)
|
27
|
+
Alamaat (revision 15913741)
|
28
|
+
Alan Turing (revision 15904871)
|
29
|
+
Alankomaat (revision 15936643)
|
30
|
+
Albania (revision 15767604)
|
31
|
+
Alec Guinness (revision 15363805)
|
32
|
+
Alexander Fleming (revision 15023225)
|
33
|
+
Alfred Hitchcock (revision 15892843)
|
34
|
+
Alfred Tennyson (revision 15856114)
|
35
|
+
Allen Jones (revision 12871703)
|
36
|
+
Andorra (revision 15913862)
|
37
|
+
Andrew Lloyd Webber (revision 14978349)
|
38
|
+
Anglit (revision 15902350)
|
39
|
+
Anguilla (revision 15854041)
|
40
|
+
Anne Brontë (revision 14287992)
|
41
|
+
Anthony Eden (revision 14391831)
|
42
|
+
Antigua ja Barbuda (revision 15196967)
|
43
|
+
Arabian Lawrence (revision 15736417)
|
44
|
+
Argentiina (revision 15676474)
|
45
|
+
Armenia (revision 15634470)
|
46
|
+
Arthur Conan Doyle (revision 15402837)
|
47
|
+
Arts and Crafts (revision 15806930)
|
48
|
+
Aurinko (revision 15934252)
|
49
|
+
Australia (revision 15934255)
|
50
|
+
Avara luonto (revision 15815943)
|
51
|
+
Azerbaidžan (revision 15946891)
|
52
|
+
BBC (revision 15866026)
|
53
|
+
BKT (revision 15656549)
|
54
|
+
Bahama (revision 15516869)
|
55
|
+
Bangladesh (revision 15883994)
|
56
|
+
Bank of England (revision 14481173)
|
57
|
+
Barbados (revision 15839821)
|
58
|
+
Barbara Hepworth (revision 15106880)
|
59
|
+
Bath (revision 15869900)
|
60
|
+
Beatrix Potter (revision 15057380)
|
61
|
+
Belfast (revision 15715934)
|
62
|
+
Belgia (revision 15932391)
|
63
|
+
Belize (revision 15665086)
|
64
|
+
Ben Nevis (revision 15610196)
|
65
|
+
Bengalin kieli (revision 15551820)
|
66
|
+
Benjamin Britten (revision 15081615)
|
67
|
+
Bermuda (revision 15632621)
|
68
|
+
Bertrand Russell (revision 14631969)
|
69
|
+
Bhutan (revision 15377394)
|
70
|
+
Big Ben (revision 14897401)
|
71
|
+
Big Brother (revision 14641391)
|
72
|
+
Birmingham (revision 15855259)
|
73
|
+
Black Sabbath (revision 15839917)
|
74
|
+
Bosnia ja Hertsegovina (revision 15934266)
|
75
|
+
Botswana (revision 15524955)
|
76
|
+
Bristol (revision 15891889)
|
77
|
+
Bristolin kanaali (revision 15849713)
|
78
|
+
Bristolin kansainvälinen lentoasema (revision 14452870)
|
79
|
+
Britannia (provinssi) (revision 14557442)
|
80
|
+
Britannian avoin golfturnaus (revision 14293265)
|
81
|
+
Britannian kuninkaallinen perhe (revision 15522149)
|
82
|
+
Britannian talous (revision 15470242)
|
83
|
+
Britannian väestö (revision 15661241)
|
84
|
+
Brittein saaret (revision 15805422)
|
85
|
+
Brittiläinen Antarktiksen alue (revision 15836227)
|
86
|
+
Brittiläinen Intia (revision 15593126)
|
87
|
+
Brittiläinen Intian valtameren alue (revision 14272903)
|
88
|
+
Brittiläinen imperiumi (revision 15906600)
|
89
|
+
Brittiläinen kansainyhteisö (revision 15894379)
|
90
|
+
Brittiläinen keittiö (revision 13393533)
|
91
|
+
Brittiläinen kulttuuri (revision 15951407)
|
92
|
+
Brittiläiset Neitsytsaaret (revision 15910520)
|
93
|
+
Brittiläiset merentakaiset alueet (revision 15836213)
|
94
|
+
Brunei (revision 15580824)
|
95
|
+
Bruttokansantuote (revision 15656549)
|
96
|
+
Bulgaria (revision 15944101)
|
97
|
+
Burma (revision 15627218)
|
98
|
+
Cambridge (revision 14641664)
|
99
|
+
Cambridgen yliopisto (revision 15493340)
|
100
|
+
Canterburyn tarinoita (revision 15232140)
|
101
|
+
Cardiff (revision 15840398)
|
102
|
+
Caymansaaret (revision 15914575)
|
103
|
+
Channel 4 (revision 15882475)
|
104
|
+
Charles Babbage (revision 15203616)
|
105
|
+
Charles Chaplin (revision 15674652)
|
106
|
+
Charles Darwin (revision 15894085)
|
107
|
+
Charles Dickens (revision 15699592)
|
108
|
+
Charles Dickensin joulutarina (revision 15116247)
|
109
|
+
|
110
|
+
== End of Parsed pages ==
|
111
|
+
|
112
|
+
- Wikipedia parsing ended at: 2016-09-21 18:15:05.189221
|
113
|
+
|
114
|
+
61 characters appeared 940364 times.
|
115
|
+
|
116
|
+
First 30 characters:
|
117
|
+
[ 0] Char a: 12.508773198463574 %
|
118
|
+
[ 1] Char i: 10.969475649854738 %
|
119
|
+
[ 2] Char n: 8.815841525196626 %
|
120
|
+
[ 3] Char t: 8.80169806585535 %
|
121
|
+
[ 4] Char e: 7.8206949649284745 %
|
122
|
+
[ 5] Char s: 7.595782058862313 %
|
123
|
+
[ 6] Char l: 5.963541777439374 %
|
124
|
+
[ 7] Char o: 5.439808414613916 %
|
125
|
+
[ 8] Char u: 5.0102938861972595 %
|
126
|
+
[ 9] Char k: 4.589712068943515 %
|
127
|
+
[10] Char r: 3.1231523112326713 %
|
128
|
+
[11] Char ä: 3.041800834570443 %
|
129
|
+
[12] Char m: 3.0392486313810396 %
|
130
|
+
[13] Char v: 2.156292669647073 %
|
131
|
+
[14] Char h: 1.996141919512019 %
|
132
|
+
[15] Char j: 1.9248929138078446 %
|
133
|
+
[16] Char p: 1.6324529650220552 %
|
134
|
+
[17] Char y: 1.6323466232224966 %
|
135
|
+
[18] Char d: 1.1981530556252684 %
|
136
|
+
[19] Char b: 0.6835650875618378 %
|
137
|
+
[20] Char g: 0.5793501239945382 %
|
138
|
+
[21] Char c: 0.5056552569005194 %
|
139
|
+
[22] Char ö: 0.38931732818355447 %
|
140
|
+
[23] Char f: 0.215023118707224 %
|
141
|
+
[24] Char w: 0.2106631049253268 %
|
142
|
+
[25] Char z: 0.06593191572625068 %
|
143
|
+
[26] Char x: 0.024458613898447838 %
|
144
|
+
[27] Char š: 0.010421496356729947 %
|
145
|
+
[28] Char ž: 0.007869293167326695 %
|
146
|
+
[29] Char q: 0.007762951367768225 %
|
147
|
+
|
148
|
+
The first 30 characters have an accumulated ratio of 0.9996012182516557.
|
149
|
+
|
150
|
+
919 sequences found.
|
151
|
+
|
152
|
+
First 512 (typical positive ratio): 0.9985378147555799
|
153
|
+
Next 512 (512-1024): 1.0634179955846884e-06
|
154
|
+
Rest: 3.881443777498106e-17
|
155
|
+
|
156
|
+
- Processing end: 2016-09-21 18:15:05.307164
|