cchardet 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/.rubocop.yml +11 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +10 -0
  7. data/README.md +35 -0
  8. data/Rakefile +15 -0
  9. data/cchardet.gemspec +30 -0
  10. data/ext/cchardet/extconf.rb +26 -0
  11. data/ext/uchardet/.gitignore +1 -0
  12. data/ext/uchardet/.gitlab-ci.yml +106 -0
  13. data/ext/uchardet/AUTHORS +16 -0
  14. data/ext/uchardet/CMakeLists.txt +74 -0
  15. data/ext/uchardet/COPYING +1316 -0
  16. data/ext/uchardet/INSTALL +26 -0
  17. data/ext/uchardet/README.md +295 -0
  18. data/ext/uchardet/build-mac/uchardet.cpp +7 -0
  19. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
  20. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  21. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
  22. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
  23. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
  24. data/ext/uchardet/doc/CMakeLists.txt +6 -0
  25. data/ext/uchardet/doc/README.maintainer +59 -0
  26. data/ext/uchardet/doc/uchardet.1 +18 -0
  27. data/ext/uchardet/script/BuildLangModel.py +533 -0
  28. data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
  29. data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
  30. data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
  31. data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
  32. data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
  33. data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
  34. data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
  35. data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
  36. data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
  37. data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
  38. data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
  39. data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
  40. data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
  41. data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
  42. data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
  43. data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
  44. data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
  45. data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
  46. data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
  47. data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
  48. data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
  49. data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
  50. data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
  51. data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
  52. data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
  53. data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
  54. data/ext/uchardet/script/README +63 -0
  55. data/ext/uchardet/script/charsets/codepoints.py +53 -0
  56. data/ext/uchardet/script/charsets/db.py +73 -0
  57. data/ext/uchardet/script/charsets/ibm852.py +72 -0
  58. data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
  59. data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
  60. data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
  61. data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
  62. data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
  63. data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
  64. data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
  65. data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
  66. data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
  67. data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
  68. data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
  69. data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
  70. data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
  71. data/ext/uchardet/script/charsets/tis-620.py +77 -0
  72. data/ext/uchardet/script/charsets/viscii.py +72 -0
  73. data/ext/uchardet/script/charsets/windows-1250.py +75 -0
  74. data/ext/uchardet/script/charsets/windows-1252.py +76 -0
  75. data/ext/uchardet/script/charsets/windows-1253.py +72 -0
  76. data/ext/uchardet/script/charsets/windows-1256.py +75 -0
  77. data/ext/uchardet/script/charsets/windows-1257.py +72 -0
  78. data/ext/uchardet/script/charsets/windows-1258.py +72 -0
  79. data/ext/uchardet/script/debug.sh +9 -0
  80. data/ext/uchardet/script/header-template.cpp +38 -0
  81. data/ext/uchardet/script/langs/ar.py +59 -0
  82. data/ext/uchardet/script/langs/cs.py +80 -0
  83. data/ext/uchardet/script/langs/da.py +69 -0
  84. data/ext/uchardet/script/langs/de.py +69 -0
  85. data/ext/uchardet/script/langs/el.py +55 -0
  86. data/ext/uchardet/script/langs/eo.py +67 -0
  87. data/ext/uchardet/script/langs/es.py +69 -0
  88. data/ext/uchardet/script/langs/et.py +57 -0
  89. data/ext/uchardet/script/langs/fi.py +60 -0
  90. data/ext/uchardet/script/langs/fr.py +79 -0
  91. data/ext/uchardet/script/langs/ga.py +60 -0
  92. data/ext/uchardet/script/langs/hr.py +59 -0
  93. data/ext/uchardet/script/langs/hu.py +66 -0
  94. data/ext/uchardet/script/langs/it.py +56 -0
  95. data/ext/uchardet/script/langs/lt.py +70 -0
  96. data/ext/uchardet/script/langs/lv.py +69 -0
  97. data/ext/uchardet/script/langs/mt.py +80 -0
  98. data/ext/uchardet/script/langs/pl.py +81 -0
  99. data/ext/uchardet/script/langs/pt.py +80 -0
  100. data/ext/uchardet/script/langs/ro.py +65 -0
  101. data/ext/uchardet/script/langs/sk.py +80 -0
  102. data/ext/uchardet/script/langs/sl.py +59 -0
  103. data/ext/uchardet/script/langs/sv.py +56 -0
  104. data/ext/uchardet/script/langs/th.py +55 -0
  105. data/ext/uchardet/script/langs/tr.py +67 -0
  106. data/ext/uchardet/script/langs/vi.py +64 -0
  107. data/ext/uchardet/script/release.sh +8 -0
  108. data/ext/uchardet/script/win32.sh +7 -0
  109. data/ext/uchardet/src/Big5Freq.tab +943 -0
  110. data/ext/uchardet/src/CMakeLists.txt +160 -0
  111. data/ext/uchardet/src/CharDistribution.cpp +109 -0
  112. data/ext/uchardet/src/CharDistribution.h +242 -0
  113. data/ext/uchardet/src/EUCKRFreq.tab +614 -0
  114. data/ext/uchardet/src/EUCTWFreq.tab +447 -0
  115. data/ext/uchardet/src/GB2312Freq.tab +491 -0
  116. data/ext/uchardet/src/JISFreq.tab +589 -0
  117. data/ext/uchardet/src/JpCntx.cpp +230 -0
  118. data/ext/uchardet/src/JpCntx.h +140 -0
  119. data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
  120. data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
  121. data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
  122. data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
  123. data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
  124. data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
  125. data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
  126. data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
  127. data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
  128. data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
  129. data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
  130. data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
  131. data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
  132. data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
  133. data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
  134. data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
  135. data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
  136. data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
  137. data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
  138. data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
  139. data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
  140. data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
  141. data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
  142. data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
  143. data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
  144. data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
  145. data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
  146. data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
  147. data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
  148. data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
  149. data/ext/uchardet/src/nsBig5Prober.h +75 -0
  150. data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
  151. data/ext/uchardet/src/nsCharSetProber.h +77 -0
  152. data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
  153. data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
  154. data/ext/uchardet/src/nsEUCJPProber.h +79 -0
  155. data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
  156. data/ext/uchardet/src/nsEUCKRProber.h +81 -0
  157. data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
  158. data/ext/uchardet/src/nsEUCTWProber.h +75 -0
  159. data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
  160. data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
  161. data/ext/uchardet/src/nsEscSM.cpp +267 -0
  162. data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
  163. data/ext/uchardet/src/nsGB2312Prober.h +77 -0
  164. data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
  165. data/ext/uchardet/src/nsHebrewProber.h +177 -0
  166. data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
  167. data/ext/uchardet/src/nsLatin1Prober.h +73 -0
  168. data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
  169. data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
  170. data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
  171. data/ext/uchardet/src/nsPkgInt.h +89 -0
  172. data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
  173. data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
  174. data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
  175. data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
  176. data/ext/uchardet/src/nsSJISProber.cpp +98 -0
  177. data/ext/uchardet/src/nsSJISProber.h +81 -0
  178. data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
  179. data/ext/uchardet/src/nsUTF8Prober.h +66 -0
  180. data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
  181. data/ext/uchardet/src/nsUniversalDetector.h +91 -0
  182. data/ext/uchardet/src/nscore.h +59 -0
  183. data/ext/uchardet/src/prmem.h +49 -0
  184. data/ext/uchardet/src/symbols.cmake +41 -0
  185. data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
  186. data/ext/uchardet/src/tools/uchardet.cpp +254 -0
  187. data/ext/uchardet/src/uchardet.cpp +274 -0
  188. data/ext/uchardet/src/uchardet.h +136 -0
  189. data/ext/uchardet/test/CMakeLists.txt +47 -0
  190. data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
  191. data/ext/uchardet/test/ar/utf-8.txt +3 -0
  192. data/ext/uchardet/test/ar/windows-1256.txt +3 -0
  193. data/ext/uchardet/test/bg/windows-1251.txt +3 -0
  194. data/ext/uchardet/test/cs/ibm852.txt +4 -0
  195. data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
  196. data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
  197. data/ext/uchardet/test/cs/utf-8.txt +4 -0
  198. data/ext/uchardet/test/cs/windows-1250.txt +4 -0
  199. data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
  200. data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
  201. data/ext/uchardet/test/da/utf-8.txt +10 -0
  202. data/ext/uchardet/test/da/windows-1252.txt +10 -0
  203. data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
  204. data/ext/uchardet/test/de/windows-1252.txt +11 -0
  205. data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
  206. data/ext/uchardet/test/el/utf-8.txt +3 -0
  207. data/ext/uchardet/test/el/windows-1253.txt +5 -0
  208. data/ext/uchardet/test/en/ascii.txt +4 -0
  209. data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
  210. data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
  211. data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
  212. data/ext/uchardet/test/es/utf-8.txt +5 -0
  213. data/ext/uchardet/test/es/windows-1252.txt +5 -0
  214. data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
  215. data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
  216. data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
  217. data/ext/uchardet/test/et/utf-8.txt +6 -0
  218. data/ext/uchardet/test/et/windows-1252.txt +6 -0
  219. data/ext/uchardet/test/et/windows-1257.txt +6 -0
  220. data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
  221. data/ext/uchardet/test/fi/utf-8.txt +8 -0
  222. data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
  223. data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
  224. data/ext/uchardet/test/fr/utf-16.be +0 -0
  225. data/ext/uchardet/test/fr/utf-32.le +0 -0
  226. data/ext/uchardet/test/fr/utf-8.txt +14 -0
  227. data/ext/uchardet/test/fr/windows-1252.txt +3 -0
  228. data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
  229. data/ext/uchardet/test/ga/utf-8.txt +6 -0
  230. data/ext/uchardet/test/ga/windows-1252.txt +6 -0
  231. data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
  232. data/ext/uchardet/test/he/utf-8.txt +3 -0
  233. data/ext/uchardet/test/he/windows-1255.txt +1 -0
  234. data/ext/uchardet/test/hr/ibm852.txt +4 -0
  235. data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
  236. data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
  237. data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
  238. data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
  239. data/ext/uchardet/test/hr/utf-8.txt +4 -0
  240. data/ext/uchardet/test/hr/windows-1250.txt +4 -0
  241. data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
  242. data/ext/uchardet/test/hu/windows-1250.txt +1 -0
  243. data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
  244. data/ext/uchardet/test/it/utf-8.txt +18 -0
  245. data/ext/uchardet/test/ja/euc-jp.txt +10 -0
  246. data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
  247. data/ext/uchardet/test/ja/shift_jis.txt +1 -0
  248. data/ext/uchardet/test/ja/utf-16be.txt +0 -0
  249. data/ext/uchardet/test/ja/utf-16le.txt +0 -0
  250. data/ext/uchardet/test/ja/utf-8.txt +9 -0
  251. data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
  252. data/ext/uchardet/test/ko/uhc.smi +16 -0
  253. data/ext/uchardet/test/ko/utf-16.le +0 -0
  254. data/ext/uchardet/test/ko/utf-32.be +0 -0
  255. data/ext/uchardet/test/ko/utf-8.txt +3 -0
  256. data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
  257. data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
  258. data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
  259. data/ext/uchardet/test/lt/utf-8.txt +3 -0
  260. data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
  261. data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
  262. data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
  263. data/ext/uchardet/test/lv/utf-8.txt +6 -0
  264. data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
  265. data/ext/uchardet/test/mt/utf-8.txt +4 -0
  266. data/ext/uchardet/test/pl/ibm852.txt +3 -0
  267. data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
  268. data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
  269. data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
  270. data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
  271. data/ext/uchardet/test/pl/utf-8.txt +3 -0
  272. data/ext/uchardet/test/pl/windows-1250.txt +3 -0
  273. data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
  274. data/ext/uchardet/test/pt/utf-8.txt +6 -0
  275. data/ext/uchardet/test/ro/ibm852.txt +9 -0
  276. data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
  277. data/ext/uchardet/test/ro/utf-8.txt +9 -0
  278. data/ext/uchardet/test/ro/windows-1250.txt +9 -0
  279. data/ext/uchardet/test/ru/ibm855.txt +5 -0
  280. data/ext/uchardet/test/ru/ibm866.txt +11 -0
  281. data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
  282. data/ext/uchardet/test/ru/koi8-r.txt +1 -0
  283. data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
  284. data/ext/uchardet/test/ru/windows-1251.txt +4 -0
  285. data/ext/uchardet/test/sk/ibm852.txt +3 -0
  286. data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
  287. data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
  288. data/ext/uchardet/test/sk/utf-8.txt +3 -0
  289. data/ext/uchardet/test/sk/windows-1250.txt +3 -0
  290. data/ext/uchardet/test/sl/ibm852.txt +9 -0
  291. data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
  292. data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
  293. data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
  294. data/ext/uchardet/test/sl/utf-8.txt +9 -0
  295. data/ext/uchardet/test/sl/windows-1250.txt +9 -0
  296. data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
  297. data/ext/uchardet/test/sv/utf-8.txt +10 -0
  298. data/ext/uchardet/test/sv/windows-1252.txt +10 -0
  299. data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
  300. data/ext/uchardet/test/th/tis-620.txt +5 -0
  301. data/ext/uchardet/test/th/utf-8.txt +1 -0
  302. data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
  303. data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
  304. data/ext/uchardet/test/uchardet-tests.c +130 -0
  305. data/ext/uchardet/test/vi/utf-8.txt +4 -0
  306. data/ext/uchardet/test/vi/viscii.txt +4 -0
  307. data/ext/uchardet/test/vi/windows-1258.txt +4 -0
  308. data/ext/uchardet/test/zh/big5.txt +1 -0
  309. data/ext/uchardet/test/zh/euc-tw.txt +1 -0
  310. data/ext/uchardet/test/zh/gb18030.txt +1 -0
  311. data/ext/uchardet/test/zh/utf-8.txt +1 -0
  312. data/ext/uchardet/uchardet.doap +51 -0
  313. data/ext/uchardet/uchardet.pc.in +10 -0
  314. data/lib/cchardet.rb +56 -0
  315. data/lib/cchardet/lib_finder.rb +32 -0
  316. data/lib/cchardet/version.rb +5 -0
  317. metadata +362 -0
@@ -0,0 +1,130 @@
1
+ /* ***** BEGIN LICENSE BLOCK *****
2
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3
+ *
4
+ * The contents of this file are subject to the Mozilla Public License Version
5
+ * 1.1 (the "License"); you may not use this file except in compliance with
6
+ * the License. You may obtain a copy of the License at
7
+ * http://www.mozilla.org/MPL/
8
+ *
9
+ * Software distributed under the License is distributed on an "AS IS" basis,
10
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11
+ * for the specific language governing rights and limitations under the
12
+ * License.
13
+ *
14
+ * The Original Code is Mozilla Universal charset detector code.
15
+ *
16
+ * The Initial Developer of the Original Code is
17
+ * Netscape Communications Corporation.
18
+ * Portions created by the Initial Developer are Copyright (C) 2001
19
+ * the Initial Developer. All Rights Reserved.
20
+ *
21
+ * Contributor(s):
22
+ * Jehan <jehan@girinstud.io>
23
+ *
24
+ * Alternatively, the contents of this file may be used under the terms of
25
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
26
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
+ * in which case the provisions of the GPL or the LGPL are applicable instead
28
+ * of those above. If you wish to allow use of your version of this file only
29
+ * under the terms of either the GPL or the LGPL, and not to allow others to
30
+ * use your version of this file under the terms of the MPL, indicate your
31
+ * decision by deleting the provisions above and replace them with the notice
32
+ * and other provisions required by the GPL or the LGPL. If you do not delete
33
+ * the provisions above, a recipient may use your version of this file under
34
+ * the terms of any one of the MPL, the GPL or the LGPL.
35
+ *
36
+ * ***** END LICENSE BLOCK ***** */
37
+
38
+ #include <ctype.h>
39
+ #include <stdio.h>
40
+ #include <stdlib.h>
41
+ #include <string.h>
42
+
43
+ #include "../src/uchardet.h"
44
+
45
+ #define BUFFER_SIZE 65536
46
+
47
+ char *
48
+ detect(FILE *fp)
49
+ {
50
+ uchardet_t handle = uchardet_new();
51
+ char *charset;
52
+ char buffer[BUFFER_SIZE];
53
+ int i;
54
+
55
+ while (!feof(fp))
56
+ {
57
+ size_t len = fread(buffer, 1, BUFFER_SIZE, fp);
58
+ int retval = uchardet_handle_data(handle, buffer, len);
59
+ if (retval != 0)
60
+ {
61
+ fprintf(stderr,
62
+ "uchardet-tests: handle data error.\n");
63
+ exit(1);
64
+ }
65
+ }
66
+ uchardet_data_end(handle);
67
+
68
+ charset = strdup(uchardet_get_encoding(handle, 0));
69
+ for (i = 0; charset[i]; i++)
70
+ {
71
+ /* Our test files are lowercase. */
72
+ charset[i] = tolower(charset[i]);
73
+ }
74
+
75
+ uchardet_delete(handle);
76
+
77
+ return charset;
78
+ }
79
+
80
+ int
81
+ main(int argc, char ** argv)
82
+ {
83
+ FILE *f;
84
+ char *filename;
85
+ char *expected_charset;
86
+ char *charset;
87
+ int success;
88
+
89
+ if (argc != 2)
90
+ {
91
+ /* The test program expects exactly 1 argument. */
92
+ fprintf(stderr,
93
+ "uchardet-tests expects exactly 1 argument\n");
94
+ return 1;
95
+ }
96
+
97
+ filename = strdup(argv[1]);
98
+ f = fopen(filename, "r");
99
+ if (f == NULL)
100
+ {
101
+ /* Error opening the test file. */
102
+ fprintf(stderr,
103
+ "uchardet-tests: error opening the test file \"%s\"\n",
104
+ filename);
105
+ free(filename);
106
+ return 1;
107
+ }
108
+
109
+ expected_charset = strrchr(filename, '/');
110
+ if (expected_charset == NULL)
111
+ {
112
+ expected_charset = filename;
113
+ }
114
+ else
115
+ {
116
+ expected_charset++;
117
+ }
118
+ expected_charset = strtok(expected_charset, ".");
119
+
120
+ charset = detect(f);
121
+ fclose (f);
122
+
123
+ /* In a unit test, 0 means success, other returned values mean failure. */
124
+ success = (strcmp(charset, expected_charset) != 0);
125
+
126
+ free(charset);
127
+ free(filename);
128
+
129
+ return success;
130
+ }
@@ -0,0 +1,4 @@
1
+ Chữ Quốc ngữ là hệ chữ viết thống nhất chính thức hiện nay của tiếng Việt, sử
2
+ dụng ký tự La Tinh, dựa trên các bảng chữ cái của nhóm ngôn ngữ Rôman,[1] đặc
3
+ biệt là bảng chữ cái Bồ Đào Nha,[2] với các dấu phụ chủ yếu từ bảng chữ cái Hy
4
+ Lạp.
@@ -0,0 +1,4 @@
1
+ Ch� Qu�c ng� l� h� ch� vi�t th�ng nh�t ch�nh th�c hi�n nay c�a ti�ng Vi�t, s�
2
+ d�ng k� t� La Tinh, d�a tr�n c�c b�ng ch� c�i c�a nh�m ng�n ng� R�man,[1] �c
3
+ bi�t l� b�ng ch� c�i B� ��o Nha,[2] v�i c�c d�u ph� ch� y�u t� b�ng ch� c�i Hy
4
+ L�p.
@@ -0,0 +1,4 @@
1
+ Ch�� Qu��c ng�� l� h�� ch�� vi��t th��ng nh��t ch�nh th��c hi��n nay cu�a ti��ng Vi��t, s��
2
+ du�ng ky� t�� La Tinh, d��a tr�n c�c ba�ng ch�� c�i cu�a nh�m ng�n ng�� R�man,[1] ���c
3
+ bi��t l� ba�ng ch�� c�i B�� ��o Nha,[2] v��i c�c d��u phu� chu� y��u t�� ba�ng ch�� c�i Hy
4
+ La�p.
@@ -0,0 +1 @@
1
+ �c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤���c�餤��
@@ -0,0 +1 @@
1
+ EUC-TWҳƺ��������������������ġ����CNS 11643����?��?������ƺ����������������EUC-TW��ŷ���
@@ -0,0 +1 @@
1
+ ����������������������������������������������������������
@@ -0,0 +1 @@
1
+ 汉字漢字統一編碼萬國碼
@@ -0,0 +1,51 @@
1
+ <Project xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
2
+ xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
3
+ xmlns:foaf="http://xmlns.com/foaf/0.1/"
4
+ xmlns:gnome="http://api.gnome.org/doap-extensions#"
5
+ xmlns="http://usefulinc.com/ns/doap#">
6
+
7
+ <name xml:lang="en">uchardet</name>
8
+
9
+ <shortdesc xml:lang="en">Universal Charset Detector</shortdesc>
10
+
11
+ <description xml:lang="en">
12
+ uchardet is an encoding detector library, which takes a sequence of bytes
13
+ in an unknown character encoding without any additional information, and
14
+ attempts to determine the encoding of the text. Returned encoding names
15
+ are iconv-compatible.
16
+
17
+ uchardet started as a C language binding of the original C++
18
+ implementation of the universal charset detection library by Mozilla. It
19
+ can now detect more charsets, and more reliably than the original
20
+ implementation.
21
+ </description>
22
+ <description xml:lang="fr">
23
+ uchardet est une bibliothèque de détection de codage, prenant une séquence
24
+ d'octets en entrée, représentant un texte, et tente d'en déterminer le
25
+ codage. Le nom du codage retourné est compatible iconv.
26
+
27
+ uchardet était originellement un binding en C de l'implémentation
28
+ originelle en C++ par Mozilla. L'implémentation actuelle peut détecter
29
+ plus de codages de caractères que l'originale.
30
+ </description>
31
+
32
+ <homepage rdf:resource="https://www.freedesktop.org/wiki/Software/uchardet/" />
33
+
34
+ <download-page rdf:resource="https://www.freedesktop.org/software/uchardet/releases/" />
35
+
36
+ <bug-database rdf:resource="https://gitlab.freedesktop.org/uchardet/uchardet/-/issues" />
37
+
38
+ <programming-language>C</programming-language>
39
+ <programming-language>C++</programming-language>
40
+ <programming-language>Python</programming-language>
41
+
42
+ <maintainer>
43
+ <foaf:Person>
44
+ <foaf:name>Jehan</foaf:name>
45
+ <gnome:userid>jehanp</gnome:userid>
46
+ </foaf:Person>
47
+ </maintainer>
48
+
49
+ <!-- TODO: generate MAINTAINERS from this file -->
50
+
51
+ </Project>
@@ -0,0 +1,10 @@
1
+ libdir=@CMAKE_INSTALL_FULL_LIBDIR@
2
+ includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
3
+
4
+ Name: uchardet
5
+ Description: An encoding detector library ported from Mozilla
6
+ Version: @UCHARDET_VERSION@
7
+ Requires:
8
+ Libs: -L${libdir} -luchardet
9
+ Libs.private: -lstdc++
10
+ Cflags: -I${includedir}/uchardet
data/lib/cchardet.rb ADDED
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fiddle"
4
+ require "fiddle/import"
5
+
6
+ require_relative "cchardet/lib_finder"
7
+ require_relative "cchardet/version"
8
+
9
+ module CChardet
10
+ extend Fiddle::Importer
11
+
12
+ lib_finder = CChardet::LibFinder.new
13
+ IN_GEM_LIB = lib_finder.built_in_gem?.freeze
14
+
15
+ dlload lib_finder.lib_path
16
+
17
+ typealias "uchardet_t", "void *"
18
+
19
+ extern "uchardet_t uchardet_new(void)"
20
+ extern "void uchardet_delete(uchardet_t ud)"
21
+ extern "int uchardet_handle_data(uchardet_t ud, const char *data, size_t len)"
22
+ extern "void uchardet_data_end(uchardet_t ud)"
23
+ extern "void uchardet_reset(uchardet_t ud)"
24
+ if IN_GEM_LIB
25
+ extern "size_t uchardet_get_candidates(uchardet_t ud)"
26
+ extern "float uchardet_get_confidence(uchardet_t ud, size_t candidate)"
27
+ extern "const char *uchardet_get_encoding(uchardet_t ud, size_t candidate)"
28
+ extern "const char *uchardet_get_language(uchardet_t ud, size_t candidate)"
29
+ extern "void uchardet_weigh_language(uchardet_t ud, const char *language, float weight)"
30
+ extern "void uchardet_set_default_weight(uchardet_t ud, float weight)"
31
+ else
32
+ extern "const char *uchardet_get_charset(uchardet_t ud)"
33
+ end
34
+
35
+ def self.detect(str)
36
+ uchardet_obj = uchardet_new
37
+ uchardet_handle_data(uchardet_obj, str, str.bytesize)
38
+ uchardet_data_end(uchardet_obj)
39
+
40
+ return { encoding: uchardet_get_charset(uchardet_obj).to_s } unless IN_GEM_LIB
41
+
42
+ num_candidates = uchardet_get_candidates(uchardet_obj)
43
+
44
+ (0..num_candidates - 1).map do |i|
45
+ {
46
+ encoding: uchardet_get_encoding(uchardet_obj, i).to_s,
47
+ confidence: uchardet_get_confidence(uchardet_obj, i),
48
+ language: uchardet_get_language(uchardet_obj, i).yield_self do |lang_ptr|
49
+ lang_ptr.null? ? nil : lang_ptr.to_s
50
+ end
51
+ }
52
+ end
53
+ ensure
54
+ uchardet_delete(uchardet_obj)
55
+ end
56
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CChardet
4
+ class LibFinder
5
+ PREFIXES = ["/usr/lib",
6
+ "/usr/lib/x86_64-linux-gnu/",
7
+ "/usr/local/lib",
8
+ "/opt/local/lib"].freeze
9
+
10
+ def lib_path
11
+ if built_in_gem?
12
+ # Load the library that was built inside this gem source tree
13
+ return lib_in_gem_path
14
+ end
15
+
16
+ PREFIXES.each do |prefix|
17
+ candidate_path = "#{prefix}/libuchardet.so.0"
18
+ return candidate_path if File.exist?(candidate_path)
19
+ end
20
+
21
+ raise "could not find `libuchardet.so.0` in #{PREFIXES}. Install it, or install this gem with `--with-unreleased-uchardet` to build the included version."
22
+ end
23
+
24
+ def built_in_gem?
25
+ File.exist?(lib_in_gem_path)
26
+ end
27
+
28
+ def lib_in_gem_path
29
+ @lib_in_gem_path ||= File.expand_path("#{__dir__}/../../ext/uchardet/src/libuchardet.so")
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CChardet
4
+ VERSION = "0.2.0"
5
+ end
metadata ADDED
@@ -0,0 +1,362 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cchardet
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Eddie Lebow
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-03-01 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - elebow@users.noreply.github.com
16
+ executables: []
17
+ extensions:
18
+ - ext/cchardet/extconf.rb
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".gitignore"
22
+ - ".gitmodules"
23
+ - ".rubocop.yml"
24
+ - CHANGELOG.md
25
+ - Gemfile
26
+ - README.md
27
+ - Rakefile
28
+ - cchardet.gemspec
29
+ - ext/cchardet/extconf.rb
30
+ - ext/uchardet/.gitignore
31
+ - ext/uchardet/.gitlab-ci.yml
32
+ - ext/uchardet/AUTHORS
33
+ - ext/uchardet/CMakeLists.txt
34
+ - ext/uchardet/COPYING
35
+ - ext/uchardet/INSTALL
36
+ - ext/uchardet/README.md
37
+ - ext/uchardet/build-mac/uchardet.cpp
38
+ - ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj
39
+ - ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata
40
+ - ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout
41
+ - ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme
42
+ - ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme
43
+ - ext/uchardet/doc/CMakeLists.txt
44
+ - ext/uchardet/doc/README.maintainer
45
+ - ext/uchardet/doc/uchardet.1
46
+ - ext/uchardet/script/BuildLangModel.py
47
+ - ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log
48
+ - ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log
49
+ - ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log
50
+ - ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log
51
+ - ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log
52
+ - ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log
53
+ - ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log
54
+ - ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log
55
+ - ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log
56
+ - ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log
57
+ - ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log
58
+ - ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log
59
+ - ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log
60
+ - ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log
61
+ - ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log
62
+ - ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log
63
+ - ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log
64
+ - ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log
65
+ - ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log
66
+ - ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log
67
+ - ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log
68
+ - ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log
69
+ - ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log
70
+ - ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log
71
+ - ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log
72
+ - ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log
73
+ - ext/uchardet/script/README
74
+ - ext/uchardet/script/charsets/codepoints.py
75
+ - ext/uchardet/script/charsets/db.py
76
+ - ext/uchardet/script/charsets/ibm852.py
77
+ - ext/uchardet/script/charsets/iso-8859-1.py
78
+ - ext/uchardet/script/charsets/iso-8859-10.py
79
+ - ext/uchardet/script/charsets/iso-8859-11.py
80
+ - ext/uchardet/script/charsets/iso-8859-13.py
81
+ - ext/uchardet/script/charsets/iso-8859-15.py
82
+ - ext/uchardet/script/charsets/iso-8859-16.py
83
+ - ext/uchardet/script/charsets/iso-8859-2.py
84
+ - ext/uchardet/script/charsets/iso-8859-3.py
85
+ - ext/uchardet/script/charsets/iso-8859-4.py
86
+ - ext/uchardet/script/charsets/iso-8859-6.py
87
+ - ext/uchardet/script/charsets/iso-8859-7.py
88
+ - ext/uchardet/script/charsets/iso-8859-9.py
89
+ - ext/uchardet/script/charsets/mac-centraleurope.py
90
+ - ext/uchardet/script/charsets/tis-620.py
91
+ - ext/uchardet/script/charsets/viscii.py
92
+ - ext/uchardet/script/charsets/windows-1250.py
93
+ - ext/uchardet/script/charsets/windows-1252.py
94
+ - ext/uchardet/script/charsets/windows-1253.py
95
+ - ext/uchardet/script/charsets/windows-1256.py
96
+ - ext/uchardet/script/charsets/windows-1257.py
97
+ - ext/uchardet/script/charsets/windows-1258.py
98
+ - ext/uchardet/script/debug.sh
99
+ - ext/uchardet/script/header-template.cpp
100
+ - ext/uchardet/script/langs/ar.py
101
+ - ext/uchardet/script/langs/cs.py
102
+ - ext/uchardet/script/langs/da.py
103
+ - ext/uchardet/script/langs/de.py
104
+ - ext/uchardet/script/langs/el.py
105
+ - ext/uchardet/script/langs/eo.py
106
+ - ext/uchardet/script/langs/es.py
107
+ - ext/uchardet/script/langs/et.py
108
+ - ext/uchardet/script/langs/fi.py
109
+ - ext/uchardet/script/langs/fr.py
110
+ - ext/uchardet/script/langs/ga.py
111
+ - ext/uchardet/script/langs/hr.py
112
+ - ext/uchardet/script/langs/hu.py
113
+ - ext/uchardet/script/langs/it.py
114
+ - ext/uchardet/script/langs/lt.py
115
+ - ext/uchardet/script/langs/lv.py
116
+ - ext/uchardet/script/langs/mt.py
117
+ - ext/uchardet/script/langs/pl.py
118
+ - ext/uchardet/script/langs/pt.py
119
+ - ext/uchardet/script/langs/ro.py
120
+ - ext/uchardet/script/langs/sk.py
121
+ - ext/uchardet/script/langs/sl.py
122
+ - ext/uchardet/script/langs/sv.py
123
+ - ext/uchardet/script/langs/th.py
124
+ - ext/uchardet/script/langs/tr.py
125
+ - ext/uchardet/script/langs/vi.py
126
+ - ext/uchardet/script/release.sh
127
+ - ext/uchardet/script/win32.sh
128
+ - ext/uchardet/src/Big5Freq.tab
129
+ - ext/uchardet/src/CMakeLists.txt
130
+ - ext/uchardet/src/CharDistribution.cpp
131
+ - ext/uchardet/src/CharDistribution.h
132
+ - ext/uchardet/src/EUCKRFreq.tab
133
+ - ext/uchardet/src/EUCTWFreq.tab
134
+ - ext/uchardet/src/GB2312Freq.tab
135
+ - ext/uchardet/src/JISFreq.tab
136
+ - ext/uchardet/src/JpCntx.cpp
137
+ - ext/uchardet/src/JpCntx.h
138
+ - ext/uchardet/src/LangModels/LangArabicModel.cpp
139
+ - ext/uchardet/src/LangModels/LangBulgarianModel.cpp
140
+ - ext/uchardet/src/LangModels/LangCroatianModel.cpp
141
+ - ext/uchardet/src/LangModels/LangCzechModel.cpp
142
+ - ext/uchardet/src/LangModels/LangDanishModel.cpp
143
+ - ext/uchardet/src/LangModels/LangEsperantoModel.cpp
144
+ - ext/uchardet/src/LangModels/LangEstonianModel.cpp
145
+ - ext/uchardet/src/LangModels/LangFinnishModel.cpp
146
+ - ext/uchardet/src/LangModels/LangFrenchModel.cpp
147
+ - ext/uchardet/src/LangModels/LangGermanModel.cpp
148
+ - ext/uchardet/src/LangModels/LangGreekModel.cpp
149
+ - ext/uchardet/src/LangModels/LangHebrewModel.cpp
150
+ - ext/uchardet/src/LangModels/LangHungarianModel.cpp
151
+ - ext/uchardet/src/LangModels/LangIrishModel.cpp
152
+ - ext/uchardet/src/LangModels/LangItalianModel.cpp
153
+ - ext/uchardet/src/LangModels/LangLatvianModel.cpp
154
+ - ext/uchardet/src/LangModels/LangLithuanianModel.cpp
155
+ - ext/uchardet/src/LangModels/LangMalteseModel.cpp
156
+ - ext/uchardet/src/LangModels/LangPolishModel.cpp
157
+ - ext/uchardet/src/LangModels/LangPortugueseModel.cpp
158
+ - ext/uchardet/src/LangModels/LangRomanianModel.cpp
159
+ - ext/uchardet/src/LangModels/LangRussianModel.cpp
160
+ - ext/uchardet/src/LangModels/LangSlovakModel.cpp
161
+ - ext/uchardet/src/LangModels/LangSloveneModel.cpp
162
+ - ext/uchardet/src/LangModels/LangSpanishModel.cpp
163
+ - ext/uchardet/src/LangModels/LangSwedishModel.cpp
164
+ - ext/uchardet/src/LangModels/LangThaiModel.cpp
165
+ - ext/uchardet/src/LangModels/LangTurkishModel.cpp
166
+ - ext/uchardet/src/LangModels/LangVietnameseModel.cpp
167
+ - ext/uchardet/src/nsBig5Prober.cpp
168
+ - ext/uchardet/src/nsBig5Prober.h
169
+ - ext/uchardet/src/nsCharSetProber.cpp
170
+ - ext/uchardet/src/nsCharSetProber.h
171
+ - ext/uchardet/src/nsCodingStateMachine.h
172
+ - ext/uchardet/src/nsEUCJPProber.cpp
173
+ - ext/uchardet/src/nsEUCJPProber.h
174
+ - ext/uchardet/src/nsEUCKRProber.cpp
175
+ - ext/uchardet/src/nsEUCKRProber.h
176
+ - ext/uchardet/src/nsEUCTWProber.cpp
177
+ - ext/uchardet/src/nsEUCTWProber.h
178
+ - ext/uchardet/src/nsEscCharsetProber.cpp
179
+ - ext/uchardet/src/nsEscCharsetProber.h
180
+ - ext/uchardet/src/nsEscSM.cpp
181
+ - ext/uchardet/src/nsGB2312Prober.cpp
182
+ - ext/uchardet/src/nsGB2312Prober.h
183
+ - ext/uchardet/src/nsHebrewProber.cpp
184
+ - ext/uchardet/src/nsHebrewProber.h
185
+ - ext/uchardet/src/nsLatin1Prober.cpp
186
+ - ext/uchardet/src/nsLatin1Prober.h
187
+ - ext/uchardet/src/nsMBCSGroupProber.cpp
188
+ - ext/uchardet/src/nsMBCSGroupProber.h
189
+ - ext/uchardet/src/nsMBCSSM.cpp
190
+ - ext/uchardet/src/nsPkgInt.h
191
+ - ext/uchardet/src/nsSBCSGroupProber.cpp
192
+ - ext/uchardet/src/nsSBCSGroupProber.h
193
+ - ext/uchardet/src/nsSBCharSetProber.cpp
194
+ - ext/uchardet/src/nsSBCharSetProber.h
195
+ - ext/uchardet/src/nsSJISProber.cpp
196
+ - ext/uchardet/src/nsSJISProber.h
197
+ - ext/uchardet/src/nsUTF8Prober.cpp
198
+ - ext/uchardet/src/nsUTF8Prober.h
199
+ - ext/uchardet/src/nsUniversalDetector.cpp
200
+ - ext/uchardet/src/nsUniversalDetector.h
201
+ - ext/uchardet/src/nscore.h
202
+ - ext/uchardet/src/prmem.h
203
+ - ext/uchardet/src/symbols.cmake
204
+ - ext/uchardet/src/tools/CMakeLists.txt
205
+ - ext/uchardet/src/tools/uchardet.cpp
206
+ - ext/uchardet/src/uchardet.cpp
207
+ - ext/uchardet/src/uchardet.h
208
+ - ext/uchardet/test/CMakeLists.txt
209
+ - ext/uchardet/test/ar/iso-8859-6.txt
210
+ - ext/uchardet/test/ar/utf-8.txt
211
+ - ext/uchardet/test/ar/windows-1256.txt
212
+ - ext/uchardet/test/bg/windows-1251.txt
213
+ - ext/uchardet/test/cs/ibm852.txt
214
+ - ext/uchardet/test/cs/iso-8859-2.txt
215
+ - ext/uchardet/test/cs/mac-centraleurope.txt
216
+ - ext/uchardet/test/cs/utf-8.txt
217
+ - ext/uchardet/test/cs/windows-1250.txt
218
+ - ext/uchardet/test/da/iso-8859-1.txt
219
+ - ext/uchardet/test/da/iso-8859-15.txt
220
+ - ext/uchardet/test/da/utf-8.txt
221
+ - ext/uchardet/test/da/windows-1252.txt
222
+ - ext/uchardet/test/de/iso-8859-1.txt
223
+ - ext/uchardet/test/de/windows-1252.txt
224
+ - ext/uchardet/test/el/iso-8859-7.txt
225
+ - ext/uchardet/test/el/utf-8.txt
226
+ - ext/uchardet/test/el/windows-1253.txt
227
+ - ext/uchardet/test/en/ascii.txt
228
+ - ext/uchardet/test/eo/iso-8859-3.txt
229
+ - ext/uchardet/test/es/iso-8859-1.txt
230
+ - ext/uchardet/test/es/iso-8859-15.txt
231
+ - ext/uchardet/test/es/utf-8.txt
232
+ - ext/uchardet/test/es/windows-1252.txt
233
+ - ext/uchardet/test/et/iso-8859-13.txt
234
+ - ext/uchardet/test/et/iso-8859-15.txt
235
+ - ext/uchardet/test/et/iso-8859-4.txt
236
+ - ext/uchardet/test/et/utf-8.txt
237
+ - ext/uchardet/test/et/windows-1252.txt
238
+ - ext/uchardet/test/et/windows-1257.txt
239
+ - ext/uchardet/test/fi/iso-8859-1.txt
240
+ - ext/uchardet/test/fi/utf-8.txt
241
+ - ext/uchardet/test/fr/iso-8859-1.txt
242
+ - ext/uchardet/test/fr/iso-8859-15.txt
243
+ - ext/uchardet/test/fr/utf-16.be
244
+ - ext/uchardet/test/fr/utf-32.le
245
+ - ext/uchardet/test/fr/utf-8.txt
246
+ - ext/uchardet/test/fr/windows-1252.txt
247
+ - ext/uchardet/test/ga/iso-8859-1.txt
248
+ - ext/uchardet/test/ga/utf-8.txt
249
+ - ext/uchardet/test/ga/windows-1252.txt
250
+ - ext/uchardet/test/he/iso-8859-8.txt
251
+ - ext/uchardet/test/he/utf-8.txt
252
+ - ext/uchardet/test/he/windows-1255.txt
253
+ - ext/uchardet/test/hr/ibm852.txt
254
+ - ext/uchardet/test/hr/iso-8859-13.txt
255
+ - ext/uchardet/test/hr/iso-8859-16.txt
256
+ - ext/uchardet/test/hr/iso-8859-2.txt
257
+ - ext/uchardet/test/hr/mac-centraleurope.txt
258
+ - ext/uchardet/test/hr/utf-8.txt
259
+ - ext/uchardet/test/hr/windows-1250.txt
260
+ - ext/uchardet/test/hu/iso-8859-2.txt
261
+ - ext/uchardet/test/hu/windows-1250.txt
262
+ - ext/uchardet/test/it/iso-8859-1.txt
263
+ - ext/uchardet/test/it/utf-8.txt
264
+ - ext/uchardet/test/ja/euc-jp.txt
265
+ - ext/uchardet/test/ja/iso-2022-jp.txt
266
+ - ext/uchardet/test/ja/shift_jis.txt
267
+ - ext/uchardet/test/ja/utf-16be.txt
268
+ - ext/uchardet/test/ja/utf-16le.txt
269
+ - ext/uchardet/test/ja/utf-8.txt
270
+ - ext/uchardet/test/ko/iso-2022-kr.txt
271
+ - ext/uchardet/test/ko/uhc.smi
272
+ - ext/uchardet/test/ko/utf-16.le
273
+ - ext/uchardet/test/ko/utf-32.be
274
+ - ext/uchardet/test/ko/utf-8.txt
275
+ - ext/uchardet/test/lt/iso-8859-10.txt
276
+ - ext/uchardet/test/lt/iso-8859-13.txt
277
+ - ext/uchardet/test/lt/iso-8859-4.txt
278
+ - ext/uchardet/test/lt/utf-8.txt
279
+ - ext/uchardet/test/lv/iso-8859-10.txt
280
+ - ext/uchardet/test/lv/iso-8859-13.txt
281
+ - ext/uchardet/test/lv/iso-8859-4.txt
282
+ - ext/uchardet/test/lv/utf-8.txt
283
+ - ext/uchardet/test/mt/iso-8859-3.txt
284
+ - ext/uchardet/test/mt/utf-8.txt
285
+ - ext/uchardet/test/pl/ibm852.txt
286
+ - ext/uchardet/test/pl/iso-8859-13.txt
287
+ - ext/uchardet/test/pl/iso-8859-16.txt
288
+ - ext/uchardet/test/pl/iso-8859-2.txt
289
+ - ext/uchardet/test/pl/mac-centraleurope.txt
290
+ - ext/uchardet/test/pl/utf-8.txt
291
+ - ext/uchardet/test/pl/windows-1250.txt
292
+ - ext/uchardet/test/pt/iso-8859-1.txt
293
+ - ext/uchardet/test/pt/utf-8.txt
294
+ - ext/uchardet/test/ro/ibm852.txt
295
+ - ext/uchardet/test/ro/iso-8859-16.txt
296
+ - ext/uchardet/test/ro/utf-8.txt
297
+ - ext/uchardet/test/ro/windows-1250.txt
298
+ - ext/uchardet/test/ru/ibm855.txt
299
+ - ext/uchardet/test/ru/ibm866.txt
300
+ - ext/uchardet/test/ru/iso-8859-5.txt
301
+ - ext/uchardet/test/ru/koi8-r.txt
302
+ - ext/uchardet/test/ru/mac-cyrillic.txt
303
+ - ext/uchardet/test/ru/windows-1251.txt
304
+ - ext/uchardet/test/sk/ibm852.txt
305
+ - ext/uchardet/test/sk/iso-8859-2.txt
306
+ - ext/uchardet/test/sk/mac-centraleurope.txt
307
+ - ext/uchardet/test/sk/utf-8.txt
308
+ - ext/uchardet/test/sk/windows-1250.txt
309
+ - ext/uchardet/test/sl/ibm852.txt
310
+ - ext/uchardet/test/sl/iso-8859-16.txt
311
+ - ext/uchardet/test/sl/iso-8859-2.txt
312
+ - ext/uchardet/test/sl/mac-centraleurope.txt
313
+ - ext/uchardet/test/sl/utf-8.txt
314
+ - ext/uchardet/test/sl/windows-1250.txt
315
+ - ext/uchardet/test/sv/iso-8859-1.txt
316
+ - ext/uchardet/test/sv/utf-8.txt
317
+ - ext/uchardet/test/sv/windows-1252.txt
318
+ - ext/uchardet/test/th/iso-8859-11.txt
319
+ - ext/uchardet/test/th/tis-620.txt
320
+ - ext/uchardet/test/th/utf-8.txt
321
+ - ext/uchardet/test/tr/iso-8859-3.txt
322
+ - ext/uchardet/test/tr/iso-8859-9.txt
323
+ - ext/uchardet/test/uchardet-tests.c
324
+ - ext/uchardet/test/vi/utf-8.txt
325
+ - ext/uchardet/test/vi/viscii.txt
326
+ - ext/uchardet/test/vi/windows-1258.txt
327
+ - ext/uchardet/test/zh/big5.txt
328
+ - ext/uchardet/test/zh/euc-tw.txt
329
+ - ext/uchardet/test/zh/gb18030.txt
330
+ - ext/uchardet/test/zh/utf-8.txt
331
+ - ext/uchardet/uchardet.doap
332
+ - ext/uchardet/uchardet.pc.in
333
+ - lib/cchardet.rb
334
+ - lib/cchardet/lib_finder.rb
335
+ - lib/cchardet/version.rb
336
+ homepage: https://github.com/elebow/cchardet
337
+ licenses:
338
+ - public domain
339
+ metadata:
340
+ homepage_uri: https://github.com/elebow/cchardet
341
+ source_code_uri: https://github.com/elebow/cchardet
342
+ changelog_uri: https://github.com/elebow/cchardet/blob/trunk/CHANGELOG.md
343
+ post_install_message:
344
+ rdoc_options: []
345
+ require_paths:
346
+ - lib
347
+ required_ruby_version: !ruby/object:Gem::Requirement
348
+ requirements:
349
+ - - ">="
350
+ - !ruby/object:Gem::Version
351
+ version: 2.3.0
352
+ required_rubygems_version: !ruby/object:Gem::Requirement
353
+ requirements:
354
+ - - ">="
355
+ - !ruby/object:Gem::Version
356
+ version: '0'
357
+ requirements: []
358
+ rubygems_version: 3.2.3
359
+ signing_key:
360
+ specification_version: 4
361
+ summary: Ruby bindings for freedesktop.org's libuchardet, for encoding detection.
362
+ test_files: []