cchardet 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/.rubocop.yml +11 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +10 -0
  7. data/README.md +35 -0
  8. data/Rakefile +15 -0
  9. data/cchardet.gemspec +30 -0
  10. data/ext/cchardet/extconf.rb +26 -0
  11. data/ext/uchardet/.gitignore +1 -0
  12. data/ext/uchardet/.gitlab-ci.yml +106 -0
  13. data/ext/uchardet/AUTHORS +16 -0
  14. data/ext/uchardet/CMakeLists.txt +74 -0
  15. data/ext/uchardet/COPYING +1316 -0
  16. data/ext/uchardet/INSTALL +26 -0
  17. data/ext/uchardet/README.md +295 -0
  18. data/ext/uchardet/build-mac/uchardet.cpp +7 -0
  19. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.pbxproj +543 -0
  20. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  21. data/ext/uchardet/build-mac/uchardet.xcodeproj/project.xcworkspace/xcshareddata/uchardet.xccheckout +41 -0
  22. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet-ios.xcscheme +77 -0
  23. data/ext/uchardet/build-mac/uchardet.xcodeproj/xcshareddata/xcschemes/uchardet.xcscheme +77 -0
  24. data/ext/uchardet/doc/CMakeLists.txt +6 -0
  25. data/ext/uchardet/doc/README.maintainer +59 -0
  26. data/ext/uchardet/doc/uchardet.1 +18 -0
  27. data/ext/uchardet/script/BuildLangModel.py +533 -0
  28. data/ext/uchardet/script/BuildLangModelLogs/LangArabicModel.log +142 -0
  29. data/ext/uchardet/script/BuildLangModelLogs/LangCroatianModel.log +157 -0
  30. data/ext/uchardet/script/BuildLangModelLogs/LangCzechModel.log +161 -0
  31. data/ext/uchardet/script/BuildLangModelLogs/LangDanishModel.log +158 -0
  32. data/ext/uchardet/script/BuildLangModelLogs/LangEsperantoModel.log +110 -0
  33. data/ext/uchardet/script/BuildLangModelLogs/LangEstonianModel.log +159 -0
  34. data/ext/uchardet/script/BuildLangModelLogs/LangFinnishModel.log +156 -0
  35. data/ext/uchardet/script/BuildLangModelLogs/LangFrenchModel.log +116 -0
  36. data/ext/uchardet/script/BuildLangModelLogs/LangGermanModel.log +159 -0
  37. data/ext/uchardet/script/BuildLangModelLogs/LangGreekModel.log +272 -0
  38. data/ext/uchardet/script/BuildLangModelLogs/LangHungarianModel.log +109 -0
  39. data/ext/uchardet/script/BuildLangModelLogs/LangIrishModel.log +156 -0
  40. data/ext/uchardet/script/BuildLangModelLogs/LangItalianModel.log +162 -0
  41. data/ext/uchardet/script/BuildLangModelLogs/LangLatvianModel.log +162 -0
  42. data/ext/uchardet/script/BuildLangModelLogs/LangLithuanianModel.log +162 -0
  43. data/ext/uchardet/script/BuildLangModelLogs/LangMalteseModel.log +147 -0
  44. data/ext/uchardet/script/BuildLangModelLogs/LangPolishModel.log +154 -0
  45. data/ext/uchardet/script/BuildLangModelLogs/LangPortugueseModel.log +166 -0
  46. data/ext/uchardet/script/BuildLangModelLogs/LangRomanianModel.log +153 -0
  47. data/ext/uchardet/script/BuildLangModelLogs/LangSlovakModel.log +158 -0
  48. data/ext/uchardet/script/BuildLangModelLogs/LangSloveneModel.log +148 -0
  49. data/ext/uchardet/script/BuildLangModelLogs/LangSpanishModel.log +109 -0
  50. data/ext/uchardet/script/BuildLangModelLogs/LangSwedishModel.log +151 -0
  51. data/ext/uchardet/script/BuildLangModelLogs/LangThaiModel.log +141 -0
  52. data/ext/uchardet/script/BuildLangModelLogs/LangTurkishModel.log +113 -0
  53. data/ext/uchardet/script/BuildLangModelLogs/LangVietnameseModel.log +121 -0
  54. data/ext/uchardet/script/README +63 -0
  55. data/ext/uchardet/script/charsets/codepoints.py +53 -0
  56. data/ext/uchardet/script/charsets/db.py +73 -0
  57. data/ext/uchardet/script/charsets/ibm852.py +72 -0
  58. data/ext/uchardet/script/charsets/iso-8859-1.py +85 -0
  59. data/ext/uchardet/script/charsets/iso-8859-10.py +73 -0
  60. data/ext/uchardet/script/charsets/iso-8859-11.py +77 -0
  61. data/ext/uchardet/script/charsets/iso-8859-13.py +72 -0
  62. data/ext/uchardet/script/charsets/iso-8859-15.py +80 -0
  63. data/ext/uchardet/script/charsets/iso-8859-16.py +83 -0
  64. data/ext/uchardet/script/charsets/iso-8859-2.py +73 -0
  65. data/ext/uchardet/script/charsets/iso-8859-3.py +75 -0
  66. data/ext/uchardet/script/charsets/iso-8859-4.py +73 -0
  67. data/ext/uchardet/script/charsets/iso-8859-6.py +73 -0
  68. data/ext/uchardet/script/charsets/iso-8859-7.py +73 -0
  69. data/ext/uchardet/script/charsets/iso-8859-9.py +76 -0
  70. data/ext/uchardet/script/charsets/mac-centraleurope.py +72 -0
  71. data/ext/uchardet/script/charsets/tis-620.py +77 -0
  72. data/ext/uchardet/script/charsets/viscii.py +72 -0
  73. data/ext/uchardet/script/charsets/windows-1250.py +75 -0
  74. data/ext/uchardet/script/charsets/windows-1252.py +76 -0
  75. data/ext/uchardet/script/charsets/windows-1253.py +72 -0
  76. data/ext/uchardet/script/charsets/windows-1256.py +75 -0
  77. data/ext/uchardet/script/charsets/windows-1257.py +72 -0
  78. data/ext/uchardet/script/charsets/windows-1258.py +72 -0
  79. data/ext/uchardet/script/debug.sh +9 -0
  80. data/ext/uchardet/script/header-template.cpp +38 -0
  81. data/ext/uchardet/script/langs/ar.py +59 -0
  82. data/ext/uchardet/script/langs/cs.py +80 -0
  83. data/ext/uchardet/script/langs/da.py +69 -0
  84. data/ext/uchardet/script/langs/de.py +69 -0
  85. data/ext/uchardet/script/langs/el.py +55 -0
  86. data/ext/uchardet/script/langs/eo.py +67 -0
  87. data/ext/uchardet/script/langs/es.py +69 -0
  88. data/ext/uchardet/script/langs/et.py +57 -0
  89. data/ext/uchardet/script/langs/fi.py +60 -0
  90. data/ext/uchardet/script/langs/fr.py +79 -0
  91. data/ext/uchardet/script/langs/ga.py +60 -0
  92. data/ext/uchardet/script/langs/hr.py +59 -0
  93. data/ext/uchardet/script/langs/hu.py +66 -0
  94. data/ext/uchardet/script/langs/it.py +56 -0
  95. data/ext/uchardet/script/langs/lt.py +70 -0
  96. data/ext/uchardet/script/langs/lv.py +69 -0
  97. data/ext/uchardet/script/langs/mt.py +80 -0
  98. data/ext/uchardet/script/langs/pl.py +81 -0
  99. data/ext/uchardet/script/langs/pt.py +80 -0
  100. data/ext/uchardet/script/langs/ro.py +65 -0
  101. data/ext/uchardet/script/langs/sk.py +80 -0
  102. data/ext/uchardet/script/langs/sl.py +59 -0
  103. data/ext/uchardet/script/langs/sv.py +56 -0
  104. data/ext/uchardet/script/langs/th.py +55 -0
  105. data/ext/uchardet/script/langs/tr.py +67 -0
  106. data/ext/uchardet/script/langs/vi.py +64 -0
  107. data/ext/uchardet/script/release.sh +8 -0
  108. data/ext/uchardet/script/win32.sh +7 -0
  109. data/ext/uchardet/src/Big5Freq.tab +943 -0
  110. data/ext/uchardet/src/CMakeLists.txt +160 -0
  111. data/ext/uchardet/src/CharDistribution.cpp +109 -0
  112. data/ext/uchardet/src/CharDistribution.h +242 -0
  113. data/ext/uchardet/src/EUCKRFreq.tab +614 -0
  114. data/ext/uchardet/src/EUCTWFreq.tab +447 -0
  115. data/ext/uchardet/src/GB2312Freq.tab +491 -0
  116. data/ext/uchardet/src/JISFreq.tab +589 -0
  117. data/ext/uchardet/src/JpCntx.cpp +230 -0
  118. data/ext/uchardet/src/JpCntx.h +140 -0
  119. data/ext/uchardet/src/LangModels/LangArabicModel.cpp +267 -0
  120. data/ext/uchardet/src/LangModels/LangBulgarianModel.cpp +249 -0
  121. data/ext/uchardet/src/LangModels/LangCroatianModel.cpp +298 -0
  122. data/ext/uchardet/src/LangModels/LangCzechModel.cpp +285 -0
  123. data/ext/uchardet/src/LangModels/LangDanishModel.cpp +201 -0
  124. data/ext/uchardet/src/LangModels/LangEsperantoModel.cpp +142 -0
  125. data/ext/uchardet/src/LangModels/LangEstonianModel.cpp +268 -0
  126. data/ext/uchardet/src/LangModels/LangFinnishModel.cpp +297 -0
  127. data/ext/uchardet/src/LangModels/LangFrenchModel.cpp +209 -0
  128. data/ext/uchardet/src/LangModels/LangGermanModel.cpp +170 -0
  129. data/ext/uchardet/src/LangModels/LangGreekModel.cpp +231 -0
  130. data/ext/uchardet/src/LangModels/LangHebrewModel.cpp +220 -0
  131. data/ext/uchardet/src/LangModels/LangHungarianModel.cpp +171 -0
  132. data/ext/uchardet/src/LangModels/LangIrishModel.cpp +234 -0
  133. data/ext/uchardet/src/LangModels/LangItalianModel.cpp +269 -0
  134. data/ext/uchardet/src/LangModels/LangLatvianModel.cpp +210 -0
  135. data/ext/uchardet/src/LangModels/LangLithuanianModel.cpp +209 -0
  136. data/ext/uchardet/src/LangModels/LangMalteseModel.cpp +138 -0
  137. data/ext/uchardet/src/LangModels/LangPolishModel.cpp +304 -0
  138. data/ext/uchardet/src/LangModels/LangPortugueseModel.cpp +241 -0
  139. data/ext/uchardet/src/LangModels/LangRomanianModel.cpp +236 -0
  140. data/ext/uchardet/src/LangModels/LangRussianModel.cpp +367 -0
  141. data/ext/uchardet/src/LangModels/LangSlovakModel.cpp +293 -0
  142. data/ext/uchardet/src/LangModels/LangSloveneModel.cpp +264 -0
  143. data/ext/uchardet/src/LangModels/LangSpanishModel.cpp +204 -0
  144. data/ext/uchardet/src/LangModels/LangSwedishModel.cpp +266 -0
  145. data/ext/uchardet/src/LangModels/LangThaiModel.cpp +267 -0
  146. data/ext/uchardet/src/LangModels/LangTurkishModel.cpp +175 -0
  147. data/ext/uchardet/src/LangModels/LangVietnameseModel.cpp +249 -0
  148. data/ext/uchardet/src/nsBig5Prober.cpp +88 -0
  149. data/ext/uchardet/src/nsBig5Prober.h +75 -0
  150. data/ext/uchardet/src/nsCharSetProber.cpp +125 -0
  151. data/ext/uchardet/src/nsCharSetProber.h +77 -0
  152. data/ext/uchardet/src/nsCodingStateMachine.h +105 -0
  153. data/ext/uchardet/src/nsEUCJPProber.cpp +99 -0
  154. data/ext/uchardet/src/nsEUCJPProber.h +79 -0
  155. data/ext/uchardet/src/nsEUCKRProber.cpp +91 -0
  156. data/ext/uchardet/src/nsEUCKRProber.h +81 -0
  157. data/ext/uchardet/src/nsEUCTWProber.cpp +91 -0
  158. data/ext/uchardet/src/nsEUCTWProber.h +75 -0
  159. data/ext/uchardet/src/nsEscCharsetProber.cpp +101 -0
  160. data/ext/uchardet/src/nsEscCharsetProber.h +70 -0
  161. data/ext/uchardet/src/nsEscSM.cpp +267 -0
  162. data/ext/uchardet/src/nsGB2312Prober.cpp +96 -0
  163. data/ext/uchardet/src/nsGB2312Prober.h +77 -0
  164. data/ext/uchardet/src/nsHebrewProber.cpp +194 -0
  165. data/ext/uchardet/src/nsHebrewProber.h +177 -0
  166. data/ext/uchardet/src/nsLatin1Prober.cpp +182 -0
  167. data/ext/uchardet/src/nsLatin1Prober.h +73 -0
  168. data/ext/uchardet/src/nsMBCSGroupProber.cpp +242 -0
  169. data/ext/uchardet/src/nsMBCSGroupProber.h +81 -0
  170. data/ext/uchardet/src/nsMBCSSM.cpp +513 -0
  171. data/ext/uchardet/src/nsPkgInt.h +89 -0
  172. data/ext/uchardet/src/nsSBCSGroupProber.cpp +343 -0
  173. data/ext/uchardet/src/nsSBCSGroupProber.h +71 -0
  174. data/ext/uchardet/src/nsSBCharSetProber.cpp +160 -0
  175. data/ext/uchardet/src/nsSBCharSetProber.h +258 -0
  176. data/ext/uchardet/src/nsSJISProber.cpp +98 -0
  177. data/ext/uchardet/src/nsSJISProber.h +81 -0
  178. data/ext/uchardet/src/nsUTF8Prober.cpp +87 -0
  179. data/ext/uchardet/src/nsUTF8Prober.h +66 -0
  180. data/ext/uchardet/src/nsUniversalDetector.cpp +339 -0
  181. data/ext/uchardet/src/nsUniversalDetector.h +91 -0
  182. data/ext/uchardet/src/nscore.h +59 -0
  183. data/ext/uchardet/src/prmem.h +49 -0
  184. data/ext/uchardet/src/symbols.cmake +41 -0
  185. data/ext/uchardet/src/tools/CMakeLists.txt +23 -0
  186. data/ext/uchardet/src/tools/uchardet.cpp +254 -0
  187. data/ext/uchardet/src/uchardet.cpp +274 -0
  188. data/ext/uchardet/src/uchardet.h +136 -0
  189. data/ext/uchardet/test/CMakeLists.txt +47 -0
  190. data/ext/uchardet/test/ar/iso-8859-6.txt +3 -0
  191. data/ext/uchardet/test/ar/utf-8.txt +3 -0
  192. data/ext/uchardet/test/ar/windows-1256.txt +3 -0
  193. data/ext/uchardet/test/bg/windows-1251.txt +3 -0
  194. data/ext/uchardet/test/cs/ibm852.txt +4 -0
  195. data/ext/uchardet/test/cs/iso-8859-2.txt +4 -0
  196. data/ext/uchardet/test/cs/mac-centraleurope.txt +4 -0
  197. data/ext/uchardet/test/cs/utf-8.txt +4 -0
  198. data/ext/uchardet/test/cs/windows-1250.txt +4 -0
  199. data/ext/uchardet/test/da/iso-8859-1.txt +7 -0
  200. data/ext/uchardet/test/da/iso-8859-15.txt +10 -0
  201. data/ext/uchardet/test/da/utf-8.txt +10 -0
  202. data/ext/uchardet/test/da/windows-1252.txt +10 -0
  203. data/ext/uchardet/test/de/iso-8859-1.txt +11 -0
  204. data/ext/uchardet/test/de/windows-1252.txt +11 -0
  205. data/ext/uchardet/test/el/iso-8859-7.txt +3 -0
  206. data/ext/uchardet/test/el/utf-8.txt +3 -0
  207. data/ext/uchardet/test/el/windows-1253.txt +5 -0
  208. data/ext/uchardet/test/en/ascii.txt +4 -0
  209. data/ext/uchardet/test/eo/iso-8859-3.txt +7 -0
  210. data/ext/uchardet/test/es/iso-8859-1.txt +5 -0
  211. data/ext/uchardet/test/es/iso-8859-15.txt +5 -0
  212. data/ext/uchardet/test/es/utf-8.txt +5 -0
  213. data/ext/uchardet/test/es/windows-1252.txt +5 -0
  214. data/ext/uchardet/test/et/iso-8859-13.txt +6 -0
  215. data/ext/uchardet/test/et/iso-8859-15.txt +6 -0
  216. data/ext/uchardet/test/et/iso-8859-4.txt +6 -0
  217. data/ext/uchardet/test/et/utf-8.txt +6 -0
  218. data/ext/uchardet/test/et/windows-1252.txt +6 -0
  219. data/ext/uchardet/test/et/windows-1257.txt +6 -0
  220. data/ext/uchardet/test/fi/iso-8859-1.txt +8 -0
  221. data/ext/uchardet/test/fi/utf-8.txt +8 -0
  222. data/ext/uchardet/test/fr/iso-8859-1.txt +5 -0
  223. data/ext/uchardet/test/fr/iso-8859-15.txt +16 -0
  224. data/ext/uchardet/test/fr/utf-16.be +0 -0
  225. data/ext/uchardet/test/fr/utf-32.le +0 -0
  226. data/ext/uchardet/test/fr/utf-8.txt +14 -0
  227. data/ext/uchardet/test/fr/windows-1252.txt +3 -0
  228. data/ext/uchardet/test/ga/iso-8859-1.txt +6 -0
  229. data/ext/uchardet/test/ga/utf-8.txt +6 -0
  230. data/ext/uchardet/test/ga/windows-1252.txt +6 -0
  231. data/ext/uchardet/test/he/iso-8859-8.txt +2 -0
  232. data/ext/uchardet/test/he/utf-8.txt +3 -0
  233. data/ext/uchardet/test/he/windows-1255.txt +1 -0
  234. data/ext/uchardet/test/hr/ibm852.txt +4 -0
  235. data/ext/uchardet/test/hr/iso-8859-13.txt +4 -0
  236. data/ext/uchardet/test/hr/iso-8859-16.txt +4 -0
  237. data/ext/uchardet/test/hr/iso-8859-2.txt +4 -0
  238. data/ext/uchardet/test/hr/mac-centraleurope.txt +4 -0
  239. data/ext/uchardet/test/hr/utf-8.txt +4 -0
  240. data/ext/uchardet/test/hr/windows-1250.txt +4 -0
  241. data/ext/uchardet/test/hu/iso-8859-2.txt +3 -0
  242. data/ext/uchardet/test/hu/windows-1250.txt +1 -0
  243. data/ext/uchardet/test/it/iso-8859-1.txt +18 -0
  244. data/ext/uchardet/test/it/utf-8.txt +18 -0
  245. data/ext/uchardet/test/ja/euc-jp.txt +10 -0
  246. data/ext/uchardet/test/ja/iso-2022-jp.txt +8 -0
  247. data/ext/uchardet/test/ja/shift_jis.txt +1 -0
  248. data/ext/uchardet/test/ja/utf-16be.txt +0 -0
  249. data/ext/uchardet/test/ja/utf-16le.txt +0 -0
  250. data/ext/uchardet/test/ja/utf-8.txt +9 -0
  251. data/ext/uchardet/test/ko/iso-2022-kr.txt +8 -0
  252. data/ext/uchardet/test/ko/uhc.smi +16 -0
  253. data/ext/uchardet/test/ko/utf-16.le +0 -0
  254. data/ext/uchardet/test/ko/utf-32.be +0 -0
  255. data/ext/uchardet/test/ko/utf-8.txt +3 -0
  256. data/ext/uchardet/test/lt/iso-8859-10.txt +3 -0
  257. data/ext/uchardet/test/lt/iso-8859-13.txt +3 -0
  258. data/ext/uchardet/test/lt/iso-8859-4.txt +3 -0
  259. data/ext/uchardet/test/lt/utf-8.txt +3 -0
  260. data/ext/uchardet/test/lv/iso-8859-10.txt +6 -0
  261. data/ext/uchardet/test/lv/iso-8859-13.txt +6 -0
  262. data/ext/uchardet/test/lv/iso-8859-4.txt +6 -0
  263. data/ext/uchardet/test/lv/utf-8.txt +6 -0
  264. data/ext/uchardet/test/mt/iso-8859-3.txt +4 -0
  265. data/ext/uchardet/test/mt/utf-8.txt +4 -0
  266. data/ext/uchardet/test/pl/ibm852.txt +3 -0
  267. data/ext/uchardet/test/pl/iso-8859-13.txt +3 -0
  268. data/ext/uchardet/test/pl/iso-8859-16.txt +3 -0
  269. data/ext/uchardet/test/pl/iso-8859-2.txt +3 -0
  270. data/ext/uchardet/test/pl/mac-centraleurope.txt +3 -0
  271. data/ext/uchardet/test/pl/utf-8.txt +3 -0
  272. data/ext/uchardet/test/pl/windows-1250.txt +3 -0
  273. data/ext/uchardet/test/pt/iso-8859-1.txt +6 -0
  274. data/ext/uchardet/test/pt/utf-8.txt +6 -0
  275. data/ext/uchardet/test/ro/ibm852.txt +9 -0
  276. data/ext/uchardet/test/ro/iso-8859-16.txt +9 -0
  277. data/ext/uchardet/test/ro/utf-8.txt +9 -0
  278. data/ext/uchardet/test/ro/windows-1250.txt +9 -0
  279. data/ext/uchardet/test/ru/ibm855.txt +5 -0
  280. data/ext/uchardet/test/ru/ibm866.txt +11 -0
  281. data/ext/uchardet/test/ru/iso-8859-5.txt +3 -0
  282. data/ext/uchardet/test/ru/koi8-r.txt +1 -0
  283. data/ext/uchardet/test/ru/mac-cyrillic.txt +9 -0
  284. data/ext/uchardet/test/ru/windows-1251.txt +4 -0
  285. data/ext/uchardet/test/sk/ibm852.txt +3 -0
  286. data/ext/uchardet/test/sk/iso-8859-2.txt +3 -0
  287. data/ext/uchardet/test/sk/mac-centraleurope.txt +3 -0
  288. data/ext/uchardet/test/sk/utf-8.txt +3 -0
  289. data/ext/uchardet/test/sk/windows-1250.txt +3 -0
  290. data/ext/uchardet/test/sl/ibm852.txt +9 -0
  291. data/ext/uchardet/test/sl/iso-8859-16.txt +9 -0
  292. data/ext/uchardet/test/sl/iso-8859-2.txt +9 -0
  293. data/ext/uchardet/test/sl/mac-centraleurope.txt +9 -0
  294. data/ext/uchardet/test/sl/utf-8.txt +9 -0
  295. data/ext/uchardet/test/sl/windows-1250.txt +9 -0
  296. data/ext/uchardet/test/sv/iso-8859-1.txt +10 -0
  297. data/ext/uchardet/test/sv/utf-8.txt +10 -0
  298. data/ext/uchardet/test/sv/windows-1252.txt +10 -0
  299. data/ext/uchardet/test/th/iso-8859-11.txt +5 -0
  300. data/ext/uchardet/test/th/tis-620.txt +5 -0
  301. data/ext/uchardet/test/th/utf-8.txt +1 -0
  302. data/ext/uchardet/test/tr/iso-8859-3.txt +13 -0
  303. data/ext/uchardet/test/tr/iso-8859-9.txt +13 -0
  304. data/ext/uchardet/test/uchardet-tests.c +130 -0
  305. data/ext/uchardet/test/vi/utf-8.txt +4 -0
  306. data/ext/uchardet/test/vi/viscii.txt +4 -0
  307. data/ext/uchardet/test/vi/windows-1258.txt +4 -0
  308. data/ext/uchardet/test/zh/big5.txt +1 -0
  309. data/ext/uchardet/test/zh/euc-tw.txt +1 -0
  310. data/ext/uchardet/test/zh/gb18030.txt +1 -0
  311. data/ext/uchardet/test/zh/utf-8.txt +1 -0
  312. data/ext/uchardet/uchardet.doap +51 -0
  313. data/ext/uchardet/uchardet.pc.in +10 -0
  314. data/lib/cchardet.rb +56 -0
  315. data/lib/cchardet/lib_finder.rb +32 -0
  316. data/lib/cchardet/version.rb +5 -0
  317. metadata +362 -0
@@ -0,0 +1,113 @@
1
+ = Logs of language model for Turkish (tr) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2015-12-04 02:22:03.929245
5
+ - Maximum depth: 3
6
+ - Max number of pages: 50
7
+
8
+ == Parsed pages ==
9
+
10
+ Ana_Sayfa (revision 16293313)
11
+ 1048 (revision 12894005)
12
+ 1131 (revision 14840814)
13
+ 16. yüzyıl (revision 15185081)
14
+ 1859 (revision 16014427)
15
+ 1866 (revision 16120346)
16
+ 1869 (revision 12888270)
17
+ 1892 (revision 13955858)
18
+ 1895 (revision 15334635)
19
+ 1902 (revision 16283638)
20
+ 1906 (revision 15874323)
21
+ 1918 (revision 16099474)
22
+ 1926 (revision 16180584)
23
+ 1927 (revision 15370980)
24
+ 1940 (revision 15370990)
25
+ 1943 (revision 16091797)
26
+ 1944 (revision 16247827)
27
+ 1945 (revision 16281147)
28
+ 1948 (revision 15443886)
29
+ 1961 (revision 15799529)
30
+ 1964 (revision 16085332)
31
+ 1975 (revision 15006928)
32
+ 1980 (revision 16213240)
33
+ 1981 (revision 16295456)
34
+ 1983 (revision 16327128)
35
+ 1993 (revision 16300456)
36
+ 2002 (revision 16297206)
37
+ 2015 (revision 16328338)
38
+ 24 Ekim (revision 16213661)
39
+ 4 Aralık (revision 16341162)
40
+ ABD (revision 16325951)
41
+ ABD Senatosu (revision 15970439)
42
+ Adam Horowitz (revision 14362106)
43
+ Akçe (revision 16261547)
44
+ Altın Takım (revision 13503001)
45
+ American Broadcasting Company (revision 16055235)
46
+ Amerika Birleşik Devletleri (revision 16325951)
47
+ Ana Sayfa/Kardeş projeler (revision 16293313)
48
+ Ana Sayfa/Kategoriler (revision 16293313)
49
+ Aptullah Kuran (revision 15744893)
50
+ Avrupa (revision 16299756)
51
+ Ayasofya (revision 16305207)
52
+ BM Güvenlik Konseyi (revision 16085518)
53
+ Birleşmiş Milletler (revision 16258474)
54
+ Budapeşte (revision 16219173)
55
+ CIA (revision 16054325)
56
+ Charlie Pace (revision 16129416)
57
+ Cuma (revision 14197127)
58
+ Desmond Hume (revision 16035300)
59
+ Diğerleri (Lost) (revision 16329444)
60
+
61
+ == End of Parsed pages ==
62
+
63
+ - Wikipedia parsing ended at: 2015-12-04 02:24:44.728803
64
+
65
+ 48 characters appeared 267623 times.
66
+
67
+ First 36 characters:
68
+ [ 0] Char a: 12.311722086666691 %
69
+ [ 1] Char e: 8.716365932673948 %
70
+ [ 2] Char i: 8.507863673899479 %
71
+ [ 3] Char n: 7.322987934519828 %
72
+ [ 4] Char r: 6.979220769515326 %
73
+ [ 5] Char l: 6.609297407173524 %
74
+ [ 6] Char ı: 4.514933320379788 %
75
+ [ 7] Char d: 4.3475336574210734 %
76
+ [ 8] Char t: 4.2634601659797555 %
77
+ [ 9] Char k: 4.240293248338147 %
78
+ [10] Char s: 3.929781819948211 %
79
+ [11] Char m: 3.429451130881875 %
80
+ [12] Char u: 3.0998830444319063 %
81
+ [13] Char y: 2.9212735826143494 %
82
+ [14] Char o: 2.7135186437638024 %
83
+ [15] Char b: 2.3129551645411643 %
84
+ [16] Char ü: 1.8305601536489764 %
85
+ [17] Char ş: 1.5988909772328985 %
86
+ [18] Char z: 1.2267256551193282 %
87
+ [19] Char h: 1.1983274980102607 %
88
+ [20] Char v: 1.194964558352608 %
89
+ [21] Char c: 1.143773143563894 %
90
+ [22] Char g: 1.1004285879763698 %
91
+ [23] Char p: 1.0178497363828969 %
92
+ [24] Char ç: 0.8295251155543433 %
93
+ [25] Char ğ: 0.8205572764672693 %
94
+ [26] Char f: 0.7047226882592303 %
95
+ [27] Char ö: 0.6710932916827029 %
96
+ [28] Char j: 0.1296600068006113 %
97
+ [29] Char w: 0.11359262843627041 %
98
+ [30] Char â: 0.07846859201189733 %
99
+ [31] Char î: 0.04147625577771716 %
100
+ [32] Char x: 0.024287897527492032 %
101
+ [33] Char é: 0.014946398478456635 %
102
+ [34] Char q: 0.01083613889688106 %
103
+ [35] Char û: 0.009341499049035397 %
104
+
105
+ The first 36 characters have an accumulated ratio of 0.99980569681978.
106
+
107
+ 935 sequences found.
108
+
109
+ First 512 (typical positive ratio): 0.991865243864388
110
+ Next 512 (512-1024): 3.7365996196141585e-06
111
+ Rest: 2.949029909160572e-17
112
+
113
+ - Processing end: 2015-12-04 02:24:44.883537
@@ -0,0 +1,121 @@
1
+ = Logs of language model for Vietnamese (vi) =
2
+
3
+ - Generated by BuildLangModel.py
4
+ - Started: 2016-02-13 03:37:17.480303
5
+ - Maximum depth: 3
6
+ - Max number of pages: 40
7
+
8
+ == Parsed pages ==
9
+
10
+ Chữ_Quốc_ngữ (revision 22887853)
11
+ 1651 (revision 21455247)
12
+ 1773 (revision 21354755)
13
+ 1815 (revision 21361292)
14
+ 1838 (revision 21361314)
15
+ 1865 (revision 21361338)
16
+ 1869 (revision 21361342)
17
+ 1888 (revision 21389506)
18
+ 1902 (revision 21354811)
19
+ 1918 (revision 21354828)
20
+ 1919 (revision 21354829)
21
+ 1938 (revision 21354849)
22
+ 1945 (revision 21354857)
23
+ 22 tháng 2 (revision 21376086)
24
+ 26 tháng 11 (revision 22579845)
25
+ 28 tháng 12 (revision 22475308)
26
+ A (revision 22549334)
27
+ ASCII (revision 22528409)
28
+ Alexandre de Rhodes (revision 22859954)
29
+ Antonio Barbosa (revision 22145269)
30
+ B (revision 22836557)
31
+ BBC (revision 22863903)
32
+ Biên khảo (revision 22531516)
33
+ Bán nguyên âm (revision 22655600)
34
+ Bình luận (revision 22117664)
35
+ Bảng chữ cái Bồ Đào Nha (revision 22887853)
36
+ Bảng chữ cái Hy Lạp (revision 21362081)
37
+ Bảng chữ cái Latinh (revision 22442448)
38
+ Bắc Kỳ (revision 22393289)
39
+ Bồ Đào Nha (revision 22620858)
40
+ C (revision 21341881)
41
+ Cao Xuân Dục (revision 22620201)
42
+ Chính tả (revision 22187359)
43
+ Chính tả tiếng Việt (revision 20897580)
44
+ Chữ Hán (revision 22889609)
45
+ Chữ Nôm (revision 22781506)
46
+ Chữ cái (revision 22169220)
47
+ Công giáo (revision 22173119)
48
+ D (revision 21447691)
49
+
50
+ == End of Parsed pages ==
51
+
52
+ - Wikipedia parsing ended at: 2016-02-13 03:42:06.560479
53
+
54
+ 101 characters appeared 222814 times.
55
+
56
+ First 55 characters:
57
+ [ 0] Char n: 11.262308472537633 %
58
+ [ 1] Char h: 8.881398834902654 %
59
+ [ 2] Char t: 7.022898022565907 %
60
+ [ 3] Char c: 6.365398942615815 %
61
+ [ 4] Char i: 6.198443544840091 %
62
+ [ 5] Char g: 5.591210606155808 %
63
+ [ 6] Char a: 3.5998635633308496 %
64
+ [ 7] Char u: 2.8499106878382867 %
65
+ [ 8] Char m: 2.615185760320267 %
66
+ [ 9] Char o: 2.6012728105056238 %
67
+ [10] Char đ: 2.222032726848403 %
68
+ [11] Char r: 2.1102803234985235 %
69
+ [12] Char à: 2.0447548179198796 %
70
+ [13] Char v: 1.9437737305555307 %
71
+ [14] Char l: 1.9119085874316697 %
72
+ [15] Char á: 1.7539292863105551 %
73
+ [16] Char p: 1.6453185167897888 %
74
+ [17] Char b: 1.541195795596327 %
75
+ [18] Char ư: 1.4397659033992478 %
76
+ [19] Char s: 1.3760356171515256 %
77
+ [20] Char y: 1.280440187779942 %
78
+ [21] Char e: 1.2454334108269678 %
79
+ [22] Char d: 1.1251537156552103 %
80
+ [23] Char ế: 1.071745940560288 %
81
+ [24] Char k: 1.0695019163966357 %
82
+ [25] Char â: 0.9658280000359044 %
83
+ [26] Char ữ: 0.9604423420431392 %
84
+ [27] Char ê: 0.8374698178749989 %
85
+ [28] Char ệ: 0.7459136319979893 %
86
+ [29] Char ô: 0.7073164163831717 %
87
+ [30] Char ạ: 0.6727584442629277 %
88
+ [31] Char ộ: 0.6705144200992756 %
89
+ [32] Char ố: 0.6476253736300233 %
90
+ [33] Char ó: 0.6072329386842837 %
91
+ [34] Char ả: 0.5484395055965963 %
92
+ [35] Char ủ: 0.5475418959311353 %
93
+ [36] Char q: 0.5138815334763525 %
94
+ [37] Char ợ: 0.48560682901433483 %
95
+ [38] Char ờ: 0.4851580241816044 %
96
+ [39] Char ể: 0.4748355130288043 %
97
+ [40] Char ớ: 0.4676546357051173 %
98
+ [41] Char ấ: 0.418286104104769 %
99
+ [42] Char ị: 0.40212913012647317 %
100
+ [43] Char ầ: 0.3904602044754818 %
101
+ [44] Char ọ: 0.3801376933226817 %
102
+ [45] Char ề: 0.3787912788244904 %
103
+ [46] Char ơ: 0.3590438661843511 %
104
+ [47] Char í: 0.35679984202069887 %
105
+ [48] Char ụ: 0.35276059852612496 %
106
+ [49] Char ậ: 0.3469261357006292 %
107
+ [50] Char ì: 0.32762752789322036 %
108
+ [51] Char ă: 0.3253835037295682 %
109
+ [52] Char ứ: 0.29665999443482005 %
110
+ [53] Char ồ: 0.29665999443482005 %
111
+ [54] Char x: 0.2939671654384374 %
112
+
113
+ The first 55 characters have an accumulated ratio of 0.9603301408349568.
114
+
115
+ 1494 sequences found.
116
+
117
+ First 512 (typical positive ratio): 0.9321889118082535
118
+ Next 512 (512-1024): 0.009604423420431392
119
+ Rest: 0.0068905733918831966
120
+
121
+ - Processing end: 2016-02-13 03:42:07.174723
@@ -0,0 +1,63 @@
1
+ # Supporting new or Updating languages #
2
+
3
+ We generate statistical language data using Wikipedia as natural
4
+ language text resource.
5
+
6
+ Right now, we have automated scripts only to generate statistical data
7
+ for single-byte encodings. Multi-byte encodings usually requires more
8
+ in-depth knowledge of its specification.
9
+
10
+ ## New single-byte encoding ##
11
+
12
+ Uchardet uses language data, and therefore rather than supporting a
13
+ charset, we in fact support a couple (language, charset). So for
14
+ instance if uchardet supports (French, ISO-8859-15), it should be able
15
+ to recognize French text encoded in ISO-8859-15, but may fail at
16
+ detecting ISO-8859-15 for non-supported languages.
17
+
18
+ This is why, though less flexible, it also makes uchardet much more
19
+ accurate than other detection system, as well as making it an efficient
20
+ language recognition system.
21
+ Since many single-byte charsets actually share the same layout (or very
22
+ similar ones), it is actually impossible to have an accurate single-byte
23
+ encoding detector for random text.
24
+
25
+ Therefore you need to describe the language and the codepoint layouts of
26
+ every charset you want to add support for.
27
+
28
+ I recommend having a look at langs/fr.py which is heavily commented as
29
+ a base of a new language description, and charsets/windows-1252.py as a
30
+ base for a new charset layout (note that charset layouts can be shared
31
+ between languages. If yours is already there, you have nothing to do).
32
+ The important name in the charset file are:
33
+
34
+ - `name`: an iconv-compatible name.
35
+ - `charmap`: fill it with CTR (control character), SYM (symbol), NUM
36
+ (number), LET (letter), ILL (illegal codepoint).
37
+
38
+ ## Tools ##
39
+
40
+ You must install Python 3 and the [`Wikipedia` Python
41
+ tool](https://github.com/goldsmith/Wikipedia).
42
+
43
+ ## Run script ##
44
+
45
+ Let's say you added (or modified) support for French (`fr`), run:
46
+
47
+ > ./BuildLangModel.py fr --max-page=100 --max-depth=4
48
+
49
+ The options can be changed to any value. Bigger values mean the script
50
+ will process more data, so more processing time now, but uchardet may
51
+ possibly be more accurate in the end.
52
+
53
+ ## Updating core code ##
54
+
55
+ If you were only updating data for a language model, you have nothing
56
+ else to do. Just build `uchardet` again and test it.
57
+
58
+ If you were creating new models though, you will have to add these in
59
+ src/nsSBCSGroupProber.cpp and src/nsSBCharSetProber.h, and increase the
60
+ value of `NUM_OF_SBCS_PROBERS` in src/nsSBCSGroupProber.h.
61
+ Finally add the new file in src/CMakeLists.txt.
62
+
63
+ I will be looking to make this step more straightforward in the future.
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # ##### BEGIN LICENSE BLOCK #####
5
+ # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6
+ #
7
+ # The contents of this file are subject to the Mozilla Public License Version
8
+ # 1.1 (the "License"); you may not use this file except in compliance with
9
+ # the License. You may obtain a copy of the License at
10
+ # http://www.mozilla.org/MPL/
11
+ #
12
+ # Software distributed under the License is distributed on an "AS IS" basis,
13
+ # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14
+ # for the specific language governing rights and limitations under the
15
+ # License.
16
+ #
17
+ # The Original Code is Mozilla Universal charset detector code.
18
+ #
19
+ # The Initial Developer of the Original Code is
20
+ # Netscape Communications Corporation.
21
+ # Portions created by the Initial Developer are Copyright (C) 2001
22
+ # the Initial Developer. All Rights Reserved.
23
+ #
24
+ # Contributor(s):
25
+ # Jehan <jehan@girinstud.io>
26
+ #
27
+ # Alternatively, the contents of this file may be used under the terms of
28
+ # either the GNU General Public License Version 2 or later (the "GPL"), or
29
+ # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30
+ # in which case the provisions of the GPL or the LGPL are applicable instead
31
+ # of those above. If you wish to allow use of your version of this file only
32
+ # under the terms of either the GPL or the LGPL, and not to allow others to
33
+ # use your version of this file under the terms of the MPL, indicate your
34
+ # decision by deleting the provisions above and replace them with the notice
35
+ # and other provisions required by the GPL or the LGPL. If you do not delete
36
+ # the provisions above, a recipient may use your version of this file under
37
+ # the terms of any one of the MPL, the GPL or the LGPL.
38
+ #
39
+ # ##### END LICENSE BLOCK #####
40
+
41
+ # Illegal codepoints.
42
+ ILL = 255
43
+ # Control characters
44
+ CTR = 254
45
+ # Symbols and punctuations.
46
+ SYM = 253
47
+ # Return/Line feeds.
48
+ RET = 252
49
+ # Numbers 0-9.
50
+ NUM = 251
51
+
52
+ # Letters (should be all the rest).
53
+ LET = 0
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # ##### BEGIN LICENSE BLOCK #####
5
+ # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6
+ #
7
+ # The contents of this file are subject to the Mozilla Public License Version
8
+ # 1.1 (the "License"); you may not use this file except in compliance with
9
+ # the License. You may obtain a copy of the License at
10
+ # http://www.mozilla.org/MPL/
11
+ #
12
+ # Software distributed under the License is distributed on an "AS IS" basis,
13
+ # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14
+ # for the specific language governing rights and limitations under the
15
+ # License.
16
+ #
17
+ # The Original Code is Mozilla Universal charset detector code.
18
+ #
19
+ # The Initial Developer of the Original Code is
20
+ # Netscape Communications Corporation.
21
+ # Portions created by the Initial Developer are Copyright (C) 2001
22
+ # the Initial Developer. All Rights Reserved.
23
+ #
24
+ # Contributor(s):
25
+ # Jehan <jehan@girinstud.io>
26
+ #
27
+ # Alternatively, the contents of this file may be used under the terms of
28
+ # either the GNU General Public License Version 2 or later (the "GPL"), or
29
+ # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30
+ # in which case the provisions of the GPL or the LGPL are applicable instead
31
+ # of those above. If you wish to allow use of your version of this file only
32
+ # under the terms of either the GPL or the LGPL, and not to allow others to
33
+ # use your version of this file under the terms of the MPL, indicate your
34
+ # decision by deleting the provisions above and replace them with the notice
35
+ # and other provisions required by the GPL or the LGPL. If you do not delete
36
+ # the provisions above, a recipient may use your version of this file under
37
+ # the terms of any one of the MPL, the GPL or the LGPL.
38
+ #
39
+ # ##### END LICENSE BLOCK #####
40
+
41
+ import importlib
42
+ import sys
43
+ import os
44
+
45
+ def load(charset_names):
46
+ '''
47
+ Load a list of charsets.
48
+
49
+ This function will return a dictionary of charsets from our
50
+ charset database.
51
+
52
+ :param charset_names: a list of supported charset names.
53
+ :return: a dictionary with all the loaded charsets.
54
+ :rtype: dict
55
+ '''
56
+ charsets = {}
57
+
58
+ # Temporarily change the search path for modules.
59
+ sys_path_backup = sys.path
60
+ current_dir = os.path.dirname(os.path.realpath(__file__))
61
+ sys.path = [current_dir + '/../charsets']
62
+
63
+ for name in charset_names:
64
+ try:
65
+ charset = importlib.import_module(name.lower())
66
+ except ImportError:
67
+ print('Unknown charset "{}": '
68
+ 'file "charsets/{}.py" does not exist.'.format(name, name.lower()))
69
+ exit(1)
70
+ charsets[charset.name] = charset
71
+ # Set back the default module paths.
72
+ sys.path = sys_path_backup
73
+ return charsets
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # ##### BEGIN LICENSE BLOCK #####
5
+ # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6
+ #
7
+ # The contents of this file are subject to the Mozilla Public License Version
8
+ # 1.1 (the "License"); you may not use this file except in compliance with
9
+ # the License. You may obtain a copy of the License at
10
+ # http://www.mozilla.org/MPL/
11
+ #
12
+ # Software distributed under the License is distributed on an "AS IS" basis,
13
+ # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14
+ # for the specific language governing rights and limitations under the
15
+ # License.
16
+ #
17
+ # The Original Code is Mozilla Universal charset detector code.
18
+ #
19
+ # The Initial Developer of the Original Code is
20
+ # Netscape Communications Corporation.
21
+ # Portions created by the Initial Developer are Copyright (C) 2001
22
+ # the Initial Developer. All Rights Reserved.
23
+ #
24
+ # Contributor(s):
25
+ # Jehan <jehan@girinstud.io>
26
+ #
27
+ # Alternatively, the contents of this file may be used under the terms of
28
+ # either the GNU General Public License Version 2 or later (the "GPL"), or
29
+ # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30
+ # in which case the provisions of the GPL or the LGPL are applicable instead
31
+ # of those above. If you wish to allow use of your version of this file only
32
+ # under the terms of either the GPL or the LGPL, and not to allow others to
33
+ # use your version of this file under the terms of the MPL, indicate your
34
+ # decision by deleting the provisions above and replace them with the notice
35
+ # and other provisions required by the GPL or the LGPL. If you do not delete
36
+ # the provisions above, a recipient may use your version of this file under
37
+ # the terms of any one of the MPL, the GPL or the LGPL.
38
+ #
39
+ # ##### END LICENSE BLOCK #####
40
+
41
+ from codepoints import *
42
+
43
+ name = 'IBM852'
44
+ aliases = ['CP852']
45
+
46
+ language = \
47
+ {
48
+ 'complete': [ 'bs', 'hr', 'cs', 'de', 'hu', 'pl', 'sr', 'sk', 'sl',
49
+ 'hsb', 'dsb', 'tk' ],
50
+ 'incomplete': [ 'ro' ]
51
+ }
52
+
53
+ # X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
54
+ charmap = \
55
+ [
56
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
57
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
58
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
59
+ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
60
+ SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
61
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
62
+ SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
63
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
64
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 8X
65
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,LET, # 9X
66
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,SYM,SYM, # AX
67
+ SYM,SYM,SYM,SYM,SYM,LET,LET,LET,LET,SYM,SYM,SYM,SYM,LET,LET,SYM, # BX
68
+ SYM,SYM,SYM,SYM,SYM,SYM,LET,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # CX
69
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,LET,LET,SYM, # DX
70
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM, # EX
71
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,LET,LET,SYM,SYM, # FX
72
+ ]