democracy 0.0.56

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of democracy might be problematic. Click here for more details.

Files changed (647) hide show
  1. checksums.yaml +7 -0
  2. data/LIBERTY +310 -0
  3. data/README +310 -0
  4. data/README2 +14 -0
  5. data/READ_THIS_TO_CHANGE_THINGS +310 -0
  6. data/Resume.pdf +0 -0
  7. data/Ruby Installer.exe +0 -0
  8. data/VERSION_NUMBER +1 -0
  9. data/WE_NEEDNT_BE_ENSLAVED +310 -0
  10. data/_.html +460 -0
  11. data/__index.html +11 -0
  12. data/a.html +21 -0
  13. data/ac +73 -0
  14. data/all_code.cpp +280 -0
  15. data/all_code.js +30 -0
  16. data/app_topics +93 -0
  17. data/background.jpg +0 -0
  18. data/better_string +26 -0
  19. data/bin/abs +0 -0
  20. data/bin/ac +34 -0
  21. data/bin/add +0 -0
  22. data/bin/append +14 -0
  23. data/bin/args +0 -0
  24. data/bin/autorequire +85 -0
  25. data/bin/big_num +155 -0
  26. data/bin/black +0 -0
  27. data/bin/blue +0 -0
  28. data/bin/bold +0 -0
  29. data/bin/c_repl +76 -0
  30. data/bin/close +3 -0
  31. data/bin/copy +0 -0
  32. data/bin/count +0 -0
  33. data/bin/cpy +0 -0
  34. data/bin/cyan +0 -0
  35. data/bin/democracyinstall.exe +0 -0
  36. data/bin/div +0 -0
  37. data/bin/down_all_links +50 -0
  38. data/bin/dump +0 -0
  39. data/bin/email +127 -0
  40. data/bin/emerald-browser +0 -0
  41. data/bin/emoji +176 -0
  42. data/bin/executable +1 -0
  43. data/bin/exp +0 -0
  44. data/bin/files +0 -0
  45. data/bin/floor +0 -0
  46. data/bin/foreach +49 -0
  47. data/bin/g+ +557 -0
  48. data/bin/get_all_links +11 -0
  49. data/bin/github +55 -0
  50. data/bin/green +0 -0
  51. data/bin/gsearch +159 -0
  52. data/bin/gsub +0 -0
  53. data/bin/gsub_in_place +0 -0
  54. data/bin/last_nth +0 -0
  55. data/bin/lines +0 -0
  56. data/bin/magenta +0 -0
  57. data/bin/mkfile +47 -0
  58. data/bin/mul +0 -0
  59. data/bin/nth +0 -0
  60. data/bin/open +0 -0
  61. data/bin/play +0 -0
  62. data/bin/prepend +14 -0
  63. data/bin/processes +3 -0
  64. data/bin/qt_download +21 -0
  65. data/bin/record +121 -0
  66. data/bin/red +0 -0
  67. data/bin/replace_in_place +0 -0
  68. data/bin/replace_not_in_place +0 -0
  69. data/bin/selectlines +0 -0
  70. data/bin/sort_in_place +0 -0
  71. data/bin/splitfiles +16 -0
  72. data/bin/squeeze +17 -0
  73. data/bin/string +161 -0
  74. data/bin/strip_extensions +13 -0
  75. data/bin/sub +0 -0
  76. data/bin/t +63 -0
  77. data/bin/tracefile +555 -0
  78. data/bin/underline +0 -0
  79. data/bin/undump +0 -0
  80. data/bin/viu +0 -0
  81. data/bin/white +0 -0
  82. data/bin/yellow +0 -0
  83. data/bin/youtube_video +45 -0
  84. data/bin/youtube_video2 +45 -0
  85. data/code.js +28490 -0
  86. data/crystal-gobject/.git/HEAD +1 -0
  87. data/crystal-gobject/.git/config +11 -0
  88. data/crystal-gobject/.git/description +1 -0
  89. data/crystal-gobject/.git/hooks/applypatch-msg.sample +15 -0
  90. data/crystal-gobject/.git/hooks/commit-msg.sample +24 -0
  91. data/crystal-gobject/.git/hooks/fsmonitor-watchman.sample +109 -0
  92. data/crystal-gobject/.git/hooks/post-update.sample +8 -0
  93. data/crystal-gobject/.git/hooks/pre-applypatch.sample +14 -0
  94. data/crystal-gobject/.git/hooks/pre-commit.sample +49 -0
  95. data/crystal-gobject/.git/hooks/pre-merge-commit.sample +13 -0
  96. data/crystal-gobject/.git/hooks/pre-push.sample +53 -0
  97. data/crystal-gobject/.git/hooks/pre-rebase.sample +169 -0
  98. data/crystal-gobject/.git/hooks/pre-receive.sample +24 -0
  99. data/crystal-gobject/.git/hooks/prepare-commit-msg.sample +42 -0
  100. data/crystal-gobject/.git/hooks/update.sample +128 -0
  101. data/crystal-gobject/.git/index +0 -0
  102. data/crystal-gobject/.git/info/exclude +6 -0
  103. data/crystal-gobject/.git/logs/HEAD +1 -0
  104. data/crystal-gobject/.git/logs/refs/heads/main +1 -0
  105. data/crystal-gobject/.git/logs/refs/remotes/origin/HEAD +1 -0
  106. data/crystal-gobject/.git/objects/pack/pack-c21cfd88c6eef3076f2c2aebb658b750e87e10b6.idx +0 -0
  107. data/crystal-gobject/.git/objects/pack/pack-c21cfd88c6eef3076f2c2aebb658b750e87e10b6.pack +0 -0
  108. data/crystal-gobject/.git/packed-refs +15 -0
  109. data/crystal-gobject/.git/refs/heads/main +1 -0
  110. data/crystal-gobject/.git/refs/remotes/origin/HEAD +1 -0
  111. data/crystal-gobject/.github/workflows/ci.yml +22 -0
  112. data/crystal-gobject/.gitignore +14 -0
  113. data/crystal-gobject/LICENSE +27 -0
  114. data/crystal-gobject/README.md +143 -0
  115. data/crystal-gobject/samples/base_class_container.cr +15 -0
  116. data/crystal-gobject/samples/connect_after.cr +35 -0
  117. data/crystal-gobject/samples/crout/crout.cr +59 -0
  118. data/crystal-gobject/samples/e +0 -0
  119. data/crystal-gobject/samples/gdk_window.cr +36 -0
  120. data/crystal-gobject/samples/glist.cr +16 -0
  121. data/crystal-gobject/samples/greeter.cr +34 -0
  122. data/crystal-gobject/samples/greeter.glade +91 -0
  123. data/crystal-gobject/samples/gtk_accel_group.cr +13 -0
  124. data/crystal-gobject/samples/gtk_application.cr +12 -0
  125. data/crystal-gobject/samples/gtk_boxes.cr +44 -0
  126. data/crystal-gobject/samples/gtk_css_styling/css_window.cr +37 -0
  127. data/crystal-gobject/samples/gtk_css_styling/css_window.css +3 -0
  128. data/crystal-gobject/samples/gtk_css_styling/css_window.glade +104 -0
  129. data/crystal-gobject/samples/gtk_editable.cr +14 -0
  130. data/crystal-gobject/samples/gtk_entry.cr +10 -0
  131. data/crystal-gobject/samples/gtk_hello_world.cr +13 -0
  132. data/crystal-gobject/samples/gtk_menu_and_actions.cr +69 -0
  133. data/crystal-gobject/samples/gtk_message_dialog.cr +9 -0
  134. data/crystal-gobject/samples/gtk_subclasses.cr +30 -0
  135. data/crystal-gobject/samples/gtk_tree_view.cr +30 -0
  136. data/crystal-gobject/samples/gtk_tree_view.glade +44 -0
  137. data/crystal-gobject/samples/hello +0 -0
  138. data/crystal-gobject/samples/libgdk-3.a +0 -0
  139. data/crystal-gobject/samples/libgdk_pixbuf-2.0.a +0 -0
  140. data/crystal-gobject/samples/libgtk-3.a +0 -0
  141. data/crystal-gobject/samples/timeout.cr +34 -0
  142. data/crystal-gobject/script/ci.sh +17 -0
  143. data/crystal-gobject/script/release.sh +31 -0
  144. data/crystal-gobject/script/run_with_timeout.cr +14 -0
  145. data/crystal-gobject/script/test +7 -0
  146. data/crystal-gobject/shard.yml +16 -0
  147. data/crystal-gobject/spec/basic_spec.cr +46 -0
  148. data/crystal-gobject/spec/libsample/Makefile +28 -0
  149. data/crystal-gobject/spec/libsample/test-subject.c +107 -0
  150. data/crystal-gobject/spec/libsample/test-subject.h +65 -0
  151. data/crystal-gobject/spec/spec_helper.cr +5 -0
  152. data/crystal-gobject/src/atk/atk.cr +2 -0
  153. data/crystal-gobject/src/closure_data_manager.cr +62 -0
  154. data/crystal-gobject/src/crout.cr +601 -0
  155. data/crystal-gobject/src/g_i_repository/dumper.cr +116 -0
  156. data/crystal-gobject/src/g_i_repository/g_i_repository.cr +46 -0
  157. data/crystal-gobject/src/g_i_repository/info/arg_info.cr +157 -0
  158. data/crystal-gobject/src/g_i_repository/info/base_info.cr +169 -0
  159. data/crystal-gobject/src/g_i_repository/info/callable_info.cr +266 -0
  160. data/crystal-gobject/src/g_i_repository/info/callback_info.cr +54 -0
  161. data/crystal-gobject/src/g_i_repository/info/constant_info.cr +115 -0
  162. data/crystal-gobject/src/g_i_repository/info/enum_info.cr +88 -0
  163. data/crystal-gobject/src/g_i_repository/info/field_info.cr +97 -0
  164. data/crystal-gobject/src/g_i_repository/info/function_info.cr +237 -0
  165. data/crystal-gobject/src/g_i_repository/info/interface_info.cr +97 -0
  166. data/crystal-gobject/src/g_i_repository/info/object_info.cr +170 -0
  167. data/crystal-gobject/src/g_i_repository/info/property_info.cr +93 -0
  168. data/crystal-gobject/src/g_i_repository/info/registered_type_info.cr +62 -0
  169. data/crystal-gobject/src/g_i_repository/info/signal_info.cr +119 -0
  170. data/crystal-gobject/src/g_i_repository/info/struct_info.cr +139 -0
  171. data/crystal-gobject/src/g_i_repository/info/type_info.cr +326 -0
  172. data/crystal-gobject/src/g_i_repository/info/union_info.cr +136 -0
  173. data/crystal-gobject/src/g_i_repository/info/v_func_info.cr +33 -0
  174. data/crystal-gobject/src/g_i_repository/info/value_info.cr +62 -0
  175. data/crystal-gobject/src/g_i_repository/repository.cr +68 -0
  176. data/crystal-gobject/src/g_i_repository/wrapper_generator.cr +103 -0
  177. data/crystal-gobject/src/g_lib/error.cr +34 -0
  178. data/crystal-gobject/src/g_lib/g_lib.cr +81 -0
  179. data/crystal-gobject/src/g_lib/list_iterator.cr +81 -0
  180. data/crystal-gobject/src/g_lib/s_list_iterator.cr +54 -0
  181. data/crystal-gobject/src/g_object/closure.cr +40 -0
  182. data/crystal-gobject/src/g_object/g_object.cr +7 -0
  183. data/crystal-gobject/src/g_object/module_functions.cr +38 -0
  184. data/crystal-gobject/src/g_object/object.cr +104 -0
  185. data/crystal-gobject/src/g_object/type.cr +68 -0
  186. data/crystal-gobject/src/g_object/value.cr +308 -0
  187. data/crystal-gobject/src/gdk/event.cr +34 -0
  188. data/crystal-gobject/src/gdk/gdk.cr +35 -0
  189. data/crystal-gobject/src/generated/g_i_repository/argument.cr +130 -0
  190. data/crystal-gobject/src/generated/g_i_repository/array_type.cr +34 -0
  191. data/crystal-gobject/src/generated/g_i_repository/attribute_iter.cr +65 -0
  192. data/crystal-gobject/src/generated/g_i_repository/base_info.cr +134 -0
  193. data/crystal-gobject/src/generated/g_i_repository/direction.cr +33 -0
  194. data/crystal-gobject/src/generated/g_i_repository/field_info_flags.cr +34 -0
  195. data/crystal-gobject/src/generated/g_i_repository/function_info_flags.cr +38 -0
  196. data/crystal-gobject/src/generated/g_i_repository/g_i_repository.cr +7 -0
  197. data/crystal-gobject/src/generated/g_i_repository/info_type.cr +50 -0
  198. data/crystal-gobject/src/generated/g_i_repository/module_functions.cr +816 -0
  199. data/crystal-gobject/src/generated/g_i_repository/repository.cr +214 -0
  200. data/crystal-gobject/src/generated/g_i_repository/repository_class.cr +55 -0
  201. data/crystal-gobject/src/generated/g_i_repository/repository_error.cr +34 -0
  202. data/crystal-gobject/src/generated/g_i_repository/repository_load_flags.cr +33 -0
  203. data/crystal-gobject/src/generated/g_i_repository/repository_private.cr +46 -0
  204. data/crystal-gobject/src/generated/g_i_repository/scope_type.cr +34 -0
  205. data/crystal-gobject/src/generated/g_i_repository/transfer.cr +33 -0
  206. data/crystal-gobject/src/generated/g_i_repository/type_tag.cr +52 -0
  207. data/crystal-gobject/src/generated/g_i_repository/typelib.cr +61 -0
  208. data/crystal-gobject/src/generated/g_i_repository/unresolved_info.cr +45 -0
  209. data/crystal-gobject/src/generated/g_i_repository/v_func_info_flags.cr +36 -0
  210. data/crystal-gobject/src/generated/g_object/param_flags.cr +44 -0
  211. data/crystal-gobject/src/generated/g_object/signal_flags.cr +42 -0
  212. data/crystal-gobject/src/generated/lib_g_i_repository.cr +302 -0
  213. data/crystal-gobject/src/generated/lib_g_lib.cr +2261 -0
  214. data/crystal-gobject/src/generated/lib_g_object.cr +874 -0
  215. data/crystal-gobject/src/generator/build_namespace.cr +22 -0
  216. data/crystal-gobject/src/generator/doc.cr +259 -0
  217. data/crystal-gobject/src/generator/dump.cr +55 -0
  218. data/crystal-gobject/src/generator/generator.cr +22 -0
  219. data/crystal-gobject/src/generator/namespace.cr +192 -0
  220. data/crystal-gobject/src/generator/stage2.cr +113 -0
  221. data/crystal-gobject/src/generator/stage3.cr +3 -0
  222. data/crystal-gobject/src/gio/gio.cr +4 -0
  223. data/crystal-gobject/src/gobject.cr +54 -0
  224. data/crystal-gobject/src/gtk/autorun.cr +16 -0
  225. data/crystal-gobject/src/gtk/gtk.cr +61 -0
  226. data/crystal-gobject/src/notify/notification.cr +158 -0
  227. data/crystal-gobject/src/notify/notify.cr +4 -0
  228. data/crystal-gobject/src/pointer_iterator.cr +49 -0
  229. data/crystal-gobject/src/wrapped_type.cr +53 -0
  230. data/democracy.gemspec +14 -0
  231. data/dictate +183 -0
  232. data/emerald-browser/.qmake.stash +23 -0
  233. data/emerald-browser/Makefile +469 -0
  234. data/emerald-browser/OUT +90 -0
  235. data/emerald-browser/a.out +0 -0
  236. data/emerald-browser/empty +0 -0
  237. data/emerald-browser/empty.c +1 -0
  238. data/emerald-browser/open +0 -0
  239. data/emerald-browser/open.c +57 -0
  240. data/emerald-browser/qmake.pro +23 -0
  241. data/emerald-browser/src/OUT +438 -0
  242. data/emerald-browser/src/a.cpp +303 -0
  243. data/emerald-browser/src/a.out +0 -0
  244. data/emerald-browser/src/ss.h +12 -0
  245. data/emerald-browser/src/t/fullscreennotification.cpp +98 -0
  246. data/emerald-browser/src/t/fullscreennotification.h +72 -0
  247. data/emerald-browser/src/t/fullscreenwindow.cpp +99 -0
  248. data/emerald-browser/src/t/fullscreenwindow.h +76 -0
  249. data/emerald-browser/src/t/main.cpp +64 -0
  250. data/emerald-browser/src/t/mainwindow.cpp +83 -0
  251. data/emerald-browser/src/t/mainwindow.h +75 -0
  252. data/emerald-browser/test/a.cpp +2 -0
  253. data/f +0 -0
  254. data/faster_googler +31 -0
  255. data/favicon.ico +0 -0
  256. data/foo +80 -0
  257. data/foo.c +39 -0
  258. data/foo.cpp +36 -0
  259. data/foo.js +25 -0
  260. data/free_icons/site/bulb +0 -0
  261. data/free_icons/site/chess +0 -0
  262. data/free_icons/site/compiz.svg +463 -0
  263. data/free_icons/site/cursor.svg +1 -0
  264. data/free_icons/site/dock +0 -0
  265. data/free_icons/site/gnu +0 -0
  266. data/free_icons/site/info.svg +200 -0
  267. data/free_icons/site/iphone.svg +222 -0
  268. data/free_icons/site/laptop +0 -0
  269. data/free_icons/site/magic.svg +856 -0
  270. data/free_icons/site/man +0 -0
  271. data/free_icons/site/mint +0 -0
  272. data/free_icons/site/mouse +0 -0
  273. data/free_icons/site/paint.svg +2640 -0
  274. data/free_icons/site/present +0 -0
  275. data/free_icons/site/python +0 -0
  276. data/free_icons/site/reality +0 -0
  277. data/free_icons/site/ruby +0 -0
  278. data/free_icons/site/skeptic +0 -0
  279. data/free_icons/site/terminal +0 -0
  280. data/free_icons/site/video.svg +358 -0
  281. data/free_icons/site/warning +0 -0
  282. data/free_icons/site/wikipedia.svg +553 -0
  283. data/free_icons/site/write.svg +287 -0
  284. data/g +0 -0
  285. data/graphical_help.c +6 -0
  286. data/gregory_coreutils/DOC +14 -0
  287. data/gregory_coreutils/build/Makefile +12 -0
  288. data/gregory_coreutils/build/code +1 -0
  289. data/gregory_coreutils/build/functions +465 -0
  290. data/gregory_coreutils/build/make +93 -0
  291. data/gregory_coreutils/build/mk +74 -0
  292. data/gregory_coreutils/build/regex +2 -0
  293. data/gregory_coreutils/build/ruby_functions +0 -0
  294. data/gregory_coreutils/utilities/abs +0 -0
  295. data/gregory_coreutils/utilities/add +0 -0
  296. data/gregory_coreutils/utilities/args +0 -0
  297. data/gregory_coreutils/utilities/black +0 -0
  298. data/gregory_coreutils/utilities/blue +0 -0
  299. data/gregory_coreutils/utilities/bold +0 -0
  300. data/gregory_coreutils/utilities/copy +0 -0
  301. data/gregory_coreutils/utilities/count +0 -0
  302. data/gregory_coreutils/utilities/cpy +0 -0
  303. data/gregory_coreutils/utilities/cyan +0 -0
  304. data/gregory_coreutils/utilities/div +0 -0
  305. data/gregory_coreutils/utilities/dump +0 -0
  306. data/gregory_coreutils/utilities/exp +0 -0
  307. data/gregory_coreutils/utilities/files +0 -0
  308. data/gregory_coreutils/utilities/floor +0 -0
  309. data/gregory_coreutils/utilities/green +0 -0
  310. data/gregory_coreutils/utilities/gsub +0 -0
  311. data/gregory_coreutils/utilities/gsub_in_place +0 -0
  312. data/gregory_coreutils/utilities/last_nth +0 -0
  313. data/gregory_coreutils/utilities/lines +0 -0
  314. data/gregory_coreutils/utilities/magenta +0 -0
  315. data/gregory_coreutils/utilities/mul +0 -0
  316. data/gregory_coreutils/utilities/nth +0 -0
  317. data/gregory_coreutils/utilities/red +0 -0
  318. data/gregory_coreutils/utilities/replace_in_place +0 -0
  319. data/gregory_coreutils/utilities/replace_not_in_place +0 -0
  320. data/gregory_coreutils/utilities/selectlines +0 -0
  321. data/gregory_coreutils/utilities/sort_in_place +0 -0
  322. data/gregory_coreutils/utilities/sub +0 -0
  323. data/gregory_coreutils/utilities/underline +0 -0
  324. data/gregory_coreutils/utilities/undump +0 -0
  325. data/gregory_coreutils/utilities/white +0 -0
  326. data/gregory_coreutils/utilities/yellow +0 -0
  327. data/gtk +3 -0
  328. data/html_file +0 -0
  329. data/html_file.html +92 -0
  330. data/i +181 -0
  331. data/index.html +38 -0
  332. data/index2.html +344 -0
  333. data/inspect.rb +209 -0
  334. data/lib/README +310 -0
  335. data/lib/__index.html +11 -0
  336. data/lib/because.rb +41 -0
  337. data/lib/code.js +28490 -0
  338. data/lib/compile +5 -0
  339. data/lib/conjunction.rb +49 -0
  340. data/lib/evidence.rb +45 -0
  341. data/lib/graphical_help +90 -0
  342. data/lib/however.rb +42 -0
  343. data/lib/index.html +49 -0
  344. data/lib/index2.html +344 -0
  345. data/lib/inputreceiver.rb +135 -0
  346. data/lib/item.rb +77 -0
  347. data/lib/itemlist.rb +50 -0
  348. data/lib/negative.rb +43 -0
  349. data/lib/neutral.rb +43 -0
  350. data/lib/outputter.rb +60 -0
  351. data/lib/p.rb +71 -0
  352. data/lib/positive.rb +41 -0
  353. data/lib/script.js +423 -0
  354. data/lib/string__.rb +110 -0
  355. data/lib/sub/file +5 -0
  356. data/man/foo.rb +6 -0
  357. data/man/gtk.html +1 -0
  358. data/man/out +7288 -0
  359. data/more/ac.desktop +12 -0
  360. data/more/res +1 -0
  361. data/more/web-speech-api/.git/HEAD +1 -0
  362. data/more/web-speech-api/.git/config +11 -0
  363. data/more/web-speech-api/.git/description +1 -0
  364. data/more/web-speech-api/.git/hooks/applypatch-msg.sample +15 -0
  365. data/more/web-speech-api/.git/hooks/commit-msg.sample +24 -0
  366. data/more/web-speech-api/.git/hooks/fsmonitor-watchman.sample +109 -0
  367. data/more/web-speech-api/.git/hooks/post-update.sample +8 -0
  368. data/more/web-speech-api/.git/hooks/pre-applypatch.sample +14 -0
  369. data/more/web-speech-api/.git/hooks/pre-commit.sample +49 -0
  370. data/more/web-speech-api/.git/hooks/pre-merge-commit.sample +13 -0
  371. data/more/web-speech-api/.git/hooks/pre-push.sample +53 -0
  372. data/more/web-speech-api/.git/hooks/pre-rebase.sample +169 -0
  373. data/more/web-speech-api/.git/hooks/pre-receive.sample +24 -0
  374. data/more/web-speech-api/.git/hooks/prepare-commit-msg.sample +42 -0
  375. data/more/web-speech-api/.git/hooks/update.sample +128 -0
  376. data/more/web-speech-api/.git/index +0 -0
  377. data/more/web-speech-api/.git/info/exclude +6 -0
  378. data/more/web-speech-api/.git/logs/HEAD +1 -0
  379. data/more/web-speech-api/.git/logs/refs/heads/master +1 -0
  380. data/more/web-speech-api/.git/logs/refs/remotes/origin/HEAD +1 -0
  381. data/more/web-speech-api/.git/objects/pack/pack-4d2adae000c513b287d53e24937e9e836954f901.idx +0 -0
  382. data/more/web-speech-api/.git/objects/pack/pack-4d2adae000c513b287d53e24937e9e836954f901.pack +0 -0
  383. data/more/web-speech-api/.git/packed-refs +2 -0
  384. data/more/web-speech-api/.git/refs/heads/master +1 -0
  385. data/more/web-speech-api/.git/refs/remotes/origin/HEAD +1 -0
  386. data/more/web-speech-api/CODE_OF_CONDUCT.md +15 -0
  387. data/more/web-speech-api/LICENSE +116 -0
  388. data/more/web-speech-api/README.md +23 -0
  389. data/more/web-speech-api/index.html +25 -0
  390. data/more/web-speech-api/phrase-matcher/index.html +30 -0
  391. data/more/web-speech-api/phrase-matcher/script.js +124 -0
  392. data/more/web-speech-api/phrase-matcher/style.css +54 -0
  393. data/more/web-speech-api/speak-easy-synthesis/img/ws128.png +0 -0
  394. data/more/web-speech-api/speak-easy-synthesis/img/ws512.png +0 -0
  395. data/more/web-speech-api/speak-easy-synthesis/index.html +43 -0
  396. data/more/web-speech-api/speak-easy-synthesis/manifest.webapp +14 -0
  397. data/more/web-speech-api/speak-easy-synthesis/script.js +87 -0
  398. data/more/web-speech-api/speak-easy-synthesis/style.css +74 -0
  399. data/more/web-speech-api/speech-color-changer/img/ws128.png +0 -0
  400. data/more/web-speech-api/speech-color-changer/img/ws512.png +0 -0
  401. data/more/web-speech-api/speech-color-changer/index.html +26 -0
  402. data/more/web-speech-api/speech-color-changer/manifest.webapp +23 -0
  403. data/more/web-speech-api/speech-color-changer/script.js +58 -0
  404. data/more/web-speech-api/speech-color-changer/style.css +38 -0
  405. data/opensrc +68 -0
  406. data/out +39 -0
  407. data/playsrc +14 -0
  408. data/program.c +11 -0
  409. data/program.prog +568 -0
  410. data/proj/a +0 -0
  411. data/proj/a.c +1 -0
  412. data/proj/a.o +0 -0
  413. data/proj/a.rs +18 -0
  414. data/proj/b.rs +22 -0
  415. data/proj/linker.sh +8 -0
  416. data/proj/save_last +4 -0
  417. data/push +16 -0
  418. data/q.cpp +175 -0
  419. data/qt/.qmake.stash +23 -0
  420. data/qt/Qt5Core.dll +0 -0
  421. data/qt/Qt5Gui.dll +0 -0
  422. data/qt/Qt5Multimedia.dll +0 -0
  423. data/qt/Qt5MultimediaWidgets.dll +0 -0
  424. data/qt/Qt5Network.dll +0 -0
  425. data/qt/Qt5PrintSupport.dll +0 -0
  426. data/qt/Qt5Qml.dll +0 -0
  427. data/qt/Qt5QmlModels.dll +0 -0
  428. data/qt/Qt5Quick.dll +0 -0
  429. data/qt/Qt5Sensors.dll +0 -0
  430. data/qt/Qt5WebChannel.dll +0 -0
  431. data/qt/Qt5WebKit.dll +0 -0
  432. data/qt/Qt5WebKitWidgets.dll +0 -0
  433. data/qt/Qt5Widgets.dll +0 -0
  434. data/qt/a.cpp +39 -0
  435. data/qt/a.out +0 -0
  436. data/qt/a.sh +7 -0
  437. data/qt/icudt66.dll +0 -0
  438. data/qt/icuin66.dll +0 -0
  439. data/qt/icuuc66.dll +0 -0
  440. data/qt/libbz2.dll +0 -0
  441. data/qt/libcrypto-1_1-x64.dll +0 -0
  442. data/qt/libfreetype-6.dll +0 -0
  443. data/qt/libgcc_s_seh-1.dll +0 -0
  444. data/qt/libglib-2.0-0.dll +0 -0
  445. data/qt/libharfbuzz-0.dll +0 -0
  446. data/qt/libiconv-2.dll +0 -0
  447. data/qt/libintl-8.dll +0 -0
  448. data/qt/libjpeg-9.dll +0 -0
  449. data/qt/liblzma-5.dll +0 -0
  450. data/qt/libpcre-1.dll +0 -0
  451. data/qt/libpcre2-16-0.dll +0 -0
  452. data/qt/libpng16-16.dll +0 -0
  453. data/qt/libsqlite3-0.dll +0 -0
  454. data/qt/libssl-1_1-x64.dll +0 -0
  455. data/qt/libstdc++-6.dll +0 -0
  456. data/qt/libwebp-7.dll +0 -0
  457. data/qt/libwinpthread-1.dll +0 -0
  458. data/qt/libxml2-2.dll +0 -0
  459. data/qt/libxslt-1.dll +0 -0
  460. data/qt/libzstd.dll +0 -0
  461. data/qt/prog +1 -0
  462. data/qt/qwindows.dll +0 -0
  463. data/qt/zlib1.dll +0 -0
  464. data/quickjs/.git/HEAD +1 -0
  465. data/quickjs/.git/config +11 -0
  466. data/quickjs/.git/description +1 -0
  467. data/quickjs/.git/hooks/applypatch-msg.sample +15 -0
  468. data/quickjs/.git/hooks/commit-msg.sample +24 -0
  469. data/quickjs/.git/hooks/fsmonitor-watchman.sample +109 -0
  470. data/quickjs/.git/hooks/post-update.sample +8 -0
  471. data/quickjs/.git/hooks/pre-applypatch.sample +14 -0
  472. data/quickjs/.git/hooks/pre-commit.sample +49 -0
  473. data/quickjs/.git/hooks/pre-merge-commit.sample +13 -0
  474. data/quickjs/.git/hooks/pre-push.sample +53 -0
  475. data/quickjs/.git/hooks/pre-rebase.sample +169 -0
  476. data/quickjs/.git/hooks/pre-receive.sample +24 -0
  477. data/quickjs/.git/hooks/prepare-commit-msg.sample +42 -0
  478. data/quickjs/.git/hooks/update.sample +128 -0
  479. data/quickjs/.git/index +0 -0
  480. data/quickjs/.git/info/exclude +6 -0
  481. data/quickjs/.git/logs/HEAD +1 -0
  482. data/quickjs/.git/logs/refs/heads/master +1 -0
  483. data/quickjs/.git/logs/refs/remotes/origin/HEAD +1 -0
  484. data/quickjs/.git/objects/pack/pack-744a13dba8773a756b22372ba65ded3aca50cdeb.idx +0 -0
  485. data/quickjs/.git/objects/pack/pack-744a13dba8773a756b22372ba65ded3aca50cdeb.pack +0 -0
  486. data/quickjs/.git/packed-refs +2 -0
  487. data/quickjs/.git/refs/heads/master +1 -0
  488. data/quickjs/.git/refs/remotes/origin/HEAD +1 -0
  489. data/quickjs/.obj/.d +1 -0
  490. data/quickjs/.obj/cutils.nolto.o +0 -0
  491. data/quickjs/.obj/cutils.o +0 -0
  492. data/quickjs/.obj/cutils.o.d +1 -0
  493. data/quickjs/.obj/hello.o +0 -0
  494. data/quickjs/.obj/hello.o.d +1 -0
  495. data/quickjs/.obj/libbf.nolto.o +0 -0
  496. data/quickjs/.obj/libbf.o +0 -0
  497. data/quickjs/.obj/libbf.o.d +1 -0
  498. data/quickjs/.obj/libregexp.nolto.o +0 -0
  499. data/quickjs/.obj/libregexp.o +0 -0
  500. data/quickjs/.obj/libregexp.o.d +2 -0
  501. data/quickjs/.obj/libunicode.nolto.o +0 -0
  502. data/quickjs/.obj/libunicode.o +0 -0
  503. data/quickjs/.obj/libunicode.o.d +1 -0
  504. data/quickjs/.obj/qjs.check.o +0 -0
  505. data/quickjs/.obj/qjs.check.o.d +1 -0
  506. data/quickjs/.obj/qjs.o +0 -0
  507. data/quickjs/.obj/qjs.o.d +1 -0
  508. data/quickjs/.obj/qjsc.o +0 -0
  509. data/quickjs/.obj/qjsc.o.d +1 -0
  510. data/quickjs/.obj/qjscalc.o +0 -0
  511. data/quickjs/.obj/qjscalc.o.d +1 -0
  512. data/quickjs/.obj/quickjs-libc.nolto.o +0 -0
  513. data/quickjs/.obj/quickjs-libc.o +0 -0
  514. data/quickjs/.obj/quickjs-libc.o.d +2 -0
  515. data/quickjs/.obj/quickjs.check.o +0 -0
  516. data/quickjs/.obj/quickjs.check.o.d +2 -0
  517. data/quickjs/.obj/quickjs.nolto.o +0 -0
  518. data/quickjs/.obj/quickjs.o +0 -0
  519. data/quickjs/.obj/quickjs.o.d +2 -0
  520. data/quickjs/.obj/repl.o +0 -0
  521. data/quickjs/.obj/repl.o.d +1 -0
  522. data/quickjs/.obj/run-test262.o +0 -0
  523. data/quickjs/.obj/run-test262.o.d +2 -0
  524. data/quickjs/COMPILE_WIN +1 -0
  525. data/quickjs/Changelog +148 -0
  526. data/quickjs/LICENSE +22 -0
  527. data/quickjs/Makefile +470 -0
  528. data/quickjs/TODO +70 -0
  529. data/quickjs/VERSION +1 -0
  530. data/quickjs/a.c +50038 -0
  531. data/quickjs/a.js +26075 -0
  532. data/quickjs/a.rb +3 -0
  533. data/quickjs/cutils.c +631 -0
  534. data/quickjs/cutils.h +297 -0
  535. data/quickjs/doc/jsbignum.texi +589 -0
  536. data/quickjs/doc/quickjs.texi +1097 -0
  537. data/quickjs/examples/fib.c +72 -0
  538. data/quickjs/examples/fib_module.js +10 -0
  539. data/quickjs/examples/hello +0 -0
  540. data/quickjs/examples/hello.js +1 -0
  541. data/quickjs/examples/hello_module.js +6 -0
  542. data/quickjs/examples/pi_bigdecimal.js +68 -0
  543. data/quickjs/examples/pi_bigfloat.js +66 -0
  544. data/quickjs/examples/pi_bigint.js +118 -0
  545. data/quickjs/examples/point.c +151 -0
  546. data/quickjs/examples/test_fib.js +6 -0
  547. data/quickjs/examples/test_point.js +40 -0
  548. data/quickjs/hello.c +44 -0
  549. data/quickjs/libbf.c +8466 -0
  550. data/quickjs/libbf.h +535 -0
  551. data/quickjs/libquickjs.a +0 -0
  552. data/quickjs/libquickjs.lto.a +0 -0
  553. data/quickjs/libregexp-opcode.h +58 -0
  554. data/quickjs/libregexp.c +2610 -0
  555. data/quickjs/libregexp.h +92 -0
  556. data/quickjs/libunicode-table.h +4368 -0
  557. data/quickjs/libunicode.c +1556 -0
  558. data/quickjs/libunicode.h +124 -0
  559. data/quickjs/libwinpthread-1.dll +0 -0
  560. data/quickjs/list.h +100 -0
  561. data/quickjs/out.c +24 -0
  562. data/quickjs/out55.c +59 -0
  563. data/quickjs/out56.c +79 -0
  564. data/quickjs/out57.c +24 -0
  565. data/quickjs/out58.c +4 -0
  566. data/quickjs/p.exe +0 -0
  567. data/quickjs/qjs +0 -0
  568. data/quickjs/qjs.c +570 -0
  569. data/quickjs/qjsc +0 -0
  570. data/quickjs/qjsc.c +762 -0
  571. data/quickjs/qjsc.exe +0 -0
  572. data/quickjs/qjscalc.c +4005 -0
  573. data/quickjs/qjscalc.js +2657 -0
  574. data/quickjs/quickjs-atom.h +273 -0
  575. data/quickjs/quickjs-libc.c +3927 -0
  576. data/quickjs/quickjs-libc.h +59 -0
  577. data/quickjs/quickjs-opcode.h +365 -0
  578. data/quickjs/quickjs.c +54036 -0
  579. data/quickjs/quickjs.h +1049 -0
  580. data/quickjs/readme.txt +1 -0
  581. data/quickjs/release.sh +158 -0
  582. data/quickjs/repl.c +2020 -0
  583. data/quickjs/repl.js +1566 -0
  584. data/quickjs/run-test262 +0 -0
  585. data/quickjs/run-test262.c +2107 -0
  586. data/quickjs/test262.conf +199 -0
  587. data/quickjs/test262_errors.txt +35 -0
  588. data/quickjs/test262o.conf +410 -0
  589. data/quickjs/test262o_errors.txt +0 -0
  590. data/quickjs/tests/bjson.c +96 -0
  591. data/quickjs/tests/microbench.js +1065 -0
  592. data/quickjs/tests/test262.patch +71 -0
  593. data/quickjs/tests/test_bignum.js +326 -0
  594. data/quickjs/tests/test_bjson.js +191 -0
  595. data/quickjs/tests/test_builtin.js +685 -0
  596. data/quickjs/tests/test_closure.js +221 -0
  597. data/quickjs/tests/test_language.js +547 -0
  598. data/quickjs/tests/test_loop.js +368 -0
  599. data/quickjs/tests/test_op_overloading.js +207 -0
  600. data/quickjs/tests/test_qjscalc.js +256 -0
  601. data/quickjs/tests/test_std.js +281 -0
  602. data/quickjs/tests/test_worker.js +62 -0
  603. data/quickjs/tests/test_worker_module.js +31 -0
  604. data/quickjs/unicode_download.sh +19 -0
  605. data/quickjs/unicode_gen.c +3057 -0
  606. data/quickjs/unicode_gen_def.h +284 -0
  607. data/readline +3 -0
  608. data/rm_com +1 -0
  609. data/run.sh +13 -0
  610. data/scaling +6 -0
  611. data/script.js +572 -0
  612. data/server_creator/__ +307 -0
  613. data/server_creator/a/a (1).tar.gz +0 -0
  614. data/server_creator/a/a.out +0 -0
  615. data/server_creator/a/hello_world +0 -0
  616. data/server_creator/a/help +57 -0
  617. data/server_creator/a/main_v1.cpp +83 -0
  618. data/server_creator/a/my_libs_gregory/.deps +0 -0
  619. data/server_creator/a/my_libs_gregory/.libs/mod_my_libs_gregory.lai +41 -0
  620. data/server_creator/a/my_libs_gregory/.libs/mod_my_libs_gregory.o +0 -0
  621. data/server_creator/a/my_libs_gregory/.libs/mod_my_libs_gregory.so +0 -0
  622. data/server_creator/a/my_libs_gregory/mod_my_libs_gregory.c +72 -0
  623. data/server_creator/a/my_libs_gregory/mod_my_libs_gregory.la +41 -0
  624. data/server_creator/a/my_libs_gregory/mod_my_libs_gregory.lo +12 -0
  625. data/server_creator/a/my_libs_gregory/mod_my_libs_gregory.slo +0 -0
  626. data/server_creator/a/nginx.conf +32 -0
  627. data/server_creator/a/six +1 -0
  628. data/server_creator/a.out +0 -0
  629. data/server_creator/all_code.cpp +448 -0
  630. data/server_creator/all_code.js +27 -0
  631. data/server_creator/code +510 -0
  632. data/server_creator/make_application +64 -0
  633. data/server_creator/prog.cpp +33 -0
  634. data/server_creator/prog.js +22 -0
  635. data/server_creator/program.js +745 -0
  636. data/server_creator/rm_com +1 -0
  637. data/server_creator/test.sh +82 -0
  638. data/speakcat.rb +9 -0
  639. data/string +48 -0
  640. data/super_trans +83 -0
  641. data/test.sh +21 -0
  642. data/todo +8 -0
  643. data/version_decrement +3 -0
  644. data/viu_license +21 -0
  645. data/windows/program.c +21 -0
  646. data/windows/program.exe +0 -0
  647. metadata +759 -0
@@ -0,0 +1,3057 @@
1
+ /*
2
+ * Generation of Unicode tables
3
+ *
4
+ * Copyright (c) 2017-2018 Fabrice Bellard
5
+ * Copyright (c) 2017-2018 Charlie Gordon
6
+ *
7
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ * of this software and associated documentation files (the "Software"), to deal
9
+ * in the Software without restriction, including without limitation the rights
10
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the Software is
12
+ * furnished to do so, subject to the following conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be included in
15
+ * all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
+ * THE SOFTWARE.
24
+ */
25
+ #include <stdlib.h>
26
+ #include <stdio.h>
27
+ #include <stdarg.h>
28
+ #include <inttypes.h>
29
+ #include <string.h>
30
+ #include <assert.h>
31
+ #include <ctype.h>
32
+ #include <time.h>
33
+
34
+ #include "cutils.h"
35
+
36
+ /* define it to be able to test unicode.c */
37
+ //#define USE_TEST
38
+ /* profile tests */
39
+ //#define PROFILE
40
+
41
+ //#define DUMP_CASE_CONV_TABLE
42
+ //#define DUMP_TABLE_SIZE
43
+ //#define DUMP_CC_TABLE
44
+ //#define DUMP_DECOMP_TABLE
45
+
46
+ /* Ideas:
47
+ - Generalize run length encoding + index for all tables
48
+ - remove redundant tables for ID_start, ID_continue, Case_Ignorable, Cased
49
+
50
+ Case conversion:
51
+ - use a single entry for consecutive U/LF runs
52
+ - allow EXT runs of length > 1
53
+
54
+ Decomposition:
55
+ - Greek lower case (+1f10/1f10) ?
56
+ - allow holes in B runs
57
+ - suppress more upper / lower case redundancy
58
+ */
59
+
60
+ #ifdef USE_TEST
61
+ #include "libunicode.c"
62
+ #endif
63
+
64
+ #define CHARCODE_MAX 0x10ffff
65
+ #define CC_LEN_MAX 3
66
+
67
+ void *mallocz(size_t size)
68
+ {
69
+ void *ptr;
70
+ ptr = malloc(size);
71
+ memset(ptr, 0, size);
72
+ return ptr;
73
+ }
74
+
75
+ const char *get_field(const char *p, int n)
76
+ {
77
+ int i;
78
+ for(i = 0; i < n; i++) {
79
+ while (*p != ';' && *p != '\0')
80
+ p++;
81
+ if (*p == '\0')
82
+ return NULL;
83
+ p++;
84
+ }
85
+ return p;
86
+ }
87
+
88
+ const char *get_field_buf(char *buf, size_t buf_size, const char *p, int n)
89
+ {
90
+ char *q;
91
+ p = get_field(p, n);
92
+ q = buf;
93
+ while (*p != ';' && *p != '\0') {
94
+ if ((q - buf) < buf_size - 1)
95
+ *q++ = *p;
96
+ p++;
97
+ }
98
+ *q = '\0';
99
+ return buf;
100
+ }
101
+
102
+ void add_char(int **pbuf, int *psize, int *plen, int c)
103
+ {
104
+ int len, size, *buf;
105
+ buf = *pbuf;
106
+ size = *psize;
107
+ len = *plen;
108
+ if (len >= size) {
109
+ size = *psize;
110
+ size = max_int(len + 1, size * 3 / 2);
111
+ buf = realloc(buf, sizeof(buf[0]) * size);
112
+ *pbuf = buf;
113
+ *psize = size;
114
+ }
115
+ buf[len++] = c;
116
+ *plen = len;
117
+ }
118
+
119
+ int *get_field_str(int *plen, const char *str, int n)
120
+ {
121
+ const char *p;
122
+ int *buf, len, size;
123
+ p = get_field(str, n);
124
+ if (!p) {
125
+ *plen = 0;
126
+ return NULL;
127
+ }
128
+ len = 0;
129
+ size = 0;
130
+ buf = NULL;
131
+ for(;;) {
132
+ while (isspace(*p))
133
+ p++;
134
+ if (!isxdigit(*p))
135
+ break;
136
+ add_char(&buf, &size, &len, strtoul(p, (char **)&p, 16));
137
+ }
138
+ *plen = len;
139
+ return buf;
140
+ }
141
+
142
+ char *get_line(char *buf, int buf_size, FILE *f)
143
+ {
144
+ int len;
145
+ if (!fgets(buf, buf_size, f))
146
+ return NULL;
147
+ len = strlen(buf);
148
+ if (len > 0 && buf[len - 1] == '\n')
149
+ buf[len - 1] = '\0';
150
+ return buf;
151
+ }
152
+
153
+ #define UNICODE_GENERAL_CATEGORY
154
+
155
+ typedef enum {
156
+ #define DEF(id, str) GCAT_ ## id,
157
+ #include "unicode_gen_def.h"
158
+ #undef DEF
159
+ GCAT_COUNT,
160
+ } UnicodeGCEnum1;
161
+
162
+ static const char *unicode_gc_name[] = {
163
+ #define DEF(id, str) #id,
164
+ #include "unicode_gen_def.h"
165
+ #undef DEF
166
+ };
167
+
168
+ static const char *unicode_gc_short_name[] = {
169
+ #define DEF(id, str) str,
170
+ #include "unicode_gen_def.h"
171
+ #undef DEF
172
+ };
173
+
174
+ #undef UNICODE_GENERAL_CATEGORY
175
+
176
+ #define UNICODE_SCRIPT
177
+
178
+ typedef enum {
179
+ #define DEF(id, str) SCRIPT_ ## id,
180
+ #include "unicode_gen_def.h"
181
+ #undef DEF
182
+ SCRIPT_COUNT,
183
+ } UnicodeScriptEnum1;
184
+
185
+ static const char *unicode_script_name[] = {
186
+ #define DEF(id, str) #id,
187
+ #include "unicode_gen_def.h"
188
+ #undef DEF
189
+ };
190
+
191
+ const char *unicode_script_short_name[] = {
192
+ #define DEF(id, str) str,
193
+ #include "unicode_gen_def.h"
194
+ #undef DEF
195
+ };
196
+
197
+ #undef UNICODE_SCRIPT
198
+
199
+ #define UNICODE_PROP_LIST
200
+
201
+ typedef enum {
202
+ #define DEF(id, str) PROP_ ## id,
203
+ #include "unicode_gen_def.h"
204
+ #undef DEF
205
+ PROP_COUNT,
206
+ } UnicodePropEnum1;
207
+
208
+ static const char *unicode_prop_name[] = {
209
+ #define DEF(id, str) #id,
210
+ #include "unicode_gen_def.h"
211
+ #undef DEF
212
+ };
213
+
214
+ static const char *unicode_prop_short_name[] = {
215
+ #define DEF(id, str) str,
216
+ #include "unicode_gen_def.h"
217
+ #undef DEF
218
+ };
219
+
220
+ #undef UNICODE_SPROP_LIST
221
+
222
+ typedef struct {
223
+ /* case conv */
224
+ uint8_t u_len;
225
+ uint8_t l_len;
226
+ int u_data[CC_LEN_MAX];
227
+ int l_data[CC_LEN_MAX];
228
+ int f_code;
229
+
230
+ uint8_t combining_class;
231
+ uint8_t is_compat:1;
232
+ uint8_t is_excluded:1;
233
+ uint8_t general_category;
234
+ uint8_t script;
235
+ uint8_t script_ext_len;
236
+ uint8_t *script_ext;
237
+ uint32_t prop_bitmap_tab[3];
238
+ /* decomposition */
239
+ int decomp_len;
240
+ int *decomp_data;
241
+ } CCInfo;
242
+
243
+ CCInfo *unicode_db;
244
+
245
+ int find_name(const char **tab, int tab_len, const char *name)
246
+ {
247
+ int i, len, name_len;
248
+ const char *p, *r;
249
+
250
+ name_len = strlen(name);
251
+ for(i = 0; i < tab_len; i++) {
252
+ p = tab[i];
253
+ for(;;) {
254
+ r = strchr(p, ',');
255
+ if (!r)
256
+ len = strlen(p);
257
+ else
258
+ len = r - p;
259
+ if (len == name_len && memcmp(p, name, len) == 0)
260
+ return i;
261
+ if (!r)
262
+ break;
263
+ p = r + 1;
264
+ }
265
+ }
266
+ return -1;
267
+ }
268
+
269
+ static int get_prop(uint32_t c, int prop_idx)
270
+ {
271
+ return (unicode_db[c].prop_bitmap_tab[prop_idx >> 5] >> (prop_idx & 0x1f)) & 1;
272
+ }
273
+
274
+ static void set_prop(uint32_t c, int prop_idx, int val)
275
+ {
276
+ uint32_t mask;
277
+ mask = 1U << (prop_idx & 0x1f);
278
+ if (val)
279
+ unicode_db[c].prop_bitmap_tab[prop_idx >> 5] |= mask;
280
+ else
281
+ unicode_db[c].prop_bitmap_tab[prop_idx >> 5] &= ~mask;
282
+ }
283
+
284
+ void parse_unicode_data(const char *filename)
285
+ {
286
+ FILE *f;
287
+ char line[1024];
288
+ char buf1[256];
289
+ const char *p;
290
+ int code, lc, uc, last_code;
291
+ CCInfo *ci, *tab = unicode_db;
292
+
293
+ f = fopen(filename, "rb");
294
+ if (!f) {
295
+ perror(filename);
296
+ exit(1);
297
+ }
298
+
299
+ last_code = 0;
300
+ for(;;) {
301
+ if (!get_line(line, sizeof(line), f))
302
+ break;
303
+ p = line;
304
+ while (isspace(*p))
305
+ p++;
306
+ if (*p == '#')
307
+ continue;
308
+
309
+ p = get_field(line, 0);
310
+ if (!p)
311
+ continue;
312
+ code = strtoul(p, NULL, 16);
313
+ lc = 0;
314
+ uc = 0;
315
+
316
+ p = get_field(line, 12);
317
+ if (p && *p != ';') {
318
+ uc = strtoul(p, NULL, 16);
319
+ }
320
+
321
+ p = get_field(line, 13);
322
+ if (p && *p != ';') {
323
+ lc = strtoul(p, NULL, 16);
324
+ }
325
+ ci = &tab[code];
326
+ if (uc > 0 || lc > 0) {
327
+ assert(code <= CHARCODE_MAX);
328
+ if (uc > 0) {
329
+ assert(ci->u_len == 0);
330
+ ci->u_len = 1;
331
+ ci->u_data[0] = uc;
332
+ }
333
+ if (lc > 0) {
334
+ assert(ci->l_len == 0);
335
+ ci->l_len = 1;
336
+ ci->l_data[0] = lc;
337
+ }
338
+ }
339
+
340
+ {
341
+ int i;
342
+ get_field_buf(buf1, sizeof(buf1), line, 2);
343
+ i = find_name(unicode_gc_name, countof(unicode_gc_name), buf1);
344
+ if (i < 0) {
345
+ fprintf(stderr, "General category '%s' not found\n",
346
+ buf1);
347
+ exit(1);
348
+ }
349
+ ci->general_category = i;
350
+ }
351
+
352
+ p = get_field(line, 3);
353
+ if (p && *p != ';' && *p != '\0') {
354
+ int cc;
355
+ cc = strtoul(p, NULL, 0);
356
+ if (cc != 0) {
357
+ assert(code <= CHARCODE_MAX);
358
+ ci->combining_class = cc;
359
+ // printf("%05x: %d\n", code, ci->combining_class);
360
+ }
361
+ }
362
+
363
+ p = get_field(line, 5);
364
+ if (p && *p != ';' && *p != '\0') {
365
+ int size;
366
+ assert(code <= CHARCODE_MAX);
367
+ ci->is_compat = 0;
368
+ if (*p == '<') {
369
+ while (*p != '\0' && *p != '>')
370
+ p++;
371
+ if (*p == '>')
372
+ p++;
373
+ ci->is_compat = 1;
374
+ }
375
+ size = 0;
376
+ for(;;) {
377
+ while (isspace(*p))
378
+ p++;
379
+ if (!isxdigit(*p))
380
+ break;
381
+ add_char(&ci->decomp_data, &size, &ci->decomp_len, strtoul(p, (char **)&p, 16));
382
+ }
383
+ #if 0
384
+ {
385
+ int i;
386
+ static int count, d_count;
387
+
388
+ printf("%05x: %c", code, ci->is_compat ? 'C': ' ');
389
+ for(i = 0; i < ci->decomp_len; i++)
390
+ printf(" %05x", ci->decomp_data[i]);
391
+ printf("\n");
392
+ count++;
393
+ d_count += ci->decomp_len;
394
+ // printf("%d %d\n", count, d_count);
395
+ }
396
+ #endif
397
+ }
398
+
399
+ p = get_field(line, 9);
400
+ if (p && *p == 'Y') {
401
+ set_prop(code, PROP_Bidi_Mirrored, 1);
402
+ }
403
+
404
+ /* handle ranges */
405
+ get_field_buf(buf1, sizeof(buf1), line, 1);
406
+ if (strstr(buf1, " Last>")) {
407
+ int i;
408
+ // printf("range: 0x%x-%0x\n", last_code, code);
409
+ assert(ci->decomp_len == 0);
410
+ assert(ci->script_ext_len == 0);
411
+ for(i = last_code + 1; i < code; i++) {
412
+ unicode_db[i] = *ci;
413
+ }
414
+ }
415
+ last_code = code;
416
+ }
417
+
418
+ fclose(f);
419
+ }
420
+
421
+ void parse_special_casing(CCInfo *tab, const char *filename)
422
+ {
423
+ FILE *f;
424
+ char line[1024];
425
+ const char *p;
426
+ int code;
427
+ CCInfo *ci;
428
+
429
+ f = fopen(filename, "rb");
430
+ if (!f) {
431
+ perror(filename);
432
+ exit(1);
433
+ }
434
+
435
+ for(;;) {
436
+ if (!get_line(line, sizeof(line), f))
437
+ break;
438
+ p = line;
439
+ while (isspace(*p))
440
+ p++;
441
+ if (*p == '#')
442
+ continue;
443
+
444
+ p = get_field(line, 0);
445
+ if (!p)
446
+ continue;
447
+ code = strtoul(p, NULL, 16);
448
+ assert(code <= CHARCODE_MAX);
449
+ ci = &tab[code];
450
+
451
+ p = get_field(line, 4);
452
+ if (p) {
453
+ /* locale dependent casing */
454
+ while (isspace(*p))
455
+ p++;
456
+ if (*p != '#' && *p != '\0')
457
+ continue;
458
+ }
459
+
460
+
461
+ p = get_field(line, 1);
462
+ if (p && *p != ';') {
463
+ ci->l_len = 0;
464
+ for(;;) {
465
+ while (isspace(*p))
466
+ p++;
467
+ if (*p == ';')
468
+ break;
469
+ assert(ci->l_len < CC_LEN_MAX);
470
+ ci->l_data[ci->l_len++] = strtoul(p, (char **)&p, 16);
471
+ }
472
+
473
+ if (ci->l_len == 1 && ci->l_data[0] == code)
474
+ ci->l_len = 0;
475
+ }
476
+
477
+ p = get_field(line, 3);
478
+ if (p && *p != ';') {
479
+ ci->u_len = 0;
480
+ for(;;) {
481
+ while (isspace(*p))
482
+ p++;
483
+ if (*p == ';')
484
+ break;
485
+ assert(ci->u_len < CC_LEN_MAX);
486
+ ci->u_data[ci->u_len++] = strtoul(p, (char **)&p, 16);
487
+ }
488
+
489
+ if (ci->u_len == 1 && ci->u_data[0] == code)
490
+ ci->u_len = 0;
491
+ }
492
+ }
493
+
494
+ fclose(f);
495
+ }
496
+
497
+ void parse_case_folding(CCInfo *tab, const char *filename)
498
+ {
499
+ FILE *f;
500
+ char line[1024];
501
+ const char *p;
502
+ int code;
503
+ CCInfo *ci;
504
+
505
+ f = fopen(filename, "rb");
506
+ if (!f) {
507
+ perror(filename);
508
+ exit(1);
509
+ }
510
+
511
+ for(;;) {
512
+ if (!get_line(line, sizeof(line), f))
513
+ break;
514
+ p = line;
515
+ while (isspace(*p))
516
+ p++;
517
+ if (*p == '#')
518
+ continue;
519
+
520
+ p = get_field(line, 0);
521
+ if (!p)
522
+ continue;
523
+ code = strtoul(p, NULL, 16);
524
+ assert(code <= CHARCODE_MAX);
525
+ ci = &tab[code];
526
+
527
+ p = get_field(line, 1);
528
+ if (!p)
529
+ continue;
530
+ /* locale dependent casing */
531
+ while (isspace(*p))
532
+ p++;
533
+ if (*p != 'C' && *p != 'S')
534
+ continue;
535
+
536
+ p = get_field(line, 2);
537
+ assert(p != 0);
538
+ assert(ci->f_code == 0);
539
+ ci->f_code = strtoul(p, NULL, 16);
540
+ assert(ci->f_code != 0 && ci->f_code != code);
541
+ }
542
+
543
+ fclose(f);
544
+ }
545
+
546
+ void parse_composition_exclusions(const char *filename)
547
+ {
548
+ FILE *f;
549
+ char line[4096], *p;
550
+ uint32_t c0;
551
+
552
+ f = fopen(filename, "rb");
553
+ if (!f) {
554
+ perror(filename);
555
+ exit(1);
556
+ }
557
+
558
+ for(;;) {
559
+ if (!get_line(line, sizeof(line), f))
560
+ break;
561
+ p = line;
562
+ while (isspace(*p))
563
+ p++;
564
+ if (*p == '#' || *p == '@' || *p == '\0')
565
+ continue;
566
+ c0 = strtoul(p, (char **)&p, 16);
567
+ assert(c0 > 0 && c0 <= CHARCODE_MAX);
568
+ unicode_db[c0].is_excluded = TRUE;
569
+ }
570
+ fclose(f);
571
+ }
572
+
573
+ void parse_derived_core_properties(const char *filename)
574
+ {
575
+ FILE *f;
576
+ char line[4096], *p, buf[256], *q;
577
+ uint32_t c0, c1, c;
578
+ int i;
579
+
580
+ f = fopen(filename, "rb");
581
+ if (!f) {
582
+ perror(filename);
583
+ exit(1);
584
+ }
585
+
586
+ for(;;) {
587
+ if (!get_line(line, sizeof(line), f))
588
+ break;
589
+ p = line;
590
+ while (isspace(*p))
591
+ p++;
592
+ if (*p == '#' || *p == '@' || *p == '\0')
593
+ continue;
594
+ c0 = strtoul(p, (char **)&p, 16);
595
+ if (*p == '.' && p[1] == '.') {
596
+ p += 2;
597
+ c1 = strtoul(p, (char **)&p, 16);
598
+ } else {
599
+ c1 = c0;
600
+ }
601
+ assert(c1 <= CHARCODE_MAX);
602
+ p += strspn(p, " \t");
603
+ if (*p == ';') {
604
+ p++;
605
+ p += strspn(p, " \t");
606
+ q = buf;
607
+ while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t') {
608
+ if ((q - buf) < sizeof(buf) - 1)
609
+ *q++ = *p;
610
+ p++;
611
+ }
612
+ *q = '\0';
613
+ i = find_name(unicode_prop_name,
614
+ countof(unicode_prop_name), buf);
615
+ if (i < 0) {
616
+ if (!strcmp(buf, "Grapheme_Link"))
617
+ goto next;
618
+ fprintf(stderr, "Property not found: %s\n", buf);
619
+ exit(1);
620
+ }
621
+ for(c = c0; c <= c1; c++) {
622
+ set_prop(c, i, 1);
623
+ }
624
+ next: ;
625
+ }
626
+ }
627
+ fclose(f);
628
+ }
629
+
630
+ void parse_derived_norm_properties(const char *filename)
631
+ {
632
+ FILE *f;
633
+ char line[4096], *p, buf[256], *q;
634
+ uint32_t c0, c1, c;
635
+
636
+ f = fopen(filename, "rb");
637
+ if (!f) {
638
+ perror(filename);
639
+ exit(1);
640
+ }
641
+
642
+ for(;;) {
643
+ if (!get_line(line, sizeof(line), f))
644
+ break;
645
+ p = line;
646
+ while (isspace(*p))
647
+ p++;
648
+ if (*p == '#' || *p == '@' || *p == '\0')
649
+ continue;
650
+ c0 = strtoul(p, (char **)&p, 16);
651
+ if (*p == '.' && p[1] == '.') {
652
+ p += 2;
653
+ c1 = strtoul(p, (char **)&p, 16);
654
+ } else {
655
+ c1 = c0;
656
+ }
657
+ assert(c1 <= CHARCODE_MAX);
658
+ p += strspn(p, " \t");
659
+ if (*p == ';') {
660
+ p++;
661
+ p += strspn(p, " \t");
662
+ q = buf;
663
+ while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t') {
664
+ if ((q - buf) < sizeof(buf) - 1)
665
+ *q++ = *p;
666
+ p++;
667
+ }
668
+ *q = '\0';
669
+ if (!strcmp(buf, "Changes_When_NFKC_Casefolded")) {
670
+ for(c = c0; c <= c1; c++) {
671
+ set_prop(c, PROP_Changes_When_NFKC_Casefolded, 1);
672
+ }
673
+ }
674
+ }
675
+ }
676
+ fclose(f);
677
+ }
678
+
679
+ void parse_prop_list(const char *filename)
680
+ {
681
+ FILE *f;
682
+ char line[4096], *p, buf[256], *q;
683
+ uint32_t c0, c1, c;
684
+ int i;
685
+
686
+ f = fopen(filename, "rb");
687
+ if (!f) {
688
+ perror(filename);
689
+ exit(1);
690
+ }
691
+
692
+ for(;;) {
693
+ if (!get_line(line, sizeof(line), f))
694
+ break;
695
+ p = line;
696
+ while (isspace(*p))
697
+ p++;
698
+ if (*p == '#' || *p == '@' || *p == '\0')
699
+ continue;
700
+ c0 = strtoul(p, (char **)&p, 16);
701
+ if (*p == '.' && p[1] == '.') {
702
+ p += 2;
703
+ c1 = strtoul(p, (char **)&p, 16);
704
+ } else {
705
+ c1 = c0;
706
+ }
707
+ assert(c1 <= CHARCODE_MAX);
708
+ p += strspn(p, " \t");
709
+ if (*p == ';') {
710
+ p++;
711
+ p += strspn(p, " \t");
712
+ q = buf;
713
+ while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t') {
714
+ if ((q - buf) < sizeof(buf) - 1)
715
+ *q++ = *p;
716
+ p++;
717
+ }
718
+ *q = '\0';
719
+ i = find_name(unicode_prop_name,
720
+ countof(unicode_prop_name), buf);
721
+ if (i < 0) {
722
+ fprintf(stderr, "Property not found: %s\n", buf);
723
+ exit(1);
724
+ }
725
+ for(c = c0; c <= c1; c++) {
726
+ set_prop(c, i, 1);
727
+ }
728
+ }
729
+ }
730
+ fclose(f);
731
+ }
732
+
733
+ void parse_scripts(const char *filename)
734
+ {
735
+ FILE *f;
736
+ char line[4096], *p, buf[256], *q;
737
+ uint32_t c0, c1, c;
738
+ int i;
739
+
740
+ f = fopen(filename, "rb");
741
+ if (!f) {
742
+ perror(filename);
743
+ exit(1);
744
+ }
745
+
746
+ for(;;) {
747
+ if (!get_line(line, sizeof(line), f))
748
+ break;
749
+ p = line;
750
+ while (isspace(*p))
751
+ p++;
752
+ if (*p == '#' || *p == '@' || *p == '\0')
753
+ continue;
754
+ c0 = strtoul(p, (char **)&p, 16);
755
+ if (*p == '.' && p[1] == '.') {
756
+ p += 2;
757
+ c1 = strtoul(p, (char **)&p, 16);
758
+ } else {
759
+ c1 = c0;
760
+ }
761
+ assert(c1 <= CHARCODE_MAX);
762
+ p += strspn(p, " \t");
763
+ if (*p == ';') {
764
+ p++;
765
+ p += strspn(p, " \t");
766
+ q = buf;
767
+ while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t') {
768
+ if ((q - buf) < sizeof(buf) - 1)
769
+ *q++ = *p;
770
+ p++;
771
+ }
772
+ *q = '\0';
773
+ i = find_name(unicode_script_name,
774
+ countof(unicode_script_name), buf);
775
+ if (i < 0) {
776
+ fprintf(stderr, "Unknown script: '%s'\n", buf);
777
+ exit(1);
778
+ }
779
+ for(c = c0; c <= c1; c++)
780
+ unicode_db[c].script = i;
781
+ }
782
+ }
783
+ fclose(f);
784
+ }
785
+
786
+ void parse_script_extensions(const char *filename)
787
+ {
788
+ FILE *f;
789
+ char line[4096], *p, buf[256], *q;
790
+ uint32_t c0, c1, c;
791
+ int i;
792
+ uint8_t script_ext[255];
793
+ int script_ext_len;
794
+
795
+ f = fopen(filename, "rb");
796
+ if (!f) {
797
+ perror(filename);
798
+ exit(1);
799
+ }
800
+
801
+ for(;;) {
802
+ if (!get_line(line, sizeof(line), f))
803
+ break;
804
+ p = line;
805
+ while (isspace(*p))
806
+ p++;
807
+ if (*p == '#' || *p == '@' || *p == '\0')
808
+ continue;
809
+ c0 = strtoul(p, (char **)&p, 16);
810
+ if (*p == '.' && p[1] == '.') {
811
+ p += 2;
812
+ c1 = strtoul(p, (char **)&p, 16);
813
+ } else {
814
+ c1 = c0;
815
+ }
816
+ assert(c1 <= CHARCODE_MAX);
817
+ p += strspn(p, " \t");
818
+ script_ext_len = 0;
819
+ if (*p == ';') {
820
+ p++;
821
+ for(;;) {
822
+ p += strspn(p, " \t");
823
+ q = buf;
824
+ while (*p != '\0' && *p != ' ' && *p != '#' && *p != '\t') {
825
+ if ((q - buf) < sizeof(buf) - 1)
826
+ *q++ = *p;
827
+ p++;
828
+ }
829
+ *q = '\0';
830
+ if (buf[0] == '\0')
831
+ break;
832
+ i = find_name(unicode_script_short_name,
833
+ countof(unicode_script_short_name), buf);
834
+ if (i < 0) {
835
+ fprintf(stderr, "Script not found: %s\n", buf);
836
+ exit(1);
837
+ }
838
+ assert(script_ext_len < sizeof(script_ext));
839
+ script_ext[script_ext_len++] = i;
840
+ }
841
+ for(c = c0; c <= c1; c++) {
842
+ CCInfo *ci = &unicode_db[c];
843
+ ci->script_ext_len = script_ext_len;
844
+ ci->script_ext = malloc(sizeof(ci->script_ext[0]) * script_ext_len);
845
+ for(i = 0; i < script_ext_len; i++)
846
+ ci->script_ext[i] = script_ext[i];
847
+ }
848
+ }
849
+ }
850
+ fclose(f);
851
+ }
852
+
853
+ void dump_cc_info(CCInfo *ci, int i)
854
+ {
855
+ int j;
856
+ printf("%05x:", i);
857
+ if (ci->u_len != 0) {
858
+ printf(" U:");
859
+ for(j = 0; j < ci->u_len; j++)
860
+ printf(" %05x", ci->u_data[j]);
861
+ }
862
+ if (ci->l_len != 0) {
863
+ printf(" L:");
864
+ for(j = 0; j < ci->l_len; j++)
865
+ printf(" %05x", ci->l_data[j]);
866
+ }
867
+ if (ci->f_code != 0) {
868
+ printf(" F: %05x", ci->f_code);
869
+ }
870
+ printf("\n");
871
+ }
872
+
873
+ void dump_data(CCInfo *tab)
874
+ {
875
+ int i;
876
+ CCInfo *ci;
877
+ for(i = 0; i <= CHARCODE_MAX; i++) {
878
+ ci = &tab[i];
879
+ if (ci->u_len != 0 || ci->l_len != 0 || ci->f_code != 0) {
880
+ dump_cc_info(ci, i);
881
+ }
882
+ }
883
+ }
884
+
885
+ BOOL is_complicated_case(const CCInfo *ci)
886
+ {
887
+ return (ci->u_len > 1 || ci->l_len > 1 ||
888
+ (ci->u_len > 0 && ci->l_len > 0) ||
889
+ (ci->f_code != 0) != ci->l_len ||
890
+ (ci->f_code != 0 && ci->l_data[0] != ci->f_code));
891
+ }
892
+
893
+ #ifndef USE_TEST
894
+ enum {
895
+ RUN_TYPE_U,
896
+ RUN_TYPE_L,
897
+ RUN_TYPE_UF,
898
+ RUN_TYPE_LF,
899
+ RUN_TYPE_UL,
900
+ RUN_TYPE_LSU,
901
+ RUN_TYPE_U2L_399_EXT2,
902
+ RUN_TYPE_UF_D20,
903
+ RUN_TYPE_UF_D1_EXT,
904
+ RUN_TYPE_U_EXT,
905
+ RUN_TYPE_LF_EXT,
906
+ RUN_TYPE_U_EXT2,
907
+ RUN_TYPE_L_EXT2,
908
+ RUN_TYPE_U_EXT3,
909
+ };
910
+ #endif
911
+
912
+ const char *run_type_str[] = {
913
+ "U",
914
+ "L",
915
+ "UF",
916
+ "LF",
917
+ "UL",
918
+ "LSU",
919
+ "U2L_399_EXT2",
920
+ "UF_D20",
921
+ "UF_D1_EXT",
922
+ "U_EXT",
923
+ "LF_EXT",
924
+ "U_EXT2",
925
+ "L_EXT2",
926
+ "U_EXT3",
927
+ };
928
+
929
+ typedef struct {
930
+ int code;
931
+ int len;
932
+ int type;
933
+ int data;
934
+ int ext_len;
935
+ int ext_data[3];
936
+ int data_index; /* 'data' coming from the table */
937
+ } TableEntry;
938
+
939
+ /* code (17), len (7), type (4) */
940
+
941
+ void find_run_type(TableEntry *te, CCInfo *tab, int code)
942
+ {
943
+ int is_lower, len;
944
+ CCInfo *ci, *ci1, *ci2;
945
+
946
+ ci = &tab[code];
947
+ ci1 = &tab[code + 1];
948
+ ci2 = &tab[code + 2];
949
+ te->code = code;
950
+
951
+ if (ci->l_len == 1 && ci->l_data[0] == code + 2 &&
952
+ ci->f_code == ci->l_data[0] &&
953
+ ci->u_len == 0 &&
954
+
955
+ ci1->l_len == 1 && ci1->l_data[0] == code + 2 &&
956
+ ci1->f_code == ci1->l_data[0] &&
957
+ ci1->u_len == 1 && ci1->u_data[0] == code &&
958
+
959
+ ci2->l_len == 0 &&
960
+ ci2->f_code == 0 &&
961
+ ci2->u_len == 1 && ci2->u_data[0] == code) {
962
+ te->len = 3;
963
+ te->data = 0;
964
+ te->type = RUN_TYPE_LSU;
965
+ return;
966
+ }
967
+
968
+ if (is_complicated_case(ci)) {
969
+ len = 1;
970
+ while (code + len <= CHARCODE_MAX) {
971
+ ci1 = &tab[code + len];
972
+ if (ci1->u_len != 1 ||
973
+ ci1->u_data[0] != ci->u_data[0] + len ||
974
+ ci1->l_len != 0 ||
975
+ ci1->f_code != ci1->u_data[0])
976
+ break;
977
+ len++;
978
+ }
979
+ if (len > 1) {
980
+ te->len = len;
981
+ te->type = RUN_TYPE_UF;
982
+ te->data = ci->u_data[0];
983
+ return;
984
+ }
985
+
986
+ if (ci->u_len == 2 && ci->u_data[1] == 0x399 &&
987
+ ci->f_code == 0 && ci->l_len == 0) {
988
+ len = 1;
989
+ while (code + len <= CHARCODE_MAX) {
990
+ ci1 = &tab[code + len];
991
+ if (!(ci1->u_len == 2 &&
992
+ ci1->u_data[1] == 0x399 &&
993
+ ci1->u_data[0] == ci->u_data[0] + len &&
994
+ ci1->f_code == 0 &&
995
+ ci1->l_len == 0))
996
+ break;
997
+ len++;
998
+ }
999
+ te->len = len;
1000
+ te->type = RUN_TYPE_U_EXT2;
1001
+ te->ext_data[0] = ci->u_data[0];
1002
+ te->ext_data[1] = ci->u_data[1];
1003
+ te->ext_len = 2;
1004
+ return;
1005
+ }
1006
+
1007
+ if (ci->u_len == 2 && ci->u_data[1] == 0x399 &&
1008
+ ci->l_len == 1 && ci->f_code == ci->l_data[0]) {
1009
+ len = 1;
1010
+ while (code + len <= CHARCODE_MAX) {
1011
+ ci1 = &tab[code + len];
1012
+ if (!(ci1->u_len == 2 &&
1013
+ ci1->u_data[1] == 0x399 &&
1014
+ ci1->u_data[0] == ci->u_data[0] + len &&
1015
+ ci1->l_len == 1 &&
1016
+ ci1->l_data[0] == ci->l_data[0] + len &&
1017
+ ci1->f_code == ci1->l_data[0]))
1018
+ break;
1019
+ len++;
1020
+ }
1021
+ te->len = len;
1022
+ te->type = RUN_TYPE_U2L_399_EXT2;
1023
+ te->ext_data[0] = ci->u_data[0];
1024
+ te->ext_data[1] = ci->l_data[0];
1025
+ te->ext_len = 2;
1026
+ return;
1027
+ }
1028
+
1029
+ if (ci->l_len == 1 && ci->u_len == 0 && ci->f_code == 0) {
1030
+ len = 1;
1031
+ while (code + len <= CHARCODE_MAX) {
1032
+ ci1 = &tab[code + len];
1033
+ if (!(ci1->l_len == 1 &&
1034
+ ci1->l_data[0] == ci->l_data[0] + len &&
1035
+ ci1->u_len == 0 && ci1->f_code == 0))
1036
+ break;
1037
+ len++;
1038
+ }
1039
+ te->len = len;
1040
+ te->type = RUN_TYPE_L;
1041
+ te->data = ci->l_data[0];
1042
+ return;
1043
+ }
1044
+
1045
+ if (ci->l_len == 0 &&
1046
+ ci->u_len == 1 &&
1047
+ ci->u_data[0] < 0x1000 &&
1048
+ ci->f_code == ci->u_data[0] + 0x20) {
1049
+ te->len = 1;
1050
+ te->type = RUN_TYPE_UF_D20;
1051
+ te->data = ci->u_data[0];
1052
+ } else if (ci->l_len == 0 &&
1053
+ ci->u_len == 1 &&
1054
+ ci->f_code == ci->u_data[0] + 1) {
1055
+ te->len = 1;
1056
+ te->type = RUN_TYPE_UF_D1_EXT;
1057
+ te->ext_data[0] = ci->u_data[0];
1058
+ te->ext_len = 1;
1059
+ } else if (ci->l_len == 2 && ci->u_len == 0 && ci->f_code == 0) {
1060
+ te->len = 1;
1061
+ te->type = RUN_TYPE_L_EXT2;
1062
+ te->ext_data[0] = ci->l_data[0];
1063
+ te->ext_data[1] = ci->l_data[1];
1064
+ te->ext_len = 2;
1065
+ } else if (ci->u_len == 2 && ci->l_len == 0 && ci->f_code == 0) {
1066
+ te->len = 1;
1067
+ te->type = RUN_TYPE_U_EXT2;
1068
+ te->ext_data[0] = ci->u_data[0];
1069
+ te->ext_data[1] = ci->u_data[1];
1070
+ te->ext_len = 2;
1071
+ } else if (ci->u_len == 3 && ci->l_len == 0 && ci->f_code == 0) {
1072
+ te->len = 1;
1073
+ te->type = RUN_TYPE_U_EXT3;
1074
+ te->ext_data[0] = ci->u_data[0];
1075
+ te->ext_data[1] = ci->u_data[1];
1076
+ te->ext_data[2] = ci->u_data[2];
1077
+ te->ext_len = 3;
1078
+ } else {
1079
+ printf("unsupported encoding case:\n");
1080
+ dump_cc_info(ci, code);
1081
+ abort();
1082
+ }
1083
+ } else {
1084
+ /* look for a run of identical conversions */
1085
+ len = 0;
1086
+ for(;;) {
1087
+ if (code >= CHARCODE_MAX || len >= 126)
1088
+ break;
1089
+ ci = &tab[code + len];
1090
+ ci1 = &tab[code + len + 1];
1091
+ if (is_complicated_case(ci) || is_complicated_case(ci1)) {
1092
+ break;
1093
+ }
1094
+ if (ci->l_len != 1 || ci->l_data[0] != code + len + 1)
1095
+ break;
1096
+ if (ci1->u_len != 1 || ci1->u_data[0] != code + len)
1097
+ break;
1098
+ len += 2;
1099
+ }
1100
+ if (len > 0) {
1101
+ te->len = len;
1102
+ te->type = RUN_TYPE_UL;
1103
+ te->data = 0;
1104
+ return;
1105
+ }
1106
+
1107
+ ci = &tab[code];
1108
+ is_lower = ci->l_len > 0;
1109
+ len = 1;
1110
+ while (code + len <= CHARCODE_MAX) {
1111
+ ci1 = &tab[code + len];
1112
+ if (is_complicated_case(ci1))
1113
+ break;
1114
+ if (is_lower) {
1115
+ if (ci1->l_len != 1 ||
1116
+ ci1->l_data[0] != ci->l_data[0] + len)
1117
+ break;
1118
+ } else {
1119
+ if (ci1->u_len != 1 ||
1120
+ ci1->u_data[0] != ci->u_data[0] + len)
1121
+ break;
1122
+ }
1123
+ len++;
1124
+ }
1125
+ te->len = len;
1126
+ if (is_lower) {
1127
+ te->type = RUN_TYPE_LF;
1128
+ te->data = ci->l_data[0];
1129
+ } else {
1130
+ te->type = RUN_TYPE_U;
1131
+ te->data = ci->u_data[0];
1132
+ }
1133
+ }
1134
+ }
1135
+
1136
+ TableEntry conv_table[1000];
1137
+ int conv_table_len;
1138
+ int ext_data[1000];
1139
+ int ext_data_len;
1140
+
1141
+ void dump_case_conv_table1(void)
1142
+ {
1143
+ int i, j;
1144
+ const TableEntry *te;
1145
+
1146
+ for(i = 0; i < conv_table_len; i++) {
1147
+ te = &conv_table[i];
1148
+ printf("%05x %02x %-10s %05x",
1149
+ te->code, te->len, run_type_str[te->type], te->data);
1150
+ for(j = 0; j < te->ext_len; j++) {
1151
+ printf(" %05x", te->ext_data[j]);
1152
+ }
1153
+ printf("\n");
1154
+ }
1155
+ printf("table_len=%d ext_len=%d\n", conv_table_len, ext_data_len);
1156
+ }
1157
+
1158
+ int find_data_index(const TableEntry *conv_table, int len, int data)
1159
+ {
1160
+ int i;
1161
+ const TableEntry *te;
1162
+ for(i = 0; i < len; i++) {
1163
+ te = &conv_table[i];
1164
+ if (te->code == data)
1165
+ return i;
1166
+ }
1167
+ return -1;
1168
+ }
1169
+
1170
+ int find_ext_data_index(int data)
1171
+ {
1172
+ int i;
1173
+ for(i = 0; i < ext_data_len; i++) {
1174
+ if (ext_data[i] == data)
1175
+ return i;
1176
+ }
1177
+ assert(ext_data_len < countof(ext_data));
1178
+ ext_data[ext_data_len++] = data;
1179
+ return ext_data_len - 1;
1180
+ }
1181
+
1182
+ void build_conv_table(CCInfo *tab)
1183
+ {
1184
+ int code, i, j;
1185
+ CCInfo *ci;
1186
+ TableEntry *te;
1187
+
1188
+ te = conv_table;
1189
+ for(code = 0; code <= CHARCODE_MAX; code++) {
1190
+ ci = &tab[code];
1191
+ if (ci->u_len == 0 && ci->l_len == 0 && ci->f_code == 0)
1192
+ continue;
1193
+ assert(te - conv_table < countof(conv_table));
1194
+ find_run_type(te, tab, code);
1195
+ #if 0
1196
+ if (te->type == RUN_TYPE_TODO) {
1197
+ printf("TODO: ");
1198
+ dump_cc_info(ci, code);
1199
+ }
1200
+ #endif
1201
+ assert(te->len <= 127);
1202
+ code += te->len - 1;
1203
+ te++;
1204
+ }
1205
+ conv_table_len = te - conv_table;
1206
+
1207
+ /* find the data index */
1208
+ for(i = 0; i < conv_table_len; i++) {
1209
+ int data_index;
1210
+ te = &conv_table[i];
1211
+
1212
+ switch(te->type) {
1213
+ case RUN_TYPE_U:
1214
+ case RUN_TYPE_L:
1215
+ case RUN_TYPE_UF:
1216
+ case RUN_TYPE_LF:
1217
+ data_index = find_data_index(conv_table, conv_table_len, te->data);
1218
+ if (data_index < 0) {
1219
+ switch(te->type) {
1220
+ case RUN_TYPE_U:
1221
+ te->type = RUN_TYPE_U_EXT;
1222
+ te->ext_len = 1;
1223
+ te->ext_data[0] = te->data;
1224
+ break;
1225
+ case RUN_TYPE_LF:
1226
+ te->type = RUN_TYPE_LF_EXT;
1227
+ te->ext_len = 1;
1228
+ te->ext_data[0] = te->data;
1229
+ break;
1230
+ default:
1231
+ printf("%05x: index not found\n", te->code);
1232
+ exit(1);
1233
+ }
1234
+ } else {
1235
+ te->data_index = data_index;
1236
+ }
1237
+ break;
1238
+ case RUN_TYPE_UF_D20:
1239
+ te->data_index = te->data;
1240
+ break;
1241
+ }
1242
+ }
1243
+
1244
+ /* find the data index for ext_data */
1245
+ for(i = 0; i < conv_table_len; i++) {
1246
+ te = &conv_table[i];
1247
+ if (te->type == RUN_TYPE_U_EXT3) {
1248
+ int p, v;
1249
+ v = 0;
1250
+ for(j = 0; j < 3; j++) {
1251
+ p = find_ext_data_index(te->ext_data[j]);
1252
+ assert(p < 16);
1253
+ v = (v << 4) | p;
1254
+ }
1255
+ te->data_index = v;
1256
+ }
1257
+ }
1258
+
1259
+ for(i = 0; i < conv_table_len; i++) {
1260
+ te = &conv_table[i];
1261
+ if (te->type == RUN_TYPE_L_EXT2 ||
1262
+ te->type == RUN_TYPE_U_EXT2 ||
1263
+ te->type == RUN_TYPE_U2L_399_EXT2) {
1264
+ int p, v;
1265
+ v = 0;
1266
+ for(j = 0; j < 2; j++) {
1267
+ p = find_ext_data_index(te->ext_data[j]);
1268
+ assert(p < 64);
1269
+ v = (v << 6) | p;
1270
+ }
1271
+ te->data_index = v;
1272
+ }
1273
+ }
1274
+
1275
+ for(i = 0; i < conv_table_len; i++) {
1276
+ te = &conv_table[i];
1277
+ if (te->type == RUN_TYPE_UF_D1_EXT ||
1278
+ te->type == RUN_TYPE_U_EXT ||
1279
+ te->type == RUN_TYPE_LF_EXT) {
1280
+ te->data_index = find_ext_data_index(te->ext_data[0]);
1281
+ }
1282
+ }
1283
+ #ifdef DUMP_CASE_CONV_TABLE
1284
+ dump_case_conv_table1();
1285
+ #endif
1286
+ }
1287
+
1288
+ void dump_case_conv_table(FILE *f)
1289
+ {
1290
+ int i;
1291
+ uint32_t v;
1292
+ const TableEntry *te;
1293
+
1294
+ fprintf(f, "static const uint32_t case_conv_table1[%u] = {", conv_table_len);
1295
+ for(i = 0; i < conv_table_len; i++) {
1296
+ if (i % 4 == 0)
1297
+ fprintf(f, "\n ");
1298
+ te = &conv_table[i];
1299
+ v = te->code << (32 - 17);
1300
+ v |= te->len << (32 - 17 - 7);
1301
+ v |= te->type << (32 - 17 - 7 - 4);
1302
+ v |= te->data_index >> 8;
1303
+ fprintf(f, " 0x%08x,", v);
1304
+ }
1305
+ fprintf(f, "\n};\n\n");
1306
+
1307
+ fprintf(f, "static const uint8_t case_conv_table2[%u] = {", conv_table_len);
1308
+ for(i = 0; i < conv_table_len; i++) {
1309
+ if (i % 8 == 0)
1310
+ fprintf(f, "\n ");
1311
+ te = &conv_table[i];
1312
+ fprintf(f, " 0x%02x,", te->data_index & 0xff);
1313
+ }
1314
+ fprintf(f, "\n};\n\n");
1315
+
1316
+ fprintf(f, "static const uint16_t case_conv_ext[%u] = {", ext_data_len);
1317
+ for(i = 0; i < ext_data_len; i++) {
1318
+ if (i % 8 == 0)
1319
+ fprintf(f, "\n ");
1320
+ fprintf(f, " 0x%04x,", ext_data[i]);
1321
+ }
1322
+ fprintf(f, "\n};\n\n");
1323
+ }
1324
+
1325
+ int tabcmp(const int *tab1, const int *tab2, int n)
1326
+ {
1327
+ int i;
1328
+ for(i = 0; i < n; i++) {
1329
+ if (tab1[i] != tab2[i])
1330
+ return -1;
1331
+ }
1332
+ return 0;
1333
+ }
1334
+
1335
+ void dump_str(const char *str, const int *buf, int len)
1336
+ {
1337
+ int i;
1338
+ printf("%s=", str);
1339
+ for(i = 0; i < len; i++)
1340
+ printf(" %05x", buf[i]);
1341
+ printf("\n");
1342
+ }
1343
+
1344
+ void compute_internal_props(void)
1345
+ {
1346
+ int i;
1347
+ BOOL has_ul;
1348
+
1349
+ for(i = 0; i <= CHARCODE_MAX; i++) {
1350
+ CCInfo *ci = &unicode_db[i];
1351
+ has_ul = (ci->u_len != 0 || ci->l_len != 0 || ci->f_code != 0);
1352
+ if (has_ul) {
1353
+ assert(get_prop(i, PROP_Cased));
1354
+ } else {
1355
+ set_prop(i, PROP_Cased1, get_prop(i, PROP_Cased));
1356
+ }
1357
+ set_prop(i, PROP_ID_Continue1,
1358
+ get_prop(i, PROP_ID_Continue) & (get_prop(i, PROP_ID_Start) ^ 1));
1359
+ set_prop(i, PROP_XID_Start1,
1360
+ get_prop(i, PROP_ID_Start) ^ get_prop(i, PROP_XID_Start));
1361
+ set_prop(i, PROP_XID_Continue1,
1362
+ get_prop(i, PROP_ID_Continue) ^ get_prop(i, PROP_XID_Continue));
1363
+ set_prop(i, PROP_Changes_When_Titlecased1,
1364
+ get_prop(i, PROP_Changes_When_Titlecased) ^ (ci->u_len != 0));
1365
+ set_prop(i, PROP_Changes_When_Casefolded1,
1366
+ get_prop(i, PROP_Changes_When_Casefolded) ^ (ci->f_code != 0));
1367
+ /* XXX: reduce table size (438 bytes) */
1368
+ set_prop(i, PROP_Changes_When_NFKC_Casefolded1,
1369
+ get_prop(i, PROP_Changes_When_NFKC_Casefolded) ^ (ci->f_code != 0));
1370
+ #if 0
1371
+ /* TEST */
1372
+ #define M(x) (1U << GCAT_ ## x)
1373
+ {
1374
+ int b;
1375
+ b = ((M(Mn) | M(Cf) | M(Lm) | M(Sk)) >>
1376
+ unicode_db[i].general_category) & 1;
1377
+ set_prop(i, PROP_Cased1,
1378
+ get_prop(i, PROP_Case_Ignorable) ^ b);
1379
+ }
1380
+ #undef M
1381
+ #endif
1382
+ }
1383
+ }
1384
+
1385
+ void dump_byte_table(FILE *f, const char *cname, const uint8_t *tab, int len)
1386
+ {
1387
+ int i;
1388
+ fprintf(f, "static const uint8_t %s[%d] = {", cname, len);
1389
+ for(i = 0; i < len; i++) {
1390
+ if (i % 8 == 0)
1391
+ fprintf(f, "\n ");
1392
+ fprintf(f, " 0x%02x,", tab[i]);
1393
+ }
1394
+ fprintf(f, "\n};\n\n");
1395
+ }
1396
+
1397
+ #define PROP_BLOCK_LEN 32
1398
+
1399
+ void build_prop_table(FILE *f, int prop_index, BOOL add_index)
1400
+ {
1401
+ int i, j, n, v, offset, code;
1402
+ DynBuf dbuf_s, *dbuf = &dbuf_s;
1403
+ DynBuf dbuf1_s, *dbuf1 = &dbuf1_s;
1404
+ DynBuf dbuf2_s, *dbuf2 = &dbuf2_s;
1405
+ const uint32_t *buf;
1406
+ int buf_len, block_end_pos, bit;
1407
+ char cname[128];
1408
+
1409
+ dbuf_init(dbuf1);
1410
+
1411
+ for(i = 0; i <= CHARCODE_MAX;) {
1412
+ v = get_prop(i, prop_index);
1413
+ j = i + 1;
1414
+ while (j <= CHARCODE_MAX && get_prop(j, prop_index) == v) {
1415
+ j++;
1416
+ }
1417
+ n = j - i;
1418
+ if (j == (CHARCODE_MAX + 1) && v == 0)
1419
+ break; /* no need to encode last zero run */
1420
+ //printf("%05x: %d %d\n", i, n, v);
1421
+ dbuf_put_u32(dbuf1, n - 1);
1422
+ i += n;
1423
+ }
1424
+
1425
+ dbuf_init(dbuf);
1426
+ dbuf_init(dbuf2);
1427
+ buf = (uint32_t *)dbuf1->buf;
1428
+ buf_len = dbuf1->size / sizeof(buf[0]);
1429
+
1430
+ /* the first value is assumed to be 0 */
1431
+ assert(get_prop(0, prop_index) == 0);
1432
+
1433
+ block_end_pos = PROP_BLOCK_LEN;
1434
+ i = 0;
1435
+ code = 0;
1436
+ bit = 0;
1437
+ while (i < buf_len) {
1438
+ if (add_index && dbuf->size >= block_end_pos && bit == 0) {
1439
+ offset = (dbuf->size - block_end_pos);
1440
+ /* XXX: offset could be larger in case of runs of small
1441
+ lengths. Could add code to change the encoding to
1442
+ prevent it at the expense of one byte loss */
1443
+ assert(offset <= 7);
1444
+ v = code | (offset << 21);
1445
+ dbuf_putc(dbuf2, v);
1446
+ dbuf_putc(dbuf2, v >> 8);
1447
+ dbuf_putc(dbuf2, v >> 16);
1448
+ block_end_pos += PROP_BLOCK_LEN;
1449
+ }
1450
+
1451
+ v = buf[i];
1452
+ code += v + 1;
1453
+ bit ^= 1;
1454
+ if (v < 8 && (i + 1) < buf_len && buf[i + 1] < 8) {
1455
+ code += buf[i + 1] + 1;
1456
+ bit ^= 1;
1457
+ dbuf_putc(dbuf, (v << 3) | buf[i + 1]);
1458
+ i += 2;
1459
+ } else if (v < 128) {
1460
+ dbuf_putc(dbuf, 0x80 + v);
1461
+ i++;
1462
+ } else if (v < (1 << 13)) {
1463
+ dbuf_putc(dbuf, 0x40 + (v >> 8));
1464
+ dbuf_putc(dbuf, v);
1465
+ i++;
1466
+ } else {
1467
+ assert(v < (1 << 21));
1468
+ dbuf_putc(dbuf, 0x60 + (v >> 16));
1469
+ dbuf_putc(dbuf, v >> 8);
1470
+ dbuf_putc(dbuf, v);
1471
+ i++;
1472
+ }
1473
+ }
1474
+
1475
+ if (add_index) {
1476
+ /* last index entry */
1477
+ v = code;
1478
+ dbuf_putc(dbuf2, v);
1479
+ dbuf_putc(dbuf2, v >> 8);
1480
+ dbuf_putc(dbuf2, v >> 16);
1481
+ }
1482
+
1483
+ #ifdef DUMP_TABLE_SIZE
1484
+ printf("prop %s: length=%d bytes\n", unicode_prop_name[prop_index],
1485
+ (int)(dbuf->size + dbuf2->size));
1486
+ #endif
1487
+ snprintf(cname, sizeof(cname), "unicode_prop_%s_table", unicode_prop_name[prop_index]);
1488
+ dump_byte_table(f, cname, dbuf->buf, dbuf->size);
1489
+ if (add_index) {
1490
+ snprintf(cname, sizeof(cname), "unicode_prop_%s_index", unicode_prop_name[prop_index]);
1491
+ dump_byte_table(f, cname, dbuf2->buf, dbuf2->size);
1492
+ }
1493
+
1494
+ dbuf_free(dbuf);
1495
+ dbuf_free(dbuf1);
1496
+ dbuf_free(dbuf2);
1497
+ }
1498
+
1499
+ void build_flags_tables(FILE *f)
1500
+ {
1501
+ build_prop_table(f, PROP_Cased1, TRUE);
1502
+ build_prop_table(f, PROP_Case_Ignorable, TRUE);
1503
+ build_prop_table(f, PROP_ID_Start, TRUE);
1504
+ build_prop_table(f, PROP_ID_Continue1, TRUE);
1505
+ }
1506
+
1507
+ void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len,
1508
+ const char **tab_short_name)
1509
+ {
1510
+ int i, w, maxw;
1511
+
1512
+ maxw = 0;
1513
+ for(i = 0; i < len; i++) {
1514
+ w = strlen(tab_name[i]);
1515
+ if (tab_short_name[i][0] != '\0') {
1516
+ w += 1 + strlen(tab_short_name[i]);
1517
+ }
1518
+ if (maxw < w)
1519
+ maxw = w;
1520
+ }
1521
+
1522
+ /* generate a sequence of strings terminated by an empty string */
1523
+ fprintf(f, "static const char %s[] =\n", cname);
1524
+ for(i = 0; i < len; i++) {
1525
+ fprintf(f, " \"");
1526
+ w = fprintf(f, "%s", tab_name[i]);
1527
+ if (tab_short_name[i][0] != '\0') {
1528
+ w += fprintf(f, ",%s", tab_short_name[i]);
1529
+ }
1530
+ fprintf(f, "\"%*s\"\\0\"\n", 1 + maxw - w, "");
1531
+ }
1532
+ fprintf(f, ";\n\n");
1533
+ }
1534
+
1535
+ void build_general_category_table(FILE *f)
1536
+ {
1537
+ int i, v, j, n, n1;
1538
+ DynBuf dbuf_s, *dbuf = &dbuf_s;
1539
+ int cw_count, cw_len_count[4], cw_start;
1540
+
1541
+ fprintf(f, "typedef enum {\n");
1542
+ for(i = 0; i < GCAT_COUNT; i++)
1543
+ fprintf(f, " UNICODE_GC_%s,\n", unicode_gc_name[i]);
1544
+ fprintf(f, " UNICODE_GC_COUNT,\n");
1545
+ fprintf(f, "} UnicodeGCEnum;\n\n");
1546
+
1547
+ dump_name_table(f, "unicode_gc_name_table",
1548
+ unicode_gc_name, GCAT_COUNT,
1549
+ unicode_gc_short_name);
1550
+
1551
+
1552
+ dbuf_init(dbuf);
1553
+ cw_count = 0;
1554
+ for(i = 0; i < 4; i++)
1555
+ cw_len_count[i] = 0;
1556
+ for(i = 0; i <= CHARCODE_MAX;) {
1557
+ v = unicode_db[i].general_category;
1558
+ j = i + 1;
1559
+ while (j <= CHARCODE_MAX && unicode_db[j].general_category == v)
1560
+ j++;
1561
+ n = j - i;
1562
+ /* compress Lu/Ll runs */
1563
+ if (v == GCAT_Lu) {
1564
+ n1 = 1;
1565
+ while ((i + n1) <= CHARCODE_MAX && unicode_db[i + n1].general_category == (v + (n1 & 1))) {
1566
+ n1++;
1567
+ }
1568
+ if (n1 > n) {
1569
+ v = 31;
1570
+ n = n1;
1571
+ }
1572
+ }
1573
+ // printf("%05x %05x %d\n", i, n, v);
1574
+ cw_count++;
1575
+ n--;
1576
+ cw_start = dbuf->size;
1577
+ if (n < 7) {
1578
+ dbuf_putc(dbuf, (n << 5) | v);
1579
+ } else if (n < 7 + 128) {
1580
+ n1 = n - 7;
1581
+ assert(n1 < 128);
1582
+ dbuf_putc(dbuf, (0xf << 5) | v);
1583
+ dbuf_putc(dbuf, n1);
1584
+ } else if (n < 7 + 128 + (1 << 14)) {
1585
+ n1 = n - (7 + 128);
1586
+ assert(n1 < (1 << 14));
1587
+ dbuf_putc(dbuf, (0xf << 5) | v);
1588
+ dbuf_putc(dbuf, (n1 >> 8) + 128);
1589
+ dbuf_putc(dbuf, n1);
1590
+ } else {
1591
+ n1 = n - (7 + 128 + (1 << 14));
1592
+ assert(n1 < (1 << 22));
1593
+ dbuf_putc(dbuf, (0xf << 5) | v);
1594
+ dbuf_putc(dbuf, (n1 >> 16) + 128 + 64);
1595
+ dbuf_putc(dbuf, n1 >> 8);
1596
+ dbuf_putc(dbuf, n1);
1597
+ }
1598
+ cw_len_count[dbuf->size - cw_start - 1]++;
1599
+ i += n + 1;
1600
+ }
1601
+ #ifdef DUMP_TABLE_SIZE
1602
+ printf("general category: %d entries [",
1603
+ cw_count);
1604
+ for(i = 0; i < 4; i++)
1605
+ printf(" %d", cw_len_count[i]);
1606
+ printf(" ], length=%d bytes\n", (int)dbuf->size);
1607
+ #endif
1608
+
1609
+ dump_byte_table(f, "unicode_gc_table", dbuf->buf, dbuf->size);
1610
+
1611
+ dbuf_free(dbuf);
1612
+ }
1613
+
1614
+ void build_script_table(FILE *f)
1615
+ {
1616
+ int i, v, j, n, n1, type;
1617
+ DynBuf dbuf_s, *dbuf = &dbuf_s;
1618
+ int cw_count, cw_len_count[4], cw_start;
1619
+
1620
+ fprintf(f, "typedef enum {\n");
1621
+ for(i = 0; i < SCRIPT_COUNT; i++)
1622
+ fprintf(f, " UNICODE_SCRIPT_%s,\n", unicode_script_name[i]);
1623
+ fprintf(f, " UNICODE_SCRIPT_COUNT,\n");
1624
+ fprintf(f, "} UnicodeScriptEnum;\n\n");
1625
+
1626
+ i = 1;
1627
+ dump_name_table(f, "unicode_script_name_table",
1628
+ unicode_script_name + i, SCRIPT_COUNT - i,
1629
+ unicode_script_short_name + i);
1630
+
1631
+ dbuf_init(dbuf);
1632
+ cw_count = 0;
1633
+ for(i = 0; i < 4; i++)
1634
+ cw_len_count[i] = 0;
1635
+ for(i = 0; i <= CHARCODE_MAX;) {
1636
+ v = unicode_db[i].script;
1637
+ j = i + 1;
1638
+ while (j <= CHARCODE_MAX && unicode_db[j].script == v)
1639
+ j++;
1640
+ n = j - i;
1641
+ if (v == 0 && j == (CHARCODE_MAX + 1))
1642
+ break;
1643
+ // printf("%05x %05x %d\n", i, n, v);
1644
+ cw_count++;
1645
+ n--;
1646
+ cw_start = dbuf->size;
1647
+ if (v == 0)
1648
+ type = 0;
1649
+ else
1650
+ type = 1;
1651
+ if (n < 96) {
1652
+ dbuf_putc(dbuf, n | (type << 7));
1653
+ } else if (n < 96 + (1 << 12)) {
1654
+ n1 = n - 96;
1655
+ assert(n1 < (1 << 12));
1656
+ dbuf_putc(dbuf, ((n1 >> 8) + 96) | (type << 7));
1657
+ dbuf_putc(dbuf, n1);
1658
+ } else {
1659
+ n1 = n - (96 + (1 << 12));
1660
+ assert(n1 < (1 << 20));
1661
+ dbuf_putc(dbuf, ((n1 >> 16) + 112) | (type << 7));
1662
+ dbuf_putc(dbuf, n1 >> 8);
1663
+ dbuf_putc(dbuf, n1);
1664
+ }
1665
+ if (type != 0)
1666
+ dbuf_putc(dbuf, v);
1667
+
1668
+ cw_len_count[dbuf->size - cw_start - 1]++;
1669
+ i += n + 1;
1670
+ }
1671
+ #if defined(DUMP_TABLE_SIZE)
1672
+ printf("script: %d entries [",
1673
+ cw_count);
1674
+ for(i = 0; i < 4; i++)
1675
+ printf(" %d", cw_len_count[i]);
1676
+ printf(" ], length=%d bytes\n", (int)dbuf->size);
1677
+ #endif
1678
+
1679
+ dump_byte_table(f, "unicode_script_table", dbuf->buf, dbuf->size);
1680
+
1681
+ dbuf_free(dbuf);
1682
+ }
1683
+
1684
+ void build_script_ext_table(FILE *f)
1685
+ {
1686
+ int i, j, n, n1, script_ext_len;
1687
+ DynBuf dbuf_s, *dbuf = &dbuf_s;
1688
+ int cw_count;
1689
+
1690
+ dbuf_init(dbuf);
1691
+ cw_count = 0;
1692
+ for(i = 0; i <= CHARCODE_MAX;) {
1693
+ script_ext_len = unicode_db[i].script_ext_len;
1694
+ j = i + 1;
1695
+ while (j <= CHARCODE_MAX &&
1696
+ unicode_db[j].script_ext_len == script_ext_len &&
1697
+ !memcmp(unicode_db[j].script_ext, unicode_db[i].script_ext,
1698
+ script_ext_len)) {
1699
+ j++;
1700
+ }
1701
+ n = j - i;
1702
+ cw_count++;
1703
+ n--;
1704
+ if (n < 128) {
1705
+ dbuf_putc(dbuf, n);
1706
+ } else if (n < 128 + (1 << 14)) {
1707
+ n1 = n - 128;
1708
+ assert(n1 < (1 << 14));
1709
+ dbuf_putc(dbuf, (n1 >> 8) + 128);
1710
+ dbuf_putc(dbuf, n1);
1711
+ } else {
1712
+ n1 = n - (128 + (1 << 14));
1713
+ assert(n1 < (1 << 22));
1714
+ dbuf_putc(dbuf, (n1 >> 16) + 128 + 64);
1715
+ dbuf_putc(dbuf, n1 >> 8);
1716
+ dbuf_putc(dbuf, n1);
1717
+ }
1718
+ dbuf_putc(dbuf, script_ext_len);
1719
+ for(j = 0; j < script_ext_len; j++)
1720
+ dbuf_putc(dbuf, unicode_db[i].script_ext[j]);
1721
+ i += n + 1;
1722
+ }
1723
+ #ifdef DUMP_TABLE_SIZE
1724
+ printf("script_ext: %d entries",
1725
+ cw_count);
1726
+ printf(", length=%d bytes\n", (int)dbuf->size);
1727
+ #endif
1728
+
1729
+ dump_byte_table(f, "unicode_script_ext_table", dbuf->buf, dbuf->size);
1730
+
1731
+ dbuf_free(dbuf);
1732
+ }
1733
+
1734
+ /* the following properties are synthetized so no table is necessary */
1735
+ #define PROP_TABLE_COUNT PROP_ASCII
1736
+
1737
+ void build_prop_list_table(FILE *f)
1738
+ {
1739
+ int i;
1740
+
1741
+ for(i = 0; i < PROP_TABLE_COUNT; i++) {
1742
+ if (i == PROP_ID_Start ||
1743
+ i == PROP_Case_Ignorable ||
1744
+ i == PROP_ID_Continue1) {
1745
+ /* already generated */
1746
+ } else {
1747
+ build_prop_table(f, i, FALSE);
1748
+ }
1749
+ }
1750
+
1751
+ fprintf(f, "typedef enum {\n");
1752
+ for(i = 0; i < PROP_COUNT; i++)
1753
+ fprintf(f, " UNICODE_PROP_%s,\n", unicode_prop_name[i]);
1754
+ fprintf(f, " UNICODE_PROP_COUNT,\n");
1755
+ fprintf(f, "} UnicodePropertyEnum;\n\n");
1756
+
1757
+ i = PROP_ASCII_Hex_Digit;
1758
+ dump_name_table(f, "unicode_prop_name_table",
1759
+ unicode_prop_name + i, PROP_XID_Start - i + 1,
1760
+ unicode_prop_short_name + i);
1761
+
1762
+ fprintf(f, "static const uint8_t * const unicode_prop_table[] = {\n");
1763
+ for(i = 0; i < PROP_TABLE_COUNT; i++) {
1764
+ fprintf(f, " unicode_prop_%s_table,\n", unicode_prop_name[i]);
1765
+ }
1766
+ fprintf(f, "};\n\n");
1767
+
1768
+ fprintf(f, "static const uint16_t unicode_prop_len_table[] = {\n");
1769
+ for(i = 0; i < PROP_TABLE_COUNT; i++) {
1770
+ fprintf(f, " countof(unicode_prop_%s_table),\n", unicode_prop_name[i]);
1771
+ }
1772
+ fprintf(f, "};\n\n");
1773
+ }
1774
+
1775
+ #ifdef USE_TEST
1776
+ int check_conv(uint32_t *res, uint32_t c, int conv_type)
1777
+ {
1778
+ return lre_case_conv(res, c, conv_type);
1779
+ }
1780
+
1781
+ void check_case_conv(void)
1782
+ {
1783
+ CCInfo *tab = unicode_db;
1784
+ uint32_t res[3];
1785
+ int l, error;
1786
+ CCInfo ci_s, *ci1, *ci = &ci_s;
1787
+ int code;
1788
+
1789
+ for(code = 0; code <= CHARCODE_MAX; code++) {
1790
+ ci1 = &tab[code];
1791
+ *ci = *ci1;
1792
+ if (ci->l_len == 0) {
1793
+ ci->l_len = 1;
1794
+ ci->l_data[0] = code;
1795
+ }
1796
+ if (ci->u_len == 0) {
1797
+ ci->u_len = 1;
1798
+ ci->u_data[0] = code;
1799
+ }
1800
+ if (ci->f_code == 0)
1801
+ ci->f_code = code;
1802
+
1803
+ error = 0;
1804
+ l = check_conv(res, code, 0);
1805
+ if (l != ci->u_len || tabcmp((int *)res, ci->u_data, l)) {
1806
+ printf("ERROR: L\n");
1807
+ error++;
1808
+ }
1809
+ l = check_conv(res, code, 1);
1810
+ if (l != ci->l_len || tabcmp((int *)res, ci->l_data, l)) {
1811
+ printf("ERROR: U\n");
1812
+ error++;
1813
+ }
1814
+ l = check_conv(res, code, 2);
1815
+ if (l != 1 || res[0] != ci->f_code) {
1816
+ printf("ERROR: F\n");
1817
+ error++;
1818
+ }
1819
+ if (error) {
1820
+ dump_cc_info(ci, code);
1821
+ exit(1);
1822
+ }
1823
+ }
1824
+ }
1825
+
1826
+ #ifdef PROFILE
1827
+ static int64_t get_time_ns(void)
1828
+ {
1829
+ struct timespec ts;
1830
+ clock_gettime(CLOCK_MONOTONIC, &ts);
1831
+ return (int64_t)ts.tv_sec * 1000000000 + ts.tv_nsec;
1832
+ }
1833
+ #endif
1834
+
1835
+
1836
+ void check_flags(void)
1837
+ {
1838
+ int c;
1839
+ BOOL flag_ref, flag;
1840
+ for(c = 0; c <= CHARCODE_MAX; c++) {
1841
+ flag_ref = get_prop(c, PROP_Cased);
1842
+ flag = lre_is_cased(c);
1843
+ if (flag != flag_ref) {
1844
+ printf("ERROR: c=%05x cased=%d ref=%d\n",
1845
+ c, flag, flag_ref);
1846
+ exit(1);
1847
+ }
1848
+
1849
+ flag_ref = get_prop(c, PROP_Case_Ignorable);
1850
+ flag = lre_is_case_ignorable(c);
1851
+ if (flag != flag_ref) {
1852
+ printf("ERROR: c=%05x case_ignorable=%d ref=%d\n",
1853
+ c, flag, flag_ref);
1854
+ exit(1);
1855
+ }
1856
+
1857
+ flag_ref = get_prop(c, PROP_ID_Start);
1858
+ flag = lre_is_id_start(c);
1859
+ if (flag != flag_ref) {
1860
+ printf("ERROR: c=%05x id_start=%d ref=%d\n",
1861
+ c, flag, flag_ref);
1862
+ exit(1);
1863
+ }
1864
+
1865
+ flag_ref = get_prop(c, PROP_ID_Continue);
1866
+ flag = lre_is_id_continue(c);
1867
+ if (flag != flag_ref) {
1868
+ printf("ERROR: c=%05x id_cont=%d ref=%d\n",
1869
+ c, flag, flag_ref);
1870
+ exit(1);
1871
+ }
1872
+ }
1873
+ #ifdef PROFILE
1874
+ {
1875
+ int64_t ti, count;
1876
+ ti = get_time_ns();
1877
+ count = 0;
1878
+ for(c = 0x20; c <= 0xffff; c++) {
1879
+ flag_ref = get_prop(c, PROP_ID_Start);
1880
+ flag = lre_is_id_start(c);
1881
+ assert(flag == flag_ref);
1882
+ count++;
1883
+ }
1884
+ ti = get_time_ns() - ti;
1885
+ printf("flags time=%0.1f ns/char\n",
1886
+ (double)ti / count);
1887
+ }
1888
+ #endif
1889
+ }
1890
+
1891
+ #endif
1892
+
1893
+ #define CC_BLOCK_LEN 32
1894
+
1895
+ void build_cc_table(FILE *f)
1896
+ {
1897
+ int i, cc, n, cc_table_len, type, n1;
1898
+ DynBuf dbuf_s, *dbuf = &dbuf_s;
1899
+ DynBuf dbuf1_s, *dbuf1 = &dbuf1_s;
1900
+ int cw_len_tab[3], cw_start, block_end_pos;
1901
+ uint32_t v;
1902
+
1903
+ dbuf_init(dbuf);
1904
+ dbuf_init(dbuf1);
1905
+ cc_table_len = 0;
1906
+ for(i = 0; i < countof(cw_len_tab); i++)
1907
+ cw_len_tab[i] = 0;
1908
+ block_end_pos = CC_BLOCK_LEN;
1909
+ for(i = 0; i <= CHARCODE_MAX;) {
1910
+ cc = unicode_db[i].combining_class;
1911
+ assert(cc <= 255);
1912
+ /* check increasing values */
1913
+ n = 1;
1914
+ while ((i + n) <= CHARCODE_MAX &&
1915
+ unicode_db[i + n].combining_class == (cc + n))
1916
+ n++;
1917
+ if (n >= 2) {
1918
+ type = 1;
1919
+ } else {
1920
+ type = 0;
1921
+ n = 1;
1922
+ while ((i + n) <= CHARCODE_MAX &&
1923
+ unicode_db[i + n].combining_class == cc)
1924
+ n++;
1925
+ }
1926
+ /* no need to encode the last run */
1927
+ if (cc == 0 && (i + n - 1) == CHARCODE_MAX)
1928
+ break;
1929
+ #ifdef DUMP_CC_TABLE
1930
+ printf("%05x %6d %d %d\n", i, n, type, cc);
1931
+ #endif
1932
+ if (type == 0) {
1933
+ if (cc == 0)
1934
+ type = 2;
1935
+ else if (cc == 230)
1936
+ type = 3;
1937
+ }
1938
+ n1 = n - 1;
1939
+
1940
+ /* add an entry to the index if necessary */
1941
+ if (dbuf->size >= block_end_pos) {
1942
+ v = i | ((dbuf->size - block_end_pos) << 21);
1943
+ dbuf_putc(dbuf1, v);
1944
+ dbuf_putc(dbuf1, v >> 8);
1945
+ dbuf_putc(dbuf1, v >> 16);
1946
+ block_end_pos += CC_BLOCK_LEN;
1947
+ }
1948
+ cw_start = dbuf->size;
1949
+ if (n1 < 48) {
1950
+ dbuf_putc(dbuf, n1 | (type << 6));
1951
+ } else if (n1 < 48 + (1 << 11)) {
1952
+ n1 -= 48;
1953
+ dbuf_putc(dbuf, ((n1 >> 8) + 48) | (type << 6));
1954
+ dbuf_putc(dbuf, n1);
1955
+ } else {
1956
+ n1 -= 48 + (1 << 11);
1957
+ assert(n1 < (1 << 20));
1958
+ dbuf_putc(dbuf, ((n1 >> 16) + 56) | (type << 6));
1959
+ dbuf_putc(dbuf, n1 >> 8);
1960
+ dbuf_putc(dbuf, n1);
1961
+ }
1962
+ cw_len_tab[dbuf->size - cw_start - 1]++;
1963
+ if (type == 0 || type == 1)
1964
+ dbuf_putc(dbuf, cc);
1965
+ cc_table_len++;
1966
+ i += n;
1967
+ }
1968
+
1969
+ /* last index entry */
1970
+ v = i;
1971
+ dbuf_putc(dbuf1, v);
1972
+ dbuf_putc(dbuf1, v >> 8);
1973
+ dbuf_putc(dbuf1, v >> 16);
1974
+
1975
+ dump_byte_table(f, "unicode_cc_table", dbuf->buf, dbuf->size);
1976
+ dump_byte_table(f, "unicode_cc_index", dbuf1->buf, dbuf1->size);
1977
+
1978
+ #if defined(DUMP_CC_TABLE) || defined(DUMP_TABLE_SIZE)
1979
+ printf("CC table: size=%d (%d entries) [",
1980
+ (int)(dbuf->size + dbuf1->size),
1981
+ cc_table_len);
1982
+ for(i = 0; i < countof(cw_len_tab); i++)
1983
+ printf(" %d", cw_len_tab[i]);
1984
+ printf(" ]\n");
1985
+ #endif
1986
+ dbuf_free(dbuf);
1987
+ dbuf_free(dbuf1);
1988
+ }
1989
+
1990
+ /* maximum length of decomposition: 18 chars (1), then 8 */
1991
+ #ifndef USE_TEST
1992
+ typedef enum {
1993
+ DECOMP_TYPE_C1, /* 16 bit char */
1994
+ DECOMP_TYPE_L1, /* 16 bit char table */
1995
+ DECOMP_TYPE_L2,
1996
+ DECOMP_TYPE_L3,
1997
+ DECOMP_TYPE_L4,
1998
+ DECOMP_TYPE_L5, /* XXX: not used */
1999
+ DECOMP_TYPE_L6, /* XXX: could remove */
2000
+ DECOMP_TYPE_L7, /* XXX: could remove */
2001
+ DECOMP_TYPE_LL1, /* 18 bit char table */
2002
+ DECOMP_TYPE_LL2,
2003
+ DECOMP_TYPE_S1, /* 8 bit char table */
2004
+ DECOMP_TYPE_S2,
2005
+ DECOMP_TYPE_S3,
2006
+ DECOMP_TYPE_S4,
2007
+ DECOMP_TYPE_S5,
2008
+ DECOMP_TYPE_I1, /* increment 16 bit char value */
2009
+ DECOMP_TYPE_I2_0,
2010
+ DECOMP_TYPE_I2_1,
2011
+ DECOMP_TYPE_I3_1,
2012
+ DECOMP_TYPE_I3_2,
2013
+ DECOMP_TYPE_I4_1,
2014
+ DECOMP_TYPE_I4_2,
2015
+ DECOMP_TYPE_B1, /* 16 bit base + 8 bit offset */
2016
+ DECOMP_TYPE_B2,
2017
+ DECOMP_TYPE_B3,
2018
+ DECOMP_TYPE_B4,
2019
+ DECOMP_TYPE_B5,
2020
+ DECOMP_TYPE_B6,
2021
+ DECOMP_TYPE_B7,
2022
+ DECOMP_TYPE_B8,
2023
+ DECOMP_TYPE_B18,
2024
+ DECOMP_TYPE_LS2,
2025
+ DECOMP_TYPE_PAT3,
2026
+ DECOMP_TYPE_S2_UL,
2027
+ DECOMP_TYPE_LS2_UL,
2028
+ } DecompTypeEnum;
2029
+ #endif
2030
+
2031
+ const char *decomp_type_str[] = {
2032
+ "C1",
2033
+ "L1",
2034
+ "L2",
2035
+ "L3",
2036
+ "L4",
2037
+ "L5",
2038
+ "L6",
2039
+ "L7",
2040
+ "LL1",
2041
+ "LL2",
2042
+ "S1",
2043
+ "S2",
2044
+ "S3",
2045
+ "S4",
2046
+ "S5",
2047
+ "I1",
2048
+ "I2_0",
2049
+ "I2_1",
2050
+ "I3_1",
2051
+ "I3_2",
2052
+ "I4_1",
2053
+ "I4_2",
2054
+ "B1",
2055
+ "B2",
2056
+ "B3",
2057
+ "B4",
2058
+ "B5",
2059
+ "B6",
2060
+ "B7",
2061
+ "B8",
2062
+ "B18",
2063
+ "LS2",
2064
+ "PAT3",
2065
+ "S2_UL",
2066
+ "LS2_UL",
2067
+ };
2068
+
2069
+ const int decomp_incr_tab[4][4] = {
2070
+ { DECOMP_TYPE_I1, 0, -1 },
2071
+ { DECOMP_TYPE_I2_0, 0, 1, -1 },
2072
+ { DECOMP_TYPE_I3_1, 1, 2, -1 },
2073
+ { DECOMP_TYPE_I4_1, 1, 2, -1 },
2074
+ };
2075
+
2076
+ /*
2077
+ entry size:
2078
+ type bits
2079
+ code 18
2080
+ len 7
2081
+ compat 1
2082
+ type 5
2083
+ index 16
2084
+ total 47
2085
+ */
2086
+
2087
+ typedef struct {
2088
+ int code;
2089
+ uint8_t len;
2090
+ uint8_t type;
2091
+ uint8_t c_len;
2092
+ uint16_t c_min;
2093
+ uint16_t data_index;
2094
+ int cost; /* size in bytes from this entry to the end */
2095
+ } DecompEntry;
2096
+
2097
+ int get_decomp_run_size(const DecompEntry *de)
2098
+ {
2099
+ int s;
2100
+ s = 6;
2101
+ if (de->type <= DECOMP_TYPE_C1) {
2102
+ /* nothing more */
2103
+ } else if (de->type <= DECOMP_TYPE_L7) {
2104
+ s += de->len * de->c_len * 2;
2105
+ } else if (de->type <= DECOMP_TYPE_LL2) {
2106
+ /* 18 bits per char */
2107
+ s += (de->len * de->c_len * 18 + 7) / 8;
2108
+ } else if (de->type <= DECOMP_TYPE_S5) {
2109
+ s += de->len * de->c_len;
2110
+ } else if (de->type <= DECOMP_TYPE_I4_2) {
2111
+ s += de->c_len * 2;
2112
+ } else if (de->type <= DECOMP_TYPE_B18) {
2113
+ s += 2 + de->len * de->c_len;
2114
+ } else if (de->type <= DECOMP_TYPE_LS2) {
2115
+ s += de->len * 3;
2116
+ } else if (de->type <= DECOMP_TYPE_PAT3) {
2117
+ s += 4 + de->len * 2;
2118
+ } else if (de->type <= DECOMP_TYPE_S2_UL) {
2119
+ s += de->len;
2120
+ } else if (de->type <= DECOMP_TYPE_LS2_UL) {
2121
+ s += (de->len / 2) * 3;
2122
+ } else {
2123
+ abort();
2124
+ }
2125
+ return s;
2126
+ }
2127
+
2128
+ static const uint16_t unicode_short_table[2] = { 0x2044, 0x2215 };
2129
+
2130
+ /* return -1 if not found */
2131
+ int get_short_code(int c)
2132
+ {
2133
+ int i;
2134
+ if (c < 0x80) {
2135
+ return c;
2136
+ } else if (c >= 0x300 && c < 0x350) {
2137
+ return c - 0x300 + 0x80;
2138
+ } else {
2139
+ for(i = 0; i < countof(unicode_short_table); i++) {
2140
+ if (c == unicode_short_table[i])
2141
+ return i + 0x80 + 0x50;
2142
+ }
2143
+ return -1;
2144
+ }
2145
+ }
2146
+
2147
+ static BOOL is_short(int code)
2148
+ {
2149
+ return get_short_code(code) >= 0;
2150
+ }
2151
+
2152
+ static BOOL is_short_tab(const int *tab, int len)
2153
+ {
2154
+ int i;
2155
+ for(i = 0; i < len; i++) {
2156
+ if (!is_short(tab[i]))
2157
+ return FALSE;
2158
+ }
2159
+ return TRUE;
2160
+ }
2161
+
2162
+ static BOOL is_16bit(const int *tab, int len)
2163
+ {
2164
+ int i;
2165
+ for(i = 0; i < len; i++) {
2166
+ if (tab[i] > 0xffff)
2167
+ return FALSE;
2168
+ }
2169
+ return TRUE;
2170
+ }
2171
+
2172
+ static uint32_t to_lower_simple(uint32_t c)
2173
+ {
2174
+ /* Latin1 and Cyrillic */
2175
+ if (c < 0x100 || (c >= 0x410 && c <= 0x42f))
2176
+ c += 0x20;
2177
+ else
2178
+ c++;
2179
+ return c;
2180
+ }
2181
+
2182
+ /* select best encoding with dynamic programming */
2183
+ void find_decomp_run(DecompEntry *tab_de, int i)
2184
+ {
2185
+ DecompEntry de_s, *de = &de_s;
2186
+ CCInfo *ci, *ci1, *ci2;
2187
+ int l, j, n, len_max;
2188
+
2189
+ ci = &unicode_db[i];
2190
+ l = ci->decomp_len;
2191
+ if (l == 0) {
2192
+ tab_de[i].cost = tab_de[i + 1].cost;
2193
+ return;
2194
+ }
2195
+
2196
+ /* the offset for the compose table has only 6 bits, so we must
2197
+ limit if it can be used by the compose table */
2198
+ if (!ci->is_compat && !ci->is_excluded && l == 2)
2199
+ len_max = 64;
2200
+ else
2201
+ len_max = 127;
2202
+
2203
+ tab_de[i].cost = 0x7fffffff;
2204
+
2205
+ if (!is_16bit(ci->decomp_data, l)) {
2206
+ assert(l <= 2);
2207
+
2208
+ n = 1;
2209
+ for(;;) {
2210
+ de->code = i;
2211
+ de->len = n;
2212
+ de->type = DECOMP_TYPE_LL1 + l - 1;
2213
+ de->c_len = l;
2214
+ de->cost = get_decomp_run_size(de) + tab_de[i + n].cost;
2215
+ if (de->cost < tab_de[i].cost) {
2216
+ tab_de[i] = *de;
2217
+ }
2218
+ if (!((i + n) <= CHARCODE_MAX && n < len_max))
2219
+ break;
2220
+ ci1 = &unicode_db[i + n];
2221
+ /* Note: we accept a hole */
2222
+ if (!(ci1->decomp_len == 0 ||
2223
+ (ci1->decomp_len == l &&
2224
+ ci1->is_compat == ci->is_compat)))
2225
+ break;
2226
+ n++;
2227
+ }
2228
+ return;
2229
+ }
2230
+
2231
+ if (l <= 7) {
2232
+ n = 1;
2233
+ for(;;) {
2234
+ de->code = i;
2235
+ de->len = n;
2236
+ if (l == 1 && n == 1) {
2237
+ de->type = DECOMP_TYPE_C1;
2238
+ } else {
2239
+ assert(l <= 8);
2240
+ de->type = DECOMP_TYPE_L1 + l - 1;
2241
+ }
2242
+ de->c_len = l;
2243
+ de->cost = get_decomp_run_size(de) + tab_de[i + n].cost;
2244
+ if (de->cost < tab_de[i].cost) {
2245
+ tab_de[i] = *de;
2246
+ }
2247
+
2248
+ if (!((i + n) <= CHARCODE_MAX && n < len_max))
2249
+ break;
2250
+ ci1 = &unicode_db[i + n];
2251
+ /* Note: we accept a hole */
2252
+ if (!(ci1->decomp_len == 0 ||
2253
+ (ci1->decomp_len == l &&
2254
+ ci1->is_compat == ci->is_compat &&
2255
+ is_16bit(ci1->decomp_data, l))))
2256
+ break;
2257
+ n++;
2258
+ }
2259
+ }
2260
+
2261
+ if (l <= 8 || l == 18) {
2262
+ int c_min, c_max, c;
2263
+ c_min = c_max = -1;
2264
+ n = 1;
2265
+ for(;;) {
2266
+ ci1 = &unicode_db[i + n - 1];
2267
+ for(j = 0; j < l; j++) {
2268
+ c = ci1->decomp_data[j];
2269
+ if (c == 0x20) {
2270
+ /* we accept space for Arabic */
2271
+ } else if (c_min == -1) {
2272
+ c_min = c_max = c;
2273
+ } else {
2274
+ c_min = min_int(c_min, c);
2275
+ c_max = max_int(c_max, c);
2276
+ }
2277
+ }
2278
+ if ((c_max - c_min) > 254)
2279
+ break;
2280
+ de->code = i;
2281
+ de->len = n;
2282
+ if (l == 18)
2283
+ de->type = DECOMP_TYPE_B18;
2284
+ else
2285
+ de->type = DECOMP_TYPE_B1 + l - 1;
2286
+ de->c_len = l;
2287
+ de->c_min = c_min;
2288
+ de->cost = get_decomp_run_size(de) + tab_de[i + n].cost;
2289
+ if (de->cost < tab_de[i].cost) {
2290
+ tab_de[i] = *de;
2291
+ }
2292
+ if (!((i + n) <= CHARCODE_MAX && n < len_max))
2293
+ break;
2294
+ ci1 = &unicode_db[i + n];
2295
+ if (!(ci1->decomp_len == l &&
2296
+ ci1->is_compat == ci->is_compat))
2297
+ break;
2298
+ n++;
2299
+ }
2300
+ }
2301
+
2302
+ /* find an ascii run */
2303
+ if (l <= 5 && is_short_tab(ci->decomp_data, l)) {
2304
+ n = 1;
2305
+ for(;;) {
2306
+ de->code = i;
2307
+ de->len = n;
2308
+ de->type = DECOMP_TYPE_S1 + l - 1;
2309
+ de->c_len = l;
2310
+ de->cost = get_decomp_run_size(de) + tab_de[i + n].cost;
2311
+ if (de->cost < tab_de[i].cost) {
2312
+ tab_de[i] = *de;
2313
+ }
2314
+
2315
+ if (!((i + n) <= CHARCODE_MAX && n < len_max))
2316
+ break;
2317
+ ci1 = &unicode_db[i + n];
2318
+ /* Note: we accept a hole */
2319
+ if (!(ci1->decomp_len == 0 ||
2320
+ (ci1->decomp_len == l &&
2321
+ ci1->is_compat == ci->is_compat &&
2322
+ is_short_tab(ci1->decomp_data, l))))
2323
+ break;
2324
+ n++;
2325
+ }
2326
+ }
2327
+
2328
+ /* check if a single char is increasing */
2329
+ if (l <= 4) {
2330
+ int idx1, idx;
2331
+
2332
+ for(idx1 = 1; (idx = decomp_incr_tab[l - 1][idx1]) >= 0; idx1++) {
2333
+ n = 1;
2334
+ for(;;) {
2335
+ de->code = i;
2336
+ de->len = n;
2337
+ de->type = decomp_incr_tab[l - 1][0] + idx1 - 1;
2338
+ de->c_len = l;
2339
+ de->cost = get_decomp_run_size(de) + tab_de[i + n].cost;
2340
+ if (de->cost < tab_de[i].cost) {
2341
+ tab_de[i] = *de;
2342
+ }
2343
+
2344
+ if (!((i + n) <= CHARCODE_MAX && n < len_max))
2345
+ break;
2346
+ ci1 = &unicode_db[i + n];
2347
+ if (!(ci1->decomp_len == l &&
2348
+ ci1->is_compat == ci->is_compat))
2349
+ goto next1;
2350
+ for(j = 0; j < l; j++) {
2351
+ if (j == idx) {
2352
+ if (ci1->decomp_data[j] != ci->decomp_data[j] + n)
2353
+ goto next1;
2354
+ } else {
2355
+ if (ci1->decomp_data[j] != ci->decomp_data[j])
2356
+ goto next1;
2357
+ }
2358
+ }
2359
+ n++;
2360
+ }
2361
+ next1: ;
2362
+ }
2363
+ }
2364
+
2365
+ if (l == 3) {
2366
+ n = 1;
2367
+ for(;;) {
2368
+ de->code = i;
2369
+ de->len = n;
2370
+ de->type = DECOMP_TYPE_PAT3;
2371
+ de->c_len = l;
2372
+ de->cost = get_decomp_run_size(de) + tab_de[i + n].cost;
2373
+ if (de->cost < tab_de[i].cost) {
2374
+ tab_de[i] = *de;
2375
+ }
2376
+ if (!((i + n) <= CHARCODE_MAX && n < len_max))
2377
+ break;
2378
+ ci1 = &unicode_db[i + n];
2379
+ if (!(ci1->decomp_len == l &&
2380
+ ci1->is_compat == ci->is_compat &&
2381
+ ci1->decomp_data[1] <= 0xffff &&
2382
+ ci1->decomp_data[0] == ci->decomp_data[0] &&
2383
+ ci1->decomp_data[l - 1] == ci->decomp_data[l - 1]))
2384
+ break;
2385
+ n++;
2386
+ }
2387
+ }
2388
+
2389
+ if (l == 2 && is_short(ci->decomp_data[1])) {
2390
+ n = 1;
2391
+ for(;;) {
2392
+ de->code = i;
2393
+ de->len = n;
2394
+ de->type = DECOMP_TYPE_LS2;
2395
+ de->c_len = l;
2396
+ de->cost = get_decomp_run_size(de) + tab_de[i + n].cost;
2397
+ if (de->cost < tab_de[i].cost) {
2398
+ tab_de[i] = *de;
2399
+ }
2400
+ if (!((i + n) <= CHARCODE_MAX && n < len_max))
2401
+ break;
2402
+ ci1 = &unicode_db[i + n];
2403
+ if (!(ci1->decomp_len == 0 ||
2404
+ (ci1->decomp_len == l &&
2405
+ ci1->is_compat == ci->is_compat &&
2406
+ ci1->decomp_data[0] <= 0xffff &&
2407
+ is_short(ci1->decomp_data[1]))))
2408
+ break;
2409
+ n++;
2410
+ }
2411
+ }
2412
+
2413
+ if (l == 2) {
2414
+ BOOL is_16bit;
2415
+
2416
+ n = 0;
2417
+ is_16bit = FALSE;
2418
+ for(;;) {
2419
+ if (!((i + n + 1) <= CHARCODE_MAX && n + 2 <= len_max))
2420
+ break;
2421
+ ci1 = &unicode_db[i + n];
2422
+ if (!(ci1->decomp_len == l &&
2423
+ ci1->is_compat == ci->is_compat &&
2424
+ is_short(ci1->decomp_data[1])))
2425
+ break;
2426
+ if (!is_16bit && !is_short(ci1->decomp_data[0]))
2427
+ is_16bit = TRUE;
2428
+ ci2 = &unicode_db[i + n + 1];
2429
+ if (!(ci2->decomp_len == l &&
2430
+ ci2->is_compat == ci->is_compat &&
2431
+ ci2->decomp_data[0] == to_lower_simple(ci1->decomp_data[0]) &&
2432
+ ci2->decomp_data[1] == ci1->decomp_data[1]))
2433
+ break;
2434
+ n += 2;
2435
+ de->code = i;
2436
+ de->len = n;
2437
+ de->type = DECOMP_TYPE_S2_UL + is_16bit;
2438
+ de->c_len = l;
2439
+ de->cost = get_decomp_run_size(de) + tab_de[i + n].cost;
2440
+ if (de->cost < tab_de[i].cost) {
2441
+ tab_de[i] = *de;
2442
+ }
2443
+ }
2444
+ }
2445
+ }
2446
+
2447
+ void put16(uint8_t *data_buf, int *pidx, uint16_t c)
2448
+ {
2449
+ int idx;
2450
+ idx = *pidx;
2451
+ data_buf[idx++] = c;
2452
+ data_buf[idx++] = c >> 8;
2453
+ *pidx = idx;
2454
+ }
2455
+
2456
+ void add_decomp_data(uint8_t *data_buf, int *pidx, DecompEntry *de)
2457
+ {
2458
+ int i, j, idx, c;
2459
+ CCInfo *ci;
2460
+
2461
+ idx = *pidx;
2462
+ de->data_index = idx;
2463
+ if (de->type <= DECOMP_TYPE_C1) {
2464
+ ci = &unicode_db[de->code];
2465
+ assert(ci->decomp_len == 1);
2466
+ de->data_index = ci->decomp_data[0];
2467
+ } else if (de->type <= DECOMP_TYPE_L7) {
2468
+ for(i = 0; i < de->len; i++) {
2469
+ ci = &unicode_db[de->code + i];
2470
+ for(j = 0; j < de->c_len; j++) {
2471
+ if (ci->decomp_len == 0)
2472
+ c = 0;
2473
+ else
2474
+ c = ci->decomp_data[j];
2475
+ put16(data_buf, &idx, c);
2476
+ }
2477
+ }
2478
+ } else if (de->type <= DECOMP_TYPE_LL2) {
2479
+ int n, p, k;
2480
+ n = (de->len * de->c_len * 18 + 7) / 8;
2481
+ p = de->len * de->c_len * 2;
2482
+ memset(data_buf + idx, 0, n);
2483
+ k = 0;
2484
+ for(i = 0; i < de->len; i++) {
2485
+ ci = &unicode_db[de->code + i];
2486
+ for(j = 0; j < de->c_len; j++) {
2487
+ if (ci->decomp_len == 0)
2488
+ c = 0;
2489
+ else
2490
+ c = ci->decomp_data[j];
2491
+ data_buf[idx + k * 2] = c;
2492
+ data_buf[idx + k * 2 + 1] = c >> 8;
2493
+ data_buf[idx + p + (k / 4)] |= (c >> 16) << ((k % 4) * 2);
2494
+ k++;
2495
+ }
2496
+ }
2497
+ idx += n;
2498
+ } else if (de->type <= DECOMP_TYPE_S5) {
2499
+ for(i = 0; i < de->len; i++) {
2500
+ ci = &unicode_db[de->code + i];
2501
+ for(j = 0; j < de->c_len; j++) {
2502
+ if (ci->decomp_len == 0)
2503
+ c = 0;
2504
+ else
2505
+ c = ci->decomp_data[j];
2506
+ c = get_short_code(c);
2507
+ assert(c >= 0);
2508
+ data_buf[idx++] = c;
2509
+ }
2510
+ }
2511
+ } else if (de->type <= DECOMP_TYPE_I4_2) {
2512
+ ci = &unicode_db[de->code];
2513
+ assert(ci->decomp_len == de->c_len);
2514
+ for(j = 0; j < de->c_len; j++)
2515
+ put16(data_buf, &idx, ci->decomp_data[j]);
2516
+ } else if (de->type <= DECOMP_TYPE_B18) {
2517
+ c = de->c_min;
2518
+ data_buf[idx++] = c;
2519
+ data_buf[idx++] = c >> 8;
2520
+ for(i = 0; i < de->len; i++) {
2521
+ ci = &unicode_db[de->code + i];
2522
+ for(j = 0; j < de->c_len; j++) {
2523
+ assert(ci->decomp_len == de->c_len);
2524
+ c = ci->decomp_data[j];
2525
+ if (c == 0x20) {
2526
+ c = 0xff;
2527
+ } else {
2528
+ c -= de->c_min;
2529
+ assert((uint32_t)c <= 254);
2530
+ }
2531
+ data_buf[idx++] = c;
2532
+ }
2533
+ }
2534
+ } else if (de->type <= DECOMP_TYPE_LS2) {
2535
+ assert(de->c_len == 2);
2536
+ for(i = 0; i < de->len; i++) {
2537
+ ci = &unicode_db[de->code + i];
2538
+ if (ci->decomp_len == 0)
2539
+ c = 0;
2540
+ else
2541
+ c = ci->decomp_data[0];
2542
+ put16(data_buf, &idx, c);
2543
+
2544
+ if (ci->decomp_len == 0)
2545
+ c = 0;
2546
+ else
2547
+ c = ci->decomp_data[1];
2548
+ c = get_short_code(c);
2549
+ assert(c >= 0);
2550
+ data_buf[idx++] = c;
2551
+ }
2552
+ } else if (de->type <= DECOMP_TYPE_PAT3) {
2553
+ ci = &unicode_db[de->code];
2554
+ assert(ci->decomp_len == 3);
2555
+ put16(data_buf, &idx, ci->decomp_data[0]);
2556
+ put16(data_buf, &idx, ci->decomp_data[2]);
2557
+ for(i = 0; i < de->len; i++) {
2558
+ ci = &unicode_db[de->code + i];
2559
+ assert(ci->decomp_len == 3);
2560
+ put16(data_buf, &idx, ci->decomp_data[1]);
2561
+ }
2562
+ } else if (de->type <= DECOMP_TYPE_S2_UL) {
2563
+ for(i = 0; i < de->len; i += 2) {
2564
+ ci = &unicode_db[de->code + i];
2565
+ c = ci->decomp_data[0];
2566
+ c = get_short_code(c);
2567
+ assert(c >= 0);
2568
+ data_buf[idx++] = c;
2569
+ c = ci->decomp_data[1];
2570
+ c = get_short_code(c);
2571
+ assert(c >= 0);
2572
+ data_buf[idx++] = c;
2573
+ }
2574
+ } else if (de->type <= DECOMP_TYPE_LS2_UL) {
2575
+ for(i = 0; i < de->len; i += 2) {
2576
+ ci = &unicode_db[de->code + i];
2577
+ c = ci->decomp_data[0];
2578
+ put16(data_buf, &idx, c);
2579
+ c = ci->decomp_data[1];
2580
+ c = get_short_code(c);
2581
+ assert(c >= 0);
2582
+ data_buf[idx++] = c;
2583
+ }
2584
+ } else {
2585
+ abort();
2586
+ }
2587
+ *pidx = idx;
2588
+ }
2589
+
2590
+ #if 0
2591
+ void dump_large_char(void)
2592
+ {
2593
+ int i, j;
2594
+ for(i = 0; i <= CHARCODE_MAX; i++) {
2595
+ CCInfo *ci = &unicode_db[i];
2596
+ for(j = 0; j < ci->decomp_len; j++) {
2597
+ if (ci->decomp_data[j] > 0xffff)
2598
+ printf("%05x\n", ci->decomp_data[j]);
2599
+ }
2600
+ }
2601
+ }
2602
+ #endif
2603
+
2604
+ void build_compose_table(FILE *f, const DecompEntry *tab_de);
2605
+
2606
+ void build_decompose_table(FILE *f)
2607
+ {
2608
+ int i, array_len, code_max, data_len, count;
2609
+ DecompEntry *tab_de, de_s, *de = &de_s;
2610
+ uint8_t *data_buf;
2611
+
2612
+ code_max = CHARCODE_MAX;
2613
+
2614
+ tab_de = mallocz((code_max + 2) * sizeof(*tab_de));
2615
+
2616
+ for(i = code_max; i >= 0; i--) {
2617
+ find_decomp_run(tab_de, i);
2618
+ }
2619
+
2620
+ /* build the data buffer */
2621
+ data_buf = malloc(100000);
2622
+ data_len = 0;
2623
+ array_len = 0;
2624
+ for(i = 0; i <= code_max; i++) {
2625
+ de = &tab_de[i];
2626
+ if (de->len != 0) {
2627
+ add_decomp_data(data_buf, &data_len, de);
2628
+ i += de->len - 1;
2629
+ array_len++;
2630
+ }
2631
+ }
2632
+
2633
+ #ifdef DUMP_DECOMP_TABLE
2634
+ /* dump */
2635
+ {
2636
+ int size, size1;
2637
+
2638
+ printf("START LEN TYPE L C SIZE\n");
2639
+ size = 0;
2640
+ for(i = 0; i <= code_max; i++) {
2641
+ de = &tab_de[i];
2642
+ if (de->len != 0) {
2643
+ size1 = get_decomp_run_size(de);
2644
+ printf("%05x %3d %6s %2d %1d %4d\n", i, de->len,
2645
+ decomp_type_str[de->type], de->c_len,
2646
+ unicode_db[i].is_compat, size1);
2647
+ i += de->len - 1;
2648
+ size += size1;
2649
+ }
2650
+ }
2651
+
2652
+ printf("array_len=%d estimated size=%d bytes actual=%d bytes\n",
2653
+ array_len, size, array_len * 6 + data_len);
2654
+ }
2655
+ #endif
2656
+
2657
+ fprintf(f, "static const uint32_t unicode_decomp_table1[%u] = {",
2658
+ array_len);
2659
+ count = 0;
2660
+ for(i = 0; i <= code_max; i++) {
2661
+ de = &tab_de[i];
2662
+ if (de->len != 0) {
2663
+ uint32_t v;
2664
+ if (count++ % 4 == 0)
2665
+ fprintf(f, "\n ");
2666
+ v = (de->code << (32 - 18)) |
2667
+ (de->len << (32 - 18 - 7)) |
2668
+ (de->type << (32 - 18 - 7 - 6)) |
2669
+ unicode_db[de->code].is_compat;
2670
+ fprintf(f, " 0x%08x,", v);
2671
+ i += de->len - 1;
2672
+ }
2673
+ }
2674
+ fprintf(f, "\n};\n\n");
2675
+
2676
+ fprintf(f, "static const uint16_t unicode_decomp_table2[%u] = {",
2677
+ array_len);
2678
+ count = 0;
2679
+ for(i = 0; i <= code_max; i++) {
2680
+ de = &tab_de[i];
2681
+ if (de->len != 0) {
2682
+ if (count++ % 8 == 0)
2683
+ fprintf(f, "\n ");
2684
+ fprintf(f, " 0x%04x,", de->data_index);
2685
+ i += de->len - 1;
2686
+ }
2687
+ }
2688
+ fprintf(f, "\n};\n\n");
2689
+
2690
+ fprintf(f, "static const uint8_t unicode_decomp_data[%u] = {",
2691
+ data_len);
2692
+ for(i = 0; i < data_len; i++) {
2693
+ if (i % 8 == 0)
2694
+ fprintf(f, "\n ");
2695
+ fprintf(f, " 0x%02x,", data_buf[i]);
2696
+ }
2697
+ fprintf(f, "\n};\n\n");
2698
+
2699
+ build_compose_table(f, tab_de);
2700
+
2701
+ free(data_buf);
2702
+
2703
+ free(tab_de);
2704
+ }
2705
+
2706
+ typedef struct {
2707
+ uint32_t c[2];
2708
+ uint32_t p;
2709
+ } ComposeEntry;
2710
+
2711
+ #define COMPOSE_LEN_MAX 10000
2712
+
2713
+ static int ce_cmp(const void *p1, const void *p2)
2714
+ {
2715
+ const ComposeEntry *ce1 = p1;
2716
+ const ComposeEntry *ce2 = p2;
2717
+ int i;
2718
+
2719
+ for(i = 0; i < 2; i++) {
2720
+ if (ce1->c[i] < ce2->c[i])
2721
+ return -1;
2722
+ else if (ce1->c[i] > ce2->c[i])
2723
+ return 1;
2724
+ }
2725
+ return 0;
2726
+ }
2727
+
2728
+
2729
+ static int get_decomp_pos(const DecompEntry *tab_de, int c)
2730
+ {
2731
+ int i, v, k;
2732
+ const DecompEntry *de;
2733
+
2734
+ k = 0;
2735
+ for(i = 0; i <= CHARCODE_MAX; i++) {
2736
+ de = &tab_de[i];
2737
+ if (de->len != 0) {
2738
+ if (c >= de->code && c < de->code + de->len) {
2739
+ v = c - de->code;
2740
+ assert(v < 64);
2741
+ v |= k << 6;
2742
+ assert(v < 65536);
2743
+ return v;
2744
+ }
2745
+ i += de->len - 1;
2746
+ k++;
2747
+ }
2748
+ }
2749
+ return -1;
2750
+ }
2751
+
2752
+ void build_compose_table(FILE *f, const DecompEntry *tab_de)
2753
+ {
2754
+ int i, v, tab_ce_len;
2755
+ ComposeEntry *ce, *tab_ce;
2756
+
2757
+ tab_ce = malloc(sizeof(*tab_ce) * COMPOSE_LEN_MAX);
2758
+ tab_ce_len = 0;
2759
+ for(i = 0; i <= CHARCODE_MAX; i++) {
2760
+ CCInfo *ci = &unicode_db[i];
2761
+ if (ci->decomp_len == 2 && !ci->is_compat &&
2762
+ !ci->is_excluded) {
2763
+ assert(tab_ce_len < COMPOSE_LEN_MAX);
2764
+ ce = &tab_ce[tab_ce_len++];
2765
+ ce->c[0] = ci->decomp_data[0];
2766
+ ce->c[1] = ci->decomp_data[1];
2767
+ ce->p = i;
2768
+ }
2769
+ }
2770
+ qsort(tab_ce, tab_ce_len, sizeof(*tab_ce), ce_cmp);
2771
+
2772
+ #if 0
2773
+ {
2774
+ printf("tab_ce_len=%d\n", tab_ce_len);
2775
+ for(i = 0; i < tab_ce_len; i++) {
2776
+ ce = &tab_ce[i];
2777
+ printf("%05x %05x %05x\n", ce->c[0], ce->c[1], ce->p);
2778
+ }
2779
+ }
2780
+ #endif
2781
+
2782
+ fprintf(f, "static const uint16_t unicode_comp_table[%u] = {",
2783
+ tab_ce_len);
2784
+ for(i = 0; i < tab_ce_len; i++) {
2785
+ if (i % 8 == 0)
2786
+ fprintf(f, "\n ");
2787
+ v = get_decomp_pos(tab_de, tab_ce[i].p);
2788
+ if (v < 0) {
2789
+ printf("ERROR: entry for c=%04x not found\n",
2790
+ tab_ce[i].p);
2791
+ exit(1);
2792
+ }
2793
+ fprintf(f, " 0x%04x,", v);
2794
+ }
2795
+ fprintf(f, "\n};\n\n");
2796
+
2797
+ free(tab_ce);
2798
+ }
2799
+
2800
+ #ifdef USE_TEST
2801
+ void check_decompose_table(void)
2802
+ {
2803
+ int c;
2804
+ CCInfo *ci;
2805
+ int res[UNICODE_DECOMP_LEN_MAX], *ref;
2806
+ int len, ref_len, is_compat;
2807
+
2808
+ for(is_compat = 0; is_compat <= 1; is_compat++) {
2809
+ for(c = 0; c < CHARCODE_MAX; c++) {
2810
+ ci = &unicode_db[c];
2811
+ ref_len = ci->decomp_len;
2812
+ ref = ci->decomp_data;
2813
+ if (!is_compat && ci->is_compat) {
2814
+ ref_len = 0;
2815
+ }
2816
+ len = unicode_decomp_char((uint32_t *)res, c, is_compat);
2817
+ if (len != ref_len ||
2818
+ tabcmp(res, ref, ref_len) != 0) {
2819
+ printf("ERROR c=%05x compat=%d\n", c, is_compat);
2820
+ dump_str("res", res, len);
2821
+ dump_str("ref", ref, ref_len);
2822
+ exit(1);
2823
+ }
2824
+ }
2825
+ }
2826
+ }
2827
+
2828
+ void check_compose_table(void)
2829
+ {
2830
+ int i, p;
2831
+ /* XXX: we don't test all the cases */
2832
+
2833
+ for(i = 0; i <= CHARCODE_MAX; i++) {
2834
+ CCInfo *ci = &unicode_db[i];
2835
+ if (ci->decomp_len == 2 && !ci->is_compat &&
2836
+ !ci->is_excluded) {
2837
+ p = unicode_compose_pair(ci->decomp_data[0], ci->decomp_data[1]);
2838
+ if (p != i) {
2839
+ printf("ERROR compose: c=%05x %05x -> %05x ref=%05x\n",
2840
+ ci->decomp_data[0], ci->decomp_data[1], p, i);
2841
+ exit(1);
2842
+ }
2843
+ }
2844
+ }
2845
+
2846
+
2847
+
2848
+ }
2849
+
2850
+ #endif
2851
+
2852
+
2853
+
2854
+ #ifdef USE_TEST
2855
+
2856
+ void check_str(const char *msg, int num, const int *in_buf, int in_len,
2857
+ const int *buf1, int len1,
2858
+ const int *buf2, int len2)
2859
+ {
2860
+ if (len1 != len2 || tabcmp(buf1, buf2, len1) != 0) {
2861
+ printf("%d: ERROR %s:\n", num, msg);
2862
+ dump_str(" in", in_buf, in_len);
2863
+ dump_str("res", buf1, len1);
2864
+ dump_str("ref", buf2, len2);
2865
+ exit(1);
2866
+ }
2867
+ }
2868
+
2869
+ void check_cc_table(void)
2870
+ {
2871
+ int cc, cc_ref, c;
2872
+
2873
+ for(c = 0; c <= CHARCODE_MAX; c++) {
2874
+ cc_ref = unicode_db[c].combining_class;
2875
+ cc = unicode_get_cc(c);
2876
+ if (cc != cc_ref) {
2877
+ printf("ERROR: c=%04x cc=%d cc_ref=%d\n",
2878
+ c, cc, cc_ref);
2879
+ exit(1);
2880
+ }
2881
+ }
2882
+ #ifdef PROFILE
2883
+ {
2884
+ int64_t ti, count;
2885
+
2886
+ ti = get_time_ns();
2887
+ count = 0;
2888
+ /* only do it on meaningful chars */
2889
+ for(c = 0x20; c <= 0xffff; c++) {
2890
+ cc_ref = unicode_db[c].combining_class;
2891
+ cc = unicode_get_cc(c);
2892
+ count++;
2893
+ }
2894
+ ti = get_time_ns() - ti;
2895
+ printf("cc time=%0.1f ns/char\n",
2896
+ (double)ti / count);
2897
+ }
2898
+ #endif
2899
+ }
2900
+
2901
+ void normalization_test(const char *filename)
2902
+ {
2903
+ FILE *f;
2904
+ char line[4096], *p;
2905
+ int *in_str, *nfc_str, *nfd_str, *nfkc_str, *nfkd_str;
2906
+ int in_len, nfc_len, nfd_len, nfkc_len, nfkd_len;
2907
+ int *buf, buf_len, pos;
2908
+
2909
+ f = fopen(filename, "rb");
2910
+ if (!f) {
2911
+ perror(filename);
2912
+ exit(1);
2913
+ }
2914
+ pos = 0;
2915
+ for(;;) {
2916
+ if (!get_line(line, sizeof(line), f))
2917
+ break;
2918
+ pos++;
2919
+ p = line;
2920
+ while (isspace(*p))
2921
+ p++;
2922
+ if (*p == '#' || *p == '@')
2923
+ continue;
2924
+ in_str = get_field_str(&in_len, p, 0);
2925
+ nfc_str = get_field_str(&nfc_len, p, 1);
2926
+ nfd_str = get_field_str(&nfd_len, p, 2);
2927
+ nfkc_str = get_field_str(&nfkc_len, p, 3);
2928
+ nfkd_str = get_field_str(&nfkd_len, p, 4);
2929
+
2930
+ // dump_str("in", in_str, in_len);
2931
+
2932
+ buf_len = unicode_normalize((uint32_t **)&buf, (uint32_t *)in_str, in_len, UNICODE_NFD, NULL, NULL);
2933
+ check_str("nfd", pos, in_str, in_len, buf, buf_len, nfd_str, nfd_len);
2934
+ free(buf);
2935
+
2936
+ buf_len = unicode_normalize((uint32_t **)&buf, (uint32_t *)in_str, in_len, UNICODE_NFKD, NULL, NULL);
2937
+ check_str("nfkd", pos, in_str, in_len, buf, buf_len, nfkd_str, nfkd_len);
2938
+ free(buf);
2939
+
2940
+ buf_len = unicode_normalize((uint32_t **)&buf, (uint32_t *)in_str, in_len, UNICODE_NFC, NULL, NULL);
2941
+ check_str("nfc", pos, in_str, in_len, buf, buf_len, nfc_str, nfc_len);
2942
+ free(buf);
2943
+
2944
+ buf_len = unicode_normalize((uint32_t **)&buf, (uint32_t *)in_str, in_len, UNICODE_NFKC, NULL, NULL);
2945
+ check_str("nfkc", pos, in_str, in_len, buf, buf_len, nfkc_str, nfkc_len);
2946
+ free(buf);
2947
+
2948
+ free(in_str);
2949
+ free(nfc_str);
2950
+ free(nfd_str);
2951
+ free(nfkc_str);
2952
+ free(nfkd_str);
2953
+ }
2954
+ fclose(f);
2955
+ }
2956
+ #endif
2957
+
2958
+ int main(int argc, char **argv)
2959
+ {
2960
+ const char *unicode_db_path, *outfilename;
2961
+ char filename[1024];
2962
+
2963
+ if (argc < 2) {
2964
+ printf("usage: %s unicode_db_path [output_file]\n"
2965
+ "\n"
2966
+ "If no output_file is given, a self test is done using the current unicode library\n",
2967
+ argv[0]);
2968
+ exit(1);
2969
+ }
2970
+ unicode_db_path = argv[1];
2971
+ outfilename = NULL;
2972
+ if (argc >= 3)
2973
+ outfilename = argv[2];
2974
+
2975
+ unicode_db = mallocz(sizeof(unicode_db[0]) * (CHARCODE_MAX + 1));
2976
+
2977
+ snprintf(filename, sizeof(filename), "%s/UnicodeData.txt", unicode_db_path);
2978
+
2979
+ parse_unicode_data(filename);
2980
+
2981
+ snprintf(filename, sizeof(filename), "%s/SpecialCasing.txt", unicode_db_path);
2982
+ parse_special_casing(unicode_db, filename);
2983
+
2984
+ snprintf(filename, sizeof(filename), "%s/CaseFolding.txt", unicode_db_path);
2985
+ parse_case_folding(unicode_db, filename);
2986
+
2987
+ snprintf(filename, sizeof(filename), "%s/CompositionExclusions.txt", unicode_db_path);
2988
+ parse_composition_exclusions(filename);
2989
+
2990
+ snprintf(filename, sizeof(filename), "%s/DerivedCoreProperties.txt", unicode_db_path);
2991
+ parse_derived_core_properties(filename);
2992
+
2993
+ snprintf(filename, sizeof(filename), "%s/DerivedNormalizationProps.txt", unicode_db_path);
2994
+ parse_derived_norm_properties(filename);
2995
+
2996
+ snprintf(filename, sizeof(filename), "%s/PropList.txt", unicode_db_path);
2997
+ parse_prop_list(filename);
2998
+
2999
+ snprintf(filename, sizeof(filename), "%s/Scripts.txt", unicode_db_path);
3000
+ parse_scripts(filename);
3001
+
3002
+ snprintf(filename, sizeof(filename), "%s/ScriptExtensions.txt",
3003
+ unicode_db_path);
3004
+ parse_script_extensions(filename);
3005
+
3006
+ snprintf(filename, sizeof(filename), "%s/emoji-data.txt",
3007
+ unicode_db_path);
3008
+ parse_prop_list(filename);
3009
+
3010
+ // dump_data(unicode_db);
3011
+
3012
+ build_conv_table(unicode_db);
3013
+
3014
+ // dump_table();
3015
+
3016
+ if (!outfilename) {
3017
+ #ifdef USE_TEST
3018
+ check_case_conv();
3019
+ check_flags();
3020
+ check_decompose_table();
3021
+ check_compose_table();
3022
+ check_cc_table();
3023
+ snprintf(filename, sizeof(filename), "%s/NormalizationTest.txt", unicode_db_path);
3024
+ normalization_test(filename);
3025
+ #else
3026
+ fprintf(stderr, "Tests are not compiled\n");
3027
+ exit(1);
3028
+ #endif
3029
+ } else
3030
+ {
3031
+ FILE *fo = fopen(outfilename, "wb");
3032
+
3033
+ if (!fo) {
3034
+ perror(outfilename);
3035
+ exit(1);
3036
+ }
3037
+ fprintf(fo,
3038
+ "/* Compressed unicode tables */\n"
3039
+ "/* Automatically generated file - do not edit */\n"
3040
+ "\n"
3041
+ "#include <stdint.h>\n"
3042
+ "\n");
3043
+ dump_case_conv_table(fo);
3044
+ compute_internal_props();
3045
+ build_flags_tables(fo);
3046
+ fprintf(fo, "#ifdef CONFIG_ALL_UNICODE\n\n");
3047
+ build_cc_table(fo);
3048
+ build_decompose_table(fo);
3049
+ build_general_category_table(fo);
3050
+ build_script_table(fo);
3051
+ build_script_ext_table(fo);
3052
+ build_prop_list_table(fo);
3053
+ fprintf(fo, "#endif /* CONFIG_ALL_UNICODE */\n");
3054
+ fclose(fo);
3055
+ }
3056
+ return 0;
3057
+ }