u 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. checksums.yaml +7 -0
  2. data/build/ext/u/data/attributes.rb +39 -0
  3. data/build/ext/u/data/bidi-mirroring.rb +27 -0
  4. data/build/ext/u/data/canonical-combining-class.rb +15 -0
  5. data/build/ext/u/data/case-folding.rb +39 -0
  6. data/build/ext/u/data/cased.rb +19 -0
  7. data/build/ext/u/data/compose.rb +304 -0
  8. data/build/ext/u/data/constants.rb +31 -0
  9. data/build/ext/u/data/decompose.rb +85 -0
  10. data/build/ext/u/data/general-category.rb +61 -0
  11. data/build/ext/u/data/grapheme-word-break.rb +15 -0
  12. data/build/ext/u/data/marshalled.rb +5 -0
  13. data/build/ext/u/data/script.rb +91 -0
  14. data/build/ext/u/data/soft-dotted.rb +17 -0
  15. data/build/ext/u/data/title-table.rb +30 -0
  16. data/build/ext/u/data/wide.rb +17 -0
  17. data/build/lib/u/build.rb +8 -0
  18. data/build/lib/u/build/data.rb +16 -0
  19. data/build/lib/u/build/data/bidimirroring.rb +26 -0
  20. data/build/lib/u/build/data/break.rb +14 -0
  21. data/build/lib/u/build/data/casefolding.rb +77 -0
  22. data/build/lib/u/build/data/compositionexclusions.rb +14 -0
  23. data/build/lib/u/build/data/derivedeastasianwidth.rb +15 -0
  24. data/build/lib/u/build/data/file.rb +88 -0
  25. data/build/lib/u/build/data/linebreak.rb +14 -0
  26. data/build/lib/u/build/data/proplist.rb +18 -0
  27. data/build/lib/u/build/data/scripts.rb +22 -0
  28. data/build/lib/u/build/data/specialcasing.rb +106 -0
  29. data/build/lib/u/build/data/unicode.rb +41 -0
  30. data/build/lib/u/build/data/unicode/entry.rb +27 -0
  31. data/build/lib/u/build/data/unicode/entry/decomposition.rb +29 -0
  32. data/build/lib/u/build/data/unicode/points.rb +32 -0
  33. data/build/lib/u/build/header.rb +11 -0
  34. data/build/lib/u/build/header/table.rb +19 -0
  35. data/build/lib/u/build/header/table/row.rb +64 -0
  36. data/build/lib/u/build/header/tables.rb +6 -0
  37. data/build/lib/u/build/header/tables/intervals.rb +50 -0
  38. data/build/lib/u/build/header/tables/split.rb +20 -0
  39. data/build/lib/u/build/header/tables/split/data.rb +29 -0
  40. data/build/lib/u/build/header/tables/split/part1.rb +28 -0
  41. data/build/lib/u/build/header/tables/split/part2.rb +13 -0
  42. data/build/lib/u/build/header/tables/split/row.rb +34 -0
  43. data/build/lib/u/build/header/tables/split/rows.rb +22 -0
  44. data/build/test/unit/break.rb +45 -0
  45. data/build/test/unit/case.rb +178 -0
  46. data/build/test/unit/foldcase.rb +44 -0
  47. data/build/test/unit/normalize.rb +81 -0
  48. data/ext/u/attributes.c +62 -0
  49. data/ext/u/attributes.h +5 -0
  50. data/ext/u/case.h +41 -0
  51. data/ext/u/data/attributes.h +3070 -0
  52. data/ext/u/data/bidi-mirroring.h +373 -0
  53. data/ext/u/data/canonical-combining-class.h +2157 -0
  54. data/ext/u/data/case-folding.h +171 -0
  55. data/ext/u/data/cased.h +42 -0
  56. data/ext/u/data/compose.h +1714 -0
  57. data/ext/u/data/constants.h +17 -0
  58. data/ext/u/data/decompose.h +9356 -0
  59. data/ext/u/data/general-category.h +28959 -0
  60. data/ext/u/data/grapheme-break.h +13201 -0
  61. data/ext/u/data/line-break.h +26501 -0
  62. data/ext/u/data/normalization-quick-check.h +3002 -0
  63. data/ext/u/data/script.h +2928 -0
  64. data/ext/u/data/soft-dotted.h +55 -0
  65. data/ext/u/data/title-table.h +41 -0
  66. data/ext/u/data/types.h +11117 -0
  67. data/ext/u/data/wide-cjk.h +197 -0
  68. data/ext/u/data/wide.h +59 -0
  69. data/ext/u/data/word-break.h +10001 -0
  70. data/ext/u/depend +281 -0
  71. data/ext/u/extconf.rb +158 -0
  72. data/ext/u/output.h +51 -0
  73. data/ext/{encoding/character/utf-8 → u}/private.c +11 -15
  74. data/ext/u/private.h +58 -0
  75. data/ext/u/rb_includes.h +10 -0
  76. data/ext/u/rb_private.c +98 -0
  77. data/ext/u/rb_private.h +67 -0
  78. data/ext/u/rb_u.c +251 -0
  79. data/ext/u/rb_u_buffer.c +443 -0
  80. data/ext/u/rb_u_buffer.h +24 -0
  81. data/ext/u/rb_u_re.c +43 -0
  82. data/ext/u/rb_u_re.h +15 -0
  83. data/ext/u/rb_u_string.c +478 -0
  84. data/ext/u/rb_u_string.h +173 -0
  85. data/ext/u/rb_u_string_alnum.c +10 -0
  86. data/ext/u/rb_u_string_alpha.c +10 -0
  87. data/ext/u/rb_u_string_aref.c +142 -0
  88. data/ext/u/rb_u_string_ascii_only.c +13 -0
  89. data/ext/u/rb_u_string_assigned.c +10 -0
  90. data/ext/u/rb_u_string_b.c +18 -0
  91. data/ext/u/rb_u_string_bytesize.c +10 -0
  92. data/ext/u/rb_u_string_byteslice.c +103 -0
  93. data/ext/u/rb_u_string_canonical_combining_class.c +33 -0
  94. data/ext/u/rb_u_string_case_ignorable.c +25 -0
  95. data/ext/u/rb_u_string_casecmp.c +61 -0
  96. data/ext/u/rb_u_string_cased.c +17 -0
  97. data/ext/u/rb_u_string_chomp.c +107 -0
  98. data/ext/u/rb_u_string_chop.c +33 -0
  99. data/ext/u/rb_u_string_chr.c +9 -0
  100. data/ext/u/rb_u_string_cntrl.c +10 -0
  101. data/ext/u/rb_u_string_collate.c +46 -0
  102. data/ext/u/rb_u_string_collation_key.c +18 -0
  103. data/ext/u/rb_u_string_count.c +38 -0
  104. data/ext/u/rb_u_string_defined.c +10 -0
  105. data/ext/u/rb_u_string_delete.c +62 -0
  106. data/ext/u/rb_u_string_digit.c +10 -0
  107. data/ext/u/rb_u_string_downcase.c +13 -0
  108. data/ext/u/rb_u_string_dump.c +153 -0
  109. data/ext/u/rb_u_string_each_byte.c +46 -0
  110. data/ext/u/rb_u_string_each_char.c +49 -0
  111. data/ext/u/rb_u_string_each_codepoint.c +45 -0
  112. data/ext/u/rb_u_string_each_grapheme_cluster.c +36 -0
  113. data/ext/u/rb_u_string_each_line.c +142 -0
  114. data/ext/u/rb_u_string_each_word.c +34 -0
  115. data/ext/u/rb_u_string_empty.c +11 -0
  116. data/ext/u/rb_u_string_end_with.c +31 -0
  117. data/ext/u/rb_u_string_eql.c +30 -0
  118. data/ext/u/rb_u_string_equal.c +33 -0
  119. data/ext/u/rb_u_string_foldcase.c +12 -0
  120. data/ext/u/rb_u_string_folded.c +13 -0
  121. data/ext/u/rb_u_string_format.c +1745 -0
  122. data/ext/u/rb_u_string_general_category.c +109 -0
  123. data/ext/u/rb_u_string_getbyte.c +21 -0
  124. data/ext/u/rb_u_string_graph.c +21 -0
  125. data/ext/u/rb_u_string_grapheme_break.c +61 -0
  126. data/ext/u/rb_u_string_gsub.c +164 -0
  127. data/ext/u/rb_u_string_hash.c +10 -0
  128. data/ext/u/rb_u_string_hex.c +9 -0
  129. data/ext/u/rb_u_string_include.c +10 -0
  130. data/ext/u/rb_u_string_index.c +110 -0
  131. data/ext/u/rb_u_string_inspect.c +189 -0
  132. data/ext/u/rb_u_string_internal_tr.c +148 -0
  133. data/ext/u/rb_u_string_internal_tr.h +29 -0
  134. data/ext/u/rb_u_string_justify.c +169 -0
  135. data/ext/u/rb_u_string_length.c +10 -0
  136. data/ext/u/rb_u_string_line_break.c +115 -0
  137. data/ext/u/rb_u_string_lower.c +13 -0
  138. data/ext/u/rb_u_string_lstrip.c +24 -0
  139. data/ext/u/rb_u_string_match.c +65 -0
  140. data/ext/u/rb_u_string_mirror.c +16 -0
  141. data/ext/u/rb_u_string_newline.c +21 -0
  142. data/ext/u/rb_u_string_normalize.c +70 -0
  143. data/ext/u/rb_u_string_normalized.c +28 -0
  144. data/ext/u/rb_u_string_oct.c +11 -0
  145. data/ext/u/rb_u_string_ord.c +14 -0
  146. data/ext/u/rb_u_string_partition.c +80 -0
  147. data/ext/u/rb_u_string_plus.c +33 -0
  148. data/ext/u/rb_u_string_print.c +10 -0
  149. data/ext/u/rb_u_string_punct.c +10 -0
  150. data/ext/u/rb_u_string_reverse.c +13 -0
  151. data/ext/u/rb_u_string_rindex.c +104 -0
  152. data/ext/u/rb_u_string_rpartition.c +81 -0
  153. data/ext/u/rb_u_string_rstrip.c +29 -0
  154. data/ext/u/rb_u_string_scan.c +109 -0
  155. data/ext/u/rb_u_string_script.c +253 -0
  156. data/ext/u/rb_u_string_soft_dotted.c +13 -0
  157. data/ext/u/rb_u_string_space.c +24 -0
  158. data/ext/u/rb_u_string_split.c +245 -0
  159. data/ext/u/rb_u_string_squeeze.c +75 -0
  160. data/ext/u/rb_u_string_start_with.c +31 -0
  161. data/ext/u/rb_u_string_strip.c +36 -0
  162. data/ext/u/rb_u_string_sub.c +147 -0
  163. data/ext/u/rb_u_string_times.c +35 -0
  164. data/ext/u/rb_u_string_title.c +10 -0
  165. data/ext/u/rb_u_string_titlecase.c +13 -0
  166. data/ext/u/rb_u_string_to_i.c +45 -0
  167. data/ext/u/rb_u_string_to_inum.c +364 -0
  168. data/ext/u/rb_u_string_to_inum.h +1 -0
  169. data/ext/u/rb_u_string_to_str.c +17 -0
  170. data/ext/u/rb_u_string_to_sym.c +12 -0
  171. data/ext/u/rb_u_string_tr.c +290 -0
  172. data/ext/u/rb_u_string_upcase.c +12 -0
  173. data/ext/u/rb_u_string_upper.c +13 -0
  174. data/ext/u/rb_u_string_valid.c +10 -0
  175. data/ext/u/rb_u_string_valid_encoding.c +12 -0
  176. data/ext/u/rb_u_string_wide.c +21 -0
  177. data/ext/u/rb_u_string_wide_cjk.c +21 -0
  178. data/ext/u/rb_u_string_width.c +19 -0
  179. data/ext/u/rb_u_string_word_break.c +63 -0
  180. data/ext/u/rb_u_string_xdigit.c +22 -0
  181. data/ext/u/rb_u_string_zero_width.c +16 -0
  182. data/ext/u/titled.c +55 -0
  183. data/ext/u/titled.h +1 -0
  184. data/ext/u/u.c +23 -0
  185. data/ext/u/u.h +458 -0
  186. data/ext/u/u_char_canonical_combining_class.c +31 -0
  187. data/ext/u/u_char_digit_value.c +21 -0
  188. data/ext/u/u_char_downcase.c +27 -0
  189. data/ext/u/u_char_general_category.c +31 -0
  190. data/ext/u/u_char_grapheme_break.c +28 -0
  191. data/ext/u/u_char_isalnum.c +24 -0
  192. data/ext/u/u_char_isalpha.c +21 -0
  193. data/ext/u/u_char_isassigned.c +16 -0
  194. data/ext/u/u_char_iscased.c +22 -0
  195. data/ext/u/u_char_iscaseignorable.c +29 -0
  196. data/ext/u/u_char_iscntrl.c +17 -0
  197. data/ext/u/u_char_isdefined.c +15 -0
  198. data/ext/u/u_char_isdigit.c +16 -0
  199. data/ext/u/u_char_isgraph.c +22 -0
  200. data/ext/u/u_char_islower.c +16 -0
  201. data/ext/u/u_char_isnewline.c +24 -0
  202. data/ext/u/u_char_isprint.c +21 -0
  203. data/ext/u/u_char_ispunct.c +27 -0
  204. data/ext/u/u_char_issoftdotted.c +18 -0
  205. data/ext/u/u_char_isspace.c +28 -0
  206. data/ext/u/u_char_isupper.c +16 -0
  207. data/ext/u/u_char_isvalid.c +18 -0
  208. data/ext/u/u_char_iswide.c +18 -0
  209. data/ext/u/u_char_iswide_cjk.c +22 -0
  210. data/ext/u/u_char_isxdigit.c +27 -0
  211. data/ext/u/u_char_iszerowidth.c +29 -0
  212. data/ext/u/u_char_line_break.c +29 -0
  213. data/ext/u/u_char_mirror.c +16 -0
  214. data/ext/u/u_char_normalized.c +23 -0
  215. data/ext/u/u_char_script.c +41 -0
  216. data/ext/u/u_char_to_u.c +48 -0
  217. data/ext/u/u_char_upcase.c +24 -0
  218. data/ext/u/u_char_width.c +12 -0
  219. data/ext/u/u_char_word_break.c +28 -0
  220. data/ext/u/u_char_xdigit_value.c +31 -0
  221. data/ext/u/u_collate.c +83 -0
  222. data/ext/u/u_collation_key.c +132 -0
  223. data/ext/u/u_decode.c +156 -0
  224. data/ext/u/u_downcase.c +201 -0
  225. data/ext/u/u_foldcase.c +68 -0
  226. data/ext/u/u_grapheme_clusters.c +57 -0
  227. data/ext/u/u_has_prefix.c +27 -0
  228. data/ext/u/u_index.c +93 -0
  229. data/ext/u/u_is_ascii_only.c +33 -0
  230. data/ext/u/u_locale.c +40 -0
  231. data/ext/u/u_locale.h +14 -0
  232. data/ext/u/u_mirror.c +20 -0
  233. data/ext/u/u_n_bytes.c +16 -0
  234. data/ext/u/u_n_chars.c +43 -0
  235. data/ext/u/u_normalize.c +232 -0
  236. data/ext/u/u_normalized.c +28 -0
  237. data/ext/u/u_offset_to_pointer.c +62 -0
  238. data/ext/u/u_pointer_to_offset.c +23 -0
  239. data/ext/u/u_recode.c +73 -0
  240. data/ext/u/u_reverse.c +21 -0
  241. data/ext/u/u_rindex.c +132 -0
  242. data/ext/u/u_titlecase.c +68 -0
  243. data/ext/u/u_upcase.c +89 -0
  244. data/ext/u/u_width.c +35 -0
  245. data/ext/u/u_words.c +82 -0
  246. data/ext/u/yield.h +27 -0
  247. data/lib/u-1.0.rb +20 -0
  248. data/lib/u-1.0/buffer.rb +10 -0
  249. data/lib/u-1.0/string.rb +9 -0
  250. data/lib/u-1.0/version.rb +287 -0
  251. data/test/unit/case.rb +2080 -0
  252. data/test/unit/foldcase.rb +1136 -0
  253. data/test/unit/graphemebreak.rb +407 -0
  254. data/test/unit/normalize.rb +367545 -0
  255. data/test/unit/u-1.0.rb +10 -0
  256. data/test/unit/u-1.0/buffer.rb +52 -0
  257. data/test/unit/u-1.0/string.rb +1439 -0
  258. data/test/unit/{u.rb → u-1.0/version.rb} +0 -1
  259. data/test/unit/wordbreak.rb +1083 -0
  260. metadata +603 -148
  261. data/README +0 -38
  262. data/Rakefile +0 -64
  263. data/ext/encoding/character/utf-8/break.c +0 -25
  264. data/ext/encoding/character/utf-8/data/break.h +0 -22931
  265. data/ext/encoding/character/utf-8/data/character-tables.h +0 -14358
  266. data/ext/encoding/character/utf-8/data/compose.h +0 -1607
  267. data/ext/encoding/character/utf-8/data/decompose.h +0 -10926
  268. data/ext/encoding/character/utf-8/data/generate-unicode-data.rb +0 -1070
  269. data/ext/encoding/character/utf-8/decompose.c +0 -444
  270. data/ext/encoding/character/utf-8/depend +0 -65
  271. data/ext/encoding/character/utf-8/extconf.rb +0 -67
  272. data/ext/encoding/character/utf-8/private.h +0 -51
  273. data/ext/encoding/character/utf-8/properties.c +0 -1056
  274. data/ext/encoding/character/utf-8/rb_includes.h +0 -19
  275. data/ext/encoding/character/utf-8/rb_methods.h +0 -49
  276. data/ext/encoding/character/utf-8/rb_private.h +0 -52
  277. data/ext/encoding/character/utf-8/rb_utf_aref.c +0 -111
  278. data/ext/encoding/character/utf-8/rb_utf_aset.c +0 -105
  279. data/ext/encoding/character/utf-8/rb_utf_casecmp.c +0 -24
  280. data/ext/encoding/character/utf-8/rb_utf_chomp.c +0 -114
  281. data/ext/encoding/character/utf-8/rb_utf_chop.c +0 -44
  282. data/ext/encoding/character/utf-8/rb_utf_collate.c +0 -13
  283. data/ext/encoding/character/utf-8/rb_utf_count.c +0 -30
  284. data/ext/encoding/character/utf-8/rb_utf_delete.c +0 -60
  285. data/ext/encoding/character/utf-8/rb_utf_downcase.c +0 -13
  286. data/ext/encoding/character/utf-8/rb_utf_each_char.c +0 -27
  287. data/ext/encoding/character/utf-8/rb_utf_foldcase.c +0 -13
  288. data/ext/encoding/character/utf-8/rb_utf_hex.c +0 -14
  289. data/ext/encoding/character/utf-8/rb_utf_index.c +0 -50
  290. data/ext/encoding/character/utf-8/rb_utf_insert.c +0 -48
  291. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.c +0 -332
  292. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.h +0 -12
  293. data/ext/encoding/character/utf-8/rb_utf_internal_tr.c +0 -142
  294. data/ext/encoding/character/utf-8/rb_utf_internal_tr.h +0 -41
  295. data/ext/encoding/character/utf-8/rb_utf_justify.c +0 -96
  296. data/ext/encoding/character/utf-8/rb_utf_length.c +0 -14
  297. data/ext/encoding/character/utf-8/rb_utf_lstrip.c +0 -41
  298. data/ext/encoding/character/utf-8/rb_utf_normalize.c +0 -51
  299. data/ext/encoding/character/utf-8/rb_utf_oct.c +0 -14
  300. data/ext/encoding/character/utf-8/rb_utf_reverse.c +0 -13
  301. data/ext/encoding/character/utf-8/rb_utf_rindex.c +0 -88
  302. data/ext/encoding/character/utf-8/rb_utf_rstrip.c +0 -51
  303. data/ext/encoding/character/utf-8/rb_utf_squeeze.c +0 -70
  304. data/ext/encoding/character/utf-8/rb_utf_strip.c +0 -27
  305. data/ext/encoding/character/utf-8/rb_utf_to_i.c +0 -25
  306. data/ext/encoding/character/utf-8/rb_utf_tr.c +0 -250
  307. data/ext/encoding/character/utf-8/rb_utf_upcase.c +0 -13
  308. data/ext/encoding/character/utf-8/tables.h +0 -38
  309. data/ext/encoding/character/utf-8/unicode.c +0 -319
  310. data/ext/encoding/character/utf-8/unicode.h +0 -216
  311. data/ext/encoding/character/utf-8/utf.c +0 -1334
  312. data/lib/encoding/character/utf-8.rb +0 -201
  313. data/lib/u.rb +0 -16
  314. data/lib/u/string.rb +0 -185
  315. data/lib/u/version.rb +0 -5
  316. data/test/unit/u/string.rb +0 -91
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e341d655772c92627d05e82ba10d7ed1e51bd77c
4
+ data.tar.gz: f48cabee19ae5775bd90baa63b7759e1e6335424
5
+ SHA512:
6
+ metadata.gz: 40a124a219c3a1b0755fa5b1b1386170bcf801e88cdf1de745abe4f6de7c6d51ba35423ded5c2d4c99cbe8c192b04579fa4b69b7e2611715615fd8e81e87c7b6
7
+ data.tar.gz: e1803f13f2c604c5e48bdae6b0edf5564eb370dee52ae0c591f7bcc0677d66005fc7a3d59626f134f27e22abee42a371b5a0d07cc5e04550fcbdf5874f8a6257
@@ -0,0 +1,39 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'u/build'
4
+
5
+ class Attributes
6
+ def initialize(data, special_casing, io = $stdout)
7
+ U::Build::Header.new(io) do
8
+ io.puts U::Build::Header::Tables::Split.new(data, 'uint32_t', 'attr'){ |i|
9
+ special_casing.include?(i) ?
10
+ '0x%07x' % (0x1000000 + special_casing[i].offset) :
11
+ '0x%04x' % (data[i].value or 0)
12
+ }
13
+ io.puts SpecialCaseTable.new(special_casing)
14
+ end
15
+ end
16
+
17
+ private
18
+
19
+ class SpecialCaseTable < U::Build::Header::Table
20
+ def initialize(special_casing)
21
+ super '/*
22
+ * Table of special cases for case conversion; each record contains
23
+ * First, the best single character mapping to lowercase if Lu,
24
+ * and to uppercase if Ll, followed by the output mapping for the two cases
25
+ * other than the case of the codepoint, in the order Ll, Lu, Lt, encoded in
26
+ * UTF-8, separated and terminated by a NUL character.
27
+ */
28
+ static const char special_case_table[]'
29
+ content = []
30
+ special_casing.each do |special_case|
31
+ content << (%( "%s\\0" /* offset %d */) % [special_case.to_escaped_s, special_case.offset])
32
+ end
33
+ self << content.join("\n")
34
+ end
35
+ end
36
+ end
37
+
38
+ data = Marshal.load(File.open(ARGV[0], 'rb', &:read))
39
+ Attributes.new(data, U::Build::Data::SpecialCasing.new(data, ARGV[1]))
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'u/build'
4
+
5
+ class BidiMirroring
6
+ def initialize(bidi_mirroring, io = $stdout)
7
+ U::Build::Header.new(io) do
8
+ io.puts BidiMirroringTable.new(bidi_mirroring)
9
+ end
10
+ end
11
+
12
+ private
13
+
14
+ class BidiMirroringTable < U::Build::Header::Table
15
+ def initialize(bidi_mirroring)
16
+ super "static const struct {
17
+ \tuint32_t ch;
18
+ \tuint32_t mirrored_ch;
19
+ } bidi_mirroring_table[]"
20
+ bidi_mirroring.each do |bidi_mirror|
21
+ self << U::Build::Header::Table::Row.new('%#06x' % bidi_mirror.char, '%#06x' % bidi_mirror.mirrored)
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ BidiMirroring.new(U::Build::Data::BidiMirroring.new(ARGV[0]))
@@ -0,0 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'u/build'
4
+
5
+ class CanonicalCombiningClass
6
+ def initialize(data, io = $stdout)
7
+ U::Build::Header.new(io) do
8
+ io.puts U::Build::Header::Tables::Split.new(data, 'uint8_t', 'canonical_combining_class'){ |i|
9
+ '%d' % data[i].cclass
10
+ }
11
+ end
12
+ end
13
+ end
14
+
15
+ CanonicalCombiningClass.new(Marshal.load(File.open(ARGV[0], 'rb', &:read)))
@@ -0,0 +1,39 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'u/build'
4
+
5
+ class CaseFolding
6
+ def initialize(casefolding, io = $stdout)
7
+ U::Build::Header.new(io) do
8
+ io.puts CasefoldTable.new(casefolding)
9
+ end
10
+ end
11
+
12
+ private
13
+
14
+ class CasefoldTable < U::Build::Header::Table
15
+ def initialize(casefolding)
16
+ super "/*
17
+ * Table of casefolding cases that can't be derived by lowercasing.
18
+ */
19
+ static const struct {
20
+ \tuint16_t ch;
21
+ \tchar data[#{casefolding.map{ |c| c.to_s.bytesize }.max + 1}];
22
+ } casefold_table[]"
23
+ # TODO: Check that this sort_by will be needed. (Or should be sorted by
24
+ # Casefolds
25
+ casefolding.sort_by{ |c| c.char }.each do |casefold|
26
+ raise RuntimeError,
27
+ 'casefold_table.ch field too short; upgrade to uint32_t to fit values beyond 0xffff: %s' %
28
+ casefold.char if
29
+ casefold.char > 0xffff
30
+ self << U::Build::Header::Table::Row.new('%#06x' % casefold.char, '"%s"' % casefold.to_escaped_s)
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ data = Marshal.load(File.open(ARGV[0], 'rb', &:read))
37
+ CaseFolding.new(U::Build::Data::CaseFolding.new(data,
38
+ U::Build::Data::SpecialCasing.new(data, ARGV[1]),
39
+ ARGV[2]))
@@ -0,0 +1,19 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'u/build'
4
+
5
+ class Cased
6
+ Cased = ['Other_Lowercase', 'Other_Uppercase'].freeze
7
+
8
+ def initialize(prop_list, io = $stdout)
9
+ U::Build::Header.new(io) do
10
+ io.puts U::Build::Header::Tables::Intervals.
11
+ new(prop_list.
12
+ select{ |point, properties| properties.any?{ |property| Cased.include? property } }.
13
+ map{ |point, properties| point },
14
+ 'cased')
15
+ end
16
+ end
17
+ end
18
+
19
+ Cased.new(U::Build::Data::PropList.new(ARGV[0]))
@@ -0,0 +1,304 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'u/build'
4
+
5
+ class Compose
6
+ def initialize(data, composition_exclusions, io = $stdout)
7
+ compositions = Compositions.new(data, composition_exclusions)
8
+ firsts = Firsts.new(compositions)
9
+ seconds = Seconds.new(compositions, firsts)
10
+ first_singletons, second_singletons, reversals = Singletons.create(compositions, firsts, seconds)
11
+ values = Values.new(firsts, first_singletons, seconds, second_singletons)
12
+ U::Build::Header.new(io) do
13
+ io.puts <<EOH
14
+ #define COMPOSE_FIRST_START #{values.first_start}
15
+ #define COMPOSE_FIRST_SINGLE_START #{values.first_single_start}
16
+ #define COMPOSE_SECOND_START #{values.second_start}
17
+ #define COMPOSE_SECOND_SINGLE_START #{values.second_single_start}
18
+ #define COMPOSE_TABLE_LAST #{values.last / 256}
19
+ EOH
20
+ io.puts Table.new(0, values.last, 'uint16_t compose_data', 'compose_table'){ |i|
21
+ values.include?(i) ? values[i].to_s : '0'
22
+ }
23
+ io.puts SingletonTable.new(first_singletons, 'compose_first_single')
24
+ io.puts SingletonTable.new(second_singletons, 'compose_second_single')
25
+ io.puts ComposeArray.new(firsts, seconds, reversals)
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ class Table
32
+ def initialize(first, last, data, part1)
33
+ rows = U::Build::Header::Tables::Split::Rows.new(0, last){ |c| yield(c) }
34
+ @data = U::Build::Header::Tables::Split::Data.new(data, rows)
35
+ @part1 = Part1.new(part1, first, last, rows, 0)
36
+ end
37
+
38
+ def to_s
39
+ "%s\n%s\n" % [@data, @part1]
40
+ end
41
+
42
+ class Part1 < U::Build::Header::Tables::Split::Part1
43
+ class << self
44
+ def last_page
45
+ 'COMPOSE_TABLE_LAST'
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+ class Compositions
52
+ include Enumerable
53
+
54
+ def initialize(data, composition_exclusions)
55
+ @entries = []
56
+ data.each_with_index do |entry, code|
57
+ next if composition_exclusions.include? code or
58
+ entry.decomposition.length < 2 or
59
+ not entry.decomposition.canonical? or
60
+ data[code].cclass != Starter or
61
+ data[entry.decomposition.first].cclass != Starter
62
+ raise RuntimeError,
63
+ 'decomposition of %04X contains more than 2 elements: %d' %
64
+ [code, entry.decomposition.length] unless
65
+ entry.decomposition.length == 2
66
+ @entries << [entry.decomposition.entries, code]
67
+ end
68
+ end
69
+
70
+ def each_with_index
71
+ @entries.each do |parts, code|
72
+ yield parts[0], parts[1], code
73
+ end
74
+ self
75
+ end
76
+
77
+ def each
78
+ @entries.each do |parts, code|
79
+ yield parts
80
+ end
81
+ self
82
+ end
83
+
84
+ private
85
+
86
+ Starter = 0
87
+ end
88
+
89
+ class Commons
90
+ include Enumerable
91
+
92
+ def initialize(compositions)
93
+ @entries = Hash.new(0)
94
+ compositions.each do |first, second|
95
+ selection = yield(first, second) and @entries[selection] += 1
96
+ end
97
+ end
98
+
99
+ def each
100
+ @entries.keys.sort.each do |code|
101
+ yield code if @entries[code] > 1
102
+ end
103
+ self
104
+ end
105
+ end
106
+
107
+ class Firsts
108
+ include Enumerable
109
+
110
+ def initialize(compositions)
111
+ self.commons = Commons.new(compositions){ |first, second| first }
112
+ end
113
+
114
+ def each
115
+ @entries.each do |code, entry|
116
+ yield code, entry
117
+ end
118
+ self
119
+ end
120
+
121
+ def size
122
+ @entries.length
123
+ end
124
+
125
+ def include?(code)
126
+ @entries.include?(code)
127
+ end
128
+
129
+ def [](code)
130
+ @entries[code]
131
+ end
132
+
133
+ protected
134
+
135
+ def commons=(commons)
136
+ @entries = {}
137
+ commons.each do |code|
138
+ @entries[code] = @entries.size
139
+ end
140
+ commons
141
+ end
142
+ end
143
+
144
+ class Seconds < Firsts
145
+ def initialize(compositions, firsts)
146
+ self.commons = Commons.new(compositions){ |first, second|
147
+ second if firsts.include? first
148
+ }
149
+ end
150
+ end
151
+
152
+ class Singletons
153
+ include Enumerable
154
+
155
+ class << self
156
+ def create(compositions, firsts, seconds)
157
+ first_singletons = []
158
+ second_singletons = []
159
+ reversals = Reversals.new
160
+ compositions.each_with_index do |first, second, code|
161
+ if firsts.include? first and seconds.include? second
162
+ reversals[firsts[first], seconds[second]] = code
163
+ elsif not firsts.include? first
164
+ first_singletons << Singleton.new(first, second, code)
165
+ else
166
+ second_singletons << Singleton.new(second, first, code)
167
+ end
168
+ end
169
+ [new(first_singletons), new(second_singletons), reversals]
170
+ end
171
+ end
172
+
173
+ def initialize(entries)
174
+ @entries = entries.sort_by{ |a| a.first }
175
+ end
176
+
177
+ def each
178
+ @entries.each do |entry|
179
+ yield entry
180
+ end
181
+ self
182
+ end
183
+
184
+ def size
185
+ @entries.size
186
+ end
187
+ end
188
+
189
+ class Singleton
190
+ def initialize(first, second, code)
191
+ @first, @second, @code = first, second, code
192
+ end
193
+
194
+ attr_reader :first, :second, :code
195
+ end
196
+
197
+ # TODO: Can this be simplified?
198
+ class Reversals
199
+ def initialize
200
+ @entries = {}
201
+ end
202
+
203
+ def []=(first, second, code)
204
+ @entries[build(first, second)] = code
205
+ end
206
+
207
+ def include?(first, second)
208
+ @entries.include? build(first, second)
209
+ end
210
+
211
+ def [](first, second)
212
+ @entries[build(first, second)]
213
+ end
214
+
215
+ private
216
+
217
+ def build(first, second)
218
+ '%s|%s' % [first, second]
219
+ end
220
+ end
221
+
222
+ class Values
223
+ def initialize(firsts, first_singletons, seconds, second_singletons)
224
+ @values = {}
225
+ @total = 1
226
+
227
+ @first_start = @total
228
+ @first_single_start = @total += set(firsts).size
229
+ @second_start = @total += singletons(first_singletons).size
230
+ @second_single_start = @total += set(seconds).size
231
+ singletons second_singletons
232
+ end
233
+
234
+ def last
235
+ @values.keys.max
236
+ end
237
+
238
+ attr_reader :first_start, :first_single_start, :second_start, :second_single_start
239
+
240
+ def include?(code)
241
+ @values.include?(code)
242
+ end
243
+
244
+ def [](code)
245
+ @values[code]
246
+ end
247
+
248
+ protected
249
+
250
+ def []=(code, value)
251
+ @values[code] = value + @total
252
+ end
253
+
254
+ private
255
+
256
+ def set(fs)
257
+ fs.each do |code, value|
258
+ self[code] = value
259
+ end
260
+ end
261
+
262
+ def singletons(singletons)
263
+ singletons.each_with_index do |singleton, code|
264
+ self[singleton.first] = code
265
+ end
266
+ end
267
+ end
268
+
269
+ class SingletonTable < U::Build::Header::Table
270
+ def initialize(singletons, name)
271
+ super 'static const uint16_t %s[][2]' % name
272
+ singletons.each do |singleton|
273
+ @declaration = 'static const uint32_t %s[][2]' % name if singleton.second > 0xffff or singleton.code > 0xffff
274
+ self << U::Build::Header::Table::Row.new(*[singleton.second, singleton.code].map{ |c| '%#06x' % c })
275
+ end
276
+ end
277
+ end
278
+
279
+ class ComposeArray < U::Build::Header::Table
280
+ def initialize(firsts, seconds, reversals)
281
+ @firsts, @seconds, @reversals = firsts, seconds, reversals
282
+ super 'static const uint16_t compose_array[%d][%d]' % [firsts.size, seconds.size]
283
+ firsts.size.times do |i|
284
+ row = U::Build::Header::Table::Row.new
285
+ seconds.size.times do |j|
286
+ row <<
287
+ if reversals.include? i, j
288
+ raise RuntimeError,
289
+ 'compose_array table field too short; upgrade to uint32_t to fit values beyond 0xffff: %04X' %
290
+ reversals[i, j] if
291
+ reversals[i, j] > 0xffff
292
+ '%#06x' % reversals[i, j]
293
+ else
294
+ ' 0'
295
+ end
296
+ end
297
+ self << row
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ Compose.new(Marshal.load(File.open(ARGV[0], 'rb', &:read)),
304
+ U::Build::Data::CompositionExclusions.new(ARGV[1]))