u 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. checksums.yaml +7 -0
  2. data/build/ext/u/data/attributes.rb +39 -0
  3. data/build/ext/u/data/bidi-mirroring.rb +27 -0
  4. data/build/ext/u/data/canonical-combining-class.rb +15 -0
  5. data/build/ext/u/data/case-folding.rb +39 -0
  6. data/build/ext/u/data/cased.rb +19 -0
  7. data/build/ext/u/data/compose.rb +304 -0
  8. data/build/ext/u/data/constants.rb +31 -0
  9. data/build/ext/u/data/decompose.rb +85 -0
  10. data/build/ext/u/data/general-category.rb +61 -0
  11. data/build/ext/u/data/grapheme-word-break.rb +15 -0
  12. data/build/ext/u/data/marshalled.rb +5 -0
  13. data/build/ext/u/data/script.rb +91 -0
  14. data/build/ext/u/data/soft-dotted.rb +17 -0
  15. data/build/ext/u/data/title-table.rb +30 -0
  16. data/build/ext/u/data/wide.rb +17 -0
  17. data/build/lib/u/build.rb +8 -0
  18. data/build/lib/u/build/data.rb +16 -0
  19. data/build/lib/u/build/data/bidimirroring.rb +26 -0
  20. data/build/lib/u/build/data/break.rb +14 -0
  21. data/build/lib/u/build/data/casefolding.rb +77 -0
  22. data/build/lib/u/build/data/compositionexclusions.rb +14 -0
  23. data/build/lib/u/build/data/derivedeastasianwidth.rb +15 -0
  24. data/build/lib/u/build/data/file.rb +88 -0
  25. data/build/lib/u/build/data/linebreak.rb +14 -0
  26. data/build/lib/u/build/data/proplist.rb +18 -0
  27. data/build/lib/u/build/data/scripts.rb +22 -0
  28. data/build/lib/u/build/data/specialcasing.rb +106 -0
  29. data/build/lib/u/build/data/unicode.rb +41 -0
  30. data/build/lib/u/build/data/unicode/entry.rb +27 -0
  31. data/build/lib/u/build/data/unicode/entry/decomposition.rb +29 -0
  32. data/build/lib/u/build/data/unicode/points.rb +32 -0
  33. data/build/lib/u/build/header.rb +11 -0
  34. data/build/lib/u/build/header/table.rb +19 -0
  35. data/build/lib/u/build/header/table/row.rb +64 -0
  36. data/build/lib/u/build/header/tables.rb +6 -0
  37. data/build/lib/u/build/header/tables/intervals.rb +50 -0
  38. data/build/lib/u/build/header/tables/split.rb +20 -0
  39. data/build/lib/u/build/header/tables/split/data.rb +29 -0
  40. data/build/lib/u/build/header/tables/split/part1.rb +28 -0
  41. data/build/lib/u/build/header/tables/split/part2.rb +13 -0
  42. data/build/lib/u/build/header/tables/split/row.rb +34 -0
  43. data/build/lib/u/build/header/tables/split/rows.rb +22 -0
  44. data/build/test/unit/break.rb +45 -0
  45. data/build/test/unit/case.rb +178 -0
  46. data/build/test/unit/foldcase.rb +44 -0
  47. data/build/test/unit/normalize.rb +81 -0
  48. data/ext/u/attributes.c +62 -0
  49. data/ext/u/attributes.h +5 -0
  50. data/ext/u/case.h +41 -0
  51. data/ext/u/data/attributes.h +3070 -0
  52. data/ext/u/data/bidi-mirroring.h +373 -0
  53. data/ext/u/data/canonical-combining-class.h +2157 -0
  54. data/ext/u/data/case-folding.h +171 -0
  55. data/ext/u/data/cased.h +42 -0
  56. data/ext/u/data/compose.h +1714 -0
  57. data/ext/u/data/constants.h +17 -0
  58. data/ext/u/data/decompose.h +9356 -0
  59. data/ext/u/data/general-category.h +28959 -0
  60. data/ext/u/data/grapheme-break.h +13201 -0
  61. data/ext/u/data/line-break.h +26501 -0
  62. data/ext/u/data/normalization-quick-check.h +3002 -0
  63. data/ext/u/data/script.h +2928 -0
  64. data/ext/u/data/soft-dotted.h +55 -0
  65. data/ext/u/data/title-table.h +41 -0
  66. data/ext/u/data/types.h +11117 -0
  67. data/ext/u/data/wide-cjk.h +197 -0
  68. data/ext/u/data/wide.h +59 -0
  69. data/ext/u/data/word-break.h +10001 -0
  70. data/ext/u/depend +281 -0
  71. data/ext/u/extconf.rb +158 -0
  72. data/ext/u/output.h +51 -0
  73. data/ext/{encoding/character/utf-8 → u}/private.c +11 -15
  74. data/ext/u/private.h +58 -0
  75. data/ext/u/rb_includes.h +10 -0
  76. data/ext/u/rb_private.c +98 -0
  77. data/ext/u/rb_private.h +67 -0
  78. data/ext/u/rb_u.c +251 -0
  79. data/ext/u/rb_u_buffer.c +443 -0
  80. data/ext/u/rb_u_buffer.h +24 -0
  81. data/ext/u/rb_u_re.c +43 -0
  82. data/ext/u/rb_u_re.h +15 -0
  83. data/ext/u/rb_u_string.c +478 -0
  84. data/ext/u/rb_u_string.h +173 -0
  85. data/ext/u/rb_u_string_alnum.c +10 -0
  86. data/ext/u/rb_u_string_alpha.c +10 -0
  87. data/ext/u/rb_u_string_aref.c +142 -0
  88. data/ext/u/rb_u_string_ascii_only.c +13 -0
  89. data/ext/u/rb_u_string_assigned.c +10 -0
  90. data/ext/u/rb_u_string_b.c +18 -0
  91. data/ext/u/rb_u_string_bytesize.c +10 -0
  92. data/ext/u/rb_u_string_byteslice.c +103 -0
  93. data/ext/u/rb_u_string_canonical_combining_class.c +33 -0
  94. data/ext/u/rb_u_string_case_ignorable.c +25 -0
  95. data/ext/u/rb_u_string_casecmp.c +61 -0
  96. data/ext/u/rb_u_string_cased.c +17 -0
  97. data/ext/u/rb_u_string_chomp.c +107 -0
  98. data/ext/u/rb_u_string_chop.c +33 -0
  99. data/ext/u/rb_u_string_chr.c +9 -0
  100. data/ext/u/rb_u_string_cntrl.c +10 -0
  101. data/ext/u/rb_u_string_collate.c +46 -0
  102. data/ext/u/rb_u_string_collation_key.c +18 -0
  103. data/ext/u/rb_u_string_count.c +38 -0
  104. data/ext/u/rb_u_string_defined.c +10 -0
  105. data/ext/u/rb_u_string_delete.c +62 -0
  106. data/ext/u/rb_u_string_digit.c +10 -0
  107. data/ext/u/rb_u_string_downcase.c +13 -0
  108. data/ext/u/rb_u_string_dump.c +153 -0
  109. data/ext/u/rb_u_string_each_byte.c +46 -0
  110. data/ext/u/rb_u_string_each_char.c +49 -0
  111. data/ext/u/rb_u_string_each_codepoint.c +45 -0
  112. data/ext/u/rb_u_string_each_grapheme_cluster.c +36 -0
  113. data/ext/u/rb_u_string_each_line.c +142 -0
  114. data/ext/u/rb_u_string_each_word.c +34 -0
  115. data/ext/u/rb_u_string_empty.c +11 -0
  116. data/ext/u/rb_u_string_end_with.c +31 -0
  117. data/ext/u/rb_u_string_eql.c +30 -0
  118. data/ext/u/rb_u_string_equal.c +33 -0
  119. data/ext/u/rb_u_string_foldcase.c +12 -0
  120. data/ext/u/rb_u_string_folded.c +13 -0
  121. data/ext/u/rb_u_string_format.c +1745 -0
  122. data/ext/u/rb_u_string_general_category.c +109 -0
  123. data/ext/u/rb_u_string_getbyte.c +21 -0
  124. data/ext/u/rb_u_string_graph.c +21 -0
  125. data/ext/u/rb_u_string_grapheme_break.c +61 -0
  126. data/ext/u/rb_u_string_gsub.c +164 -0
  127. data/ext/u/rb_u_string_hash.c +10 -0
  128. data/ext/u/rb_u_string_hex.c +9 -0
  129. data/ext/u/rb_u_string_include.c +10 -0
  130. data/ext/u/rb_u_string_index.c +110 -0
  131. data/ext/u/rb_u_string_inspect.c +189 -0
  132. data/ext/u/rb_u_string_internal_tr.c +148 -0
  133. data/ext/u/rb_u_string_internal_tr.h +29 -0
  134. data/ext/u/rb_u_string_justify.c +169 -0
  135. data/ext/u/rb_u_string_length.c +10 -0
  136. data/ext/u/rb_u_string_line_break.c +115 -0
  137. data/ext/u/rb_u_string_lower.c +13 -0
  138. data/ext/u/rb_u_string_lstrip.c +24 -0
  139. data/ext/u/rb_u_string_match.c +65 -0
  140. data/ext/u/rb_u_string_mirror.c +16 -0
  141. data/ext/u/rb_u_string_newline.c +21 -0
  142. data/ext/u/rb_u_string_normalize.c +70 -0
  143. data/ext/u/rb_u_string_normalized.c +28 -0
  144. data/ext/u/rb_u_string_oct.c +11 -0
  145. data/ext/u/rb_u_string_ord.c +14 -0
  146. data/ext/u/rb_u_string_partition.c +80 -0
  147. data/ext/u/rb_u_string_plus.c +33 -0
  148. data/ext/u/rb_u_string_print.c +10 -0
  149. data/ext/u/rb_u_string_punct.c +10 -0
  150. data/ext/u/rb_u_string_reverse.c +13 -0
  151. data/ext/u/rb_u_string_rindex.c +104 -0
  152. data/ext/u/rb_u_string_rpartition.c +81 -0
  153. data/ext/u/rb_u_string_rstrip.c +29 -0
  154. data/ext/u/rb_u_string_scan.c +109 -0
  155. data/ext/u/rb_u_string_script.c +253 -0
  156. data/ext/u/rb_u_string_soft_dotted.c +13 -0
  157. data/ext/u/rb_u_string_space.c +24 -0
  158. data/ext/u/rb_u_string_split.c +245 -0
  159. data/ext/u/rb_u_string_squeeze.c +75 -0
  160. data/ext/u/rb_u_string_start_with.c +31 -0
  161. data/ext/u/rb_u_string_strip.c +36 -0
  162. data/ext/u/rb_u_string_sub.c +147 -0
  163. data/ext/u/rb_u_string_times.c +35 -0
  164. data/ext/u/rb_u_string_title.c +10 -0
  165. data/ext/u/rb_u_string_titlecase.c +13 -0
  166. data/ext/u/rb_u_string_to_i.c +45 -0
  167. data/ext/u/rb_u_string_to_inum.c +364 -0
  168. data/ext/u/rb_u_string_to_inum.h +1 -0
  169. data/ext/u/rb_u_string_to_str.c +17 -0
  170. data/ext/u/rb_u_string_to_sym.c +12 -0
  171. data/ext/u/rb_u_string_tr.c +290 -0
  172. data/ext/u/rb_u_string_upcase.c +12 -0
  173. data/ext/u/rb_u_string_upper.c +13 -0
  174. data/ext/u/rb_u_string_valid.c +10 -0
  175. data/ext/u/rb_u_string_valid_encoding.c +12 -0
  176. data/ext/u/rb_u_string_wide.c +21 -0
  177. data/ext/u/rb_u_string_wide_cjk.c +21 -0
  178. data/ext/u/rb_u_string_width.c +19 -0
  179. data/ext/u/rb_u_string_word_break.c +63 -0
  180. data/ext/u/rb_u_string_xdigit.c +22 -0
  181. data/ext/u/rb_u_string_zero_width.c +16 -0
  182. data/ext/u/titled.c +55 -0
  183. data/ext/u/titled.h +1 -0
  184. data/ext/u/u.c +23 -0
  185. data/ext/u/u.h +458 -0
  186. data/ext/u/u_char_canonical_combining_class.c +31 -0
  187. data/ext/u/u_char_digit_value.c +21 -0
  188. data/ext/u/u_char_downcase.c +27 -0
  189. data/ext/u/u_char_general_category.c +31 -0
  190. data/ext/u/u_char_grapheme_break.c +28 -0
  191. data/ext/u/u_char_isalnum.c +24 -0
  192. data/ext/u/u_char_isalpha.c +21 -0
  193. data/ext/u/u_char_isassigned.c +16 -0
  194. data/ext/u/u_char_iscased.c +22 -0
  195. data/ext/u/u_char_iscaseignorable.c +29 -0
  196. data/ext/u/u_char_iscntrl.c +17 -0
  197. data/ext/u/u_char_isdefined.c +15 -0
  198. data/ext/u/u_char_isdigit.c +16 -0
  199. data/ext/u/u_char_isgraph.c +22 -0
  200. data/ext/u/u_char_islower.c +16 -0
  201. data/ext/u/u_char_isnewline.c +24 -0
  202. data/ext/u/u_char_isprint.c +21 -0
  203. data/ext/u/u_char_ispunct.c +27 -0
  204. data/ext/u/u_char_issoftdotted.c +18 -0
  205. data/ext/u/u_char_isspace.c +28 -0
  206. data/ext/u/u_char_isupper.c +16 -0
  207. data/ext/u/u_char_isvalid.c +18 -0
  208. data/ext/u/u_char_iswide.c +18 -0
  209. data/ext/u/u_char_iswide_cjk.c +22 -0
  210. data/ext/u/u_char_isxdigit.c +27 -0
  211. data/ext/u/u_char_iszerowidth.c +29 -0
  212. data/ext/u/u_char_line_break.c +29 -0
  213. data/ext/u/u_char_mirror.c +16 -0
  214. data/ext/u/u_char_normalized.c +23 -0
  215. data/ext/u/u_char_script.c +41 -0
  216. data/ext/u/u_char_to_u.c +48 -0
  217. data/ext/u/u_char_upcase.c +24 -0
  218. data/ext/u/u_char_width.c +12 -0
  219. data/ext/u/u_char_word_break.c +28 -0
  220. data/ext/u/u_char_xdigit_value.c +31 -0
  221. data/ext/u/u_collate.c +83 -0
  222. data/ext/u/u_collation_key.c +132 -0
  223. data/ext/u/u_decode.c +156 -0
  224. data/ext/u/u_downcase.c +201 -0
  225. data/ext/u/u_foldcase.c +68 -0
  226. data/ext/u/u_grapheme_clusters.c +57 -0
  227. data/ext/u/u_has_prefix.c +27 -0
  228. data/ext/u/u_index.c +93 -0
  229. data/ext/u/u_is_ascii_only.c +33 -0
  230. data/ext/u/u_locale.c +40 -0
  231. data/ext/u/u_locale.h +14 -0
  232. data/ext/u/u_mirror.c +20 -0
  233. data/ext/u/u_n_bytes.c +16 -0
  234. data/ext/u/u_n_chars.c +43 -0
  235. data/ext/u/u_normalize.c +232 -0
  236. data/ext/u/u_normalized.c +28 -0
  237. data/ext/u/u_offset_to_pointer.c +62 -0
  238. data/ext/u/u_pointer_to_offset.c +23 -0
  239. data/ext/u/u_recode.c +73 -0
  240. data/ext/u/u_reverse.c +21 -0
  241. data/ext/u/u_rindex.c +132 -0
  242. data/ext/u/u_titlecase.c +68 -0
  243. data/ext/u/u_upcase.c +89 -0
  244. data/ext/u/u_width.c +35 -0
  245. data/ext/u/u_words.c +82 -0
  246. data/ext/u/yield.h +27 -0
  247. data/lib/u-1.0.rb +20 -0
  248. data/lib/u-1.0/buffer.rb +10 -0
  249. data/lib/u-1.0/string.rb +9 -0
  250. data/lib/u-1.0/version.rb +287 -0
  251. data/test/unit/case.rb +2080 -0
  252. data/test/unit/foldcase.rb +1136 -0
  253. data/test/unit/graphemebreak.rb +407 -0
  254. data/test/unit/normalize.rb +367545 -0
  255. data/test/unit/u-1.0.rb +10 -0
  256. data/test/unit/u-1.0/buffer.rb +52 -0
  257. data/test/unit/u-1.0/string.rb +1439 -0
  258. data/test/unit/{u.rb → u-1.0/version.rb} +0 -1
  259. data/test/unit/wordbreak.rb +1083 -0
  260. metadata +603 -148
  261. data/README +0 -38
  262. data/Rakefile +0 -64
  263. data/ext/encoding/character/utf-8/break.c +0 -25
  264. data/ext/encoding/character/utf-8/data/break.h +0 -22931
  265. data/ext/encoding/character/utf-8/data/character-tables.h +0 -14358
  266. data/ext/encoding/character/utf-8/data/compose.h +0 -1607
  267. data/ext/encoding/character/utf-8/data/decompose.h +0 -10926
  268. data/ext/encoding/character/utf-8/data/generate-unicode-data.rb +0 -1070
  269. data/ext/encoding/character/utf-8/decompose.c +0 -444
  270. data/ext/encoding/character/utf-8/depend +0 -65
  271. data/ext/encoding/character/utf-8/extconf.rb +0 -67
  272. data/ext/encoding/character/utf-8/private.h +0 -51
  273. data/ext/encoding/character/utf-8/properties.c +0 -1056
  274. data/ext/encoding/character/utf-8/rb_includes.h +0 -19
  275. data/ext/encoding/character/utf-8/rb_methods.h +0 -49
  276. data/ext/encoding/character/utf-8/rb_private.h +0 -52
  277. data/ext/encoding/character/utf-8/rb_utf_aref.c +0 -111
  278. data/ext/encoding/character/utf-8/rb_utf_aset.c +0 -105
  279. data/ext/encoding/character/utf-8/rb_utf_casecmp.c +0 -24
  280. data/ext/encoding/character/utf-8/rb_utf_chomp.c +0 -114
  281. data/ext/encoding/character/utf-8/rb_utf_chop.c +0 -44
  282. data/ext/encoding/character/utf-8/rb_utf_collate.c +0 -13
  283. data/ext/encoding/character/utf-8/rb_utf_count.c +0 -30
  284. data/ext/encoding/character/utf-8/rb_utf_delete.c +0 -60
  285. data/ext/encoding/character/utf-8/rb_utf_downcase.c +0 -13
  286. data/ext/encoding/character/utf-8/rb_utf_each_char.c +0 -27
  287. data/ext/encoding/character/utf-8/rb_utf_foldcase.c +0 -13
  288. data/ext/encoding/character/utf-8/rb_utf_hex.c +0 -14
  289. data/ext/encoding/character/utf-8/rb_utf_index.c +0 -50
  290. data/ext/encoding/character/utf-8/rb_utf_insert.c +0 -48
  291. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.c +0 -332
  292. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.h +0 -12
  293. data/ext/encoding/character/utf-8/rb_utf_internal_tr.c +0 -142
  294. data/ext/encoding/character/utf-8/rb_utf_internal_tr.h +0 -41
  295. data/ext/encoding/character/utf-8/rb_utf_justify.c +0 -96
  296. data/ext/encoding/character/utf-8/rb_utf_length.c +0 -14
  297. data/ext/encoding/character/utf-8/rb_utf_lstrip.c +0 -41
  298. data/ext/encoding/character/utf-8/rb_utf_normalize.c +0 -51
  299. data/ext/encoding/character/utf-8/rb_utf_oct.c +0 -14
  300. data/ext/encoding/character/utf-8/rb_utf_reverse.c +0 -13
  301. data/ext/encoding/character/utf-8/rb_utf_rindex.c +0 -88
  302. data/ext/encoding/character/utf-8/rb_utf_rstrip.c +0 -51
  303. data/ext/encoding/character/utf-8/rb_utf_squeeze.c +0 -70
  304. data/ext/encoding/character/utf-8/rb_utf_strip.c +0 -27
  305. data/ext/encoding/character/utf-8/rb_utf_to_i.c +0 -25
  306. data/ext/encoding/character/utf-8/rb_utf_tr.c +0 -250
  307. data/ext/encoding/character/utf-8/rb_utf_upcase.c +0 -13
  308. data/ext/encoding/character/utf-8/tables.h +0 -38
  309. data/ext/encoding/character/utf-8/unicode.c +0 -319
  310. data/ext/encoding/character/utf-8/unicode.h +0 -216
  311. data/ext/encoding/character/utf-8/utf.c +0 -1334
  312. data/lib/encoding/character/utf-8.rb +0 -201
  313. data/lib/u.rb +0 -16
  314. data/lib/u/string.rb +0 -185
  315. data/lib/u/version.rb +0 -5
  316. data/test/unit/u/string.rb +0 -91
@@ -0,0 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::CompositionExclusions
4
+ def initialize(path)
5
+ @entries = {}
6
+ U::Build::Data::File.each(path, 1) do |point|
7
+ @entries[point] = true
8
+ end
9
+ end
10
+
11
+ def include?(code)
12
+ @entries.include? code
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::DerivedEastAsianWidth
4
+ include Enumerable
5
+
6
+ def initialize(path)
7
+ @path = path
8
+ end
9
+
10
+ def each
11
+ U::Build::Data::File.each(@path, 2) do |point, width|
12
+ yield point, width
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::File
4
+ class << self
5
+ def open(path, fields)
6
+ file = new(path, fields)
7
+ begin
8
+ yield file
9
+ ensure
10
+ file.close
11
+ end
12
+ end
13
+
14
+ def each(path, fields)
15
+ open(path, fields) do |file|
16
+ file.each do |point, *fs|
17
+ yield point, *fs
18
+ end
19
+ end
20
+ end
21
+
22
+ def each_with_unlisted(path, fields)
23
+ open(path, fields) do |file|
24
+ file.each_with_unlisted do |point, previous, unlisted, *fs|
25
+ yield point, previous, unlisted, *fs
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ def initialize(path, fields)
32
+ @path, @fields = path, fields
33
+ @file = File.open(@path, 'rb')
34
+ end
35
+
36
+ def close
37
+ @file.close
38
+ self
39
+ end
40
+
41
+ def each
42
+ @file.each_line.with_index do |line, index|
43
+ next if comment_or_empty? line
44
+ fields = line.sub(/\s*#.*\Z/, '').strip.split(/\s*;\s*/, -1)
45
+ raise RuntimeError,
46
+ '%s:%d: wrong number of fields: %d instead of %d' %
47
+ [@path, index + 1, fields.size, @fields] unless
48
+ @fields === fields.size
49
+ code = fields.shift
50
+ if code =~ /\A([0-9A-Fa-f]{4,6})\.\.([0-9A-Fa-f]{4,6})\Z/
51
+ first, last = $1.hex, $2.hex
52
+ else
53
+ first = last = code.hex
54
+ end
55
+ begin
56
+ first.upto(last) do |point|
57
+ yield point, *fields
58
+ end
59
+ rescue => e
60
+ raise e, '%s:%d: %s' % [@path, index + 1, e]
61
+ end
62
+ end
63
+ self
64
+ end
65
+
66
+ def each_with_unlisted
67
+ previous = -1
68
+ each do |point, *fields|
69
+ (previous + 1).upto(point - 1) do |i|
70
+ yield i, previous, true, *fields
71
+ end
72
+ yield point, point - 1, false, *fields
73
+ previous = point
74
+ end
75
+ (previous + 1).upto(0x10ffff) do |i|
76
+ yield i, previous, true
77
+ end
78
+ self
79
+ end
80
+
81
+ private
82
+
83
+ Code = 0
84
+
85
+ def comment_or_empty?(line)
86
+ line =~ /\A(?:#|\s*\Z)/
87
+ end
88
+ end
@@ -0,0 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::LineBreak
4
+ def initialize(data, path)
5
+ @entries = []
6
+ U::Build::Data::File.each_with_unlisted(path, 2) do |point, previous, unlisted, property|
7
+ @entries[point] = unlisted ? (data[point].type == 'Cn' ? 'XX' : 'AL') : property
8
+ end
9
+ end
10
+
11
+ def [](point)
12
+ @entries[point]
13
+ end
14
+ end
@@ -0,0 +1,18 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::PropList
4
+ include Enumerable
5
+
6
+ def initialize(path)
7
+ @entries = Hash.new{ |h, k| h[k] = [] }
8
+ U::Build::Data::File.each(path, 2) do |point, property|
9
+ @entries[point] << property
10
+ end
11
+ end
12
+
13
+ def each
14
+ @entries.each do |point, properties|
15
+ yield point, properties
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,22 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::Scripts
4
+ include Enumerable
5
+
6
+ def initialize(path)
7
+ @entries = {}
8
+ U::Build::Data::File.each(path, 2) do |point, script|
9
+ @entries[point] = script
10
+ end
11
+ end
12
+
13
+ def [](point)
14
+ @entries.fetch(point, 'Unknown')
15
+ end
16
+
17
+ def each
18
+ @entries.each do |point, script|
19
+ yield point, script
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,106 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::SpecialCasing
4
+ include Enumerable
5
+
6
+ def initialize(data, path)
7
+ @entries = {}
8
+ File.new(data, path).each do |point, entry|
9
+ @entries[point] = entry
10
+ end
11
+ end
12
+
13
+ def include?(point)
14
+ @entries.include? point
15
+ end
16
+
17
+ def [](point)
18
+ @entries[point]
19
+ end
20
+
21
+ def each
22
+ @entries.values.sort.each do |entry|
23
+ yield entry
24
+ end
25
+ end
26
+
27
+ # TODO: Move to its own file
28
+ class Entry
29
+ include Comparable
30
+
31
+ def initialize(points, offset)
32
+ @points, @offset = points, offset
33
+ end
34
+
35
+ def <=>(other)
36
+ self.class == other.class ?
37
+ offset <=> other.offset :
38
+ nil
39
+ end
40
+
41
+ def to_s
42
+ @points.to_s
43
+ end
44
+
45
+ def to_escaped_s
46
+ @points.to_escaped_s
47
+ end
48
+
49
+ attr_reader :offset
50
+ end
51
+
52
+ private
53
+
54
+ class File
55
+ def initialize(data, path)
56
+ @data, @path = data, path
57
+ @offset = 0
58
+ end
59
+
60
+ def each
61
+ U::Build::Data::File.each(@path, 5..6) do |point, lower, title, upper, condition, _comment|
62
+ raise RuntimeError,
63
+ 'special case for code point that does not have a type: %04X' %
64
+ point unless
65
+ @data[point].type
66
+ # We deal with conditional special cases in code.
67
+ next unless condition.empty?
68
+ yield point, case @data[point].type
69
+ when 'Lu'
70
+ verify upper, point, 'Lu', 'Upper'
71
+ entry(@data[point].value, lower, title)
72
+ when 'Lt'
73
+ verify title, point, 'Lt', 'Title'
74
+ entry(nil, lower, upper)
75
+ when 'Ll'
76
+ verify lower, point, 'Ll', 'Lower'
77
+ entry(@data[point].value, upper, title)
78
+ else
79
+ raise RuntimeError,
80
+ 'special case entry of type %s for non-alphabetic code point: %04X' %
81
+ [@data[point].type, point]
82
+ end
83
+ end
84
+ end
85
+
86
+ private
87
+
88
+ def verify(field, point, type, ccase)
89
+ raise RuntimeError,
90
+ 'special case entry has type %s, but UCD_%s(%04X) != %04X: %04X' %
91
+ [type, ccase, point, point, point] unless
92
+ field.hex == point
93
+ end
94
+
95
+ def entry(single, field1, field2)
96
+ Entry.new(U::Build::Data::Unicode::Points.new([
97
+ single ? single.to_s(16) : nil,
98
+ field1,
99
+ "\0",
100
+ field2
101
+ ].compact.join(' ')), @offset).tap{ |e|
102
+ @offset += e.to_s.bytesize + 1
103
+ }
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,41 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::Unicode
4
+ include Enumerable
5
+
6
+ require 'u/build/data/unicode/points'
7
+ require 'u/build/data/unicode/entry'
8
+
9
+ Name, Category, CombiningClasses, _, Decomposition, DecimalValue, _, _, _, _, _, Upper, Lower, _ = (0..13).to_a
10
+
11
+ def initialize(path)
12
+ @entries = []
13
+ U::Build::Data::File.each_with_unlisted(path, 15) do |point, previous, unlisted, *fields|
14
+ if point >= 0xe0000 and previous < 0xe0000
15
+ @pages_before_e0000 = (previous >> 8) + 1
16
+ @last_char_part1_i = @pages_before_e0000 * 256 - 1
17
+ @last_char_part1_x = '0x%04x' % @last_char_part1_i
18
+ @last_char_part1_X = '%04X' % @last_char_part1_i
19
+ end
20
+ @entries << Entry.new(point,
21
+ (unlisted and fields[Name] !~ /Last>\Z/) ? Unlisted : fields)
22
+ end
23
+ @last = 0x10ffff
24
+ end
25
+
26
+ attr_reader :pages_before_e0000, :last_char_part1_i, :last_char_part1_x, :last_char_part1_X, :last
27
+
28
+ def [](index)
29
+ @entries[index]
30
+ end
31
+
32
+ def each
33
+ @entries.each do |entry|
34
+ yield entry
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ Unlisted = ['', 'Cn', '0', '', '', '', '', '', '', '', '', '', '', ''].freeze
41
+ end
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::Unicode::Entry
4
+ require 'u/build/data/unicode/entry/decomposition'
5
+
6
+ def initialize(code, fields)
7
+ # TODO: Why not processes Nl?
8
+ case @type = fields[U::Build::Data::Unicode::Category]
9
+ when 'Nd'
10
+ @value = fields[U::Build::Data::Unicode::DecimalValue].to_i
11
+ when 'Ll'
12
+ @value = fields[U::Build::Data::Unicode::Upper].hex
13
+ when 'Lu'
14
+ @value = fields[U::Build::Data::Unicode::Lower].hex
15
+ when 'Lt'
16
+ @value = nil
17
+ @title_to_lower = fields[U::Build::Data::Unicode::Lower].hex
18
+ @title_to_upper = fields[U::Build::Data::Unicode::Upper].hex
19
+ else
20
+ @value = nil
21
+ end
22
+ @cclass = fields[U::Build::Data::Unicode::CombiningClasses].to_i
23
+ @decomposition = Decomposition.new(fields[U::Build::Data::Unicode::Decomposition])
24
+ end
25
+
26
+ attr_reader :code, :type, :value, :title_to_lower, :title_to_upper, :cclass, :decomposition
27
+ end
@@ -0,0 +1,29 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::Unicode::Entry::Decomposition
4
+ include Enumerable
5
+
6
+ def initialize(decomposition)
7
+ @canonical = decomposition !~ /\A<.*>\s*(.*)/
8
+ @points = U::Build::Data::Unicode::Points.new(@canonical ? decomposition : $1)
9
+ end
10
+
11
+ def canonical?
12
+ @canonical
13
+ end
14
+
15
+ def empty?
16
+ @points.empty?
17
+ end
18
+
19
+ def length
20
+ @points.count
21
+ end
22
+
23
+ def each
24
+ @points.each do |point|
25
+ yield point
26
+ end
27
+ self
28
+ end
29
+ end
@@ -0,0 +1,32 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Data::Unicode::Points
4
+ include Enumerable
5
+
6
+ def initialize(string)
7
+ @points = string.split(/\s+/).map{ |s| s.hex }
8
+ end
9
+
10
+ def empty?
11
+ @points.empty?
12
+ end
13
+
14
+ def length
15
+ @points.length
16
+ end
17
+
18
+ def each
19
+ @points.each do |point|
20
+ yield point
21
+ end
22
+ self
23
+ end
24
+
25
+ def to_s
26
+ @points.pack('U*')
27
+ end
28
+
29
+ def to_escaped_s
30
+ to_s.unpack('H*').first.gsub(/../, '\\x\0')
31
+ end
32
+ end
@@ -0,0 +1,11 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Header
4
+ require 'u/build/header/table'
5
+ require 'u/build/header/tables'
6
+
7
+ def initialize(io = $stdout)
8
+ io.puts "/* Automatically generated file */\n\n"
9
+ yield
10
+ end
11
+ end
@@ -0,0 +1,19 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class U::Build::Header::Table
4
+ require 'u/build/header/table/row'
5
+
6
+ def initialize(declaration)
7
+ @declaration = declaration
8
+ @rows = []
9
+ end
10
+
11
+ def <<(row)
12
+ @rows << row
13
+ self
14
+ end
15
+
16
+ def to_s
17
+ "\n\n%s = {\n%s\n};" % [@declaration, @rows.join(",\n")]
18
+ end
19
+ end