u 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. checksums.yaml +7 -0
  2. data/build/ext/u/data/attributes.rb +39 -0
  3. data/build/ext/u/data/bidi-mirroring.rb +27 -0
  4. data/build/ext/u/data/canonical-combining-class.rb +15 -0
  5. data/build/ext/u/data/case-folding.rb +39 -0
  6. data/build/ext/u/data/cased.rb +19 -0
  7. data/build/ext/u/data/compose.rb +304 -0
  8. data/build/ext/u/data/constants.rb +31 -0
  9. data/build/ext/u/data/decompose.rb +85 -0
  10. data/build/ext/u/data/general-category.rb +61 -0
  11. data/build/ext/u/data/grapheme-word-break.rb +15 -0
  12. data/build/ext/u/data/marshalled.rb +5 -0
  13. data/build/ext/u/data/script.rb +91 -0
  14. data/build/ext/u/data/soft-dotted.rb +17 -0
  15. data/build/ext/u/data/title-table.rb +30 -0
  16. data/build/ext/u/data/wide.rb +17 -0
  17. data/build/lib/u/build.rb +8 -0
  18. data/build/lib/u/build/data.rb +16 -0
  19. data/build/lib/u/build/data/bidimirroring.rb +26 -0
  20. data/build/lib/u/build/data/break.rb +14 -0
  21. data/build/lib/u/build/data/casefolding.rb +77 -0
  22. data/build/lib/u/build/data/compositionexclusions.rb +14 -0
  23. data/build/lib/u/build/data/derivedeastasianwidth.rb +15 -0
  24. data/build/lib/u/build/data/file.rb +88 -0
  25. data/build/lib/u/build/data/linebreak.rb +14 -0
  26. data/build/lib/u/build/data/proplist.rb +18 -0
  27. data/build/lib/u/build/data/scripts.rb +22 -0
  28. data/build/lib/u/build/data/specialcasing.rb +106 -0
  29. data/build/lib/u/build/data/unicode.rb +41 -0
  30. data/build/lib/u/build/data/unicode/entry.rb +27 -0
  31. data/build/lib/u/build/data/unicode/entry/decomposition.rb +29 -0
  32. data/build/lib/u/build/data/unicode/points.rb +32 -0
  33. data/build/lib/u/build/header.rb +11 -0
  34. data/build/lib/u/build/header/table.rb +19 -0
  35. data/build/lib/u/build/header/table/row.rb +64 -0
  36. data/build/lib/u/build/header/tables.rb +6 -0
  37. data/build/lib/u/build/header/tables/intervals.rb +50 -0
  38. data/build/lib/u/build/header/tables/split.rb +20 -0
  39. data/build/lib/u/build/header/tables/split/data.rb +29 -0
  40. data/build/lib/u/build/header/tables/split/part1.rb +28 -0
  41. data/build/lib/u/build/header/tables/split/part2.rb +13 -0
  42. data/build/lib/u/build/header/tables/split/row.rb +34 -0
  43. data/build/lib/u/build/header/tables/split/rows.rb +22 -0
  44. data/build/test/unit/break.rb +45 -0
  45. data/build/test/unit/case.rb +178 -0
  46. data/build/test/unit/foldcase.rb +44 -0
  47. data/build/test/unit/normalize.rb +81 -0
  48. data/ext/u/attributes.c +62 -0
  49. data/ext/u/attributes.h +5 -0
  50. data/ext/u/case.h +41 -0
  51. data/ext/u/data/attributes.h +3070 -0
  52. data/ext/u/data/bidi-mirroring.h +373 -0
  53. data/ext/u/data/canonical-combining-class.h +2157 -0
  54. data/ext/u/data/case-folding.h +171 -0
  55. data/ext/u/data/cased.h +42 -0
  56. data/ext/u/data/compose.h +1714 -0
  57. data/ext/u/data/constants.h +17 -0
  58. data/ext/u/data/decompose.h +9356 -0
  59. data/ext/u/data/general-category.h +28959 -0
  60. data/ext/u/data/grapheme-break.h +13201 -0
  61. data/ext/u/data/line-break.h +26501 -0
  62. data/ext/u/data/normalization-quick-check.h +3002 -0
  63. data/ext/u/data/script.h +2928 -0
  64. data/ext/u/data/soft-dotted.h +55 -0
  65. data/ext/u/data/title-table.h +41 -0
  66. data/ext/u/data/types.h +11117 -0
  67. data/ext/u/data/wide-cjk.h +197 -0
  68. data/ext/u/data/wide.h +59 -0
  69. data/ext/u/data/word-break.h +10001 -0
  70. data/ext/u/depend +281 -0
  71. data/ext/u/extconf.rb +158 -0
  72. data/ext/u/output.h +51 -0
  73. data/ext/{encoding/character/utf-8 → u}/private.c +11 -15
  74. data/ext/u/private.h +58 -0
  75. data/ext/u/rb_includes.h +10 -0
  76. data/ext/u/rb_private.c +98 -0
  77. data/ext/u/rb_private.h +67 -0
  78. data/ext/u/rb_u.c +251 -0
  79. data/ext/u/rb_u_buffer.c +443 -0
  80. data/ext/u/rb_u_buffer.h +24 -0
  81. data/ext/u/rb_u_re.c +43 -0
  82. data/ext/u/rb_u_re.h +15 -0
  83. data/ext/u/rb_u_string.c +478 -0
  84. data/ext/u/rb_u_string.h +173 -0
  85. data/ext/u/rb_u_string_alnum.c +10 -0
  86. data/ext/u/rb_u_string_alpha.c +10 -0
  87. data/ext/u/rb_u_string_aref.c +142 -0
  88. data/ext/u/rb_u_string_ascii_only.c +13 -0
  89. data/ext/u/rb_u_string_assigned.c +10 -0
  90. data/ext/u/rb_u_string_b.c +18 -0
  91. data/ext/u/rb_u_string_bytesize.c +10 -0
  92. data/ext/u/rb_u_string_byteslice.c +103 -0
  93. data/ext/u/rb_u_string_canonical_combining_class.c +33 -0
  94. data/ext/u/rb_u_string_case_ignorable.c +25 -0
  95. data/ext/u/rb_u_string_casecmp.c +61 -0
  96. data/ext/u/rb_u_string_cased.c +17 -0
  97. data/ext/u/rb_u_string_chomp.c +107 -0
  98. data/ext/u/rb_u_string_chop.c +33 -0
  99. data/ext/u/rb_u_string_chr.c +9 -0
  100. data/ext/u/rb_u_string_cntrl.c +10 -0
  101. data/ext/u/rb_u_string_collate.c +46 -0
  102. data/ext/u/rb_u_string_collation_key.c +18 -0
  103. data/ext/u/rb_u_string_count.c +38 -0
  104. data/ext/u/rb_u_string_defined.c +10 -0
  105. data/ext/u/rb_u_string_delete.c +62 -0
  106. data/ext/u/rb_u_string_digit.c +10 -0
  107. data/ext/u/rb_u_string_downcase.c +13 -0
  108. data/ext/u/rb_u_string_dump.c +153 -0
  109. data/ext/u/rb_u_string_each_byte.c +46 -0
  110. data/ext/u/rb_u_string_each_char.c +49 -0
  111. data/ext/u/rb_u_string_each_codepoint.c +45 -0
  112. data/ext/u/rb_u_string_each_grapheme_cluster.c +36 -0
  113. data/ext/u/rb_u_string_each_line.c +142 -0
  114. data/ext/u/rb_u_string_each_word.c +34 -0
  115. data/ext/u/rb_u_string_empty.c +11 -0
  116. data/ext/u/rb_u_string_end_with.c +31 -0
  117. data/ext/u/rb_u_string_eql.c +30 -0
  118. data/ext/u/rb_u_string_equal.c +33 -0
  119. data/ext/u/rb_u_string_foldcase.c +12 -0
  120. data/ext/u/rb_u_string_folded.c +13 -0
  121. data/ext/u/rb_u_string_format.c +1745 -0
  122. data/ext/u/rb_u_string_general_category.c +109 -0
  123. data/ext/u/rb_u_string_getbyte.c +21 -0
  124. data/ext/u/rb_u_string_graph.c +21 -0
  125. data/ext/u/rb_u_string_grapheme_break.c +61 -0
  126. data/ext/u/rb_u_string_gsub.c +164 -0
  127. data/ext/u/rb_u_string_hash.c +10 -0
  128. data/ext/u/rb_u_string_hex.c +9 -0
  129. data/ext/u/rb_u_string_include.c +10 -0
  130. data/ext/u/rb_u_string_index.c +110 -0
  131. data/ext/u/rb_u_string_inspect.c +189 -0
  132. data/ext/u/rb_u_string_internal_tr.c +148 -0
  133. data/ext/u/rb_u_string_internal_tr.h +29 -0
  134. data/ext/u/rb_u_string_justify.c +169 -0
  135. data/ext/u/rb_u_string_length.c +10 -0
  136. data/ext/u/rb_u_string_line_break.c +115 -0
  137. data/ext/u/rb_u_string_lower.c +13 -0
  138. data/ext/u/rb_u_string_lstrip.c +24 -0
  139. data/ext/u/rb_u_string_match.c +65 -0
  140. data/ext/u/rb_u_string_mirror.c +16 -0
  141. data/ext/u/rb_u_string_newline.c +21 -0
  142. data/ext/u/rb_u_string_normalize.c +70 -0
  143. data/ext/u/rb_u_string_normalized.c +28 -0
  144. data/ext/u/rb_u_string_oct.c +11 -0
  145. data/ext/u/rb_u_string_ord.c +14 -0
  146. data/ext/u/rb_u_string_partition.c +80 -0
  147. data/ext/u/rb_u_string_plus.c +33 -0
  148. data/ext/u/rb_u_string_print.c +10 -0
  149. data/ext/u/rb_u_string_punct.c +10 -0
  150. data/ext/u/rb_u_string_reverse.c +13 -0
  151. data/ext/u/rb_u_string_rindex.c +104 -0
  152. data/ext/u/rb_u_string_rpartition.c +81 -0
  153. data/ext/u/rb_u_string_rstrip.c +29 -0
  154. data/ext/u/rb_u_string_scan.c +109 -0
  155. data/ext/u/rb_u_string_script.c +253 -0
  156. data/ext/u/rb_u_string_soft_dotted.c +13 -0
  157. data/ext/u/rb_u_string_space.c +24 -0
  158. data/ext/u/rb_u_string_split.c +245 -0
  159. data/ext/u/rb_u_string_squeeze.c +75 -0
  160. data/ext/u/rb_u_string_start_with.c +31 -0
  161. data/ext/u/rb_u_string_strip.c +36 -0
  162. data/ext/u/rb_u_string_sub.c +147 -0
  163. data/ext/u/rb_u_string_times.c +35 -0
  164. data/ext/u/rb_u_string_title.c +10 -0
  165. data/ext/u/rb_u_string_titlecase.c +13 -0
  166. data/ext/u/rb_u_string_to_i.c +45 -0
  167. data/ext/u/rb_u_string_to_inum.c +364 -0
  168. data/ext/u/rb_u_string_to_inum.h +1 -0
  169. data/ext/u/rb_u_string_to_str.c +17 -0
  170. data/ext/u/rb_u_string_to_sym.c +12 -0
  171. data/ext/u/rb_u_string_tr.c +290 -0
  172. data/ext/u/rb_u_string_upcase.c +12 -0
  173. data/ext/u/rb_u_string_upper.c +13 -0
  174. data/ext/u/rb_u_string_valid.c +10 -0
  175. data/ext/u/rb_u_string_valid_encoding.c +12 -0
  176. data/ext/u/rb_u_string_wide.c +21 -0
  177. data/ext/u/rb_u_string_wide_cjk.c +21 -0
  178. data/ext/u/rb_u_string_width.c +19 -0
  179. data/ext/u/rb_u_string_word_break.c +63 -0
  180. data/ext/u/rb_u_string_xdigit.c +22 -0
  181. data/ext/u/rb_u_string_zero_width.c +16 -0
  182. data/ext/u/titled.c +55 -0
  183. data/ext/u/titled.h +1 -0
  184. data/ext/u/u.c +23 -0
  185. data/ext/u/u.h +458 -0
  186. data/ext/u/u_char_canonical_combining_class.c +31 -0
  187. data/ext/u/u_char_digit_value.c +21 -0
  188. data/ext/u/u_char_downcase.c +27 -0
  189. data/ext/u/u_char_general_category.c +31 -0
  190. data/ext/u/u_char_grapheme_break.c +28 -0
  191. data/ext/u/u_char_isalnum.c +24 -0
  192. data/ext/u/u_char_isalpha.c +21 -0
  193. data/ext/u/u_char_isassigned.c +16 -0
  194. data/ext/u/u_char_iscased.c +22 -0
  195. data/ext/u/u_char_iscaseignorable.c +29 -0
  196. data/ext/u/u_char_iscntrl.c +17 -0
  197. data/ext/u/u_char_isdefined.c +15 -0
  198. data/ext/u/u_char_isdigit.c +16 -0
  199. data/ext/u/u_char_isgraph.c +22 -0
  200. data/ext/u/u_char_islower.c +16 -0
  201. data/ext/u/u_char_isnewline.c +24 -0
  202. data/ext/u/u_char_isprint.c +21 -0
  203. data/ext/u/u_char_ispunct.c +27 -0
  204. data/ext/u/u_char_issoftdotted.c +18 -0
  205. data/ext/u/u_char_isspace.c +28 -0
  206. data/ext/u/u_char_isupper.c +16 -0
  207. data/ext/u/u_char_isvalid.c +18 -0
  208. data/ext/u/u_char_iswide.c +18 -0
  209. data/ext/u/u_char_iswide_cjk.c +22 -0
  210. data/ext/u/u_char_isxdigit.c +27 -0
  211. data/ext/u/u_char_iszerowidth.c +29 -0
  212. data/ext/u/u_char_line_break.c +29 -0
  213. data/ext/u/u_char_mirror.c +16 -0
  214. data/ext/u/u_char_normalized.c +23 -0
  215. data/ext/u/u_char_script.c +41 -0
  216. data/ext/u/u_char_to_u.c +48 -0
  217. data/ext/u/u_char_upcase.c +24 -0
  218. data/ext/u/u_char_width.c +12 -0
  219. data/ext/u/u_char_word_break.c +28 -0
  220. data/ext/u/u_char_xdigit_value.c +31 -0
  221. data/ext/u/u_collate.c +83 -0
  222. data/ext/u/u_collation_key.c +132 -0
  223. data/ext/u/u_decode.c +156 -0
  224. data/ext/u/u_downcase.c +201 -0
  225. data/ext/u/u_foldcase.c +68 -0
  226. data/ext/u/u_grapheme_clusters.c +57 -0
  227. data/ext/u/u_has_prefix.c +27 -0
  228. data/ext/u/u_index.c +93 -0
  229. data/ext/u/u_is_ascii_only.c +33 -0
  230. data/ext/u/u_locale.c +40 -0
  231. data/ext/u/u_locale.h +14 -0
  232. data/ext/u/u_mirror.c +20 -0
  233. data/ext/u/u_n_bytes.c +16 -0
  234. data/ext/u/u_n_chars.c +43 -0
  235. data/ext/u/u_normalize.c +232 -0
  236. data/ext/u/u_normalized.c +28 -0
  237. data/ext/u/u_offset_to_pointer.c +62 -0
  238. data/ext/u/u_pointer_to_offset.c +23 -0
  239. data/ext/u/u_recode.c +73 -0
  240. data/ext/u/u_reverse.c +21 -0
  241. data/ext/u/u_rindex.c +132 -0
  242. data/ext/u/u_titlecase.c +68 -0
  243. data/ext/u/u_upcase.c +89 -0
  244. data/ext/u/u_width.c +35 -0
  245. data/ext/u/u_words.c +82 -0
  246. data/ext/u/yield.h +27 -0
  247. data/lib/u-1.0.rb +20 -0
  248. data/lib/u-1.0/buffer.rb +10 -0
  249. data/lib/u-1.0/string.rb +9 -0
  250. data/lib/u-1.0/version.rb +287 -0
  251. data/test/unit/case.rb +2080 -0
  252. data/test/unit/foldcase.rb +1136 -0
  253. data/test/unit/graphemebreak.rb +407 -0
  254. data/test/unit/normalize.rb +367545 -0
  255. data/test/unit/u-1.0.rb +10 -0
  256. data/test/unit/u-1.0/buffer.rb +52 -0
  257. data/test/unit/u-1.0/string.rb +1439 -0
  258. data/test/unit/{u.rb → u-1.0/version.rb} +0 -1
  259. data/test/unit/wordbreak.rb +1083 -0
  260. metadata +603 -148
  261. data/README +0 -38
  262. data/Rakefile +0 -64
  263. data/ext/encoding/character/utf-8/break.c +0 -25
  264. data/ext/encoding/character/utf-8/data/break.h +0 -22931
  265. data/ext/encoding/character/utf-8/data/character-tables.h +0 -14358
  266. data/ext/encoding/character/utf-8/data/compose.h +0 -1607
  267. data/ext/encoding/character/utf-8/data/decompose.h +0 -10926
  268. data/ext/encoding/character/utf-8/data/generate-unicode-data.rb +0 -1070
  269. data/ext/encoding/character/utf-8/decompose.c +0 -444
  270. data/ext/encoding/character/utf-8/depend +0 -65
  271. data/ext/encoding/character/utf-8/extconf.rb +0 -67
  272. data/ext/encoding/character/utf-8/private.h +0 -51
  273. data/ext/encoding/character/utf-8/properties.c +0 -1056
  274. data/ext/encoding/character/utf-8/rb_includes.h +0 -19
  275. data/ext/encoding/character/utf-8/rb_methods.h +0 -49
  276. data/ext/encoding/character/utf-8/rb_private.h +0 -52
  277. data/ext/encoding/character/utf-8/rb_utf_aref.c +0 -111
  278. data/ext/encoding/character/utf-8/rb_utf_aset.c +0 -105
  279. data/ext/encoding/character/utf-8/rb_utf_casecmp.c +0 -24
  280. data/ext/encoding/character/utf-8/rb_utf_chomp.c +0 -114
  281. data/ext/encoding/character/utf-8/rb_utf_chop.c +0 -44
  282. data/ext/encoding/character/utf-8/rb_utf_collate.c +0 -13
  283. data/ext/encoding/character/utf-8/rb_utf_count.c +0 -30
  284. data/ext/encoding/character/utf-8/rb_utf_delete.c +0 -60
  285. data/ext/encoding/character/utf-8/rb_utf_downcase.c +0 -13
  286. data/ext/encoding/character/utf-8/rb_utf_each_char.c +0 -27
  287. data/ext/encoding/character/utf-8/rb_utf_foldcase.c +0 -13
  288. data/ext/encoding/character/utf-8/rb_utf_hex.c +0 -14
  289. data/ext/encoding/character/utf-8/rb_utf_index.c +0 -50
  290. data/ext/encoding/character/utf-8/rb_utf_insert.c +0 -48
  291. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.c +0 -332
  292. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.h +0 -12
  293. data/ext/encoding/character/utf-8/rb_utf_internal_tr.c +0 -142
  294. data/ext/encoding/character/utf-8/rb_utf_internal_tr.h +0 -41
  295. data/ext/encoding/character/utf-8/rb_utf_justify.c +0 -96
  296. data/ext/encoding/character/utf-8/rb_utf_length.c +0 -14
  297. data/ext/encoding/character/utf-8/rb_utf_lstrip.c +0 -41
  298. data/ext/encoding/character/utf-8/rb_utf_normalize.c +0 -51
  299. data/ext/encoding/character/utf-8/rb_utf_oct.c +0 -14
  300. data/ext/encoding/character/utf-8/rb_utf_reverse.c +0 -13
  301. data/ext/encoding/character/utf-8/rb_utf_rindex.c +0 -88
  302. data/ext/encoding/character/utf-8/rb_utf_rstrip.c +0 -51
  303. data/ext/encoding/character/utf-8/rb_utf_squeeze.c +0 -70
  304. data/ext/encoding/character/utf-8/rb_utf_strip.c +0 -27
  305. data/ext/encoding/character/utf-8/rb_utf_to_i.c +0 -25
  306. data/ext/encoding/character/utf-8/rb_utf_tr.c +0 -250
  307. data/ext/encoding/character/utf-8/rb_utf_upcase.c +0 -13
  308. data/ext/encoding/character/utf-8/tables.h +0 -38
  309. data/ext/encoding/character/utf-8/unicode.c +0 -319
  310. data/ext/encoding/character/utf-8/unicode.h +0 -216
  311. data/ext/encoding/character/utf-8/utf.c +0 -1334
  312. data/lib/encoding/character/utf-8.rb +0 -201
  313. data/lib/u.rb +0 -16
  314. data/lib/u/string.rb +0 -185
  315. data/lib/u/version.rb +0 -5
  316. data/test/unit/u/string.rb +0 -91
@@ -0,0 +1,43 @@
1
+ #include <ruby.h>
2
+ #include <stdbool.h>
3
+ #include <stddef.h>
4
+ #include <stdint.h>
5
+ #include "u.h"
6
+ #include "private.h"
7
+ #include "rb_private.h"
8
+ #include "rb_u_re.h"
9
+
10
+ VALUE
11
+ rb_u_pattern_argument(VALUE pattern, bool quote)
12
+ {
13
+ VALUE string;
14
+
15
+ switch (TYPE(pattern)) {
16
+ case T_REGEXP:
17
+ return pattern;
18
+ case T_STRING:
19
+ string = pattern;
20
+ break;
21
+ default:
22
+ string = rb_check_string_type(pattern);
23
+ if (NIL_P(string))
24
+ Check_Type(pattern, T_REGEXP);
25
+ }
26
+
27
+ return rb_reg_regcomp(quote ? rb_reg_quote(string) : string);
28
+ }
29
+
30
+ #ifndef HAVE_RB_REG_BACKREF_NUMBER
31
+ static int
32
+ rb_reg_backref_number(UNUSED(VALUE match), VALUE backref)
33
+ {
34
+ return NUM2INT(backref);
35
+ }
36
+ #endif
37
+
38
+ VALUE
39
+ rb_u_pattern_match_reference(VALUE reference)
40
+ {
41
+ VALUE match = rb_backref_get();
42
+ return rb_reg_nth_match(rb_reg_backref_number(match, reference), match);
43
+ }
@@ -0,0 +1,15 @@
1
+ #ifdef HAVE_RUBY_RE_H
2
+ # include <ruby/re.h>
3
+ #else
4
+ # include <re.h>
5
+ #endif
6
+
7
+
8
+ #ifdef HAVE_BROKEN_RMATCH_REGS
9
+ # undef RMATCH_REGS
10
+ # define RMATCH_REGS(obj) ((R_CAST(RMatch)(obj))->regs)
11
+ #endif
12
+
13
+
14
+ VALUE rb_u_pattern_argument(VALUE pattern, bool quote);
15
+ VALUE rb_u_pattern_match_reference(VALUE reference);
@@ -0,0 +1,478 @@
1
+ /* -*- coding: utf-8 -*- */
2
+
3
+ #include <ruby.h>
4
+ #ifdef HAVE_RUBY_ENCODING_H
5
+ # include <ruby/encoding.h>
6
+ #endif
7
+ #include <stdbool.h>
8
+ #include <stddef.h>
9
+ #include <stdint.h>
10
+ #include <limits.h>
11
+ #include "u.h"
12
+ #include "private.h"
13
+ #include "rb_private.h"
14
+ #include "rb_u_string.h"
15
+
16
+ VALUE rb_cUString;
17
+
18
+ static void
19
+ rb_u_string_mark(const struct rb_u_string *string)
20
+ {
21
+ rb_gc_mark(string->rb);
22
+ }
23
+
24
+ static void
25
+ rb_u_string_free(struct rb_u_string *string)
26
+ {
27
+ free((char *)string->c);
28
+ free(string);
29
+ }
30
+
31
+ static VALUE
32
+ rb_u_string_set_rb(VALUE self, VALUE rb)
33
+ {
34
+ struct rb_u_string *string = RVAL2USTRING(self);
35
+ if (NIL_P(rb)) {
36
+ string->rb = rb;
37
+ return self;
38
+ }
39
+ #ifdef HAVE_RUBY_ENCODING_H
40
+ rb = rb_str_encode(rb, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil);
41
+ #endif
42
+ string->rb = rb_str_freeze(rb);
43
+ OBJ_INFECT(self, string->rb);
44
+ return self;
45
+ }
46
+
47
+ static VALUE
48
+ rb_u_string_create(VALUE rb, const char *str, long length)
49
+ {
50
+ struct rb_u_string *string = ALLOC(struct rb_u_string);
51
+ string->rb = Qnil;
52
+ string->c = str;
53
+ string->length = length;
54
+ return rb_u_string_set_rb(USTRING2RVAL(string), rb);
55
+ }
56
+
57
+ static VALUE
58
+ rb_u_string_alloc(UNUSED(VALUE klass))
59
+ {
60
+ return rb_u_string_create(Qnil, NULL, 0);
61
+ }
62
+
63
+ VALUE
64
+ rb_u_string_new_uninfected(const char *str, long length)
65
+ {
66
+ return rb_u_string_new_c(Qnil, str, length);
67
+ }
68
+
69
+ VALUE
70
+ rb_u_string_new_uninfected_own(const char *str, long length)
71
+ {
72
+ return rb_u_string_new_c_own(Qnil, str, length);
73
+ }
74
+
75
+ VALUE
76
+ rb_u_string_new_c(VALUE self, const char *str, long length)
77
+ {
78
+ char *copy = ALLOC_N(char, length + 1);
79
+ MEMCPY(copy, str, char, length);
80
+ copy[length] = '\0';
81
+ return rb_u_string_new_c_own(self, copy, length);
82
+ }
83
+
84
+ VALUE
85
+ rb_u_string_new_c_own(VALUE self, const char *str, long length)
86
+ {
87
+ VALUE result = rb_u_string_create(Qnil, str, length);
88
+ OBJ_INFECT(result, self);
89
+ return result;
90
+ }
91
+
92
+ VALUE
93
+ rb_u_string_new_rb(VALUE str)
94
+ {
95
+ return rb_u_string_create(str, NULL, 0);
96
+ }
97
+
98
+ VALUE
99
+ rb_u_string_new_subsequence(VALUE self, long begin, long length)
100
+ {
101
+ const struct rb_u_string *string = RVAL2USTRING(self);
102
+ return rb_u_string_new_c(self, USTRING_STR(string) + begin, length);
103
+ }
104
+
105
+ VALUE
106
+ rb_u_string_new_empty(VALUE self)
107
+ {
108
+ return rb_u_string_new_subsequence(self, 0, 0);
109
+ }
110
+
111
+ VALUE
112
+ rb_u_string_check_type(VALUE str)
113
+ {
114
+ if (RTEST(rb_obj_is_kind_of(str, rb_cUString)))
115
+ return str;
116
+
117
+ return rb_check_string_type(str);
118
+ }
119
+
120
+ VALUE
121
+ rb_u_string_validate_type(VALUE str)
122
+ {
123
+ VALUE converted = rb_u_string_check_type(str);
124
+
125
+ if (NIL_P(converted))
126
+ rb_u_raise(rb_eTypeError,
127
+ "type mismatch: %s given",
128
+ rb_obj_classname(str));
129
+
130
+ return converted;
131
+ }
132
+
133
+ VALUE
134
+ rb_u_string_object_as_string(VALUE object)
135
+ {
136
+ if (TYPE(object) == T_STRING || RTEST(rb_obj_is_kind_of(object, rb_cUString)))
137
+ return object;
138
+
139
+ static ID id_to_s;
140
+ if (id_to_s == 0)
141
+ id_to_s = rb_intern("to_s");
142
+
143
+ VALUE str = rb_funcall(object, id_to_s, 0);
144
+ if (TYPE(str) != T_STRING)
145
+ return rb_any_to_s(object);
146
+
147
+ if (OBJ_TAINTED(object))
148
+ OBJ_TAINT(str);
149
+
150
+ return str;
151
+ }
152
+
153
+ /* @!visibility public
154
+ * @overload new(string = nil)
155
+ *
156
+ * Sets up a U::String wrapping STRING after encoding it as UTF-8 and
157
+ * freezing it.
158
+ *
159
+ * @param [String, nil] string
160
+ */
161
+ static VALUE
162
+ rb_u_string_initialize(int argc, VALUE *argv, VALUE self)
163
+ {
164
+ VALUE rb;
165
+
166
+ rb_scan_args(argc, argv, "01", &rb);
167
+ if (!NIL_P(rb)) {
168
+ StringValue(rb);
169
+ rb_u_string_set_rb(self, rb);
170
+ }
171
+
172
+ return Qnil;
173
+ }
174
+
175
+ static VALUE
176
+ rb_u_string_initialize_copy(VALUE self, VALUE rboriginal)
177
+ {
178
+ struct rb_u_string *string = RVAL2USTRING(self);
179
+ const struct rb_u_string *original = RVAL2USTRING(rboriginal);
180
+
181
+ if (string == original)
182
+ return self;
183
+
184
+ string->rb = original->rb;
185
+ string->c = original->c;
186
+ string->length = original->length;
187
+
188
+ OBJ_INFECT(self, rboriginal);
189
+
190
+ return self;
191
+ }
192
+
193
+ VALUE
194
+ rb_u_string_dup(VALUE self)
195
+ {
196
+ const struct rb_u_string *string = RVAL2USTRING(self);
197
+
198
+ VALUE result = rb_u_string_create(string->rb, string->c, string->length);
199
+ OBJ_INFECT(result, self);
200
+ return result;
201
+ }
202
+
203
+ #include <errno.h>
204
+
205
+ static VALUE
206
+ rb_u_string_recode(VALUE self, VALUE codeset)
207
+ {
208
+ const struct rb_u_string *string = RVAL2USTRING(self);
209
+ const char *cs = StringValuePtr(codeset);
210
+ errno = 0;
211
+ size_t n = u_recode(NULL, 0, USTRING_STR(string), USTRING_LENGTH(string), cs);
212
+ if (errno != 0)
213
+ rb_u_raise_errno(errno, "can’t recode");
214
+ char *recoded = ALLOC_N(char, n + 1);
215
+ u_recode(recoded, n + 1, USTRING_STR(string), USTRING_LENGTH(string), cs);
216
+ return rb_str_new(recoded, n);
217
+ }
218
+
219
+ /* Document-class: U::String
220
+ *
221
+ * A U::String is a sequence of zero or more Unicode characters encoded as
222
+ * UTF-8. It’s interface is an extension of that of Ruby’s built-in String
223
+ * class that provides better Unicode support, as it handles things such as
224
+ * casing, width, collation, and various other Unicode properties that Ruby’s
225
+ * built-in String class simply doesn’t bother itself with. It also provides
226
+ * “backwards compatibility” with Ruby 1.8.7 so that you can use Unicode
227
+ * without upgrading to Ruby 2.0 (which you probably should do, though).
228
+ *
229
+ * It differs from Ruby’s built-in String class in one other very important way
230
+ * in that it doesn’t provide any way to change an existing object. That is, a
231
+ * U::String is a value object.
232
+ *
233
+ * A U::String is most easily created from a String by calling {String#u}.
234
+ * Most U::String methods that return a stringy result will return a U::String,
235
+ * so you only have to do that once. You can get back a String by calling
236
+ * {U::String#to_str}.
237
+ *
238
+ * Validation of a U::String’s content isn’t performed until any access to it
239
+ * is made, at which time an ArgumentError will be raised if it isn’t valid.
240
+ *
241
+ * U::String has a lot of methods defined upon it, so let’s break them up into
242
+ * categories to get a proper overview of what’s possible to do with one.
243
+ * Let’s begin with the interrogators. There are three kinds of interrogators,
244
+ * validity-checking ones, property-checking ones, and content-matching ones.
245
+ *
246
+ * The validity-checking interrogator is {#valid_encoding?}, which makes sure
247
+ * that the UTF-8 sequence itself is valid.
248
+ *
249
+ * The property-checking interrogators are {#alnum?}, {#alpha?},
250
+ * {#ascii_only?}, {#assigned?}, {#case_ignorable?}, {#cased?}, {#cntrl?},
251
+ * {#defined?}, {#digit?}, {#graph?}, {#newline?}, {#print?}, {#punct?},
252
+ * {#soft_dotted?}, {#space?}, {#title?}, {#valid?}, {#wide?}, {#wide_cjk?},
253
+ * {#xdigit?}, and {#zero_width?}. These interrogators check the corresponding
254
+ * Unicode property of each characters in the U::String and if all characters
255
+ * have this property, they’ll return true.
256
+ *
257
+ * Very close relatives to the property-checking interrogators are {#folded?},
258
+ * {#lower?}, and {#upper?}, which check whether a string has been cased in a
259
+ * given way, and {#normalized?}, which checks whether the receiver has been
260
+ * normalized, optionally to a specific normalization form.
261
+ *
262
+ * The content-matching interrogators are {#==}, {#===}, {#=~}, {#match},
263
+ * {#empty?}, {#end_with?}, {#eql?}, {#include?}, {#index}, {#rindex}, and
264
+ * {#start_with?}. These interrogators check that a substring of the U::String
265
+ * matches another string or Regexp and either return a Boolean result, and
266
+ * index into the U::String where the match begins or MatchData for full
267
+ * matching information.
268
+ *
269
+ * Related to the content-matching interrogators are {#<=>}, {#casecmp}, and
270
+ * {#collation_key}, all of which compare a U::String against another for
271
+ * ordering.
272
+ *
273
+ * Related to the property-checking interrogators are
274
+ * {#canonical_combining_class}, {#general_category}, {#grapheme_break},
275
+ * {#line_break}, {#script}, and {#word_break}, which return the value of the
276
+ * Unicode property in question, the general category being the one often
277
+ * interrogated.
278
+ *
279
+ * There are a couple of other “interrogators” in {#bytesize}, {#length},
280
+ * {#size}, {#width} that return integer properties of the U::String as a
281
+ * whole, where #length and #width are probably the most useful.
282
+ *
283
+ * Beyond interrogators there are quite a few methods for iterating over the
284
+ * content of a U::String, each viewing it in its own way: {#each_byte},
285
+ * {#each_char}, {#each_codepoint}, {#each_grapheme_cluster}, {#each_line}, and
286
+ * {#each_word}. They all have respective methods ({#bytes}, {#chars},
287
+ * {#codepoints}, {#grapheme_clusters}, {#lines}, {#words}) that return an
288
+ * Array instead of yielding each result.
289
+ *
290
+ * Quite a few methods are devoted to extracting a substring of a U::String,
291
+ * namely {#[]}, {#slice}, {#byteslice}, {#chomp}, {#chop}, {#chr}, {#getbyte},
292
+ * {#lstrip}, {#ord}, {#rstrip}, {#strip}.
293
+ *
294
+ * There are a few methods for case-shifting: {#downcase}, {#foldcase},
295
+ * {#titlecase}, and {#upcase}. Then there’s {#mirror}, {#normalize}, and
296
+ * {#reverse} that alter the string in other ways.
297
+ *
298
+ * The methods {#center}, {#ljust}, and {#rjust} pad a U::String to make it a
299
+ * certain number of cells wide.
300
+ *
301
+ * Then there’s a couple of methods that are more related in the arguments they
302
+ * take than in function: {#count}, {#delete}, {#squeeze}, {#tr}, and {#tr_s}.
303
+ * These methods all take specifications of character/code point ranges that
304
+ * should be counted, deleted, squeezed, and translated (plus squeezed).
305
+ *
306
+ * Deconstructing a U::String can be done with {#partition} and {#rpartition},
307
+ * which splits it around a divider, {#scan}, which extracts matches to a
308
+ * pattern, {#split}, which splits it on a divider.
309
+ *
310
+ * Substitution of all matches to a pattern can be made with {#gsub} and of the
311
+ * first match to a pattern with {#sub}.
312
+ *
313
+ * Creating larger U::Strings from smaller ones is done with {#+}, which
314
+ * concatenates two of them, and {#*}, which concatenates a U::String to itself
315
+ * a number of times.
316
+ *
317
+ * A U::String can also be used as a specification as to how to format a number
318
+ * of values via {#%} (and its alias {#format}) into a new U::String, much like
319
+ * snprintf(3) in C.
320
+ *
321
+ * The content of a U::String can be {#dump}ed and {#inspect}ed to make it
322
+ * reader-friendly, but also debugger-friendly.
323
+ *
324
+ * Finally, a U::String has a few methods to turn its content into other
325
+ * values: {#hash}, which turns it into a hash value to be used for hashing,
326
+ * {#hex}, {#oct}, {#to_i}, which turn it into a Integer, {#to_str}, {#to_s},
327
+ * {#b}, which turn it into a String, and {#to_sym} (and its alias {#intern}),
328
+ * which turns it into a Symbol.
329
+ *
330
+ * Note that some methods defined on String are missing. #Capitalize doesn’t
331
+ * exist, as capitalization isn’t a Unicode concept. #Sum doesn’t exist, as a
332
+ * U::String generally doesn’t contain content that you need a checksum of.
333
+ * \#Crypt doesn’t exist for similar reasons. #Swapcase isn’t useful on a
334
+ * String and it certainly isn’t useful in a Unicode context. As a U::String
335
+ * doesn’t contain arbitrary data, #unpack is left to String. #Next/#succ
336
+ * would perhaps be implementable, but haven’t, as a satisfactory
337
+ * implementation hasn’t been thought of. */
338
+ void
339
+ Init_u_string(VALUE mU)
340
+ {
341
+ rb_cUString = rb_define_class_under(mU, "String", rb_cData);
342
+
343
+ rb_include_module(rb_cUString, rb_mComparable);
344
+
345
+ rb_define_alloc_func(rb_cUString, rb_u_string_alloc);
346
+ rb_define_private_method(rb_cUString, "initialize", rb_u_string_initialize, -1);
347
+ rb_define_private_method(rb_cUString, "initialize_copy", rb_u_string_initialize_copy, 1);
348
+
349
+ rb_define_method(rb_cUString, "valid_encoding?", rb_u_string_valid_encoding, 0); /* in ext/u/rb_u_string_valid_encoding.c */
350
+
351
+ rb_define_method(rb_cUString, "alnum?", rb_u_string_alnum, 0); /* in ext/u/rb_u_string_alnum.c */
352
+ rb_define_method(rb_cUString, "alpha?", rb_u_string_alpha, 0); /* in ext/u/rb_u_string_alpha.c */
353
+ rb_define_method(rb_cUString, "ascii_only?", rb_u_string_ascii_only, 0); /* in ext/u/rb_u_string_ascii_only.c */
354
+ rb_define_method(rb_cUString, "assigned?", rb_u_string_assigned, 0); /* in ext/u/rb_u_string_assigned.c */
355
+ rb_define_method(rb_cUString, "case_ignorable?", rb_u_string_case_ignorable, 0); /* in ext/u/rb_u_string_case_ignorable.c */
356
+ rb_define_method(rb_cUString, "cased?", rb_u_string_cased, 0); /* in ext/u/rb_u_string_cased.c */
357
+ rb_define_method(rb_cUString, "cntrl?", rb_u_string_cntrl, 0); /* in ext/u/rb_u_string_cntrl.c */
358
+ rb_define_method(rb_cUString, "defined?", rb_u_string_defined, 0); /* in ext/u/rb_u_string_defined.c */
359
+ rb_define_method(rb_cUString, "digit?", rb_u_string_digit, 0); /* in ext/u/rb_u_string_digit.c */
360
+ rb_define_method(rb_cUString, "folded?", rb_u_string_folded, -1); /* in ext/u/rb_u_string_folded.c */
361
+ rb_define_method(rb_cUString, "graph?", rb_u_string_graph, 0); /* in ext/u/rb_u_string_graph.c */
362
+ rb_define_method(rb_cUString, "lower?", rb_u_string_lower, -1); /* in ext/u/rb_u_string_lower.c */
363
+ rb_define_method(rb_cUString, "newline?", rb_u_string_newline, 0); /* in ext/u/rb_u_string_newline.c */
364
+ rb_define_method(rb_cUString, "print?", rb_u_string_print, 0); /* in ext/u/rb_u_string_print.c */
365
+ rb_define_method(rb_cUString, "punct?", rb_u_string_punct, 0); /* in ext/u/rb_u_string_punct.c */
366
+ rb_define_method(rb_cUString, "soft_dotted?", rb_u_string_soft_dotted, 0); /* in ext/u/rb_u_string_soft_dotted.c */
367
+ rb_define_method(rb_cUString, "space?", rb_u_string_space, 0); /* in ext/u/rb_u_string_space.c */
368
+ rb_define_method(rb_cUString, "title?", rb_u_string_title, 0); /* in ext/u/rb_u_string_title.c */
369
+ rb_define_method(rb_cUString, "upper?", rb_u_string_upper, -1); /* in ext/u/rb_u_string_upper.c */
370
+ rb_define_method(rb_cUString, "valid?", rb_u_string_valid, 0); /* in ext/u/rb_u_string_valid.c */
371
+ rb_define_method(rb_cUString, "wide?", rb_u_string_wide, 0); /* in ext/u/rb_u_string_wide.c */
372
+ rb_define_method(rb_cUString, "wide_cjk?", rb_u_string_wide_cjk, 0); /* in ext/u/rb_u_string_wide_cjk.c */
373
+ rb_define_method(rb_cUString, "xdigit?", rb_u_string_xdigit, 0); /* in ext/u/rb_u_string_xdigit.c */
374
+ rb_define_method(rb_cUString, "zero_width?", rb_u_string_zero_width, 0); /* in ext/u/rb_u_string_zero_width.c */
375
+
376
+ rb_define_method(rb_cUString, "normalized?", rb_u_string_normalized, -1); /* in ext/u/rb_u_string_normalized.c */
377
+
378
+ rb_define_method(rb_cUString, "==", rb_u_string_equal, 1); /* in ext/u/rb_u_string_equal.c */
379
+ rb_define_alias(rb_cUString, "===", "==");
380
+ rb_define_method(rb_cUString, "=~", rb_u_string_match, 1); /* in ext/u/rb_u_string_match.c */
381
+ rb_define_method(rb_cUString, "match", rb_u_string_match_m, -1); /* in ext/u/rb_u_string_match.c */
382
+ rb_define_method(rb_cUString, "empty?", rb_u_string_empty, 0); /* in ext/u/rb_u_string_empty.c */
383
+ rb_define_method(rb_cUString, "end_with?", rb_u_string_end_with, -1); /* in ext/u/rb_u_string_end_with.c */
384
+ rb_define_method(rb_cUString, "eql?", rb_u_string_eql, 1); /* in ext/u/rb_u_string_eql.c */
385
+ rb_define_method(rb_cUString, "include?", rb_u_string_include, 1); /* in ext/u/rb_u_string_include.c */
386
+ rb_define_method(rb_cUString, "index", rb_u_string_index_m, -1); /* in ext/u/rb_u_string_index.c */
387
+ rb_define_method(rb_cUString, "rindex", rb_u_string_rindex_m, -1); /* in ext/u/rb_u_string_rindex.c */
388
+ rb_define_method(rb_cUString, "start_with?", rb_u_string_start_with, -1); /* in ext/u/rb_u_string_start_with.c */
389
+
390
+ rb_define_method(rb_cUString, "<=>", rb_u_string_collate, -1); /* in ext/u/rb_u_string_collate.c */
391
+ rb_define_method(rb_cUString, "casecmp", rb_u_string_casecmp, -1); /* in ext/u/rb_u_string_casecmp.c */
392
+ rb_define_method(rb_cUString, "collation_key", rb_u_string_collation_key, -1); /* in ext/u/rb_u_string_collation_key.c */
393
+
394
+ rb_define_method(rb_cUString, "canonical_combining_class", rb_u_string_canonical_combining_class, 0); /* in ext/u/rb_u_string_canonical_combining_class.c */
395
+ rb_define_method(rb_cUString, "general_category", rb_u_string_general_category, 0); /* in ext/u/rb_u_string_general_category.c */
396
+ rb_define_method(rb_cUString, "grapheme_break", rb_u_string_grapheme_break, 0); /* in ext/u/rb_u_string_grapheme_break.c */
397
+ rb_define_method(rb_cUString, "line_break", rb_u_string_line_break, 0); /* in ext/u/rb_u_string_line_break.c */
398
+ rb_define_method(rb_cUString, "script", rb_u_string_script, 0); /* in ext/u/rb_u_string_script.c */
399
+ rb_define_method(rb_cUString, "word_break", rb_u_string_word_break, 0); /* in ext/u/rb_u_string_word_break.c */
400
+
401
+ rb_define_method(rb_cUString, "bytesize", rb_u_string_bytesize, 0); /* in ext/u/rb_u_string_bytesize.c */
402
+ rb_define_method(rb_cUString, "length", rb_u_string_length, 0); /* in ext/u/rb_u_string_length.c */
403
+ rb_define_alias(rb_cUString, "size", "length");
404
+ rb_define_method(rb_cUString, "width", rb_u_string_width, 0); /* in ext/u/rb_u_string_width.c */
405
+
406
+ rb_define_method(rb_cUString, "each_byte", rb_u_string_each_byte, 0); /* in ext/u/rb_u_string_each_byte.c */
407
+ rb_define_method(rb_cUString, "bytes", rb_u_string_bytes, 0); /* in ext/u/rb_u_string_each_byte.c */
408
+ rb_define_method(rb_cUString, "each_char", rb_u_string_each_char, 0); /* in ext/u/rb_u_string_each_char.c */
409
+ rb_define_method(rb_cUString, "chars", rb_u_string_chars, 0); /* in ext/u/rb_u_string_each_char.c */
410
+ rb_define_method(rb_cUString, "each_codepoint", rb_u_string_each_codepoint, 0); /* in ext/u/rb_u_string_each_codepoint.c */
411
+ rb_define_method(rb_cUString, "codepoints", rb_u_string_codepoints, 0); /* in ext/u/rb_u_string_each_codepoint.c */
412
+ rb_define_method(rb_cUString, "each_grapheme_cluster", rb_u_string_each_grapheme_cluster, 0); /* in ext/u/rb_u_string_each_grapheme_cluster.c */
413
+ rb_define_alias(rb_cUString, "grapheme_clusters", "each_grapheme_cluster");
414
+ rb_define_method(rb_cUString, "each_line", rb_u_string_each_line, -1); /* in ext/u/rb_u_string_each_line.c */
415
+ rb_define_method(rb_cUString, "lines", rb_u_string_lines, -1); /* in ext/u/rb_u_string_each_line.c */
416
+ rb_define_method(rb_cUString, "each_word", rb_u_string_each_word, 0); /* in ext/u/rb_u_string_each_word.c */
417
+ rb_define_alias(rb_cUString, "words", "each_word");
418
+
419
+ rb_define_method(rb_cUString, "[]", rb_u_string_aref_m, -1); /* in ext/u/rb_u_string_aref.c */
420
+ rb_define_alias(rb_cUString, "slice", "[]");
421
+ rb_define_method(rb_cUString, "byteslice", rb_u_string_byteslice_m, -1); /* in ext/u/rb_u_string_byteslice.c */
422
+ rb_define_method(rb_cUString, "chomp", rb_u_string_chomp, -1); /* in ext/u/rb_u_string_chomp.c */
423
+ rb_define_method(rb_cUString, "chop", rb_u_string_chop, 0); /* in ext/u/rb_u_string_chop.c */
424
+ rb_define_method(rb_cUString, "chr", rb_u_string_chr, 0); /* in ext/u/rb_u_string_chr.c */
425
+ rb_define_method(rb_cUString, "getbyte", rb_u_string_getbyte, 1); /* in ext/u/rb_u_string_getbyte.c */
426
+ rb_define_method(rb_cUString, "lstrip", rb_u_string_lstrip, 0); /* in ext/u/rb_u_string_lstrip.c */
427
+ rb_define_method(rb_cUString, "ord", rb_u_string_ord, 0); /* in ext/u/rb_u_string_ord.c */
428
+ rb_define_method(rb_cUString, "rstrip", rb_u_string_rstrip, 0); /* in ext/u/rb_u_string_rstrip.c */
429
+ rb_define_method(rb_cUString, "strip", rb_u_string_strip, 0); /* in ext/u/rb_u_string_strip.c */
430
+
431
+ rb_define_method(rb_cUString, "downcase", rb_u_string_downcase, -1); /* in ext/u/rb_u_string_downcase.c */
432
+ rb_define_method(rb_cUString, "foldcase", rb_u_string_foldcase, -1); /* in ext/u/rb_u_string_foldcase.c */
433
+ rb_define_method(rb_cUString, "titlecase", rb_u_string_titlecase, -1); /* in ext/u/rb_u_string_titlecase.c */
434
+ rb_define_method(rb_cUString, "upcase", rb_u_string_upcase, -1); /* in ext/u/rb_u_string_upcase.c */
435
+
436
+ rb_define_method(rb_cUString, "mirror", rb_u_string_mirror, 0); /* in ext/u/rb_u_string_mirror.c */
437
+ rb_define_method(rb_cUString, "normalize", rb_u_string_normalize, -1); /* in ext/u/rb_u_string_normalize.c */
438
+ rb_define_method(rb_cUString, "reverse", rb_u_string_reverse, 0); /* in ext/u/rb_u_string_reverse.c */
439
+
440
+ rb_define_method(rb_cUString, "center", rb_u_string_center, -1); /* in ext/u/rb_u_string_justify.c */
441
+ rb_define_method(rb_cUString, "ljust", rb_u_string_ljust, -1); /* in ext/u/rb_u_string_justify.c */
442
+ rb_define_method(rb_cUString, "rjust", rb_u_string_rjust, -1); /* in ext/u/rb_u_string_justify.c */
443
+
444
+ rb_define_method(rb_cUString, "count", rb_u_string_count, -1); /* in ext/u/rb_u_string_count.c */
445
+ rb_define_method(rb_cUString, "delete", rb_u_string_delete, -1); /* in ext/u/rb_u_string_delete.c */
446
+ rb_define_method(rb_cUString, "squeeze", rb_u_string_squeeze, -1); /* in ext/u/rb_u_string_squeeze.c */
447
+ rb_define_method(rb_cUString, "tr", rb_u_string_tr, 2); /* in ext/u/rb_u_string_tr.c */
448
+ rb_define_method(rb_cUString, "tr_s", rb_u_string_tr_s, 2); /* in ext/u/rb_u_string_tr.c */
449
+
450
+ rb_define_method(rb_cUString, "partition", rb_u_string_partition, 1); /* in ext/u/rb_u_string_partition.c */
451
+ rb_define_method(rb_cUString, "rpartition", rb_u_string_rpartition, 1); /* in ext/u/rb_u_string_rpartition.c */
452
+ rb_define_method(rb_cUString, "scan", rb_u_string_scan, 1); /* in ext/u/rb_u_string_scan.c */
453
+ rb_define_method(rb_cUString, "split", rb_u_string_split_m, -1); /* in ext/u/rb_u_string_split.c */
454
+
455
+ rb_define_method(rb_cUString, "gsub", rb_u_string_gsub, -1); /* in ext/u/rb_u_string_gsub.c */
456
+ rb_define_method(rb_cUString, "sub", rb_u_string_sub, -1); /* in ext/u/rb_u_string_sub.c */
457
+
458
+ rb_define_method(rb_cUString, "+", rb_u_string_plus, 1); /* in ext/u/rb_u_string_plus.c */
459
+ rb_define_method(rb_cUString, "*", rb_u_string_times, 1); /* in ext/u/rb_u_string_times.c */
460
+
461
+ rb_define_method(rb_cUString, "%", rb_u_string_format_m, 1); /* in ext/u/rb_u_string_format.c */
462
+ rb_define_alias(rb_cUString, "format", "%");
463
+
464
+ rb_define_method(rb_cUString, "dump", rb_u_string_dump, 0); /* in ext/u/rb_u_string_dump.c */
465
+ rb_define_method(rb_cUString, "inspect", rb_u_string_inspect, 0); /* in ext/u/rb_u_string_inspect.c */
466
+
467
+ rb_define_method(rb_cUString, "hash", rb_u_string_hash, 0); /* in ext/u/rb_u_string_hash.c */
468
+ rb_define_method(rb_cUString, "hex", rb_u_string_hex, 0); /* in ext/u/rb_u_string_hex.c */
469
+ rb_define_method(rb_cUString, "oct", rb_u_string_oct, 0); /* in ext/u/rb_u_string_oct.c */
470
+ rb_define_method(rb_cUString, "to_i", rb_u_string_to_i, -1); /* in ext/u/rb_u_string_to_i.c */
471
+ rb_define_method(rb_cUString, "to_str", rb_u_string_to_str, 0); /* in ext/u/rb_u_string_to_str.c */
472
+ rb_define_alias(rb_cUString, "to_s", "to_str");
473
+ rb_define_method(rb_cUString, "b", rb_u_string_b, 0); /* in ext/u/rb_u_string_b.c */
474
+ rb_define_method(rb_cUString, "to_sym", rb_u_string_to_sym, 0); /* in ext/u/rb_u_string_to_sym.c */
475
+ rb_define_alias(rb_cUString, "intern", "to_sym");
476
+
477
+ rb_define_method(rb_cUString, "recode", rb_u_string_recode, 1);
478
+ }