icu4r 0.1.3.2006.01.26 → 0.1.4.2006.05.29

Sign up to get free protection for your applications and to get access to all the features.
data/ustring.c CHANGED
@@ -26,6 +26,8 @@ extern VALUE icu_from_rstr(int argc, VALUE * argv, VALUE str);
26
26
  VALUE rb_cUResourceBundle;
27
27
  VALUE rb_cULocale;
28
28
  VALUE rb_cUCalendar;
29
+ VALUE rb_cUConverter;
30
+ VALUE rb_cUCollator;
29
31
 
30
32
  #include "uregex.h"
31
33
 
@@ -42,10 +44,10 @@ free_ustr(str)
42
44
  str->ptr = 0;
43
45
  free(str);
44
46
  }
45
- inline void icu_check_frozen(VALUE str)
47
+ inline void icu_check_frozen(int check_busy, VALUE str)
46
48
  {
47
49
  rb_check_frozen(str);
48
- if(USTRING(str)->busy) rb_raise(rb_eRuntimeError, "String is busy. Can't modify");
50
+ if(check_busy && USTRING(str)->busy > 0 ) rb_raise(rb_eRuntimeError, "String is busy. Can't modify");
49
51
  }
50
52
  #define START_BUF_LEN 16
51
53
  /**
@@ -85,7 +87,6 @@ icu_ustr_alloc(klass)
85
87
  {
86
88
  return icu_ustr_alloc_and_wrap(NULL, 0, 0, ICU_COPY);
87
89
  }
88
-
89
90
  void ustr_capa_resize(ICUString * str, long new_capa)
90
91
  {
91
92
  if (new_capa != str->capa) {
@@ -263,7 +264,7 @@ icu_ustr_replace(str, str2)
263
264
  {
264
265
  if (str == str2)
265
266
  return str;
266
- icu_check_frozen(str);
267
+ icu_check_frozen(1, str);
267
268
  Check_Class(str2, rb_cUString);
268
269
  ustr_splice_units(USTRING(str), 0, ICU_LEN(str), ICU_PTR(str2), ICU_LEN(str2));
269
270
  OBJ_INFECT(str, str2);
@@ -284,7 +285,7 @@ VALUE
284
285
  icu_ustr_clear(str)
285
286
  VALUE str;
286
287
  {
287
- icu_check_frozen(str);
288
+ icu_check_frozen(1, str);
288
289
  icu_ustr_resize(str, 0);
289
290
  return str;
290
291
  }
@@ -475,7 +476,7 @@ icu_ustr_concat(str1, str2)
475
476
  VALUE str1,
476
477
  str2;
477
478
  {
478
- icu_check_frozen(str1);
479
+ icu_check_frozen(1, str1);
479
480
  Check_Class(str2, rb_cUString);
480
481
  if (ICU_LEN(str2) > 0) {
481
482
  ustr_splice_units(USTRING(str1), ICU_LEN(str1), 0, ICU_PTR(str2), ICU_LEN(str2));
@@ -546,7 +547,7 @@ icu_ustr_upcase_bang(argc, argv, str)
546
547
  long len ;
547
548
  VALUE loc;
548
549
  char * locale = NULL;
549
- icu_check_frozen(str);
550
+ icu_check_frozen(1, str);
550
551
  buf = ALLOC_N(UChar, ICU_LEN(str) + 1);
551
552
  if (rb_scan_args(argc, argv, "01", &loc) == 1) {
552
553
  if( loc != Qnil) {
@@ -614,7 +615,7 @@ icu_ustr_downcase_bang(argc, argv, str)
614
615
  VALUE loc;
615
616
  char * locale = NULL;
616
617
  buf = ALLOC_N(UChar, ICU_LEN(str) + 1);
617
- icu_check_frozen(str);
618
+ icu_check_frozen(1, str);
618
619
  if (rb_scan_args(argc, argv, "01", &loc) == 1) {
619
620
  if( loc != Qnil) {
620
621
  Check_Type(loc, T_STRING);
@@ -874,7 +875,7 @@ icu_ustr_lstrip_bang(str)
874
875
  int32_t i,
875
876
  n,
876
877
  c;
877
- icu_check_frozen(str);
878
+ icu_check_frozen(1, str);
878
879
  s = ICU_PTR(str);
879
880
  n = ICU_LEN(str);
880
881
  if (!s || n == 0)
@@ -941,7 +942,7 @@ icu_ustr_rstrip_bang(str)
941
942
  n,
942
943
  c;
943
944
 
944
- icu_check_frozen(str);
945
+ icu_check_frozen(1, str);
945
946
  s = ICU_PTR(str);
946
947
  n = ICU_LEN(str);
947
948
 
@@ -1038,14 +1039,14 @@ icu_ustr_normalize(str, mode)
1038
1039
  int32_t mode;
1039
1040
  {
1040
1041
  UErrorCode error = U_ZERO_ERROR;
1041
- long capa = ICU_LEN(str);
1042
+ long capa = ICU_LEN(str)+20;
1042
1043
  UChar *buf;
1043
1044
  long needed;
1044
1045
  VALUE ret;
1045
1046
  if (UNORM_YES == unorm_quickCheck(ICU_PTR(str), ICU_LEN(str), mode, &error))
1046
1047
  return icu_ustr_dup(str);
1047
1048
 
1048
- buf = ALLOC_N(UChar, capa + 20);
1049
+ buf = ALLOC_N(UChar, capa );
1049
1050
  do {
1050
1051
  error = 0;
1051
1052
  needed =
@@ -1115,6 +1116,13 @@ icu_ustr_normalize_C(str)
1115
1116
  {
1116
1117
  return icu_ustr_normalize(str, UNORM_NFC);
1117
1118
  }
1119
+ VALUE my_ubrk_close(UBreakIterator ** boundary, VALUE errorinfo)
1120
+ {
1121
+ ubrk_close(*boundary);
1122
+ *boundary = NULL;
1123
+ rb_raise(rb_eRuntimeError, "Unhandled exception: %s", rb_obj_classname(errorinfo));
1124
+ return Qnil;
1125
+ }
1118
1126
 
1119
1127
  /* UBRK_CHARACTER, UBRK_WORD, UBRK_LINE, UBRK_SENTENCE */
1120
1128
  VALUE
@@ -1128,6 +1136,7 @@ icu_ustr_each_mode(argc, argv, str, mode)
1128
1136
  UBreakIterator *boundary;
1129
1137
  int32_t end, start;
1130
1138
  VALUE loc ;
1139
+ VALUE temp;
1131
1140
  char *locale = "";
1132
1141
  if( rb_scan_args(argc, argv, "01", &loc) == 1) {
1133
1142
  Check_Type(loc, T_STRING);
@@ -1139,12 +1148,12 @@ icu_ustr_each_mode(argc, argv, str, mode)
1139
1148
  if (U_FAILURE(error))
1140
1149
  rb_raise(rb_eArgError, "Error %s", u_errorName(error));
1141
1150
  start = ubrk_first(boundary);
1142
- USTRING(str)->busy = 1;
1143
- for (end = ubrk_next(boundary); end != UBRK_DONE;
1144
- start = end, end = ubrk_next(boundary)) {
1145
- rb_yield(icu_ustr_new(ICU_PTR(str) + start, end - start));
1151
+ ++(USTRING(str)->busy);
1152
+ for (end = ubrk_next(boundary); end != UBRK_DONE; start = end, end = ubrk_next(boundary)) {
1153
+ temp = icu_ustr_new(ICU_PTR(str) + start, end - start);
1154
+ rb_rescue(rb_yield, temp, my_ubrk_close, &boundary);
1146
1155
  }
1147
- USTRING(str)->busy = 0;
1156
+ --(USTRING(str)->busy);
1148
1157
  ubrk_close(boundary);
1149
1158
  return str;
1150
1159
  }
@@ -1491,11 +1500,11 @@ icu_ustr_scan(str, pat)
1491
1500
  }
1492
1501
  return ary;
1493
1502
  }
1494
- USTRING(str)->busy = 1;
1503
+ ++(USTRING(str)->busy);
1495
1504
  while (!NIL_P(result = ustr_scan_once(str, pat, &start))) {
1496
1505
  rb_yield(result);
1497
1506
  }
1498
- USTRING(str)->busy = 0;
1507
+ --(USTRING(str)->busy);
1499
1508
  return str;
1500
1509
  }
1501
1510
  /**
@@ -1621,7 +1630,15 @@ icu_ustr_chars_m(argc, argv, str)
1621
1630
  * string is returned as the only entry in an array). If negative, there is no
1622
1631
  * limit to the number of fields returned, and trailing null fields are not
1623
1632
  * suppressed.
1624
- *
1633
+ *
1634
+ * NOTE: there's a difference in ICU regexp split and Ruby Regexp actions:
1635
+ * "a,b,c,,".split(/,/, -1) # => ["a", "b", "c", "", ""]
1636
+ * "a,b,c,,".u.split(ure(","), -1) # => ["a", "b", "c", ""]
1637
+ * it seems to be by design, in icu/source/i18n/uregex.cpp uregex_split():
1638
+ * if (nextOutputStringStart == inputLen) {
1639
+ * // The delimiter was at the end of the string. We're done.
1640
+ * break;
1641
+ * }
1625
1642
  */
1626
1643
 
1627
1644
  VALUE
@@ -1631,7 +1648,7 @@ icu_ustr_split_m(argc, argv, str)
1631
1648
  VALUE str;
1632
1649
  {
1633
1650
  VALUE spat;
1634
- VALUE limit;
1651
+ VALUE limit = Qnil;
1635
1652
  int lim = 0;
1636
1653
  VALUE result;
1637
1654
 
@@ -2021,7 +2038,7 @@ icu_ustr_insert(str, idx, str2)
2021
2038
  str2;
2022
2039
  {
2023
2040
  long pos = NUM2LONG(idx);
2024
- icu_check_frozen(str);
2041
+ icu_check_frozen(1, str);
2025
2042
 
2026
2043
  if (pos == -1) {
2027
2044
  pos = NUM2LONG(icu_ustr_length(str));
@@ -2169,7 +2186,7 @@ icu_ustr_aset_m(argc, argv, str)
2169
2186
  VALUE *argv;
2170
2187
  VALUE str;
2171
2188
  {
2172
- icu_check_frozen(str);
2189
+ icu_check_frozen(1, str);
2173
2190
  if (argc == 3) {
2174
2191
  if (CLASS_OF(argv[0]) == rb_cURegexp) {
2175
2192
  icu_ustr_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]);
@@ -2217,7 +2234,7 @@ icu_ustr_slice_bang(argc, argv, str)
2217
2234
  VALUE result;
2218
2235
  VALUE buf[3];
2219
2236
  int i;
2220
- icu_check_frozen(str);
2237
+ icu_check_frozen(1, str);
2221
2238
  if (argc < 1 || 2 < argc) {
2222
2239
  rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)",
2223
2240
  argc);
@@ -2271,8 +2288,8 @@ ustr_gsub(argc, argv, str, bang, once)
2271
2288
  return icu_ustr_dup(str);
2272
2289
  }
2273
2290
  end = 0;
2274
- icu_check_frozen(str);
2275
- USTRING(str)->busy = 1;
2291
+ // icu_check_frozen(1, str);
2292
+ ++(USTRING(str)->busy);
2276
2293
  buf = icu_ustr_new(0, 0);
2277
2294
  pat = icu_reg_clone(pat);
2278
2295
  if(rb_block_given_p()) iter = 1;
@@ -2301,7 +2318,7 @@ ustr_gsub(argc, argv, str, bang, once)
2301
2318
  }
2302
2319
  while (icu_reg_find_next(pat) && !once);
2303
2320
  icu_ustr_concat(buf, icu_reg_get_tail(pat, end));
2304
- USTRING(str)->busy = 0;
2321
+ --(USTRING(str)->busy);
2305
2322
  if (bang) {
2306
2323
  icu_ustr_replace(str, buf);
2307
2324
  return str;
@@ -2325,7 +2342,7 @@ icu_ustr_gsub_bang(argc, argv, str)
2325
2342
  VALUE *argv;
2326
2343
  VALUE str;
2327
2344
  {
2328
- icu_check_frozen(str);
2345
+ icu_check_frozen(1, str);
2329
2346
  return ustr_gsub(argc, argv, str, 1, 0);
2330
2347
  }
2331
2348
 
@@ -2363,27 +2380,7 @@ icu_ustr_gsub(argc, argv, str)
2363
2380
 
2364
2381
 
2365
2382
  /*-------------*/
2366
- /* parsing */
2367
- extern VALUE icu_date_parse(UChar * str, int32_t str_len, char * locale, UChar * val, int32_t len);
2368
2383
 
2369
- /**
2370
- * call-seq:
2371
- * str.parse_date( locale, value)
2372
- *
2373
- * Parses given value, using +str+ as format pattern with respect to +locale+.
2374
- *
2375
- * "HH:mm:ss E dd/MM/yyyy".u.parse_date("en", "20:15:01 Fri 13/01/2006".u)) # => Time.local(2006,"jan",13,20,15,1)
2376
- *
2377
- */
2378
-
2379
- VALUE
2380
- icu_ustr_parse_date( str, locale, val)
2381
- VALUE str, locale, val;
2382
- {
2383
- Check_Type(locale, T_STRING);
2384
- Check_Class(val, rb_cUString);
2385
- return icu_date_parse(ICU_PTR(str), ICU_LEN(str), RSTRING(locale)->ptr, ICU_PTR(val), ICU_LEN(val));
2386
- }
2387
2384
 
2388
2385
  /**
2389
2386
  * call-seq:
@@ -2926,6 +2923,7 @@ mirroring ICU class hierarchy.
2926
2923
  /* comparisons */
2927
2924
  rb_define_method(rb_cUString, "<=>", icu_ustr_cmp_m, 1);
2928
2925
  rb_define_method(rb_cUString, "==", icu_ustr_equal, 1);
2926
+ rb_define_method(rb_cUString, "eql?", icu_ustr_equal, 1);
2929
2927
  rb_define_method(rb_cUString, "casecmp", icu_ustr_casecmp, 1);
2930
2928
  rb_define_singleton_method(rb_cUString, "strcoll", icu_ustr_coll, -1);
2931
2929
 
@@ -3019,7 +3017,6 @@ mirroring ICU class hierarchy.
3019
3017
  rb_define_alias( rb_cUString, "fmt", "format");
3020
3018
 
3021
3019
  /* parsing */
3022
- rb_define_method(rb_cUString, "parse_date", icu_ustr_parse_date, 2);
3023
3020
  rb_define_method(rb_cUString, "to_f", icu_ustr_parse_double, -1);
3024
3021
 
3025
3022
  /* transliteration */
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: icu4r
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.3.2006.01.26
7
- date: 2006-01-26 00:00:00 +02:00
6
+ version: 0.1.4.2006.05.29
7
+ date: 2006-05-29 00:00:00 +03:00
8
8
  summary: Ruby extension for Unicode support using ICU
9
9
  require_paths:
10
10
  - .
@@ -44,6 +44,8 @@ files:
44
44
  - tools/doc.sh
45
45
  - tools/km.rb
46
46
  - test/test_calendar.rb
47
+ - test/test_converter.rb
48
+ - test/test_collator.rb
47
49
  - test/test_ustring.rb
48
50
  - calendar.c
49
51
  - fmt.cpp
@@ -54,6 +56,8 @@ files:
54
56
  - uregex.c
55
57
  - uregex.h
56
58
  - ustring.c
59
+ - collator.c
60
+ - converter.c
57
61
  - README
58
62
  - MIT-LICENSE
59
63
  test_files: []