icu4r 0.1.3.2006.01.26 → 0.1.4.2006.05.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -3
- data/calendar.c +100 -40
- data/collator.c +233 -0
- data/converter.c +322 -0
- data/extconf.rb +2 -0
- data/fmt.cpp +54 -48
- data/icu4r.c +4 -0
- data/icu_common.h +1 -1
- data/test/test_calendar.rb +18 -4
- data/test/test_collator.rb +33 -0
- data/test/test_converter.rb +72 -0
- data/test/test_ustring.rb +139 -12
- data/tools/doc.sh +1 -1
- data/ubundle.c +19 -5
- data/uregex.c +49 -25
- data/ustring.c +46 -49
- metadata +6 -2
data/README
CHANGED
|
@@ -29,11 +29,14 @@ and provides following classes and functionality:
|
|
|
29
29
|
|
|
30
30
|
* UCalendar - date manipulation and timezone info.
|
|
31
31
|
|
|
32
|
+
* UConverter - codepage conversions API
|
|
33
|
+
|
|
34
|
+
* UCollator - locale-sensitive string comparison
|
|
35
|
+
|
|
32
36
|
== Install and usage
|
|
33
37
|
|
|
34
38
|
> ruby extconf.rb
|
|
35
|
-
> make
|
|
36
|
-
> ruby test/test_ustring.rb
|
|
39
|
+
> make && make check
|
|
37
40
|
> make install
|
|
38
41
|
|
|
39
42
|
Now, in your scripts just require 'icu4r'.
|
|
@@ -71,7 +74,7 @@ To build and use ICU4R you will need GCC and ICU v3.4 libraries[2].
|
|
|
71
74
|
gory details, but in short: locale dependent notion of character can be presented using
|
|
72
75
|
more than one codepoint - base letter and combining (accents) (also possible more than one!), and
|
|
73
76
|
each codepoint can require more than one codeunit to store (for UTF8 codeunit size is 8bit, though
|
|
74
|
-
some codepoints require up to
|
|
77
|
+
some codepoints require up to 4bytes). So, UString has normalization and locale dependent break
|
|
75
78
|
iterators.
|
|
76
79
|
|
|
77
80
|
6. Currently UString doesn't include Enumerable module.
|
data/calendar.c
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
extern VALUE rb_cUString;
|
|
6
6
|
extern VALUE icu_ustr_new(UChar * ptr, long len);
|
|
7
7
|
extern VALUE icu_ustr_new_set(UChar * ptr, long len, long capa);
|
|
8
|
-
static VALUE s_calendar_fields;
|
|
8
|
+
static VALUE s_calendar_fields, s_calendar_formats;
|
|
9
9
|
extern VALUE rb_cUCalendar;
|
|
10
10
|
#define UCALENDAR(obj) ((UCalendar *)DATA_PTR(obj))
|
|
11
11
|
/**
|
|
@@ -79,7 +79,7 @@ VALUE icu4r_cal_country_tz (VALUE obj, VALUE ctry)
|
|
|
79
79
|
* call-seq:
|
|
80
80
|
* UCalendar.default_tz => ustring
|
|
81
81
|
*
|
|
82
|
-
* Returns the default time zone
|
|
82
|
+
* Returns the default time zone name as UString.
|
|
83
83
|
*
|
|
84
84
|
* UCalendar.default_tz # "EET"
|
|
85
85
|
*
|
|
@@ -155,7 +155,10 @@ VALUE icu4r_cal_now(VALUE obj){
|
|
|
155
155
|
void icu4r_cal_free(UCalendar * cal){
|
|
156
156
|
ucal_close(cal);
|
|
157
157
|
}
|
|
158
|
-
|
|
158
|
+
static VALUE icu4r_cal_alloc(VALUE klass)
|
|
159
|
+
{
|
|
160
|
+
return Data_Wrap_Struct(klass, 0, icu4r_cal_free, 0);
|
|
161
|
+
}
|
|
159
162
|
/**
|
|
160
163
|
* call-seq:
|
|
161
164
|
* UCalendar.new(zone_id = nil, locale = nil, traditional = false)
|
|
@@ -173,7 +176,6 @@ VALUE icu4r_cal_init (int argc, VALUE * argv, VALUE self)
|
|
|
173
176
|
UCalendarType c_type = UCAL_GREGORIAN;
|
|
174
177
|
int32_t n, zone_len =0 , locale_len =0;
|
|
175
178
|
UCalendar * calendar;
|
|
176
|
-
VALUE ret;
|
|
177
179
|
UErrorCode status = U_ZERO_ERROR;
|
|
178
180
|
n = rb_scan_args(argc, argv, "03", &zone, &loc, &cal_type);
|
|
179
181
|
if( n >= 1) {
|
|
@@ -193,8 +195,8 @@ VALUE icu4r_cal_init (int argc, VALUE * argv, VALUE self)
|
|
|
193
195
|
}
|
|
194
196
|
calendar = ucal_open(zone_id, zone_len, locale, c_type, &status);
|
|
195
197
|
ICU_RAISE(status);
|
|
196
|
-
|
|
197
|
-
return
|
|
198
|
+
DATA_PTR(self) = calendar;
|
|
199
|
+
return self;
|
|
198
200
|
}
|
|
199
201
|
|
|
200
202
|
int icu4r_get_cal_field_int(VALUE field)
|
|
@@ -391,9 +393,9 @@ VALUE icu4r_cal_in_daylight(VALUE obj)
|
|
|
391
393
|
|
|
392
394
|
/**
|
|
393
395
|
* call-seq:
|
|
394
|
-
* calendar.time_zone
|
|
396
|
+
* calendar.time_zone(locale = nil)
|
|
395
397
|
*
|
|
396
|
-
* Returns the TimeZone used
|
|
398
|
+
* Returns the TimeZone name used in this UCalendar. Name is returned in requested locale or default, if not set.
|
|
397
399
|
*/
|
|
398
400
|
VALUE icu4r_cal_get_tz (int argc, VALUE * argv, VALUE obj)
|
|
399
401
|
{
|
|
@@ -418,10 +420,25 @@ VALUE icu4r_cal_get_tz (int argc, VALUE * argv, VALUE obj)
|
|
|
418
420
|
return Qnil;
|
|
419
421
|
|
|
420
422
|
}
|
|
423
|
+
int icu4r_get_cal_format_int(VALUE field)
|
|
424
|
+
{
|
|
425
|
+
VALUE field_const;
|
|
426
|
+
field_const = rb_hash_aref(s_calendar_formats, field);
|
|
427
|
+
if(field_const == Qnil) {
|
|
428
|
+
rb_warn("no such format %s , using default", RSTRING(rb_obj_as_string(field))->ptr);
|
|
429
|
+
return UDAT_DEFAULT;
|
|
430
|
+
}
|
|
431
|
+
return NUM2INT(field_const);
|
|
432
|
+
}
|
|
421
433
|
/** call-seq:
|
|
422
434
|
* calendar.format(pattern = nil , locale = nil)
|
|
423
435
|
*
|
|
424
|
-
* Formats this calendar time using given pattern and locale. Returns UString or nil on failure
|
|
436
|
+
* Formats this calendar time using given pattern and locale. Returns UString or nil on failure.
|
|
437
|
+
* Valid value types for pattern are:
|
|
438
|
+
* nil - long format for date and time
|
|
439
|
+
* UString - specification of format, as defined in docs/FORMATTING
|
|
440
|
+
* Symbol - one of :short, :medium, :long, :full, :none , sets format for both date and time
|
|
441
|
+
* Hash - {:time => aSymbol, :date => aSymbol} - sets separate formats for date and time, valid symbols see above
|
|
425
442
|
*/
|
|
426
443
|
VALUE icu4r_cal_format(int argc, VALUE * argv, VALUE obj)
|
|
427
444
|
{
|
|
@@ -432,9 +449,7 @@ VALUE icu4r_cal_format(int argc, VALUE * argv, VALUE obj)
|
|
|
432
449
|
long capa = 0, pattern_len = 0;
|
|
433
450
|
char *locale = NULL;
|
|
434
451
|
VALUE loc, pat, ret = Qnil;
|
|
435
|
-
int n ;
|
|
436
|
-
|
|
437
|
-
|
|
452
|
+
int n , def_d_format = UDAT_FULL, def_t_format = UDAT_FULL;
|
|
438
453
|
|
|
439
454
|
n = rb_scan_args(argc, argv, "02", &pat, &loc);
|
|
440
455
|
if( n == 2) {
|
|
@@ -442,12 +457,23 @@ VALUE icu4r_cal_format(int argc, VALUE * argv, VALUE obj)
|
|
|
442
457
|
locale = RSTRING(loc)->ptr;
|
|
443
458
|
}
|
|
444
459
|
if (n >= 1 && pat != Qnil) {
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
460
|
+
switch(TYPE(pat)) {
|
|
461
|
+
case T_SYMBOL:
|
|
462
|
+
def_d_format = def_t_format = icu4r_get_cal_format_int(pat);
|
|
463
|
+
break;
|
|
464
|
+
case T_HASH:
|
|
465
|
+
def_d_format = icu4r_get_cal_format_int(rb_hash_aref(pat, ID2SYM(rb_intern("date"))));
|
|
466
|
+
def_t_format = icu4r_get_cal_format_int(rb_hash_aref(pat, ID2SYM(rb_intern("time"))));
|
|
467
|
+
break;
|
|
468
|
+
default:
|
|
469
|
+
Check_Class(pat, rb_cUString);
|
|
470
|
+
pattern = ICU_PTR(pat);
|
|
471
|
+
pattern_len = ICU_LEN(pat);
|
|
472
|
+
break;
|
|
473
|
+
}
|
|
448
474
|
}
|
|
449
475
|
|
|
450
|
-
format = udat_open(
|
|
476
|
+
format = udat_open(def_t_format, def_d_format, locale, NULL, 0, NULL, 0, &status);
|
|
451
477
|
if( pattern ) {
|
|
452
478
|
udat_applyPattern(format, 0, pattern, pattern_len);
|
|
453
479
|
}
|
|
@@ -490,36 +516,60 @@ extern VALUE icu4r_cal_clone(VALUE obj);
|
|
|
490
516
|
extern VALUE icu4r_cal_equal(VALUE obj, VALUE other);
|
|
491
517
|
|
|
492
518
|
/**
|
|
493
|
-
*
|
|
494
|
-
*
|
|
495
|
-
*
|
|
496
|
-
*
|
|
519
|
+
* call-seq:
|
|
520
|
+
* cal <=> other_cal => -1, 0, +1
|
|
521
|
+
*
|
|
522
|
+
* Comparison---Returns -1 if <i>other_cal</i> is before than, 0 if
|
|
523
|
+
* <i>other_cal</i> is equal to, and +1 if <i>other_cal</i> is after than
|
|
524
|
+
* <i>str</i>.
|
|
497
525
|
*
|
|
498
|
-
*
|
|
526
|
+
* Value of calendar's milliseconds are compared.
|
|
499
527
|
*/
|
|
500
|
-
|
|
528
|
+
|
|
529
|
+
VALUE icu4r_cal_cmp (VALUE c1, VALUE c2)
|
|
530
|
+
{
|
|
531
|
+
UErrorCode status = U_ZERO_ERROR;
|
|
532
|
+
double millis1, millis2;
|
|
533
|
+
Check_Class(c1, rb_cUCalendar);
|
|
534
|
+
Check_Class(c2, rb_cUCalendar);
|
|
535
|
+
millis1 = ucal_getMillis(UCALENDAR(c1), &status);
|
|
536
|
+
millis2 = ucal_getMillis(UCALENDAR(c2), &status);
|
|
537
|
+
ICU_RAISE(status);
|
|
538
|
+
if(millis1 < millis2) return INT2FIX(-1);
|
|
539
|
+
if(millis1 > millis2) return INT2FIX(1);
|
|
540
|
+
return INT2FIX(0);
|
|
541
|
+
}
|
|
542
|
+
/* parsing */
|
|
543
|
+
extern UCalendar * icu_date_parse(UChar * str, int32_t str_len, char * locale, UChar * val, int32_t len);
|
|
544
|
+
|
|
501
545
|
/**
|
|
502
|
-
* Document-method: <
|
|
503
|
-
*
|
|
504
546
|
* call-seq:
|
|
505
|
-
*
|
|
547
|
+
* UCalendar.parse( pattern, locale, value)
|
|
548
|
+
*
|
|
549
|
+
* Parses given value, using format pattern with respect to +locale+.
|
|
506
550
|
*
|
|
507
|
-
*
|
|
508
|
-
*/
|
|
509
|
-
extern VALUE icu4r_cal_before(VALUE obj, VALUE other);
|
|
510
|
-
/**
|
|
511
|
-
* Document-method: ==
|
|
551
|
+
* UCalendar.parse("HH:mm:ss E dd/MM/yyyy".u, "en", "20:15:01 Fri 13/01/2006".u) # => Time.local(2006,"jan",13,20,15,1)
|
|
512
552
|
*
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
553
|
+
*/
|
|
554
|
+
|
|
555
|
+
VALUE
|
|
556
|
+
icu4r_cal_parse( obj, str, locale, val)
|
|
557
|
+
VALUE obj, str, locale, val;
|
|
558
|
+
{
|
|
559
|
+
UCalendar * cal;
|
|
560
|
+
VALUE ret;
|
|
561
|
+
Check_Type(locale, T_STRING);
|
|
562
|
+
Check_Class(val, rb_cUString);
|
|
563
|
+
cal = icu_date_parse(ICU_PTR(str), ICU_LEN(str), RSTRING(locale)->ptr, ICU_PTR(val), ICU_LEN(val));
|
|
564
|
+
ret = Data_Wrap_Struct(obj, 0, icu4r_cal_free, cal);
|
|
565
|
+
return ret;
|
|
566
|
+
}
|
|
519
567
|
|
|
520
568
|
void initialize_calendar(void) {
|
|
521
569
|
|
|
522
570
|
rb_cUCalendar = rb_define_class("UCalendar", rb_cObject);
|
|
571
|
+
rb_define_alloc_func(rb_cUCalendar, icu4r_cal_alloc);
|
|
572
|
+
|
|
523
573
|
s_calendar_fields = rb_hash_new();
|
|
524
574
|
/* Valid symbols to use as field reference in UCalendar#[], UCalendar#[]=, UCalendar#add are:
|
|
525
575
|
:era , :year , :month , :week_of_year , :week_of_month , :date , :day_of_year , :day_of_week, :day_of_week_in_month,
|
|
@@ -544,6 +594,15 @@ rb_hash_aset(s_calendar_fields, ID2SYM(rb_intern("millisecond")), INT2NUM(UCAL_M
|
|
|
544
594
|
rb_hash_aset(s_calendar_fields, ID2SYM(rb_intern("zone_offset")), INT2NUM(UCAL_ZONE_OFFSET));
|
|
545
595
|
rb_hash_aset(s_calendar_fields, ID2SYM(rb_intern("dst_offset")), INT2NUM(UCAL_DST_OFFSET));
|
|
546
596
|
|
|
597
|
+
s_calendar_formats = rb_hash_new();
|
|
598
|
+
rb_define_const(rb_cUCalendar, "UCALENDAR_FORMATS", s_calendar_formats);
|
|
599
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("full")), INT2NUM(UDAT_FULL));
|
|
600
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("long")), INT2NUM(UDAT_LONG));
|
|
601
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("medium")), INT2NUM(UDAT_MEDIUM));
|
|
602
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("short")), INT2NUM(UDAT_SHORT));
|
|
603
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("default")), INT2NUM(UDAT_DEFAULT));
|
|
604
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("none")), INT2NUM(UDAT_NONE));
|
|
605
|
+
|
|
547
606
|
|
|
548
607
|
rb_define_singleton_method(rb_cUCalendar, "now", icu4r_cal_now, 0);
|
|
549
608
|
|
|
@@ -552,8 +611,9 @@ rb_define_singleton_method(rb_cUCalendar, "default_tz", icu4r_cal_get_default_t
|
|
|
552
611
|
rb_define_singleton_method(rb_cUCalendar, "time_zones", icu4r_cal_all_tz, 0);
|
|
553
612
|
rb_define_singleton_method(rb_cUCalendar, "tz_for_country", icu4r_cal_country_tz, 1);
|
|
554
613
|
rb_define_singleton_method(rb_cUCalendar, "dst_savings", icu4r_cal_dst_savings, 1);
|
|
614
|
+
rb_define_singleton_method(rb_cUCalendar, "parse", icu4r_cal_parse, 3);
|
|
555
615
|
|
|
556
|
-
|
|
616
|
+
rb_define_method(rb_cUCalendar, "initialize", icu4r_cal_init, -1);
|
|
557
617
|
rb_define_method(rb_cUCalendar, "add", icu4r_cal_add, 2);
|
|
558
618
|
rb_define_method(rb_cUCalendar, "roll", icu4r_cal_roll, 2);
|
|
559
619
|
rb_define_method(rb_cUCalendar, "[]", icu4r_cal_aref, 1);
|
|
@@ -569,8 +629,8 @@ rb_define_method(rb_cUCalendar, "format", icu4r_cal_format,-1);
|
|
|
569
629
|
|
|
570
630
|
rb_define_method(rb_cUCalendar, "clone", icu4r_cal_clone,0);
|
|
571
631
|
rb_define_method(rb_cUCalendar, "eql?", icu4r_cal_equal,1);
|
|
572
|
-
rb_define_method(rb_cUCalendar, "
|
|
573
|
-
|
|
574
|
-
|
|
632
|
+
rb_define_method(rb_cUCalendar, "<=>", icu4r_cal_cmp,1);
|
|
633
|
+
|
|
634
|
+
rb_include_module(rb_cUCalendar, rb_mComparable);
|
|
575
635
|
|
|
576
636
|
}
|
data/collator.c
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
#include "icu_common.h"
|
|
2
|
+
extern VALUE rb_cUString;
|
|
3
|
+
extern VALUE rb_cUCollator;
|
|
4
|
+
extern int icu_collator_cmp (UCollator * collator, VALUE str1, VALUE str2) ;
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Document-class: UCollator
|
|
8
|
+
*
|
|
9
|
+
* API for UCollator performs locale-sensitive string comparison. You use this service to build searching and
|
|
10
|
+
* sorting routines for natural language text.
|
|
11
|
+
*
|
|
12
|
+
* Attributes that collation service understands:
|
|
13
|
+
*
|
|
14
|
+
* UCOL_FRENCH_COLLATION Attribute for direction of secondary weights - used in French. UCOL_ON, UCOL_OFF
|
|
15
|
+
*
|
|
16
|
+
* UCOL_ALTERNATE_HANDLING Attribute for handling variable elements. UCOL_NON_IGNORABLE (default), UCOL_SHIFTED
|
|
17
|
+
*
|
|
18
|
+
* UCOL_CASE_FIRST Controls the ordering of upper and lower case letters.
|
|
19
|
+
* UCOL_OFF (default), UCOL_UPPER_FIRST, UCOL_LOWER_FIRST
|
|
20
|
+
*
|
|
21
|
+
* UCOL_CASE_LEVEL Controls whether an extra case level (positioned before the third level) is
|
|
22
|
+
* generated or not. UCOL_OFF (default), UCOL_ON
|
|
23
|
+
*
|
|
24
|
+
* UCOL_NORMALIZATION_MODE Controls whether the normalization check and necessary normalizations are performed.
|
|
25
|
+
* When set to UCOL_ON, an incremental check is performed to see whether the input data
|
|
26
|
+
* is in the FCD form. If the data is not in the FCD form, incremental NFD normalization
|
|
27
|
+
* is performed.
|
|
28
|
+
*
|
|
29
|
+
* UCOL_DECOMPOSITION_MODE An alias for UCOL_NORMALIZATION_MODE attribute
|
|
30
|
+
*
|
|
31
|
+
* UCOL_STRENGTH The strength attribute.
|
|
32
|
+
* Can be either UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL.
|
|
33
|
+
* The usual strength for most locales (except Japanese) is tertiary.
|
|
34
|
+
*
|
|
35
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE when turned on, this attribute positions Hiragana before all non-ignorables on
|
|
36
|
+
* quaternary level This is a sneaky way to produce JIS sort order
|
|
37
|
+
* UCOL_NUMERIC_COLLATION when turned on, this attribute generates a collation key for the numeric value of
|
|
38
|
+
* substrings of digits. This is a way to get '100' to sort AFTER '2'.
|
|
39
|
+
*
|
|
40
|
+
* Attribute values:
|
|
41
|
+
*
|
|
42
|
+
* UCOL_DEFAULT accepted by most attributes
|
|
43
|
+
* UCOL_PRIMARY Primary collation strength
|
|
44
|
+
* UCOL_SECONDARY Secondary collation strength
|
|
45
|
+
* UCOL_TERTIARY Tertiary collation strength
|
|
46
|
+
* UCOL_DEFAULT_STRENGTH Default collation strength
|
|
47
|
+
* UCOL_QUATERNARY Quaternary collation strength
|
|
48
|
+
* UCOL_IDENTICAL Identical collation strength
|
|
49
|
+
* UCOL_OFF Turn the feature off - works for
|
|
50
|
+
* UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL,
|
|
51
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE
|
|
52
|
+
*
|
|
53
|
+
* UCOL_ON Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL,
|
|
54
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE
|
|
55
|
+
*
|
|
56
|
+
* UCOL_SHIFTED Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted
|
|
57
|
+
* UCOL_NON_IGNORABLE Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable
|
|
58
|
+
* UCOL_LOWER_FIRST Valid for UCOL_CASE_FIRST - lower case sorts before upper case
|
|
59
|
+
* UCOL_UPPER_FIRST upper case sorts before lower case
|
|
60
|
+
**/
|
|
61
|
+
|
|
62
|
+
#define UCOLLATOR(obj) ((UCollator *)DATA_PTR(obj))
|
|
63
|
+
|
|
64
|
+
void icu4r_col_free(UCollator * col)
|
|
65
|
+
{
|
|
66
|
+
ucol_close(col);
|
|
67
|
+
}
|
|
68
|
+
static VALUE icu4r_col_alloc(VALUE klass)
|
|
69
|
+
{
|
|
70
|
+
return Data_Wrap_Struct(klass, 0, icu4r_col_free, 0);
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* call-seq:
|
|
74
|
+
* col = UCollator.new(locale = nil)
|
|
75
|
+
*
|
|
76
|
+
* Open a UCollator for comparing strings for the given locale containing the required collation rules.
|
|
77
|
+
* Special values for locales can be passed in - if +nil+ is passed for the locale, the default locale
|
|
78
|
+
* collation rules will be used. If empty string ("") or "root" are passed, UCA rules will be used.
|
|
79
|
+
*/
|
|
80
|
+
VALUE icu4r_col_init(int argc, VALUE * argv, VALUE self)
|
|
81
|
+
{
|
|
82
|
+
UCollator * col;
|
|
83
|
+
UErrorCode status = U_ZERO_ERROR;
|
|
84
|
+
VALUE loc;
|
|
85
|
+
char * locale = NULL;
|
|
86
|
+
if( rb_scan_args(argc, argv, "01", &loc))
|
|
87
|
+
{
|
|
88
|
+
Check_Type(loc, T_STRING);
|
|
89
|
+
locale = RSTRING(loc)->ptr;
|
|
90
|
+
}
|
|
91
|
+
col = ucol_open(locale, &status);
|
|
92
|
+
ICU_RAISE(status);
|
|
93
|
+
DATA_PTR(self)=col;
|
|
94
|
+
return self;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* call-seq:
|
|
99
|
+
* collator.strength
|
|
100
|
+
*
|
|
101
|
+
* Get the collation strength used in a UCollator. The strength influences how strings are compared.
|
|
102
|
+
**/
|
|
103
|
+
VALUE icu4r_col_get_strength(VALUE self)
|
|
104
|
+
{
|
|
105
|
+
return INT2NUM(ucol_getStrength(UCOLLATOR(self)));
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* call-seq:
|
|
110
|
+
* collator.strength = new_strength
|
|
111
|
+
*
|
|
112
|
+
* Sets the collation strength used in a UCollator. The strength influences how strings are compared.
|
|
113
|
+
**/
|
|
114
|
+
VALUE icu4r_col_set_strength(VALUE self, VALUE obj)
|
|
115
|
+
{
|
|
116
|
+
Check_Type(obj, T_FIXNUM);
|
|
117
|
+
ucol_setStrength(UCOLLATOR(self), FIX2INT(obj));
|
|
118
|
+
return Qnil;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* call-seq:
|
|
123
|
+
* collator.get_attr(attribute)
|
|
124
|
+
* collator[attribute]
|
|
125
|
+
*
|
|
126
|
+
* Universal attribute setter. See above for valid attributes and their values
|
|
127
|
+
**/
|
|
128
|
+
VALUE icu4r_col_get_attr(VALUE self, VALUE obj)
|
|
129
|
+
{
|
|
130
|
+
UErrorCode status = U_ZERO_ERROR;
|
|
131
|
+
UColAttributeValue val;
|
|
132
|
+
Check_Type(obj, T_FIXNUM);
|
|
133
|
+
val = ucol_getAttribute(UCOLLATOR(self), FIX2INT(obj), &status);
|
|
134
|
+
ICU_RAISE(status);
|
|
135
|
+
return INT2FIX(val);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* call-seq:
|
|
140
|
+
* collator.set_attr(attribute, value)
|
|
141
|
+
* collator[attribute]=value
|
|
142
|
+
*
|
|
143
|
+
* Universal attribute setter. See above for valid attributes and their values
|
|
144
|
+
**/
|
|
145
|
+
VALUE icu4r_col_set_attr(VALUE self, VALUE obj, VALUE new_val)
|
|
146
|
+
{
|
|
147
|
+
UErrorCode status = U_ZERO_ERROR;
|
|
148
|
+
Check_Type(obj, T_FIXNUM);
|
|
149
|
+
Check_Type(new_val, T_FIXNUM);
|
|
150
|
+
ucol_setAttribute(UCOLLATOR(self), FIX2INT(obj), FIX2INT(new_val), &status);
|
|
151
|
+
ICU_RAISE(status);
|
|
152
|
+
return Qnil;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* call-seq:
|
|
156
|
+
* collator.strcoll(ustr1, ustr2)
|
|
157
|
+
*
|
|
158
|
+
* Compare two UString's. The strings will be compared using the options already specified.
|
|
159
|
+
**/
|
|
160
|
+
VALUE icu4r_col_strcoll(VALUE self, VALUE str1, VALUE str2)
|
|
161
|
+
{
|
|
162
|
+
Check_Class(str1, rb_cUString);
|
|
163
|
+
Check_Class(str2, rb_cUString);
|
|
164
|
+
return INT2FIX(icu_collator_cmp(UCOLLATOR(self), str1, str2));
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* call-seq:
|
|
168
|
+
* collator.sort_key(an_ustring) -> String
|
|
169
|
+
*
|
|
170
|
+
* Get a sort key for a string from a UCollator. Sort keys may be compared using strcmp.
|
|
171
|
+
**/
|
|
172
|
+
VALUE icu4r_col_sort_key(VALUE self, VALUE str)
|
|
173
|
+
{
|
|
174
|
+
int32_t needed , capa ;
|
|
175
|
+
char * buffer ;
|
|
176
|
+
VALUE ret;
|
|
177
|
+
Check_Class(str, rb_cUString);
|
|
178
|
+
capa = ICU_LEN(str);
|
|
179
|
+
buffer = ALLOC_N(char, capa);
|
|
180
|
+
needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, capa);
|
|
181
|
+
if(needed > capa){
|
|
182
|
+
REALLOC_N(buffer,char, needed);
|
|
183
|
+
needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, needed);
|
|
184
|
+
}
|
|
185
|
+
ret = rb_str_new(buffer, needed);
|
|
186
|
+
free(buffer);
|
|
187
|
+
return ret;
|
|
188
|
+
}
|
|
189
|
+
void initialize_collator()
|
|
190
|
+
{
|
|
191
|
+
rb_cUCollator = rb_define_class("UCollator", rb_cObject);
|
|
192
|
+
rb_define_alloc_func(rb_cUCollator, icu4r_col_alloc);
|
|
193
|
+
|
|
194
|
+
rb_define_method(rb_cUCollator, "initialize", icu4r_col_init, -1);
|
|
195
|
+
rb_define_method(rb_cUCollator, "strength", icu4r_col_get_strength, 0);
|
|
196
|
+
rb_define_method(rb_cUCollator, "strength=", icu4r_col_set_strength, 1);
|
|
197
|
+
rb_define_method(rb_cUCollator, "get_attr", icu4r_col_get_attr, 1);
|
|
198
|
+
rb_define_alias(rb_cUCollator, "[]", "get_attr");
|
|
199
|
+
rb_define_method(rb_cUCollator, "set_attr", icu4r_col_set_attr, 2);
|
|
200
|
+
rb_define_alias(rb_cUCollator, "[]=", "set_attr");
|
|
201
|
+
rb_define_method(rb_cUCollator, "strcoll", icu4r_col_strcoll, 2);
|
|
202
|
+
rb_define_method(rb_cUCollator, "sort_key",icu4r_col_sort_key, 1);
|
|
203
|
+
|
|
204
|
+
/* attributes */
|
|
205
|
+
rb_define_const(rb_cUCollator, "UCOL_FRENCH_COLLATION", INT2FIX(UCOL_FRENCH_COLLATION));
|
|
206
|
+
rb_define_const(rb_cUCollator, "UCOL_ALTERNATE_HANDLING", INT2FIX(UCOL_ALTERNATE_HANDLING));
|
|
207
|
+
rb_define_const(rb_cUCollator, "UCOL_CASE_FIRST", INT2FIX(UCOL_CASE_FIRST));
|
|
208
|
+
rb_define_const(rb_cUCollator, "UCOL_CASE_LEVEL", INT2FIX(UCOL_CASE_LEVEL));
|
|
209
|
+
rb_define_const(rb_cUCollator, "UCOL_NORMALIZATION_MODE", INT2FIX(UCOL_NORMALIZATION_MODE));
|
|
210
|
+
rb_define_const(rb_cUCollator, "UCOL_DECOMPOSITION_MODE", INT2FIX(UCOL_DECOMPOSITION_MODE));
|
|
211
|
+
rb_define_const(rb_cUCollator, "UCOL_STRENGTH", INT2FIX(UCOL_STRENGTH));
|
|
212
|
+
rb_define_const(rb_cUCollator, "UCOL_HIRAGANA_QUATERNARY_MODE", INT2FIX(UCOL_HIRAGANA_QUATERNARY_MODE));
|
|
213
|
+
rb_define_const(rb_cUCollator, "UCOL_NUMERIC_COLLATION", INT2FIX(UCOL_NUMERIC_COLLATION));
|
|
214
|
+
rb_define_const(rb_cUCollator, "UCOL_ATTRIBUTE_COUNT", INT2FIX(UCOL_ATTRIBUTE_COUNT));
|
|
215
|
+
|
|
216
|
+
/* attribute values */
|
|
217
|
+
rb_define_const(rb_cUCollator, "UCOL_DEFAULT", INT2FIX(UCOL_DEFAULT));
|
|
218
|
+
rb_define_const(rb_cUCollator, "UCOL_PRIMARY", INT2FIX(UCOL_PRIMARY));
|
|
219
|
+
rb_define_const(rb_cUCollator, "UCOL_SECONDARY", INT2FIX(UCOL_SECONDARY));
|
|
220
|
+
rb_define_const(rb_cUCollator, "UCOL_TERTIARY", INT2FIX(UCOL_TERTIARY));
|
|
221
|
+
rb_define_const(rb_cUCollator, "UCOL_DEFAULT_STRENGTH", INT2FIX(UCOL_DEFAULT_STRENGTH));
|
|
222
|
+
rb_define_const(rb_cUCollator, "UCOL_CE_STRENGTH_LIMIT", INT2FIX(UCOL_CE_STRENGTH_LIMIT));
|
|
223
|
+
rb_define_const(rb_cUCollator, "UCOL_QUATERNARY", INT2FIX(UCOL_QUATERNARY));
|
|
224
|
+
rb_define_const(rb_cUCollator, "UCOL_IDENTICAL", INT2FIX(UCOL_IDENTICAL));
|
|
225
|
+
rb_define_const(rb_cUCollator, "UCOL_STRENGTH_LIMIT", INT2FIX(UCOL_STRENGTH_LIMIT));
|
|
226
|
+
rb_define_const(rb_cUCollator, "UCOL_OFF", INT2FIX(UCOL_OFF));
|
|
227
|
+
rb_define_const(rb_cUCollator, "UCOL_ON", INT2FIX(UCOL_ON));
|
|
228
|
+
rb_define_const(rb_cUCollator, "UCOL_SHIFTED", INT2FIX(UCOL_SHIFTED));
|
|
229
|
+
rb_define_const(rb_cUCollator, "UCOL_NON_IGNORABLE", INT2FIX(UCOL_NON_IGNORABLE));
|
|
230
|
+
rb_define_const(rb_cUCollator, "UCOL_LOWER_FIRST", INT2FIX(UCOL_LOWER_FIRST));
|
|
231
|
+
rb_define_const(rb_cUCollator, "UCOL_UPPER_FIRST", INT2FIX(UCOL_UPPER_FIRST));
|
|
232
|
+
|
|
233
|
+
}
|