icu4r 0.1.3.2006.01.26 → 0.1.4.2006.05.29
Sign up to get free protection for your applications and to get access to all the features.
- data/README +6 -3
- data/calendar.c +100 -40
- data/collator.c +233 -0
- data/converter.c +322 -0
- data/extconf.rb +2 -0
- data/fmt.cpp +54 -48
- data/icu4r.c +4 -0
- data/icu_common.h +1 -1
- data/test/test_calendar.rb +18 -4
- data/test/test_collator.rb +33 -0
- data/test/test_converter.rb +72 -0
- data/test/test_ustring.rb +139 -12
- data/tools/doc.sh +1 -1
- data/ubundle.c +19 -5
- data/uregex.c +49 -25
- data/ustring.c +46 -49
- metadata +6 -2
data/README
CHANGED
@@ -29,11 +29,14 @@ and provides following classes and functionality:
|
|
29
29
|
|
30
30
|
* UCalendar - date manipulation and timezone info.
|
31
31
|
|
32
|
+
* UConverter - codepage conversions API
|
33
|
+
|
34
|
+
* UCollator - locale-sensitive string comparison
|
35
|
+
|
32
36
|
== Install and usage
|
33
37
|
|
34
38
|
> ruby extconf.rb
|
35
|
-
> make
|
36
|
-
> ruby test/test_ustring.rb
|
39
|
+
> make && make check
|
37
40
|
> make install
|
38
41
|
|
39
42
|
Now, in your scripts just require 'icu4r'.
|
@@ -71,7 +74,7 @@ To build and use ICU4R you will need GCC and ICU v3.4 libraries[2].
|
|
71
74
|
gory details, but in short: locale dependent notion of character can be presented using
|
72
75
|
more than one codepoint - base letter and combining (accents) (also possible more than one!), and
|
73
76
|
each codepoint can require more than one codeunit to store (for UTF8 codeunit size is 8bit, though
|
74
|
-
some codepoints require up to
|
77
|
+
some codepoints require up to 4bytes). So, UString has normalization and locale dependent break
|
75
78
|
iterators.
|
76
79
|
|
77
80
|
6. Currently UString doesn't include Enumerable module.
|
data/calendar.c
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
extern VALUE rb_cUString;
|
6
6
|
extern VALUE icu_ustr_new(UChar * ptr, long len);
|
7
7
|
extern VALUE icu_ustr_new_set(UChar * ptr, long len, long capa);
|
8
|
-
static VALUE s_calendar_fields;
|
8
|
+
static VALUE s_calendar_fields, s_calendar_formats;
|
9
9
|
extern VALUE rb_cUCalendar;
|
10
10
|
#define UCALENDAR(obj) ((UCalendar *)DATA_PTR(obj))
|
11
11
|
/**
|
@@ -79,7 +79,7 @@ VALUE icu4r_cal_country_tz (VALUE obj, VALUE ctry)
|
|
79
79
|
* call-seq:
|
80
80
|
* UCalendar.default_tz => ustring
|
81
81
|
*
|
82
|
-
* Returns the default time zone
|
82
|
+
* Returns the default time zone name as UString.
|
83
83
|
*
|
84
84
|
* UCalendar.default_tz # "EET"
|
85
85
|
*
|
@@ -155,7 +155,10 @@ VALUE icu4r_cal_now(VALUE obj){
|
|
155
155
|
void icu4r_cal_free(UCalendar * cal){
|
156
156
|
ucal_close(cal);
|
157
157
|
}
|
158
|
-
|
158
|
+
static VALUE icu4r_cal_alloc(VALUE klass)
|
159
|
+
{
|
160
|
+
return Data_Wrap_Struct(klass, 0, icu4r_cal_free, 0);
|
161
|
+
}
|
159
162
|
/**
|
160
163
|
* call-seq:
|
161
164
|
* UCalendar.new(zone_id = nil, locale = nil, traditional = false)
|
@@ -173,7 +176,6 @@ VALUE icu4r_cal_init (int argc, VALUE * argv, VALUE self)
|
|
173
176
|
UCalendarType c_type = UCAL_GREGORIAN;
|
174
177
|
int32_t n, zone_len =0 , locale_len =0;
|
175
178
|
UCalendar * calendar;
|
176
|
-
VALUE ret;
|
177
179
|
UErrorCode status = U_ZERO_ERROR;
|
178
180
|
n = rb_scan_args(argc, argv, "03", &zone, &loc, &cal_type);
|
179
181
|
if( n >= 1) {
|
@@ -193,8 +195,8 @@ VALUE icu4r_cal_init (int argc, VALUE * argv, VALUE self)
|
|
193
195
|
}
|
194
196
|
calendar = ucal_open(zone_id, zone_len, locale, c_type, &status);
|
195
197
|
ICU_RAISE(status);
|
196
|
-
|
197
|
-
return
|
198
|
+
DATA_PTR(self) = calendar;
|
199
|
+
return self;
|
198
200
|
}
|
199
201
|
|
200
202
|
int icu4r_get_cal_field_int(VALUE field)
|
@@ -391,9 +393,9 @@ VALUE icu4r_cal_in_daylight(VALUE obj)
|
|
391
393
|
|
392
394
|
/**
|
393
395
|
* call-seq:
|
394
|
-
* calendar.time_zone
|
396
|
+
* calendar.time_zone(locale = nil)
|
395
397
|
*
|
396
|
-
* Returns the TimeZone used
|
398
|
+
* Returns the TimeZone name used in this UCalendar. Name is returned in requested locale or default, if not set.
|
397
399
|
*/
|
398
400
|
VALUE icu4r_cal_get_tz (int argc, VALUE * argv, VALUE obj)
|
399
401
|
{
|
@@ -418,10 +420,25 @@ VALUE icu4r_cal_get_tz (int argc, VALUE * argv, VALUE obj)
|
|
418
420
|
return Qnil;
|
419
421
|
|
420
422
|
}
|
423
|
+
int icu4r_get_cal_format_int(VALUE field)
|
424
|
+
{
|
425
|
+
VALUE field_const;
|
426
|
+
field_const = rb_hash_aref(s_calendar_formats, field);
|
427
|
+
if(field_const == Qnil) {
|
428
|
+
rb_warn("no such format %s , using default", RSTRING(rb_obj_as_string(field))->ptr);
|
429
|
+
return UDAT_DEFAULT;
|
430
|
+
}
|
431
|
+
return NUM2INT(field_const);
|
432
|
+
}
|
421
433
|
/** call-seq:
|
422
434
|
* calendar.format(pattern = nil , locale = nil)
|
423
435
|
*
|
424
|
-
* Formats this calendar time using given pattern and locale. Returns UString or nil on failure
|
436
|
+
* Formats this calendar time using given pattern and locale. Returns UString or nil on failure.
|
437
|
+
* Valid value types for pattern are:
|
438
|
+
* nil - long format for date and time
|
439
|
+
* UString - specification of format, as defined in docs/FORMATTING
|
440
|
+
* Symbol - one of :short, :medium, :long, :full, :none , sets format for both date and time
|
441
|
+
* Hash - {:time => aSymbol, :date => aSymbol} - sets separate formats for date and time, valid symbols see above
|
425
442
|
*/
|
426
443
|
VALUE icu4r_cal_format(int argc, VALUE * argv, VALUE obj)
|
427
444
|
{
|
@@ -432,9 +449,7 @@ VALUE icu4r_cal_format(int argc, VALUE * argv, VALUE obj)
|
|
432
449
|
long capa = 0, pattern_len = 0;
|
433
450
|
char *locale = NULL;
|
434
451
|
VALUE loc, pat, ret = Qnil;
|
435
|
-
int n ;
|
436
|
-
|
437
|
-
|
452
|
+
int n , def_d_format = UDAT_FULL, def_t_format = UDAT_FULL;
|
438
453
|
|
439
454
|
n = rb_scan_args(argc, argv, "02", &pat, &loc);
|
440
455
|
if( n == 2) {
|
@@ -442,12 +457,23 @@ VALUE icu4r_cal_format(int argc, VALUE * argv, VALUE obj)
|
|
442
457
|
locale = RSTRING(loc)->ptr;
|
443
458
|
}
|
444
459
|
if (n >= 1 && pat != Qnil) {
|
445
|
-
|
446
|
-
|
447
|
-
|
460
|
+
switch(TYPE(pat)) {
|
461
|
+
case T_SYMBOL:
|
462
|
+
def_d_format = def_t_format = icu4r_get_cal_format_int(pat);
|
463
|
+
break;
|
464
|
+
case T_HASH:
|
465
|
+
def_d_format = icu4r_get_cal_format_int(rb_hash_aref(pat, ID2SYM(rb_intern("date"))));
|
466
|
+
def_t_format = icu4r_get_cal_format_int(rb_hash_aref(pat, ID2SYM(rb_intern("time"))));
|
467
|
+
break;
|
468
|
+
default:
|
469
|
+
Check_Class(pat, rb_cUString);
|
470
|
+
pattern = ICU_PTR(pat);
|
471
|
+
pattern_len = ICU_LEN(pat);
|
472
|
+
break;
|
473
|
+
}
|
448
474
|
}
|
449
475
|
|
450
|
-
format = udat_open(
|
476
|
+
format = udat_open(def_t_format, def_d_format, locale, NULL, 0, NULL, 0, &status);
|
451
477
|
if( pattern ) {
|
452
478
|
udat_applyPattern(format, 0, pattern, pattern_len);
|
453
479
|
}
|
@@ -490,36 +516,60 @@ extern VALUE icu4r_cal_clone(VALUE obj);
|
|
490
516
|
extern VALUE icu4r_cal_equal(VALUE obj, VALUE other);
|
491
517
|
|
492
518
|
/**
|
493
|
-
*
|
494
|
-
*
|
495
|
-
*
|
496
|
-
*
|
519
|
+
* call-seq:
|
520
|
+
* cal <=> other_cal => -1, 0, +1
|
521
|
+
*
|
522
|
+
* Comparison---Returns -1 if <i>other_cal</i> is before than, 0 if
|
523
|
+
* <i>other_cal</i> is equal to, and +1 if <i>other_cal</i> is after than
|
524
|
+
* <i>str</i>.
|
497
525
|
*
|
498
|
-
*
|
526
|
+
* Value of calendar's milliseconds are compared.
|
499
527
|
*/
|
500
|
-
|
528
|
+
|
529
|
+
VALUE icu4r_cal_cmp (VALUE c1, VALUE c2)
|
530
|
+
{
|
531
|
+
UErrorCode status = U_ZERO_ERROR;
|
532
|
+
double millis1, millis2;
|
533
|
+
Check_Class(c1, rb_cUCalendar);
|
534
|
+
Check_Class(c2, rb_cUCalendar);
|
535
|
+
millis1 = ucal_getMillis(UCALENDAR(c1), &status);
|
536
|
+
millis2 = ucal_getMillis(UCALENDAR(c2), &status);
|
537
|
+
ICU_RAISE(status);
|
538
|
+
if(millis1 < millis2) return INT2FIX(-1);
|
539
|
+
if(millis1 > millis2) return INT2FIX(1);
|
540
|
+
return INT2FIX(0);
|
541
|
+
}
|
542
|
+
/* parsing */
|
543
|
+
extern UCalendar * icu_date_parse(UChar * str, int32_t str_len, char * locale, UChar * val, int32_t len);
|
544
|
+
|
501
545
|
/**
|
502
|
-
* Document-method: <
|
503
|
-
*
|
504
546
|
* call-seq:
|
505
|
-
*
|
547
|
+
* UCalendar.parse( pattern, locale, value)
|
548
|
+
*
|
549
|
+
* Parses given value, using format pattern with respect to +locale+.
|
506
550
|
*
|
507
|
-
*
|
508
|
-
*/
|
509
|
-
extern VALUE icu4r_cal_before(VALUE obj, VALUE other);
|
510
|
-
/**
|
511
|
-
* Document-method: ==
|
551
|
+
* UCalendar.parse("HH:mm:ss E dd/MM/yyyy".u, "en", "20:15:01 Fri 13/01/2006".u) # => Time.local(2006,"jan",13,20,15,1)
|
512
552
|
*
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
553
|
+
*/
|
554
|
+
|
555
|
+
VALUE
|
556
|
+
icu4r_cal_parse( obj, str, locale, val)
|
557
|
+
VALUE obj, str, locale, val;
|
558
|
+
{
|
559
|
+
UCalendar * cal;
|
560
|
+
VALUE ret;
|
561
|
+
Check_Type(locale, T_STRING);
|
562
|
+
Check_Class(val, rb_cUString);
|
563
|
+
cal = icu_date_parse(ICU_PTR(str), ICU_LEN(str), RSTRING(locale)->ptr, ICU_PTR(val), ICU_LEN(val));
|
564
|
+
ret = Data_Wrap_Struct(obj, 0, icu4r_cal_free, cal);
|
565
|
+
return ret;
|
566
|
+
}
|
519
567
|
|
520
568
|
void initialize_calendar(void) {
|
521
569
|
|
522
570
|
rb_cUCalendar = rb_define_class("UCalendar", rb_cObject);
|
571
|
+
rb_define_alloc_func(rb_cUCalendar, icu4r_cal_alloc);
|
572
|
+
|
523
573
|
s_calendar_fields = rb_hash_new();
|
524
574
|
/* Valid symbols to use as field reference in UCalendar#[], UCalendar#[]=, UCalendar#add are:
|
525
575
|
:era , :year , :month , :week_of_year , :week_of_month , :date , :day_of_year , :day_of_week, :day_of_week_in_month,
|
@@ -544,6 +594,15 @@ rb_hash_aset(s_calendar_fields, ID2SYM(rb_intern("millisecond")), INT2NUM(UCAL_M
|
|
544
594
|
rb_hash_aset(s_calendar_fields, ID2SYM(rb_intern("zone_offset")), INT2NUM(UCAL_ZONE_OFFSET));
|
545
595
|
rb_hash_aset(s_calendar_fields, ID2SYM(rb_intern("dst_offset")), INT2NUM(UCAL_DST_OFFSET));
|
546
596
|
|
597
|
+
s_calendar_formats = rb_hash_new();
|
598
|
+
rb_define_const(rb_cUCalendar, "UCALENDAR_FORMATS", s_calendar_formats);
|
599
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("full")), INT2NUM(UDAT_FULL));
|
600
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("long")), INT2NUM(UDAT_LONG));
|
601
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("medium")), INT2NUM(UDAT_MEDIUM));
|
602
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("short")), INT2NUM(UDAT_SHORT));
|
603
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("default")), INT2NUM(UDAT_DEFAULT));
|
604
|
+
rb_hash_aset(s_calendar_formats, ID2SYM(rb_intern("none")), INT2NUM(UDAT_NONE));
|
605
|
+
|
547
606
|
|
548
607
|
rb_define_singleton_method(rb_cUCalendar, "now", icu4r_cal_now, 0);
|
549
608
|
|
@@ -552,8 +611,9 @@ rb_define_singleton_method(rb_cUCalendar, "default_tz", icu4r_cal_get_default_t
|
|
552
611
|
rb_define_singleton_method(rb_cUCalendar, "time_zones", icu4r_cal_all_tz, 0);
|
553
612
|
rb_define_singleton_method(rb_cUCalendar, "tz_for_country", icu4r_cal_country_tz, 1);
|
554
613
|
rb_define_singleton_method(rb_cUCalendar, "dst_savings", icu4r_cal_dst_savings, 1);
|
614
|
+
rb_define_singleton_method(rb_cUCalendar, "parse", icu4r_cal_parse, 3);
|
555
615
|
|
556
|
-
|
616
|
+
rb_define_method(rb_cUCalendar, "initialize", icu4r_cal_init, -1);
|
557
617
|
rb_define_method(rb_cUCalendar, "add", icu4r_cal_add, 2);
|
558
618
|
rb_define_method(rb_cUCalendar, "roll", icu4r_cal_roll, 2);
|
559
619
|
rb_define_method(rb_cUCalendar, "[]", icu4r_cal_aref, 1);
|
@@ -569,8 +629,8 @@ rb_define_method(rb_cUCalendar, "format", icu4r_cal_format,-1);
|
|
569
629
|
|
570
630
|
rb_define_method(rb_cUCalendar, "clone", icu4r_cal_clone,0);
|
571
631
|
rb_define_method(rb_cUCalendar, "eql?", icu4r_cal_equal,1);
|
572
|
-
rb_define_method(rb_cUCalendar, "
|
573
|
-
|
574
|
-
|
632
|
+
rb_define_method(rb_cUCalendar, "<=>", icu4r_cal_cmp,1);
|
633
|
+
|
634
|
+
rb_include_module(rb_cUCalendar, rb_mComparable);
|
575
635
|
|
576
636
|
}
|
data/collator.c
ADDED
@@ -0,0 +1,233 @@
|
|
1
|
+
#include "icu_common.h"
|
2
|
+
extern VALUE rb_cUString;
|
3
|
+
extern VALUE rb_cUCollator;
|
4
|
+
extern int icu_collator_cmp (UCollator * collator, VALUE str1, VALUE str2) ;
|
5
|
+
|
6
|
+
/**
|
7
|
+
* Document-class: UCollator
|
8
|
+
*
|
9
|
+
* API for UCollator performs locale-sensitive string comparison. You use this service to build searching and
|
10
|
+
* sorting routines for natural language text.
|
11
|
+
*
|
12
|
+
* Attributes that collation service understands:
|
13
|
+
*
|
14
|
+
* UCOL_FRENCH_COLLATION Attribute for direction of secondary weights - used in French. UCOL_ON, UCOL_OFF
|
15
|
+
*
|
16
|
+
* UCOL_ALTERNATE_HANDLING Attribute for handling variable elements. UCOL_NON_IGNORABLE (default), UCOL_SHIFTED
|
17
|
+
*
|
18
|
+
* UCOL_CASE_FIRST Controls the ordering of upper and lower case letters.
|
19
|
+
* UCOL_OFF (default), UCOL_UPPER_FIRST, UCOL_LOWER_FIRST
|
20
|
+
*
|
21
|
+
* UCOL_CASE_LEVEL Controls whether an extra case level (positioned before the third level) is
|
22
|
+
* generated or not. UCOL_OFF (default), UCOL_ON
|
23
|
+
*
|
24
|
+
* UCOL_NORMALIZATION_MODE Controls whether the normalization check and necessary normalizations are performed.
|
25
|
+
* When set to UCOL_ON, an incremental check is performed to see whether the input data
|
26
|
+
* is in the FCD form. If the data is not in the FCD form, incremental NFD normalization
|
27
|
+
* is performed.
|
28
|
+
*
|
29
|
+
* UCOL_DECOMPOSITION_MODE An alias for UCOL_NORMALIZATION_MODE attribute
|
30
|
+
*
|
31
|
+
* UCOL_STRENGTH The strength attribute.
|
32
|
+
* Can be either UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL.
|
33
|
+
* The usual strength for most locales (except Japanese) is tertiary.
|
34
|
+
*
|
35
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE when turned on, this attribute positions Hiragana before all non-ignorables on
|
36
|
+
* quaternary level This is a sneaky way to produce JIS sort order
|
37
|
+
* UCOL_NUMERIC_COLLATION when turned on, this attribute generates a collation key for the numeric value of
|
38
|
+
* substrings of digits. This is a way to get '100' to sort AFTER '2'.
|
39
|
+
*
|
40
|
+
* Attribute values:
|
41
|
+
*
|
42
|
+
* UCOL_DEFAULT accepted by most attributes
|
43
|
+
* UCOL_PRIMARY Primary collation strength
|
44
|
+
* UCOL_SECONDARY Secondary collation strength
|
45
|
+
* UCOL_TERTIARY Tertiary collation strength
|
46
|
+
* UCOL_DEFAULT_STRENGTH Default collation strength
|
47
|
+
* UCOL_QUATERNARY Quaternary collation strength
|
48
|
+
* UCOL_IDENTICAL Identical collation strength
|
49
|
+
* UCOL_OFF Turn the feature off - works for
|
50
|
+
* UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL,
|
51
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE
|
52
|
+
*
|
53
|
+
* UCOL_ON Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL,
|
54
|
+
* UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE
|
55
|
+
*
|
56
|
+
* UCOL_SHIFTED Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted
|
57
|
+
* UCOL_NON_IGNORABLE Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable
|
58
|
+
* UCOL_LOWER_FIRST Valid for UCOL_CASE_FIRST - lower case sorts before upper case
|
59
|
+
* UCOL_UPPER_FIRST upper case sorts before lower case
|
60
|
+
**/
|
61
|
+
|
62
|
+
#define UCOLLATOR(obj) ((UCollator *)DATA_PTR(obj))
|
63
|
+
|
64
|
+
void icu4r_col_free(UCollator * col)
|
65
|
+
{
|
66
|
+
ucol_close(col);
|
67
|
+
}
|
68
|
+
static VALUE icu4r_col_alloc(VALUE klass)
|
69
|
+
{
|
70
|
+
return Data_Wrap_Struct(klass, 0, icu4r_col_free, 0);
|
71
|
+
}
|
72
|
+
/**
|
73
|
+
* call-seq:
|
74
|
+
* col = UCollator.new(locale = nil)
|
75
|
+
*
|
76
|
+
* Open a UCollator for comparing strings for the given locale containing the required collation rules.
|
77
|
+
* Special values for locales can be passed in - if +nil+ is passed for the locale, the default locale
|
78
|
+
* collation rules will be used. If empty string ("") or "root" are passed, UCA rules will be used.
|
79
|
+
*/
|
80
|
+
VALUE icu4r_col_init(int argc, VALUE * argv, VALUE self)
|
81
|
+
{
|
82
|
+
UCollator * col;
|
83
|
+
UErrorCode status = U_ZERO_ERROR;
|
84
|
+
VALUE loc;
|
85
|
+
char * locale = NULL;
|
86
|
+
if( rb_scan_args(argc, argv, "01", &loc))
|
87
|
+
{
|
88
|
+
Check_Type(loc, T_STRING);
|
89
|
+
locale = RSTRING(loc)->ptr;
|
90
|
+
}
|
91
|
+
col = ucol_open(locale, &status);
|
92
|
+
ICU_RAISE(status);
|
93
|
+
DATA_PTR(self)=col;
|
94
|
+
return self;
|
95
|
+
}
|
96
|
+
|
97
|
+
/**
|
98
|
+
* call-seq:
|
99
|
+
* collator.strength
|
100
|
+
*
|
101
|
+
* Get the collation strength used in a UCollator. The strength influences how strings are compared.
|
102
|
+
**/
|
103
|
+
VALUE icu4r_col_get_strength(VALUE self)
|
104
|
+
{
|
105
|
+
return INT2NUM(ucol_getStrength(UCOLLATOR(self)));
|
106
|
+
}
|
107
|
+
|
108
|
+
/**
|
109
|
+
* call-seq:
|
110
|
+
* collator.strength = new_strength
|
111
|
+
*
|
112
|
+
* Sets the collation strength used in a UCollator. The strength influences how strings are compared.
|
113
|
+
**/
|
114
|
+
VALUE icu4r_col_set_strength(VALUE self, VALUE obj)
|
115
|
+
{
|
116
|
+
Check_Type(obj, T_FIXNUM);
|
117
|
+
ucol_setStrength(UCOLLATOR(self), FIX2INT(obj));
|
118
|
+
return Qnil;
|
119
|
+
}
|
120
|
+
|
121
|
+
/**
|
122
|
+
* call-seq:
|
123
|
+
* collator.get_attr(attribute)
|
124
|
+
* collator[attribute]
|
125
|
+
*
|
126
|
+
* Universal attribute setter. See above for valid attributes and their values
|
127
|
+
**/
|
128
|
+
VALUE icu4r_col_get_attr(VALUE self, VALUE obj)
|
129
|
+
{
|
130
|
+
UErrorCode status = U_ZERO_ERROR;
|
131
|
+
UColAttributeValue val;
|
132
|
+
Check_Type(obj, T_FIXNUM);
|
133
|
+
val = ucol_getAttribute(UCOLLATOR(self), FIX2INT(obj), &status);
|
134
|
+
ICU_RAISE(status);
|
135
|
+
return INT2FIX(val);
|
136
|
+
}
|
137
|
+
|
138
|
+
/**
|
139
|
+
* call-seq:
|
140
|
+
* collator.set_attr(attribute, value)
|
141
|
+
* collator[attribute]=value
|
142
|
+
*
|
143
|
+
* Universal attribute setter. See above for valid attributes and their values
|
144
|
+
**/
|
145
|
+
VALUE icu4r_col_set_attr(VALUE self, VALUE obj, VALUE new_val)
|
146
|
+
{
|
147
|
+
UErrorCode status = U_ZERO_ERROR;
|
148
|
+
Check_Type(obj, T_FIXNUM);
|
149
|
+
Check_Type(new_val, T_FIXNUM);
|
150
|
+
ucol_setAttribute(UCOLLATOR(self), FIX2INT(obj), FIX2INT(new_val), &status);
|
151
|
+
ICU_RAISE(status);
|
152
|
+
return Qnil;
|
153
|
+
}
|
154
|
+
/**
|
155
|
+
* call-seq:
|
156
|
+
* collator.strcoll(ustr1, ustr2)
|
157
|
+
*
|
158
|
+
* Compare two UString's. The strings will be compared using the options already specified.
|
159
|
+
**/
|
160
|
+
VALUE icu4r_col_strcoll(VALUE self, VALUE str1, VALUE str2)
|
161
|
+
{
|
162
|
+
Check_Class(str1, rb_cUString);
|
163
|
+
Check_Class(str2, rb_cUString);
|
164
|
+
return INT2FIX(icu_collator_cmp(UCOLLATOR(self), str1, str2));
|
165
|
+
}
|
166
|
+
/**
|
167
|
+
* call-seq:
|
168
|
+
* collator.sort_key(an_ustring) -> String
|
169
|
+
*
|
170
|
+
* Get a sort key for a string from a UCollator. Sort keys may be compared using strcmp.
|
171
|
+
**/
|
172
|
+
VALUE icu4r_col_sort_key(VALUE self, VALUE str)
|
173
|
+
{
|
174
|
+
int32_t needed , capa ;
|
175
|
+
char * buffer ;
|
176
|
+
VALUE ret;
|
177
|
+
Check_Class(str, rb_cUString);
|
178
|
+
capa = ICU_LEN(str);
|
179
|
+
buffer = ALLOC_N(char, capa);
|
180
|
+
needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, capa);
|
181
|
+
if(needed > capa){
|
182
|
+
REALLOC_N(buffer,char, needed);
|
183
|
+
needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, needed);
|
184
|
+
}
|
185
|
+
ret = rb_str_new(buffer, needed);
|
186
|
+
free(buffer);
|
187
|
+
return ret;
|
188
|
+
}
|
189
|
+
void initialize_collator()
|
190
|
+
{
|
191
|
+
rb_cUCollator = rb_define_class("UCollator", rb_cObject);
|
192
|
+
rb_define_alloc_func(rb_cUCollator, icu4r_col_alloc);
|
193
|
+
|
194
|
+
rb_define_method(rb_cUCollator, "initialize", icu4r_col_init, -1);
|
195
|
+
rb_define_method(rb_cUCollator, "strength", icu4r_col_get_strength, 0);
|
196
|
+
rb_define_method(rb_cUCollator, "strength=", icu4r_col_set_strength, 1);
|
197
|
+
rb_define_method(rb_cUCollator, "get_attr", icu4r_col_get_attr, 1);
|
198
|
+
rb_define_alias(rb_cUCollator, "[]", "get_attr");
|
199
|
+
rb_define_method(rb_cUCollator, "set_attr", icu4r_col_set_attr, 2);
|
200
|
+
rb_define_alias(rb_cUCollator, "[]=", "set_attr");
|
201
|
+
rb_define_method(rb_cUCollator, "strcoll", icu4r_col_strcoll, 2);
|
202
|
+
rb_define_method(rb_cUCollator, "sort_key",icu4r_col_sort_key, 1);
|
203
|
+
|
204
|
+
/* attributes */
|
205
|
+
rb_define_const(rb_cUCollator, "UCOL_FRENCH_COLLATION", INT2FIX(UCOL_FRENCH_COLLATION));
|
206
|
+
rb_define_const(rb_cUCollator, "UCOL_ALTERNATE_HANDLING", INT2FIX(UCOL_ALTERNATE_HANDLING));
|
207
|
+
rb_define_const(rb_cUCollator, "UCOL_CASE_FIRST", INT2FIX(UCOL_CASE_FIRST));
|
208
|
+
rb_define_const(rb_cUCollator, "UCOL_CASE_LEVEL", INT2FIX(UCOL_CASE_LEVEL));
|
209
|
+
rb_define_const(rb_cUCollator, "UCOL_NORMALIZATION_MODE", INT2FIX(UCOL_NORMALIZATION_MODE));
|
210
|
+
rb_define_const(rb_cUCollator, "UCOL_DECOMPOSITION_MODE", INT2FIX(UCOL_DECOMPOSITION_MODE));
|
211
|
+
rb_define_const(rb_cUCollator, "UCOL_STRENGTH", INT2FIX(UCOL_STRENGTH));
|
212
|
+
rb_define_const(rb_cUCollator, "UCOL_HIRAGANA_QUATERNARY_MODE", INT2FIX(UCOL_HIRAGANA_QUATERNARY_MODE));
|
213
|
+
rb_define_const(rb_cUCollator, "UCOL_NUMERIC_COLLATION", INT2FIX(UCOL_NUMERIC_COLLATION));
|
214
|
+
rb_define_const(rb_cUCollator, "UCOL_ATTRIBUTE_COUNT", INT2FIX(UCOL_ATTRIBUTE_COUNT));
|
215
|
+
|
216
|
+
/* attribute values */
|
217
|
+
rb_define_const(rb_cUCollator, "UCOL_DEFAULT", INT2FIX(UCOL_DEFAULT));
|
218
|
+
rb_define_const(rb_cUCollator, "UCOL_PRIMARY", INT2FIX(UCOL_PRIMARY));
|
219
|
+
rb_define_const(rb_cUCollator, "UCOL_SECONDARY", INT2FIX(UCOL_SECONDARY));
|
220
|
+
rb_define_const(rb_cUCollator, "UCOL_TERTIARY", INT2FIX(UCOL_TERTIARY));
|
221
|
+
rb_define_const(rb_cUCollator, "UCOL_DEFAULT_STRENGTH", INT2FIX(UCOL_DEFAULT_STRENGTH));
|
222
|
+
rb_define_const(rb_cUCollator, "UCOL_CE_STRENGTH_LIMIT", INT2FIX(UCOL_CE_STRENGTH_LIMIT));
|
223
|
+
rb_define_const(rb_cUCollator, "UCOL_QUATERNARY", INT2FIX(UCOL_QUATERNARY));
|
224
|
+
rb_define_const(rb_cUCollator, "UCOL_IDENTICAL", INT2FIX(UCOL_IDENTICAL));
|
225
|
+
rb_define_const(rb_cUCollator, "UCOL_STRENGTH_LIMIT", INT2FIX(UCOL_STRENGTH_LIMIT));
|
226
|
+
rb_define_const(rb_cUCollator, "UCOL_OFF", INT2FIX(UCOL_OFF));
|
227
|
+
rb_define_const(rb_cUCollator, "UCOL_ON", INT2FIX(UCOL_ON));
|
228
|
+
rb_define_const(rb_cUCollator, "UCOL_SHIFTED", INT2FIX(UCOL_SHIFTED));
|
229
|
+
rb_define_const(rb_cUCollator, "UCOL_NON_IGNORABLE", INT2FIX(UCOL_NON_IGNORABLE));
|
230
|
+
rb_define_const(rb_cUCollator, "UCOL_LOWER_FIRST", INT2FIX(UCOL_LOWER_FIRST));
|
231
|
+
rb_define_const(rb_cUCollator, "UCOL_UPPER_FIRST", INT2FIX(UCOL_UPPER_FIRST));
|
232
|
+
|
233
|
+
}
|