icu4x 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,132 @@
1
+ use crate::helpers;
2
+ use icu::locale::fallback::LocaleFallbacker;
3
+ use icu_provider_adapters::fallback::LocaleFallbackProvider;
4
+ use icu_provider_blob::BlobDataProvider;
5
+ use magnus::{
6
+ Error, RClass, RHash, RModule, Ruby, Symbol, TryConvert, Value, function, prelude::*,
7
+ value::ReprValue,
8
+ };
9
+ use std::fs;
10
+ use std::path::PathBuf;
11
+
12
+ /// Ruby wrapper for ICU4X DataProvider with locale fallback support
13
+ ///
14
+ /// This provider loads data from a blob file and provides locale fallback
15
+ /// automatically. When data for a specific locale is not found, it will
16
+ /// fallback through the locale hierarchy (e.g., ja-JP -> ja -> und).
17
+ ///
18
+ /// # Safety
19
+ /// We manually implement Send because:
20
+ /// 1. Ruby's GVL (Global VM Lock) ensures only one thread executes Ruby code at a time
21
+ /// 2. The provider is only accessed through Ruby method calls
22
+ /// 3. We never share the provider across threads in Rust code
23
+ #[magnus::wrap(class = "ICU4X::DataProvider", free_immediately, size)]
24
+ pub struct DataProvider {
25
+ pub(crate) inner: LocaleFallbackProvider<BlobDataProvider>,
26
+ }
27
+
28
+ // SAFETY: Ruby's GVL protects access to this type. The provider is only
29
+ // accessed through Ruby method calls, which are serialized by the GVL.
30
+ unsafe impl Send for DataProvider {}
31
+
32
+ impl DataProvider {
33
+ /// Create a DataProvider from a blob file with locale fallback support
34
+ ///
35
+ /// # Arguments
36
+ /// * `path` - A Pathname object pointing to the blob file
37
+ /// * `priority` - Fallback priority (:language or :region), defaults to :language
38
+ ///
39
+ /// # Returns
40
+ /// A new DataProvider instance with locale fallback enabled
41
+ fn from_blob(ruby: &Ruby, args: &[Value]) -> Result<Self, Error> {
42
+ // Parse arguments: (path, **kwargs)
43
+ // args[0] = path (required, Pathname)
44
+ // args[1] = kwargs hash (optional, from Ruby keyword arguments)
45
+ if args.is_empty() {
46
+ return Err(Error::new(
47
+ ruby.exception_arg_error(),
48
+ "wrong number of arguments (given 0, expected 1..2)",
49
+ ));
50
+ }
51
+
52
+ let path = args[0];
53
+
54
+ // Get the Pathname class
55
+ let pathname_class: RClass = ruby.eval("Pathname")?;
56
+
57
+ // Check if the argument is a Pathname instance
58
+ if !path.is_kind_of(pathname_class) {
59
+ let path_class = path.class();
60
+ // SAFETY: We have a valid Ruby Value from the method call
61
+ let class_name = unsafe { path_class.name() }.into_owned();
62
+ return Err(Error::new(
63
+ ruby.exception_type_error(),
64
+ format!("expected Pathname, got {}", class_name),
65
+ ));
66
+ }
67
+
68
+ // Get optional kwargs
69
+ let kwargs: Option<RHash> = if args.len() > 1 {
70
+ Some(TryConvert::try_convert(args[1])?)
71
+ } else {
72
+ None
73
+ };
74
+
75
+ // Extract priority option (default: :language)
76
+ let priority_value: Option<Symbol> = match kwargs {
77
+ Some(hash) => hash.lookup::<_, Option<Symbol>>(ruby.to_symbol("priority"))?,
78
+ None => None,
79
+ };
80
+
81
+ let priority_sym = priority_value.unwrap_or_else(|| ruby.to_symbol("language"));
82
+ let language_sym = ruby.to_symbol("language");
83
+ let region_sym = ruby.to_symbol("region");
84
+
85
+ // Validate priority value
86
+ if !priority_sym.equal(language_sym)? && !priority_sym.equal(region_sym)? {
87
+ return Err(Error::new(
88
+ ruby.exception_arg_error(),
89
+ "priority must be :language or :region",
90
+ ));
91
+ }
92
+
93
+ // Get the path as a string by calling to_s
94
+ let path_str: String = path.funcall("to_s", ())?;
95
+ let path_buf = PathBuf::from(&path_str);
96
+
97
+ // Read the file contents
98
+ let blob_data = fs::read(&path_buf).map_err(|e| {
99
+ Error::new(
100
+ ruby.exception_io_error(),
101
+ format!("Failed to read blob file '{}': {}", path_str, e),
102
+ )
103
+ })?;
104
+
105
+ // Leak the blob data to create a 'static reference
106
+ let static_blob: &'static [u8] = Box::leak(blob_data.into_boxed_slice());
107
+
108
+ // Create the BlobDataProvider from static blob
109
+ let blob_provider =
110
+ BlobDataProvider::try_new_from_static_blob(static_blob).map_err(|e| {
111
+ // Get the DataError exception class
112
+ let data_error_class = helpers::get_exception_class(ruby, "ICU4X::DataError");
113
+ Error::new(data_error_class, format!("Failed to create data provider: {}", e))
114
+ })?;
115
+
116
+ // Create the LocaleFallbacker with compiled data
117
+ // Use static_to_owned() to get an owned LocaleFallbacker
118
+ let fallbacker = LocaleFallbacker::new().static_to_owned();
119
+
120
+ // Create the LocaleFallbackProvider
121
+ // Note: LocaleFallbackConfig is applied when iterating, not at construction
122
+ let inner = LocaleFallbackProvider::new(blob_provider, fallbacker);
123
+
124
+ Ok(Self { inner })
125
+ }
126
+ }
127
+
128
+ pub fn init(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
129
+ let class = module.define_class("DataProvider", ruby.class_object())?;
130
+ class.define_singleton_method("from_blob", function!(DataProvider::from_blob, -1))?;
131
+ Ok(())
132
+ }
@@ -0,0 +1,401 @@
1
+ use crate::data_provider::DataProvider;
2
+ use crate::helpers;
3
+ use icu::calendar::preferences::CalendarAlgorithm;
4
+ use icu::calendar::{AnyCalendarKind, Date, Gregorian};
5
+ use icu::datetime::fieldsets::enums::{
6
+ CompositeDateTimeFieldSet, DateAndTimeFieldSet, DateFieldSet, TimeFieldSet,
7
+ };
8
+ use icu::datetime::fieldsets::{self};
9
+ use icu::datetime::input::DateTime;
10
+ use icu::datetime::{DateTimeFormatter, DateTimeFormatterPreferences};
11
+ use icu::time::Time;
12
+ use icu::time::zone::IanaParser;
13
+ use icu_provider::buf::AsDeserializingBufferProvider;
14
+ use icu4x_macros::RubySymbol;
15
+ use jiff::Timestamp;
16
+ use jiff::tz::TimeZone;
17
+ use magnus::{
18
+ Error, RHash, RModule, Ruby, TryConvert, Value, function, method, prelude::*,
19
+ };
20
+
21
+ /// Date style option
22
+ #[derive(Clone, Copy, PartialEq, Eq, RubySymbol)]
23
+ enum DateStyle {
24
+ Full,
25
+ Long,
26
+ Medium,
27
+ Short,
28
+ }
29
+
30
+ /// Time style option
31
+ #[derive(Clone, Copy, PartialEq, Eq, RubySymbol)]
32
+ enum TimeStyle {
33
+ Full,
34
+ Long,
35
+ Medium,
36
+ Short,
37
+ }
38
+
39
+ /// Calendar option
40
+ #[derive(Clone, Copy, PartialEq, Eq, RubySymbol)]
41
+ enum Calendar {
42
+ Gregory,
43
+ Japanese,
44
+ Buddhist,
45
+ Chinese,
46
+ Hebrew,
47
+ Islamic,
48
+ Persian,
49
+ Indian,
50
+ Ethiopian,
51
+ Coptic,
52
+ Roc,
53
+ Dangi,
54
+ }
55
+
56
+ impl Calendar {
57
+ fn to_calendar_algorithm(self) -> CalendarAlgorithm {
58
+ match self {
59
+ Calendar::Gregory => CalendarAlgorithm::Gregory,
60
+ Calendar::Japanese => CalendarAlgorithm::Japanese,
61
+ Calendar::Buddhist => CalendarAlgorithm::Buddhist,
62
+ Calendar::Chinese => CalendarAlgorithm::Chinese,
63
+ Calendar::Hebrew => CalendarAlgorithm::Hebrew,
64
+ Calendar::Islamic => CalendarAlgorithm::Hijri(None),
65
+ Calendar::Persian => CalendarAlgorithm::Persian,
66
+ Calendar::Indian => CalendarAlgorithm::Indian,
67
+ Calendar::Ethiopian => CalendarAlgorithm::Ethiopic,
68
+ Calendar::Coptic => CalendarAlgorithm::Coptic,
69
+ Calendar::Roc => CalendarAlgorithm::Roc,
70
+ Calendar::Dangi => CalendarAlgorithm::Dangi,
71
+ }
72
+ }
73
+
74
+ fn from_any_calendar_kind(kind: AnyCalendarKind) -> Self {
75
+ match kind {
76
+ AnyCalendarKind::Buddhist => Calendar::Buddhist,
77
+ AnyCalendarKind::Chinese => Calendar::Chinese,
78
+ AnyCalendarKind::Coptic => Calendar::Coptic,
79
+ AnyCalendarKind::Dangi => Calendar::Dangi,
80
+ AnyCalendarKind::Ethiopian | AnyCalendarKind::EthiopianAmeteAlem => Calendar::Ethiopian,
81
+ AnyCalendarKind::Gregorian => Calendar::Gregory,
82
+ AnyCalendarKind::Hebrew => Calendar::Hebrew,
83
+ AnyCalendarKind::Indian => Calendar::Indian,
84
+ AnyCalendarKind::HijriTabularTypeIIFriday
85
+ | AnyCalendarKind::HijriSimulatedMecca
86
+ | AnyCalendarKind::HijriTabularTypeIIThursday
87
+ | AnyCalendarKind::HijriUmmAlQura => Calendar::Islamic,
88
+ AnyCalendarKind::Iso => Calendar::Gregory,
89
+ AnyCalendarKind::Japanese | AnyCalendarKind::JapaneseExtended => Calendar::Japanese,
90
+ AnyCalendarKind::Persian => Calendar::Persian,
91
+ AnyCalendarKind::Roc => Calendar::Roc,
92
+ _ => Calendar::Gregory,
93
+ }
94
+ }
95
+ }
96
+
97
+ /// Ruby wrapper for ICU4X datetime formatters
98
+ #[magnus::wrap(class = "ICU4X::DateTimeFormat", free_immediately, size)]
99
+ pub struct DateTimeFormat {
100
+ inner: DateTimeFormatter<CompositeDateTimeFieldSet>,
101
+ locale_str: String,
102
+ date_style: Option<DateStyle>,
103
+ time_style: Option<TimeStyle>,
104
+ time_zone: Option<String>,
105
+ jiff_timezone: Option<TimeZone>,
106
+ calendar: Calendar,
107
+ }
108
+
109
+ // SAFETY: Ruby's GVL protects access to this type.
110
+ unsafe impl Send for DateTimeFormat {}
111
+
112
+ impl DateTimeFormat {
113
+ /// Create a new DateTimeFormat instance
114
+ ///
115
+ /// # Arguments
116
+ /// * `locale` - A Locale instance
117
+ /// * `provider:` - A DataProvider instance
118
+ /// * `date_style:` - :full, :long, :medium, or :short
119
+ /// * `time_style:` - :full, :long, :medium, or :short
120
+ /// * `time_zone:` - IANA timezone name (e.g., "Asia/Tokyo")
121
+ /// * `calendar:` - :gregory, :japanese, :buddhist, :chinese, :hebrew, :islamic,
122
+ /// :persian, :indian, :ethiopian, :coptic, :roc, :dangi
123
+ fn new(ruby: &Ruby, args: &[Value]) -> Result<Self, Error> {
124
+ // Parse arguments: (locale, **kwargs)
125
+ let (icu_locale, locale_str) = helpers::extract_locale(ruby, args)?;
126
+
127
+ // Get kwargs (optional)
128
+ let kwargs: RHash = if args.len() > 1 {
129
+ TryConvert::try_convert(args[1])?
130
+ } else {
131
+ ruby.hash_new()
132
+ };
133
+
134
+ // Resolve provider: use explicit or fall back to default
135
+ let resolved_provider = helpers::resolve_provider(ruby, &kwargs)?;
136
+
137
+ // Extract date_style option
138
+ let date_style =
139
+ helpers::extract_symbol(ruby, &kwargs, "date_style", DateStyle::from_ruby_symbol)?;
140
+
141
+ // Extract time_style option
142
+ let time_style =
143
+ helpers::extract_symbol(ruby, &kwargs, "time_style", TimeStyle::from_ruby_symbol)?;
144
+
145
+ // At least one of date_style or time_style must be specified
146
+ if date_style.is_none() && time_style.is_none() {
147
+ return Err(Error::new(
148
+ ruby.exception_arg_error(),
149
+ "at least one of date_style or time_style must be specified",
150
+ ));
151
+ }
152
+
153
+ // Extract time_zone option and parse it
154
+ let time_zone: Option<String> =
155
+ kwargs.lookup::<_, Option<String>>(ruby.to_symbol("time_zone"))?;
156
+
157
+ // Parse and validate the timezone if specified
158
+ let jiff_timezone = if let Some(ref tz_str) = time_zone {
159
+ // First validate with ICU4X's IanaParser
160
+ let parser = IanaParser::new();
161
+ let icu_tz = parser.parse(tz_str);
162
+ if icu_tz.is_unknown() {
163
+ return Err(Error::new(
164
+ ruby.exception_arg_error(),
165
+ format!("invalid IANA timezone: {}", tz_str),
166
+ ));
167
+ }
168
+ // Then create jiff TimeZone for offset calculation
169
+ let jiff_tz = TimeZone::get(tz_str).map_err(|e| {
170
+ Error::new(
171
+ ruby.exception_arg_error(),
172
+ format!("invalid IANA timezone: {} ({})", tz_str, e),
173
+ )
174
+ })?;
175
+ Some(jiff_tz)
176
+ } else {
177
+ None
178
+ };
179
+
180
+ // Extract calendar option
181
+ let calendar =
182
+ helpers::extract_symbol(ruby, &kwargs, "calendar", Calendar::from_ruby_symbol)?;
183
+
184
+ // Get the error exception class
185
+ let error_class = helpers::get_exception_class(ruby, "ICU4X::Error");
186
+
187
+ // Get the DataProvider
188
+ let dp: &DataProvider = TryConvert::try_convert(resolved_provider).map_err(|_| {
189
+ Error::new(
190
+ ruby.exception_type_error(),
191
+ "provider must be a DataProvider",
192
+ )
193
+ })?;
194
+
195
+ // Create field set based on date_style and time_style
196
+ let field_set = Self::create_field_set(date_style, time_style);
197
+
198
+ // Create formatter with calendar preference
199
+ let mut prefs: DateTimeFormatterPreferences = (&icu_locale).into();
200
+ if let Some(cal) = calendar {
201
+ prefs.calendar_algorithm = Some(cal.to_calendar_algorithm());
202
+ }
203
+
204
+ let formatter =
205
+ DateTimeFormatter::try_new_unstable(&dp.inner.as_deserializing(), prefs, field_set)
206
+ .map_err(|e| {
207
+ Error::new(
208
+ error_class,
209
+ format!("Failed to create DateTimeFormat: {}", e),
210
+ )
211
+ })?;
212
+
213
+ // Get the resolved calendar from the formatter
214
+ let resolved_calendar = Calendar::from_any_calendar_kind(formatter.calendar().kind());
215
+
216
+ Ok(Self {
217
+ inner: formatter,
218
+ locale_str,
219
+ date_style,
220
+ time_style,
221
+ time_zone,
222
+ jiff_timezone,
223
+ calendar: resolved_calendar,
224
+ })
225
+ }
226
+
227
+ /// Create field set based on date_style and time_style
228
+ fn create_field_set(
229
+ date_style: Option<DateStyle>,
230
+ time_style: Option<TimeStyle>,
231
+ ) -> CompositeDateTimeFieldSet {
232
+ match (date_style, time_style) {
233
+ (Some(ds), Some(ts)) => {
234
+ // Both date and time
235
+ let ymdt = match (ds, ts) {
236
+ (DateStyle::Full, _) | (DateStyle::Long, _) => fieldsets::YMDT::long(),
237
+ (DateStyle::Medium, _) => fieldsets::YMDT::medium(),
238
+ (DateStyle::Short, _) => fieldsets::YMDT::short(),
239
+ };
240
+ CompositeDateTimeFieldSet::DateTime(DateAndTimeFieldSet::YMDT(ymdt))
241
+ }
242
+ (Some(ds), None) => {
243
+ // Date only
244
+ let ymd = match ds {
245
+ DateStyle::Full | DateStyle::Long => fieldsets::YMD::long(),
246
+ DateStyle::Medium => fieldsets::YMD::medium(),
247
+ DateStyle::Short => fieldsets::YMD::short(),
248
+ };
249
+ CompositeDateTimeFieldSet::Date(DateFieldSet::YMD(ymd))
250
+ }
251
+ (None, Some(ts)) => {
252
+ // Time only
253
+ let t = match ts {
254
+ TimeStyle::Full | TimeStyle::Long => fieldsets::T::long(),
255
+ TimeStyle::Medium => fieldsets::T::medium(),
256
+ TimeStyle::Short => fieldsets::T::short(),
257
+ };
258
+ CompositeDateTimeFieldSet::Time(TimeFieldSet::T(t))
259
+ }
260
+ (None, None) => {
261
+ // Should not happen due to validation
262
+ unreachable!("at least one of date_style or time_style must be specified")
263
+ }
264
+ }
265
+ }
266
+
267
+ /// Format a Ruby Time object
268
+ ///
269
+ /// # Arguments
270
+ /// * `time` - A Ruby Time object
271
+ ///
272
+ /// # Returns
273
+ /// A formatted string
274
+ fn format(&self, time: Value) -> Result<String, Error> {
275
+ let ruby = Ruby::get().expect("Ruby runtime should be available");
276
+
277
+ // Validate that time is a Time object
278
+ let time_class: Value = ruby.eval("Time")?;
279
+ if !time.is_kind_of(magnus::RClass::try_convert(time_class)?) {
280
+ return Err(Error::new(
281
+ ruby.exception_type_error(),
282
+ "argument must be a Time object",
283
+ ));
284
+ }
285
+
286
+ // Convert Ruby Time to ICU4X DateTime, applying timezone if specified
287
+ let datetime = self.convert_time_to_datetime(&ruby, time)?;
288
+
289
+ // Format the datetime
290
+ let formatted = self.inner.format(&datetime);
291
+ Ok(formatted.to_string())
292
+ }
293
+
294
+ /// Convert Ruby Time to ICU4X DateTime<Gregorian>
295
+ ///
296
+ /// If time_zone is specified, the UTC time is converted to local time in that timezone.
297
+ /// Otherwise, the time is treated as UTC.
298
+ fn convert_time_to_datetime(
299
+ &self,
300
+ ruby: &Ruby,
301
+ time: Value,
302
+ ) -> Result<DateTime<Gregorian>, Error> {
303
+ // Get UTC time from Ruby Time object
304
+ let utc_time: Value = time.funcall("getutc", ())?;
305
+
306
+ let utc_year: i32 = utc_time.funcall("year", ())?;
307
+ let utc_month: i32 = utc_time.funcall("month", ())?;
308
+ let utc_day: i32 = utc_time.funcall("day", ())?;
309
+ let utc_hour: i32 = utc_time.funcall("hour", ())?;
310
+ let utc_min: i32 = utc_time.funcall("min", ())?;
311
+ let utc_sec: i32 = utc_time.funcall("sec", ())?;
312
+
313
+ // Get year, month, day, hour, min, sec in the target timezone
314
+ let (year, month, day, hour, min, sec) = if let Some(ref tz) = self.jiff_timezone {
315
+ // Create a jiff Timestamp from UTC components
316
+ let timestamp = Timestamp::from_second(utc_time.funcall::<_, _, i64>("to_i", ())?)
317
+ .map_err(|e| {
318
+ Error::new(
319
+ ruby.exception_arg_error(),
320
+ format!("Invalid timestamp: {}", e),
321
+ )
322
+ })?;
323
+
324
+ // Convert to local time in the target timezone
325
+ let zoned = timestamp.to_zoned(tz.clone());
326
+ let dt = zoned.datetime();
327
+
328
+ (
329
+ dt.year() as i32,
330
+ dt.month() as i32,
331
+ dt.day() as i32,
332
+ dt.hour() as i32,
333
+ dt.minute() as i32,
334
+ dt.second() as i32,
335
+ )
336
+ } else {
337
+ // No timezone specified, use UTC
338
+ (utc_year, utc_month, utc_day, utc_hour, utc_min, utc_sec)
339
+ };
340
+
341
+ // Create ISO date and convert to Gregorian
342
+ let iso_date = Date::try_new_iso(year, month as u8, day as u8)
343
+ .map_err(|e| Error::new(ruby.exception_arg_error(), format!("Invalid date: {}", e)))?;
344
+ let gregorian_date = iso_date.to_calendar(Gregorian);
345
+
346
+ // Create time
347
+ let time_of_day = Time::try_new(hour as u8, min as u8, sec as u8, 0)
348
+ .map_err(|e| Error::new(ruby.exception_arg_error(), format!("Invalid time: {}", e)))?;
349
+
350
+ Ok(DateTime {
351
+ date: gregorian_date,
352
+ time: time_of_day,
353
+ })
354
+ }
355
+
356
+ /// Get the resolved options
357
+ ///
358
+ /// # Returns
359
+ /// A hash with :locale, :calendar, :date_style, :time_style, and optionally :time_zone
360
+ fn resolved_options(&self) -> Result<RHash, Error> {
361
+ let ruby = Ruby::get().expect("Ruby runtime should be available");
362
+ let hash = ruby.hash_new();
363
+
364
+ hash.aset(ruby.to_symbol("locale"), self.locale_str.as_str())?;
365
+ hash.aset(
366
+ ruby.to_symbol("calendar"),
367
+ ruby.to_symbol(self.calendar.to_symbol_name()),
368
+ )?;
369
+
370
+ if let Some(ds) = self.date_style {
371
+ hash.aset(
372
+ ruby.to_symbol("date_style"),
373
+ ruby.to_symbol(ds.to_symbol_name()),
374
+ )?;
375
+ }
376
+
377
+ if let Some(ts) = self.time_style {
378
+ hash.aset(
379
+ ruby.to_symbol("time_style"),
380
+ ruby.to_symbol(ts.to_symbol_name()),
381
+ )?;
382
+ }
383
+
384
+ if let Some(ref tz) = self.time_zone {
385
+ hash.aset(ruby.to_symbol("time_zone"), tz.as_str())?;
386
+ }
387
+
388
+ Ok(hash)
389
+ }
390
+ }
391
+
392
+ pub fn init(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
393
+ let class = module.define_class("DateTimeFormat", ruby.class_object())?;
394
+ class.define_singleton_method("new", function!(DateTimeFormat::new, -1))?;
395
+ class.define_method("format", method!(DateTimeFormat::format, 1))?;
396
+ class.define_method(
397
+ "resolved_options",
398
+ method!(DateTimeFormat::resolved_options, 0),
399
+ )?;
400
+ Ok(())
401
+ }