icu4x 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 07dd6b700780e96f64b0f5925d1a7340a44b70c77112f863f018494c8f55db77
4
+ data.tar.gz: 614f4b010e2ce82a1e477122e322b8ecf258708e61db00efb02d06bc1f5d74f1
5
+ SHA512:
6
+ metadata.gz: 162056347baf80f0d262fba295d8dd1a2efad853b1dc267ee060c5537ff28a25c42efbee884f1694eac8536561da495eac1eb3b67b9419e3cd3a274b2a328e92
7
+ data.tar.gz: a43251139b2778e922d6977875179aea1f59b25ac1c7a8e01f5d98cafdc047714ea901ae9926250f2d96259f6a0fa29263d0dbc970ad7632abce27617de66fbb
data/CHANGELOG.md ADDED
@@ -0,0 +1,17 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.5.0] - 2026-01-01
4
+
5
+ ### Added
6
+
7
+ - **Locale**: BCP 47 (`parse`) and POSIX (`parse_posix`) locale parsing
8
+ - **DateTimeFormat**: Date/time formatting with timezone and calendar support
9
+ - **NumberFormat**: Decimal, currency, and percent formatting with configurable options
10
+ - **PluralRules**: Cardinal and ordinal plural category selection
11
+ - **Collator**: Locale-sensitive string comparison with sensitivity and numeric options
12
+ - **ListFormat**: Conjunction, disjunction, and unit list formatting
13
+ - **RelativeTimeFormat**: Relative time formatting for all time units
14
+ - **DisplayNames**: Localized names for languages, regions, scripts, and locales
15
+ - **Segmenter**: Text segmentation by grapheme, word, sentence, or line
16
+ - **DataProvider**: Blob data loading with automatic locale fallback
17
+ - **DataGenerator**: CLDR data export with locale and marker filtering
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 OZAWA Sakuro
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,118 @@
1
+ # ICU4X
2
+
3
+ Ruby bindings for [ICU4X](https://github.com/unicode-org/icu4x), providing internationalization functionality.
4
+
5
+ ## Overview
6
+
7
+ ICU4X is a Unicode library that provides locale-aware formatting and processing. This gem wraps ICU4X via Rust extensions, offering an API similar to JavaScript's Intl.
8
+
9
+ No locale data is bundled with the gem. Users generate only the data they need, keeping applications lean.
10
+
11
+ ## Features
12
+
13
+ - **Locale** - BCP 47 and POSIX locale identifier parsing
14
+ - **DateTimeFormat** - Locale-aware date/time formatting with timezone and calendar support
15
+ - **NumberFormat** - Number, currency, and percent formatting
16
+ - **PluralRules** - CLDR plural category selection (cardinal/ordinal)
17
+ - **Collator** - Locale-sensitive string comparison and sorting
18
+ - **ListFormat** - Locale-aware list formatting (and, or, unit)
19
+ - **RelativeTimeFormat** - Relative time formatting (e.g., "3 days ago")
20
+ - **DisplayNames** - Localized names for languages, regions, and scripts
21
+ - **Segmenter** - Text segmentation (grapheme, word, sentence, line)
22
+ - **DataProvider** - Locale data loading with automatic fallback
23
+ - **DataGenerator** - Locale data generation from CLDR
24
+
25
+ ## Requirements
26
+
27
+ - Ruby 3.2+
28
+ - Rust toolchain (for building the native extension)
29
+
30
+ ## Setup
31
+
32
+ Add to your Gemfile:
33
+
34
+ ```ruby
35
+ gem "icu4x"
36
+ ```
37
+
38
+ Then generate locale data for your application:
39
+
40
+ ```ruby
41
+ require "icu4x"
42
+
43
+ ICU4X::DataGenerator.export(
44
+ locales: %w[en ja],
45
+ markers: :all,
46
+ format: :blob,
47
+ output: Pathname.new("data/i18n.blob")
48
+ )
49
+ ```
50
+
51
+ ## Usage
52
+
53
+ ```ruby
54
+ require "icu4x"
55
+
56
+ # Option 1: Configure default provider (recommended)
57
+ ICU4X.configure do |config|
58
+ config.data_path = Pathname.new("data/i18n.blob")
59
+ end
60
+ # Or set ICU4X_DATA_PATH environment variable
61
+
62
+ # Option 2: Load provider explicitly
63
+ provider = ICU4X::DataProvider.from_blob(Pathname.new("data/i18n.blob"))
64
+
65
+ # Parse locale (BCP 47 or POSIX format)
66
+ locale = ICU4X::Locale.parse("ja-JP")
67
+ locale = ICU4X::Locale.parse_posix("ja_JP.UTF-8") # POSIX format also supported
68
+
69
+ # Date/time formatting (provider: can be omitted if default is configured)
70
+ dtf = ICU4X::DateTimeFormat.new(locale, provider:, date_style: :long)
71
+ dtf.format(Time.now)
72
+ # => "2025年12月30日"
73
+
74
+ # Number formatting
75
+ nf = ICU4X::NumberFormat.new(locale, provider:, style: :currency, currency: "JPY")
76
+ nf.format(1_234_567)
77
+ # => "¥1,234,567"
78
+
79
+ # Plural rules
80
+ pr = ICU4X::PluralRules.new(ICU4X::Locale.parse("en"), provider:)
81
+ pr.select(1) # => :one
82
+ pr.select(2) # => :other
83
+
84
+ # Collation (sorting)
85
+ collator = ICU4X::Collator.new(locale, provider:)
86
+ %w[メロン アップル なし].sort { |a, b| collator.compare(a, b) }
87
+ # => ["アップル", "なし", "メロン"]
88
+
89
+ # List formatting
90
+ lf = ICU4X::ListFormat.new(locale, provider:, type: :conjunction)
91
+ lf.format(%w[Apple Banana Cherry])
92
+ # => "Apple、Banana、Cherry"
93
+
94
+ # Relative time formatting
95
+ rtf = ICU4X::RelativeTimeFormat.new(locale, provider:)
96
+ rtf.format(-3, :day)
97
+ # => "3日前"
98
+
99
+ # Display names
100
+ dn = ICU4X::DisplayNames.new(locale, provider:, type: :language)
101
+ dn.of("en")
102
+ # => "英語"
103
+
104
+ # Text segmentation
105
+ segmenter = ICU4X::Segmenter.new(granularity: :word, provider:)
106
+ segmenter.segment("Hello, world!").map(&:text)
107
+ # => ["Hello", ",", " ", "world", "!"]
108
+ ```
109
+
110
+ See [doc/](doc/) for detailed documentation.
111
+
112
+ ## Contributing
113
+
114
+ Bug reports and pull requests are welcome on GitHub at https://github.com/sakuro/icu4x.
115
+
116
+ ## License
117
+
118
+ MIT License. See [LICENSE](LICENSE.txt) for details.
@@ -0,0 +1,23 @@
1
+ [package]
2
+ name = "icu4x"
3
+ version = "0.1.0"
4
+ edition = "2024"
5
+ publish = false
6
+
7
+ [lib]
8
+ crate-type = ["cdylib"]
9
+
10
+ [dependencies]
11
+ magnus = "0.8"
12
+ icu_locale = "2.1"
13
+ icu_provider = "2.1"
14
+ icu_provider_blob = { version = "2.1", features = ["alloc", "export"] }
15
+ icu_provider_source = { version = "2.1", features = ["networking", "experimental"] }
16
+ icu_provider_export = "2.1"
17
+ icu_provider_registry = "2.1"
18
+ icu_provider_adapters = "2.1"
19
+ icu = { version = "2.1", features = ["experimental"] }
20
+ fixed_decimal = "0.7"
21
+ tinystr = "0.8"
22
+ jiff = "0.2"
23
+ icu4x_macros = { path = "../icu4x_macros" }
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ require "rb_sys/mkmf"
5
+
6
+ create_rust_makefile("icu4x/icu4x")
@@ -0,0 +1,207 @@
1
+ use crate::data_provider::DataProvider;
2
+ use crate::helpers;
3
+ use icu::collator::Collator as IcuCollator;
4
+ use icu::collator::CollatorPreferences;
5
+ use icu::collator::options::{CaseLevel, CollatorOptions, Strength};
6
+ use icu::collator::preferences::{CollationCaseFirst, CollationNumericOrdering};
7
+ use icu_provider::buf::AsDeserializingBufferProvider;
8
+ use icu4x_macros::RubySymbol;
9
+ use magnus::{
10
+ Error, RHash, RModule, Ruby, TryConvert, Value, function, method, prelude::*,
11
+ };
12
+ use std::cmp::Ordering;
13
+
14
+ /// Sensitivity level for collation
15
+ #[derive(Clone, Copy, PartialEq, Eq, RubySymbol)]
16
+ enum Sensitivity {
17
+ Base,
18
+ Accent,
19
+ Case,
20
+ Variant,
21
+ }
22
+
23
+ /// Case first option
24
+ #[derive(Clone, Copy, PartialEq, Eq, RubySymbol)]
25
+ enum CaseFirstOption {
26
+ Upper,
27
+ Lower,
28
+ }
29
+
30
+ impl CaseFirstOption {
31
+ fn to_icu_case_first(self) -> CollationCaseFirst {
32
+ match self {
33
+ CaseFirstOption::Upper => CollationCaseFirst::Upper,
34
+ CaseFirstOption::Lower => CollationCaseFirst::Lower,
35
+ }
36
+ }
37
+ }
38
+
39
+ /// Ruby wrapper for ICU4X Collator
40
+ #[magnus::wrap(class = "ICU4X::Collator", free_immediately, size)]
41
+ pub struct Collator {
42
+ inner: IcuCollator,
43
+ locale_str: String,
44
+ sensitivity: Sensitivity,
45
+ numeric: bool,
46
+ case_first: Option<CaseFirstOption>,
47
+ }
48
+
49
+ // SAFETY: Ruby's GVL protects access to this type.
50
+ unsafe impl Send for Collator {}
51
+
52
+ impl Collator {
53
+ /// Create a new Collator instance
54
+ ///
55
+ /// # Arguments
56
+ /// * `locale` - A Locale instance
57
+ /// * `provider:` - A DataProvider instance
58
+ /// * `sensitivity:` - :base, :accent, :case, or :variant (default)
59
+ /// * `numeric:` - Whether to use numeric sorting (default: false)
60
+ /// * `case_first:` - :upper, :lower, or nil (default)
61
+ fn new(ruby: &Ruby, args: &[Value]) -> Result<Self, Error> {
62
+ // Parse arguments: (locale, **kwargs)
63
+ let (icu_locale, locale_str) = helpers::extract_locale(ruby, args)?;
64
+
65
+ // Get kwargs (optional)
66
+ let kwargs: RHash = if args.len() > 1 {
67
+ TryConvert::try_convert(args[1])?
68
+ } else {
69
+ ruby.hash_new()
70
+ };
71
+
72
+ // Resolve provider: use explicit or fall back to default
73
+ let resolved_provider = helpers::resolve_provider(ruby, &kwargs)?;
74
+
75
+ // Extract sensitivity option (default: :variant)
76
+ let sensitivity =
77
+ helpers::extract_symbol(ruby, &kwargs, "sensitivity", Sensitivity::from_ruby_symbol)?
78
+ .unwrap_or(Sensitivity::Variant);
79
+
80
+ // Extract numeric option (default: false)
81
+ let numeric: bool = kwargs
82
+ .lookup::<_, Option<bool>>(ruby.to_symbol("numeric"))?
83
+ .unwrap_or(false);
84
+
85
+ // Extract case_first option (default: nil)
86
+ let case_first = helpers::extract_symbol(
87
+ ruby,
88
+ &kwargs,
89
+ "case_first",
90
+ CaseFirstOption::from_ruby_symbol,
91
+ )?;
92
+
93
+ // Get the error exception class
94
+ let error_class = helpers::get_exception_class(ruby, "ICU4X::Error");
95
+
96
+ // Get the DataProvider
97
+ let dp: &DataProvider = TryConvert::try_convert(resolved_provider).map_err(|_| {
98
+ Error::new(
99
+ ruby.exception_type_error(),
100
+ "provider must be a DataProvider",
101
+ )
102
+ })?;
103
+
104
+ // Build collator options (strength and case_level)
105
+ let mut options = CollatorOptions::default();
106
+
107
+ // Set strength based on sensitivity
108
+ options.strength = Some(match sensitivity {
109
+ Sensitivity::Base => Strength::Primary,
110
+ Sensitivity::Accent => Strength::Secondary,
111
+ Sensitivity::Case => Strength::Primary,
112
+ Sensitivity::Variant => Strength::Tertiary,
113
+ });
114
+
115
+ // Set case_level for case sensitivity
116
+ if matches!(sensitivity, Sensitivity::Case) {
117
+ options.case_level = Some(CaseLevel::On);
118
+ }
119
+
120
+ // Build preferences (numeric and case_first)
121
+ let mut prefs: CollatorPreferences = (&icu_locale).into();
122
+
123
+ if numeric {
124
+ prefs.numeric_ordering = Some(CollationNumericOrdering::True);
125
+ }
126
+
127
+ if let Some(cf) = case_first {
128
+ prefs.case_first = Some(cf.to_icu_case_first());
129
+ }
130
+
131
+ // Create collator
132
+ let collator = IcuCollator::try_new_unstable(&dp.inner.as_deserializing(), prefs, options)
133
+ .map_err(|e| Error::new(error_class, format!("Failed to create Collator: {}", e)))?;
134
+
135
+ Ok(Self {
136
+ inner: collator,
137
+ locale_str,
138
+ sensitivity,
139
+ numeric,
140
+ case_first,
141
+ })
142
+ }
143
+
144
+ /// Compare two strings
145
+ ///
146
+ /// # Arguments
147
+ /// * `a` - First string
148
+ /// * `b` - Second string
149
+ ///
150
+ /// # Returns
151
+ /// -1 if a < b, 0 if a == b, 1 if a > b
152
+ fn compare(&self, a: Value, b: Value) -> Result<i32, Error> {
153
+ let ruby = Ruby::get().expect("Ruby runtime should be available");
154
+
155
+ let str_a: String = TryConvert::try_convert(a).map_err(|_| {
156
+ Error::new(
157
+ ruby.exception_type_error(),
158
+ "first argument must be a String",
159
+ )
160
+ })?;
161
+
162
+ let str_b: String = TryConvert::try_convert(b).map_err(|_| {
163
+ Error::new(
164
+ ruby.exception_type_error(),
165
+ "second argument must be a String",
166
+ )
167
+ })?;
168
+
169
+ let result = match self.inner.as_borrowed().compare(&str_a, &str_b) {
170
+ Ordering::Less => -1,
171
+ Ordering::Equal => 0,
172
+ Ordering::Greater => 1,
173
+ };
174
+
175
+ Ok(result)
176
+ }
177
+
178
+ /// Get the resolved options
179
+ ///
180
+ /// # Returns
181
+ /// A hash with :locale, :sensitivity, :numeric, and optionally :case_first
182
+ fn resolved_options(&self) -> Result<RHash, Error> {
183
+ let ruby = Ruby::get().expect("Ruby runtime should be available");
184
+ let hash = ruby.hash_new();
185
+ hash.aset(ruby.to_symbol("locale"), self.locale_str.as_str())?;
186
+ hash.aset(
187
+ ruby.to_symbol("sensitivity"),
188
+ ruby.to_symbol(self.sensitivity.to_symbol_name()),
189
+ )?;
190
+ hash.aset(ruby.to_symbol("numeric"), self.numeric)?;
191
+ if let Some(cf) = self.case_first {
192
+ hash.aset(
193
+ ruby.to_symbol("case_first"),
194
+ ruby.to_symbol(cf.to_symbol_name()),
195
+ )?;
196
+ }
197
+ Ok(hash)
198
+ }
199
+ }
200
+
201
+ pub fn init(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
202
+ let class = module.define_class("Collator", ruby.class_object())?;
203
+ class.define_singleton_method("new", function!(Collator::new, -1))?;
204
+ class.define_method("compare", method!(Collator::compare, 2))?;
205
+ class.define_method("resolved_options", method!(Collator::resolved_options, 0))?;
206
+ Ok(())
207
+ }
@@ -0,0 +1,244 @@
1
+ use crate::helpers;
2
+ use icu_provider::DataMarkerInfo;
3
+ use icu_provider_blob::export::BlobExporter;
4
+ use icu_provider_export::prelude::*;
5
+ use icu_provider_source::SourceDataProvider;
6
+ use magnus::{
7
+ Error, RArray, RClass, RHash, RModule, Ruby, Symbol, Value, function, prelude::*,
8
+ value::ReprValue,
9
+ };
10
+ use std::collections::HashMap;
11
+ use std::fs::File;
12
+ use std::io::BufWriter;
13
+ use std::path::PathBuf;
14
+ use std::sync::OnceLock;
15
+
16
+ /// Returns a static lookup table mapping marker names to DataMarkerInfo
17
+ fn marker_lookup() -> &'static HashMap<&'static str, DataMarkerInfo> {
18
+ static LOOKUP: OnceLock<HashMap<&'static str, DataMarkerInfo>> = OnceLock::new();
19
+ LOOKUP.get_or_init(|| {
20
+ let mut map = HashMap::new();
21
+ macro_rules! cb {
22
+ ($($marker_ty:ty:$marker:ident,)+ #[experimental] $($emarker_ty:ty:$emarker:ident,)+) => {
23
+ $(
24
+ // Add both the full type name and the short marker name
25
+ map.insert(stringify!($marker_ty), <$marker_ty>::INFO);
26
+ map.insert(stringify!($marker), <$marker_ty>::INFO);
27
+ )+
28
+ // Also include experimental markers
29
+ $(
30
+ map.insert(stringify!($emarker_ty), <$emarker_ty>::INFO);
31
+ map.insert(stringify!($emarker), <$emarker_ty>::INFO);
32
+ )+
33
+ };
34
+ }
35
+ icu_provider_registry::registry!(cb);
36
+ map
37
+ })
38
+ }
39
+
40
+ /// Ruby wrapper for ICU4X data generation functionality
41
+ pub struct DataGenerator;
42
+
43
+ impl DataGenerator {
44
+ /// Export ICU4X data to a blob file
45
+ ///
46
+ /// # Arguments
47
+ /// * `locales` - Array of locale strings (e.g., ["ja", "en"])
48
+ /// * `markers` - :all or Array of marker symbols (e.g., [:datetime, :number, :plurals])
49
+ /// * `format` - :blob (only blob format is supported)
50
+ /// * `output` - Pathname for the output file
51
+ fn export(ruby: &Ruby, kwargs: RHash) -> Result<(), Error> {
52
+ // Extract locales
53
+ let locales_value: RArray = kwargs
54
+ .fetch::<_, RArray>(ruby.to_symbol("locales"))
55
+ .map_err(|_| {
56
+ Error::new(
57
+ ruby.exception_arg_error(),
58
+ "missing required keyword argument: locales",
59
+ )
60
+ })?;
61
+
62
+ let mut locale_families: Vec<DataLocaleFamily> = Vec::new();
63
+ let mut has_und = false;
64
+ for i in 0..locales_value.len() {
65
+ let locale_str: String = locales_value.entry(i as isize)?;
66
+ if locale_str == "und" {
67
+ has_und = true;
68
+ }
69
+ let family = DataLocaleFamily::with_descendants(locale_str.parse().map_err(|e| {
70
+ Error::new(
71
+ ruby.exception_arg_error(),
72
+ format!("Invalid locale '{}': {}", locale_str, e),
73
+ )
74
+ })?);
75
+ locale_families.push(family);
76
+ }
77
+
78
+ // Warn if 'und' locale is not included
79
+ if !has_und {
80
+ let kernel: Value = ruby.eval("Kernel")?;
81
+ let _: Value = kernel.funcall(
82
+ "warn",
83
+ ("ICU4X::DataGenerator.export: 'und' locale not included. Fallback may fail for unlisted locales.",),
84
+ )?;
85
+ }
86
+
87
+ // Extract markers
88
+ let markers_value: Value = kwargs
89
+ .fetch::<_, Value>(ruby.to_symbol("markers"))
90
+ .map_err(|_| {
91
+ Error::new(
92
+ ruby.exception_arg_error(),
93
+ "missing required keyword argument: markers",
94
+ )
95
+ })?;
96
+
97
+ // Parse markers: either :all or an array of marker name strings
98
+ let selected_markers: Option<Vec<DataMarkerInfo>> = {
99
+ let all_symbol = ruby.to_symbol("all");
100
+ if markers_value.eql(all_symbol)? {
101
+ // :all - use all available markers (None means don't filter)
102
+ None
103
+ } else if let Ok(markers_array) = RArray::try_convert(markers_value) {
104
+ // Array of marker name strings
105
+ let lookup = marker_lookup();
106
+ let mut result = Vec::new();
107
+ for i in 0..markers_array.len() {
108
+ let marker_name: String = markers_array.entry(i as isize)?;
109
+ match lookup.get(marker_name.as_str()) {
110
+ Some(&info) => result.push(info),
111
+ None => {
112
+ return Err(Error::new(
113
+ ruby.exception_arg_error(),
114
+ format!(
115
+ "unknown marker: '{}'. Use DataGenerator.available_markers to see valid names.",
116
+ marker_name
117
+ ),
118
+ ));
119
+ }
120
+ }
121
+ }
122
+ Some(result)
123
+ } else {
124
+ return Err(Error::new(
125
+ ruby.exception_arg_error(),
126
+ "markers must be :all or an Array of marker name strings",
127
+ ));
128
+ }
129
+ };
130
+
131
+ // Extract format
132
+ let format_value: Symbol = kwargs
133
+ .fetch::<_, Symbol>(ruby.to_symbol("format"))
134
+ .map_err(|_| {
135
+ Error::new(
136
+ ruby.exception_arg_error(),
137
+ "missing required keyword argument: format",
138
+ )
139
+ })?;
140
+
141
+ let blob_symbol = ruby.to_symbol("blob");
142
+ if !format_value.eql(blob_symbol)? {
143
+ return Err(Error::new(
144
+ ruby.exception_arg_error(),
145
+ "only :blob format is currently supported",
146
+ ));
147
+ }
148
+
149
+ // Extract output path (must be Pathname)
150
+ let output_value: Value =
151
+ kwargs
152
+ .fetch::<_, Value>(ruby.to_symbol("output"))
153
+ .map_err(|_| {
154
+ Error::new(
155
+ ruby.exception_arg_error(),
156
+ "missing required keyword argument: output",
157
+ )
158
+ })?;
159
+
160
+ let pathname_class: RClass = ruby.eval("Pathname")?;
161
+ if !output_value.is_kind_of(pathname_class) {
162
+ let path_class = output_value.class();
163
+ let class_name = unsafe { path_class.name() }.into_owned();
164
+ return Err(Error::new(
165
+ ruby.exception_type_error(),
166
+ format!("output must be a Pathname, got {}", class_name),
167
+ ));
168
+ }
169
+
170
+ let output_str: String = output_value.funcall("to_s", ())?;
171
+ let output_path = PathBuf::from(&output_str);
172
+
173
+ // Create parent directories if needed
174
+ if let Some(parent) = output_path.parent() {
175
+ std::fs::create_dir_all(parent).map_err(|e| {
176
+ Error::new(
177
+ ruby.exception_io_error(),
178
+ format!("Failed to create output directory: {}", e),
179
+ )
180
+ })?;
181
+ }
182
+
183
+ // Create the source data provider (downloads CLDR data)
184
+ let source_provider = SourceDataProvider::new();
185
+
186
+ // Create the blob exporter
187
+ let file = File::create(&output_path).map_err(|e| {
188
+ Error::new(
189
+ ruby.exception_io_error(),
190
+ format!("Failed to create output file '{}': {}", output_str, e),
191
+ )
192
+ })?;
193
+ let sink = BufWriter::new(file);
194
+
195
+ // Create the export driver
196
+ let driver = ExportDriver::new(
197
+ locale_families,
198
+ DeduplicationStrategy::Maximal.into(),
199
+ LocaleFallbacker::new_without_data(),
200
+ );
201
+
202
+ // Apply marker filter if specific markers were requested
203
+ let driver = match selected_markers {
204
+ Some(markers) => driver.with_markers(markers),
205
+ None => driver, // :all - export all markers
206
+ };
207
+
208
+ let exporter = BlobExporter::new_with_sink(Box::new(sink));
209
+
210
+ driver.export(&source_provider, exporter).map_err(|e| {
211
+ let error_class = helpers::get_exception_class(ruby, "ICU4X::DataGeneratorError");
212
+ Error::new(error_class, format!("Data export failed: {}", e))
213
+ })?;
214
+
215
+ Ok(())
216
+ }
217
+
218
+ /// Returns an array of available marker names
219
+ fn available_markers(ruby: &Ruby) -> Result<RArray, Error> {
220
+ let lookup = marker_lookup();
221
+ let array = ruby.ary_new();
222
+ // Collect unique marker names (short names only, not full type paths)
223
+ let mut names: Vec<&str> = lookup
224
+ .keys()
225
+ .filter(|k| !k.contains("::"))
226
+ .copied()
227
+ .collect();
228
+ names.sort();
229
+ for name in names {
230
+ array.push(ruby.str_new(name))?;
231
+ }
232
+ Ok(array)
233
+ }
234
+ }
235
+
236
+ pub fn init(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
237
+ let class = module.define_class("DataGenerator", ruby.class_object())?;
238
+ class.define_singleton_method("export", function!(DataGenerator::export, 1))?;
239
+ class.define_singleton_method(
240
+ "available_markers",
241
+ function!(DataGenerator::available_markers, 0),
242
+ )?;
243
+ Ok(())
244
+ }