icu4x 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +17 -0
- data/LICENSE.txt +21 -0
- data/README.md +118 -0
- data/ext/icu4x/Cargo.toml +23 -0
- data/ext/icu4x/extconf.rb +6 -0
- data/ext/icu4x/src/collator.rs +207 -0
- data/ext/icu4x/src/data_generator.rs +244 -0
- data/ext/icu4x/src/data_provider.rs +132 -0
- data/ext/icu4x/src/datetime_format.rs +401 -0
- data/ext/icu4x/src/display_names.rs +301 -0
- data/ext/icu4x/src/helpers.rs +96 -0
- data/ext/icu4x/src/lib.rs +33 -0
- data/ext/icu4x/src/list_format.rs +166 -0
- data/ext/icu4x/src/locale.rs +178 -0
- data/ext/icu4x/src/number_format.rs +379 -0
- data/ext/icu4x/src/plural_rules.rs +185 -0
- data/ext/icu4x/src/relative_time_format.rs +260 -0
- data/ext/icu4x/src/segmenter.rs +331 -0
- data/lib/icu4x/version.rb +6 -0
- data/lib/icu4x/yard_docs.rb +701 -0
- data/lib/icu4x.rb +91 -0
- data/sig/icu4x.rbs +209 -0
- metadata +98 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 07dd6b700780e96f64b0f5925d1a7340a44b70c77112f863f018494c8f55db77
|
|
4
|
+
data.tar.gz: 614f4b010e2ce82a1e477122e322b8ecf258708e61db00efb02d06bc1f5d74f1
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 162056347baf80f0d262fba295d8dd1a2efad853b1dc267ee060c5537ff28a25c42efbee884f1694eac8536561da495eac1eb3b67b9419e3cd3a274b2a328e92
|
|
7
|
+
data.tar.gz: a43251139b2778e922d6977875179aea1f59b25ac1c7a8e01f5d98cafdc047714ea901ae9926250f2d96259f6a0fa29263d0dbc970ad7632abce27617de66fbb
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
## [Unreleased]
|
|
2
|
+
|
|
3
|
+
## [0.5.0] - 2026-01-01
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- **Locale**: BCP 47 (`parse`) and POSIX (`parse_posix`) locale parsing
|
|
8
|
+
- **DateTimeFormat**: Date/time formatting with timezone and calendar support
|
|
9
|
+
- **NumberFormat**: Decimal, currency, and percent formatting with configurable options
|
|
10
|
+
- **PluralRules**: Cardinal and ordinal plural category selection
|
|
11
|
+
- **Collator**: Locale-sensitive string comparison with sensitivity and numeric options
|
|
12
|
+
- **ListFormat**: Conjunction, disjunction, and unit list formatting
|
|
13
|
+
- **RelativeTimeFormat**: Relative time formatting for all time units
|
|
14
|
+
- **DisplayNames**: Localized names for languages, regions, scripts, and locales
|
|
15
|
+
- **Segmenter**: Text segmentation by grapheme, word, sentence, or line
|
|
16
|
+
- **DataProvider**: Blob data loading with automatic locale fallback
|
|
17
|
+
- **DataGenerator**: CLDR data export with locale and marker filtering
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 OZAWA Sakuro
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# ICU4X
|
|
2
|
+
|
|
3
|
+
Ruby bindings for [ICU4X](https://github.com/unicode-org/icu4x), providing internationalization functionality.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
ICU4X is a Unicode library that provides locale-aware formatting and processing. This gem wraps ICU4X via Rust extensions, offering an API similar to JavaScript's Intl.
|
|
8
|
+
|
|
9
|
+
No locale data is bundled with the gem. Users generate only the data they need, keeping applications lean.
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
- **Locale** - BCP 47 and POSIX locale identifier parsing
|
|
14
|
+
- **DateTimeFormat** - Locale-aware date/time formatting with timezone and calendar support
|
|
15
|
+
- **NumberFormat** - Number, currency, and percent formatting
|
|
16
|
+
- **PluralRules** - CLDR plural category selection (cardinal/ordinal)
|
|
17
|
+
- **Collator** - Locale-sensitive string comparison and sorting
|
|
18
|
+
- **ListFormat** - Locale-aware list formatting (and, or, unit)
|
|
19
|
+
- **RelativeTimeFormat** - Relative time formatting (e.g., "3 days ago")
|
|
20
|
+
- **DisplayNames** - Localized names for languages, regions, and scripts
|
|
21
|
+
- **Segmenter** - Text segmentation (grapheme, word, sentence, line)
|
|
22
|
+
- **DataProvider** - Locale data loading with automatic fallback
|
|
23
|
+
- **DataGenerator** - Locale data generation from CLDR
|
|
24
|
+
|
|
25
|
+
## Requirements
|
|
26
|
+
|
|
27
|
+
- Ruby 3.2+
|
|
28
|
+
- Rust toolchain (for building the native extension)
|
|
29
|
+
|
|
30
|
+
## Setup
|
|
31
|
+
|
|
32
|
+
Add to your Gemfile:
|
|
33
|
+
|
|
34
|
+
```ruby
|
|
35
|
+
gem "icu4x"
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Then generate locale data for your application:
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
require "icu4x"
|
|
42
|
+
|
|
43
|
+
ICU4X::DataGenerator.export(
|
|
44
|
+
locales: %w[en ja],
|
|
45
|
+
markers: :all,
|
|
46
|
+
format: :blob,
|
|
47
|
+
output: Pathname.new("data/i18n.blob")
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Usage
|
|
52
|
+
|
|
53
|
+
```ruby
|
|
54
|
+
require "icu4x"
|
|
55
|
+
|
|
56
|
+
# Option 1: Configure default provider (recommended)
|
|
57
|
+
ICU4X.configure do |config|
|
|
58
|
+
config.data_path = Pathname.new("data/i18n.blob")
|
|
59
|
+
end
|
|
60
|
+
# Or set ICU4X_DATA_PATH environment variable
|
|
61
|
+
|
|
62
|
+
# Option 2: Load provider explicitly
|
|
63
|
+
provider = ICU4X::DataProvider.from_blob(Pathname.new("data/i18n.blob"))
|
|
64
|
+
|
|
65
|
+
# Parse locale (BCP 47 or POSIX format)
|
|
66
|
+
locale = ICU4X::Locale.parse("ja-JP")
|
|
67
|
+
locale = ICU4X::Locale.parse_posix("ja_JP.UTF-8") # POSIX format also supported
|
|
68
|
+
|
|
69
|
+
# Date/time formatting (provider: can be omitted if default is configured)
|
|
70
|
+
dtf = ICU4X::DateTimeFormat.new(locale, provider:, date_style: :long)
|
|
71
|
+
dtf.format(Time.now)
|
|
72
|
+
# => "2025年12月30日"
|
|
73
|
+
|
|
74
|
+
# Number formatting
|
|
75
|
+
nf = ICU4X::NumberFormat.new(locale, provider:, style: :currency, currency: "JPY")
|
|
76
|
+
nf.format(1_234_567)
|
|
77
|
+
# => "¥1,234,567"
|
|
78
|
+
|
|
79
|
+
# Plural rules
|
|
80
|
+
pr = ICU4X::PluralRules.new(ICU4X::Locale.parse("en"), provider:)
|
|
81
|
+
pr.select(1) # => :one
|
|
82
|
+
pr.select(2) # => :other
|
|
83
|
+
|
|
84
|
+
# Collation (sorting)
|
|
85
|
+
collator = ICU4X::Collator.new(locale, provider:)
|
|
86
|
+
%w[メロン アップル なし].sort { |a, b| collator.compare(a, b) }
|
|
87
|
+
# => ["アップル", "なし", "メロン"]
|
|
88
|
+
|
|
89
|
+
# List formatting
|
|
90
|
+
lf = ICU4X::ListFormat.new(locale, provider:, type: :conjunction)
|
|
91
|
+
lf.format(%w[Apple Banana Cherry])
|
|
92
|
+
# => "Apple、Banana、Cherry"
|
|
93
|
+
|
|
94
|
+
# Relative time formatting
|
|
95
|
+
rtf = ICU4X::RelativeTimeFormat.new(locale, provider:)
|
|
96
|
+
rtf.format(-3, :day)
|
|
97
|
+
# => "3日前"
|
|
98
|
+
|
|
99
|
+
# Display names
|
|
100
|
+
dn = ICU4X::DisplayNames.new(locale, provider:, type: :language)
|
|
101
|
+
dn.of("en")
|
|
102
|
+
# => "英語"
|
|
103
|
+
|
|
104
|
+
# Text segmentation
|
|
105
|
+
segmenter = ICU4X::Segmenter.new(granularity: :word, provider:)
|
|
106
|
+
segmenter.segment("Hello, world!").map(&:text)
|
|
107
|
+
# => ["Hello", ",", " ", "world", "!"]
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
See [doc/](doc/) for detailed documentation.
|
|
111
|
+
|
|
112
|
+
## Contributing
|
|
113
|
+
|
|
114
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/sakuro/icu4x.
|
|
115
|
+
|
|
116
|
+
## License
|
|
117
|
+
|
|
118
|
+
MIT License. See [LICENSE](LICENSE.txt) for details.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "icu4x"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2024"
|
|
5
|
+
publish = false
|
|
6
|
+
|
|
7
|
+
[lib]
|
|
8
|
+
crate-type = ["cdylib"]
|
|
9
|
+
|
|
10
|
+
[dependencies]
|
|
11
|
+
magnus = "0.8"
|
|
12
|
+
icu_locale = "2.1"
|
|
13
|
+
icu_provider = "2.1"
|
|
14
|
+
icu_provider_blob = { version = "2.1", features = ["alloc", "export"] }
|
|
15
|
+
icu_provider_source = { version = "2.1", features = ["networking", "experimental"] }
|
|
16
|
+
icu_provider_export = "2.1"
|
|
17
|
+
icu_provider_registry = "2.1"
|
|
18
|
+
icu_provider_adapters = "2.1"
|
|
19
|
+
icu = { version = "2.1", features = ["experimental"] }
|
|
20
|
+
fixed_decimal = "0.7"
|
|
21
|
+
tinystr = "0.8"
|
|
22
|
+
jiff = "0.2"
|
|
23
|
+
icu4x_macros = { path = "../icu4x_macros" }
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
use crate::data_provider::DataProvider;
|
|
2
|
+
use crate::helpers;
|
|
3
|
+
use icu::collator::Collator as IcuCollator;
|
|
4
|
+
use icu::collator::CollatorPreferences;
|
|
5
|
+
use icu::collator::options::{CaseLevel, CollatorOptions, Strength};
|
|
6
|
+
use icu::collator::preferences::{CollationCaseFirst, CollationNumericOrdering};
|
|
7
|
+
use icu_provider::buf::AsDeserializingBufferProvider;
|
|
8
|
+
use icu4x_macros::RubySymbol;
|
|
9
|
+
use magnus::{
|
|
10
|
+
Error, RHash, RModule, Ruby, TryConvert, Value, function, method, prelude::*,
|
|
11
|
+
};
|
|
12
|
+
use std::cmp::Ordering;
|
|
13
|
+
|
|
14
|
+
/// Sensitivity level for collation
|
|
15
|
+
#[derive(Clone, Copy, PartialEq, Eq, RubySymbol)]
|
|
16
|
+
enum Sensitivity {
|
|
17
|
+
Base,
|
|
18
|
+
Accent,
|
|
19
|
+
Case,
|
|
20
|
+
Variant,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/// Case first option
|
|
24
|
+
#[derive(Clone, Copy, PartialEq, Eq, RubySymbol)]
|
|
25
|
+
enum CaseFirstOption {
|
|
26
|
+
Upper,
|
|
27
|
+
Lower,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
impl CaseFirstOption {
|
|
31
|
+
fn to_icu_case_first(self) -> CollationCaseFirst {
|
|
32
|
+
match self {
|
|
33
|
+
CaseFirstOption::Upper => CollationCaseFirst::Upper,
|
|
34
|
+
CaseFirstOption::Lower => CollationCaseFirst::Lower,
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/// Ruby wrapper for ICU4X Collator
|
|
40
|
+
#[magnus::wrap(class = "ICU4X::Collator", free_immediately, size)]
|
|
41
|
+
pub struct Collator {
|
|
42
|
+
inner: IcuCollator,
|
|
43
|
+
locale_str: String,
|
|
44
|
+
sensitivity: Sensitivity,
|
|
45
|
+
numeric: bool,
|
|
46
|
+
case_first: Option<CaseFirstOption>,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// SAFETY: Ruby's GVL protects access to this type.
|
|
50
|
+
unsafe impl Send for Collator {}
|
|
51
|
+
|
|
52
|
+
impl Collator {
|
|
53
|
+
/// Create a new Collator instance
|
|
54
|
+
///
|
|
55
|
+
/// # Arguments
|
|
56
|
+
/// * `locale` - A Locale instance
|
|
57
|
+
/// * `provider:` - A DataProvider instance
|
|
58
|
+
/// * `sensitivity:` - :base, :accent, :case, or :variant (default)
|
|
59
|
+
/// * `numeric:` - Whether to use numeric sorting (default: false)
|
|
60
|
+
/// * `case_first:` - :upper, :lower, or nil (default)
|
|
61
|
+
fn new(ruby: &Ruby, args: &[Value]) -> Result<Self, Error> {
|
|
62
|
+
// Parse arguments: (locale, **kwargs)
|
|
63
|
+
let (icu_locale, locale_str) = helpers::extract_locale(ruby, args)?;
|
|
64
|
+
|
|
65
|
+
// Get kwargs (optional)
|
|
66
|
+
let kwargs: RHash = if args.len() > 1 {
|
|
67
|
+
TryConvert::try_convert(args[1])?
|
|
68
|
+
} else {
|
|
69
|
+
ruby.hash_new()
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
// Resolve provider: use explicit or fall back to default
|
|
73
|
+
let resolved_provider = helpers::resolve_provider(ruby, &kwargs)?;
|
|
74
|
+
|
|
75
|
+
// Extract sensitivity option (default: :variant)
|
|
76
|
+
let sensitivity =
|
|
77
|
+
helpers::extract_symbol(ruby, &kwargs, "sensitivity", Sensitivity::from_ruby_symbol)?
|
|
78
|
+
.unwrap_or(Sensitivity::Variant);
|
|
79
|
+
|
|
80
|
+
// Extract numeric option (default: false)
|
|
81
|
+
let numeric: bool = kwargs
|
|
82
|
+
.lookup::<_, Option<bool>>(ruby.to_symbol("numeric"))?
|
|
83
|
+
.unwrap_or(false);
|
|
84
|
+
|
|
85
|
+
// Extract case_first option (default: nil)
|
|
86
|
+
let case_first = helpers::extract_symbol(
|
|
87
|
+
ruby,
|
|
88
|
+
&kwargs,
|
|
89
|
+
"case_first",
|
|
90
|
+
CaseFirstOption::from_ruby_symbol,
|
|
91
|
+
)?;
|
|
92
|
+
|
|
93
|
+
// Get the error exception class
|
|
94
|
+
let error_class = helpers::get_exception_class(ruby, "ICU4X::Error");
|
|
95
|
+
|
|
96
|
+
// Get the DataProvider
|
|
97
|
+
let dp: &DataProvider = TryConvert::try_convert(resolved_provider).map_err(|_| {
|
|
98
|
+
Error::new(
|
|
99
|
+
ruby.exception_type_error(),
|
|
100
|
+
"provider must be a DataProvider",
|
|
101
|
+
)
|
|
102
|
+
})?;
|
|
103
|
+
|
|
104
|
+
// Build collator options (strength and case_level)
|
|
105
|
+
let mut options = CollatorOptions::default();
|
|
106
|
+
|
|
107
|
+
// Set strength based on sensitivity
|
|
108
|
+
options.strength = Some(match sensitivity {
|
|
109
|
+
Sensitivity::Base => Strength::Primary,
|
|
110
|
+
Sensitivity::Accent => Strength::Secondary,
|
|
111
|
+
Sensitivity::Case => Strength::Primary,
|
|
112
|
+
Sensitivity::Variant => Strength::Tertiary,
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// Set case_level for case sensitivity
|
|
116
|
+
if matches!(sensitivity, Sensitivity::Case) {
|
|
117
|
+
options.case_level = Some(CaseLevel::On);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Build preferences (numeric and case_first)
|
|
121
|
+
let mut prefs: CollatorPreferences = (&icu_locale).into();
|
|
122
|
+
|
|
123
|
+
if numeric {
|
|
124
|
+
prefs.numeric_ordering = Some(CollationNumericOrdering::True);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if let Some(cf) = case_first {
|
|
128
|
+
prefs.case_first = Some(cf.to_icu_case_first());
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Create collator
|
|
132
|
+
let collator = IcuCollator::try_new_unstable(&dp.inner.as_deserializing(), prefs, options)
|
|
133
|
+
.map_err(|e| Error::new(error_class, format!("Failed to create Collator: {}", e)))?;
|
|
134
|
+
|
|
135
|
+
Ok(Self {
|
|
136
|
+
inner: collator,
|
|
137
|
+
locale_str,
|
|
138
|
+
sensitivity,
|
|
139
|
+
numeric,
|
|
140
|
+
case_first,
|
|
141
|
+
})
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/// Compare two strings
|
|
145
|
+
///
|
|
146
|
+
/// # Arguments
|
|
147
|
+
/// * `a` - First string
|
|
148
|
+
/// * `b` - Second string
|
|
149
|
+
///
|
|
150
|
+
/// # Returns
|
|
151
|
+
/// -1 if a < b, 0 if a == b, 1 if a > b
|
|
152
|
+
fn compare(&self, a: Value, b: Value) -> Result<i32, Error> {
|
|
153
|
+
let ruby = Ruby::get().expect("Ruby runtime should be available");
|
|
154
|
+
|
|
155
|
+
let str_a: String = TryConvert::try_convert(a).map_err(|_| {
|
|
156
|
+
Error::new(
|
|
157
|
+
ruby.exception_type_error(),
|
|
158
|
+
"first argument must be a String",
|
|
159
|
+
)
|
|
160
|
+
})?;
|
|
161
|
+
|
|
162
|
+
let str_b: String = TryConvert::try_convert(b).map_err(|_| {
|
|
163
|
+
Error::new(
|
|
164
|
+
ruby.exception_type_error(),
|
|
165
|
+
"second argument must be a String",
|
|
166
|
+
)
|
|
167
|
+
})?;
|
|
168
|
+
|
|
169
|
+
let result = match self.inner.as_borrowed().compare(&str_a, &str_b) {
|
|
170
|
+
Ordering::Less => -1,
|
|
171
|
+
Ordering::Equal => 0,
|
|
172
|
+
Ordering::Greater => 1,
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
Ok(result)
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/// Get the resolved options
|
|
179
|
+
///
|
|
180
|
+
/// # Returns
|
|
181
|
+
/// A hash with :locale, :sensitivity, :numeric, and optionally :case_first
|
|
182
|
+
fn resolved_options(&self) -> Result<RHash, Error> {
|
|
183
|
+
let ruby = Ruby::get().expect("Ruby runtime should be available");
|
|
184
|
+
let hash = ruby.hash_new();
|
|
185
|
+
hash.aset(ruby.to_symbol("locale"), self.locale_str.as_str())?;
|
|
186
|
+
hash.aset(
|
|
187
|
+
ruby.to_symbol("sensitivity"),
|
|
188
|
+
ruby.to_symbol(self.sensitivity.to_symbol_name()),
|
|
189
|
+
)?;
|
|
190
|
+
hash.aset(ruby.to_symbol("numeric"), self.numeric)?;
|
|
191
|
+
if let Some(cf) = self.case_first {
|
|
192
|
+
hash.aset(
|
|
193
|
+
ruby.to_symbol("case_first"),
|
|
194
|
+
ruby.to_symbol(cf.to_symbol_name()),
|
|
195
|
+
)?;
|
|
196
|
+
}
|
|
197
|
+
Ok(hash)
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
pub fn init(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
202
|
+
let class = module.define_class("Collator", ruby.class_object())?;
|
|
203
|
+
class.define_singleton_method("new", function!(Collator::new, -1))?;
|
|
204
|
+
class.define_method("compare", method!(Collator::compare, 2))?;
|
|
205
|
+
class.define_method("resolved_options", method!(Collator::resolved_options, 0))?;
|
|
206
|
+
Ok(())
|
|
207
|
+
}
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
use crate::helpers;
|
|
2
|
+
use icu_provider::DataMarkerInfo;
|
|
3
|
+
use icu_provider_blob::export::BlobExporter;
|
|
4
|
+
use icu_provider_export::prelude::*;
|
|
5
|
+
use icu_provider_source::SourceDataProvider;
|
|
6
|
+
use magnus::{
|
|
7
|
+
Error, RArray, RClass, RHash, RModule, Ruby, Symbol, Value, function, prelude::*,
|
|
8
|
+
value::ReprValue,
|
|
9
|
+
};
|
|
10
|
+
use std::collections::HashMap;
|
|
11
|
+
use std::fs::File;
|
|
12
|
+
use std::io::BufWriter;
|
|
13
|
+
use std::path::PathBuf;
|
|
14
|
+
use std::sync::OnceLock;
|
|
15
|
+
|
|
16
|
+
/// Returns a static lookup table mapping marker names to DataMarkerInfo
|
|
17
|
+
fn marker_lookup() -> &'static HashMap<&'static str, DataMarkerInfo> {
|
|
18
|
+
static LOOKUP: OnceLock<HashMap<&'static str, DataMarkerInfo>> = OnceLock::new();
|
|
19
|
+
LOOKUP.get_or_init(|| {
|
|
20
|
+
let mut map = HashMap::new();
|
|
21
|
+
macro_rules! cb {
|
|
22
|
+
($($marker_ty:ty:$marker:ident,)+ #[experimental] $($emarker_ty:ty:$emarker:ident,)+) => {
|
|
23
|
+
$(
|
|
24
|
+
// Add both the full type name and the short marker name
|
|
25
|
+
map.insert(stringify!($marker_ty), <$marker_ty>::INFO);
|
|
26
|
+
map.insert(stringify!($marker), <$marker_ty>::INFO);
|
|
27
|
+
)+
|
|
28
|
+
// Also include experimental markers
|
|
29
|
+
$(
|
|
30
|
+
map.insert(stringify!($emarker_ty), <$emarker_ty>::INFO);
|
|
31
|
+
map.insert(stringify!($emarker), <$emarker_ty>::INFO);
|
|
32
|
+
)+
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
icu_provider_registry::registry!(cb);
|
|
36
|
+
map
|
|
37
|
+
})
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/// Ruby wrapper for ICU4X data generation functionality
|
|
41
|
+
pub struct DataGenerator;
|
|
42
|
+
|
|
43
|
+
impl DataGenerator {
|
|
44
|
+
/// Export ICU4X data to a blob file
|
|
45
|
+
///
|
|
46
|
+
/// # Arguments
|
|
47
|
+
/// * `locales` - Array of locale strings (e.g., ["ja", "en"])
|
|
48
|
+
/// * `markers` - :all or Array of marker symbols (e.g., [:datetime, :number, :plurals])
|
|
49
|
+
/// * `format` - :blob (only blob format is supported)
|
|
50
|
+
/// * `output` - Pathname for the output file
|
|
51
|
+
fn export(ruby: &Ruby, kwargs: RHash) -> Result<(), Error> {
|
|
52
|
+
// Extract locales
|
|
53
|
+
let locales_value: RArray = kwargs
|
|
54
|
+
.fetch::<_, RArray>(ruby.to_symbol("locales"))
|
|
55
|
+
.map_err(|_| {
|
|
56
|
+
Error::new(
|
|
57
|
+
ruby.exception_arg_error(),
|
|
58
|
+
"missing required keyword argument: locales",
|
|
59
|
+
)
|
|
60
|
+
})?;
|
|
61
|
+
|
|
62
|
+
let mut locale_families: Vec<DataLocaleFamily> = Vec::new();
|
|
63
|
+
let mut has_und = false;
|
|
64
|
+
for i in 0..locales_value.len() {
|
|
65
|
+
let locale_str: String = locales_value.entry(i as isize)?;
|
|
66
|
+
if locale_str == "und" {
|
|
67
|
+
has_und = true;
|
|
68
|
+
}
|
|
69
|
+
let family = DataLocaleFamily::with_descendants(locale_str.parse().map_err(|e| {
|
|
70
|
+
Error::new(
|
|
71
|
+
ruby.exception_arg_error(),
|
|
72
|
+
format!("Invalid locale '{}': {}", locale_str, e),
|
|
73
|
+
)
|
|
74
|
+
})?);
|
|
75
|
+
locale_families.push(family);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Warn if 'und' locale is not included
|
|
79
|
+
if !has_und {
|
|
80
|
+
let kernel: Value = ruby.eval("Kernel")?;
|
|
81
|
+
let _: Value = kernel.funcall(
|
|
82
|
+
"warn",
|
|
83
|
+
("ICU4X::DataGenerator.export: 'und' locale not included. Fallback may fail for unlisted locales.",),
|
|
84
|
+
)?;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Extract markers
|
|
88
|
+
let markers_value: Value = kwargs
|
|
89
|
+
.fetch::<_, Value>(ruby.to_symbol("markers"))
|
|
90
|
+
.map_err(|_| {
|
|
91
|
+
Error::new(
|
|
92
|
+
ruby.exception_arg_error(),
|
|
93
|
+
"missing required keyword argument: markers",
|
|
94
|
+
)
|
|
95
|
+
})?;
|
|
96
|
+
|
|
97
|
+
// Parse markers: either :all or an array of marker name strings
|
|
98
|
+
let selected_markers: Option<Vec<DataMarkerInfo>> = {
|
|
99
|
+
let all_symbol = ruby.to_symbol("all");
|
|
100
|
+
if markers_value.eql(all_symbol)? {
|
|
101
|
+
// :all - use all available markers (None means don't filter)
|
|
102
|
+
None
|
|
103
|
+
} else if let Ok(markers_array) = RArray::try_convert(markers_value) {
|
|
104
|
+
// Array of marker name strings
|
|
105
|
+
let lookup = marker_lookup();
|
|
106
|
+
let mut result = Vec::new();
|
|
107
|
+
for i in 0..markers_array.len() {
|
|
108
|
+
let marker_name: String = markers_array.entry(i as isize)?;
|
|
109
|
+
match lookup.get(marker_name.as_str()) {
|
|
110
|
+
Some(&info) => result.push(info),
|
|
111
|
+
None => {
|
|
112
|
+
return Err(Error::new(
|
|
113
|
+
ruby.exception_arg_error(),
|
|
114
|
+
format!(
|
|
115
|
+
"unknown marker: '{}'. Use DataGenerator.available_markers to see valid names.",
|
|
116
|
+
marker_name
|
|
117
|
+
),
|
|
118
|
+
));
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
Some(result)
|
|
123
|
+
} else {
|
|
124
|
+
return Err(Error::new(
|
|
125
|
+
ruby.exception_arg_error(),
|
|
126
|
+
"markers must be :all or an Array of marker name strings",
|
|
127
|
+
));
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
// Extract format
|
|
132
|
+
let format_value: Symbol = kwargs
|
|
133
|
+
.fetch::<_, Symbol>(ruby.to_symbol("format"))
|
|
134
|
+
.map_err(|_| {
|
|
135
|
+
Error::new(
|
|
136
|
+
ruby.exception_arg_error(),
|
|
137
|
+
"missing required keyword argument: format",
|
|
138
|
+
)
|
|
139
|
+
})?;
|
|
140
|
+
|
|
141
|
+
let blob_symbol = ruby.to_symbol("blob");
|
|
142
|
+
if !format_value.eql(blob_symbol)? {
|
|
143
|
+
return Err(Error::new(
|
|
144
|
+
ruby.exception_arg_error(),
|
|
145
|
+
"only :blob format is currently supported",
|
|
146
|
+
));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Extract output path (must be Pathname)
|
|
150
|
+
let output_value: Value =
|
|
151
|
+
kwargs
|
|
152
|
+
.fetch::<_, Value>(ruby.to_symbol("output"))
|
|
153
|
+
.map_err(|_| {
|
|
154
|
+
Error::new(
|
|
155
|
+
ruby.exception_arg_error(),
|
|
156
|
+
"missing required keyword argument: output",
|
|
157
|
+
)
|
|
158
|
+
})?;
|
|
159
|
+
|
|
160
|
+
let pathname_class: RClass = ruby.eval("Pathname")?;
|
|
161
|
+
if !output_value.is_kind_of(pathname_class) {
|
|
162
|
+
let path_class = output_value.class();
|
|
163
|
+
let class_name = unsafe { path_class.name() }.into_owned();
|
|
164
|
+
return Err(Error::new(
|
|
165
|
+
ruby.exception_type_error(),
|
|
166
|
+
format!("output must be a Pathname, got {}", class_name),
|
|
167
|
+
));
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
let output_str: String = output_value.funcall("to_s", ())?;
|
|
171
|
+
let output_path = PathBuf::from(&output_str);
|
|
172
|
+
|
|
173
|
+
// Create parent directories if needed
|
|
174
|
+
if let Some(parent) = output_path.parent() {
|
|
175
|
+
std::fs::create_dir_all(parent).map_err(|e| {
|
|
176
|
+
Error::new(
|
|
177
|
+
ruby.exception_io_error(),
|
|
178
|
+
format!("Failed to create output directory: {}", e),
|
|
179
|
+
)
|
|
180
|
+
})?;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Create the source data provider (downloads CLDR data)
|
|
184
|
+
let source_provider = SourceDataProvider::new();
|
|
185
|
+
|
|
186
|
+
// Create the blob exporter
|
|
187
|
+
let file = File::create(&output_path).map_err(|e| {
|
|
188
|
+
Error::new(
|
|
189
|
+
ruby.exception_io_error(),
|
|
190
|
+
format!("Failed to create output file '{}': {}", output_str, e),
|
|
191
|
+
)
|
|
192
|
+
})?;
|
|
193
|
+
let sink = BufWriter::new(file);
|
|
194
|
+
|
|
195
|
+
// Create the export driver
|
|
196
|
+
let driver = ExportDriver::new(
|
|
197
|
+
locale_families,
|
|
198
|
+
DeduplicationStrategy::Maximal.into(),
|
|
199
|
+
LocaleFallbacker::new_without_data(),
|
|
200
|
+
);
|
|
201
|
+
|
|
202
|
+
// Apply marker filter if specific markers were requested
|
|
203
|
+
let driver = match selected_markers {
|
|
204
|
+
Some(markers) => driver.with_markers(markers),
|
|
205
|
+
None => driver, // :all - export all markers
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
let exporter = BlobExporter::new_with_sink(Box::new(sink));
|
|
209
|
+
|
|
210
|
+
driver.export(&source_provider, exporter).map_err(|e| {
|
|
211
|
+
let error_class = helpers::get_exception_class(ruby, "ICU4X::DataGeneratorError");
|
|
212
|
+
Error::new(error_class, format!("Data export failed: {}", e))
|
|
213
|
+
})?;
|
|
214
|
+
|
|
215
|
+
Ok(())
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/// Returns an array of available marker names
|
|
219
|
+
fn available_markers(ruby: &Ruby) -> Result<RArray, Error> {
|
|
220
|
+
let lookup = marker_lookup();
|
|
221
|
+
let array = ruby.ary_new();
|
|
222
|
+
// Collect unique marker names (short names only, not full type paths)
|
|
223
|
+
let mut names: Vec<&str> = lookup
|
|
224
|
+
.keys()
|
|
225
|
+
.filter(|k| !k.contains("::"))
|
|
226
|
+
.copied()
|
|
227
|
+
.collect();
|
|
228
|
+
names.sort();
|
|
229
|
+
for name in names {
|
|
230
|
+
array.push(ruby.str_new(name))?;
|
|
231
|
+
}
|
|
232
|
+
Ok(array)
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
pub fn init(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
237
|
+
let class = module.define_class("DataGenerator", ruby.class_object())?;
|
|
238
|
+
class.define_singleton_method("export", function!(DataGenerator::export, 1))?;
|
|
239
|
+
class.define_singleton_method(
|
|
240
|
+
"available_markers",
|
|
241
|
+
function!(DataGenerator::available_markers, 0),
|
|
242
|
+
)?;
|
|
243
|
+
Ok(())
|
|
244
|
+
}
|