icu4x 0.5.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +17 -0
- data/LICENSE.txt +21 -0
- data/README.md +118 -0
- data/lib/icu4x/3.2/icu4x.bundle +0 -0
- data/lib/icu4x/3.3/icu4x.bundle +0 -0
- data/lib/icu4x/3.4/icu4x.bundle +0 -0
- data/lib/icu4x/version.rb +6 -0
- data/lib/icu4x/yard_docs.rb +701 -0
- data/lib/icu4x.rb +91 -0
- data/sig/icu4x.rbs +209 -0
- metadata +74 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: cb52f24cdb88da4b0342e690870231e811efb3206fba417f85bcf892a4e62239
|
|
4
|
+
data.tar.gz: f5a1c01a2f8c0f9646953a4485553d507f5cee4448edb3c222b1b76c62881caa
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 26866adf4c69be26f1f8ba3506c597fbed81eca090e807562ca908729b25a8a2f2104f9b12ba77b249382ed6d0b4c442ffd0265577f8296ecd9e9c8150634ba6
|
|
7
|
+
data.tar.gz: 9f3f84c6d685b8ba6acd996dc319804c7e646e49ea368435f0297d741ee6f747d0e8b0a8bf67709cb103f32476344e2298304374a9d9f80fc764eb6594901416
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
## [Unreleased]
|
|
2
|
+
|
|
3
|
+
## [0.5.0] - 2026-01-01
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- **Locale**: BCP 47 (`parse`) and POSIX (`parse_posix`) locale parsing
|
|
8
|
+
- **DateTimeFormat**: Date/time formatting with timezone and calendar support
|
|
9
|
+
- **NumberFormat**: Decimal, currency, and percent formatting with configurable options
|
|
10
|
+
- **PluralRules**: Cardinal and ordinal plural category selection
|
|
11
|
+
- **Collator**: Locale-sensitive string comparison with sensitivity and numeric options
|
|
12
|
+
- **ListFormat**: Conjunction, disjunction, and unit list formatting
|
|
13
|
+
- **RelativeTimeFormat**: Relative time formatting for all time units
|
|
14
|
+
- **DisplayNames**: Localized names for languages, regions, scripts, and locales
|
|
15
|
+
- **Segmenter**: Text segmentation by grapheme, word, sentence, or line
|
|
16
|
+
- **DataProvider**: Blob data loading with automatic locale fallback
|
|
17
|
+
- **DataGenerator**: CLDR data export with locale and marker filtering
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 OZAWA Sakuro
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# ICU4X
|
|
2
|
+
|
|
3
|
+
Ruby bindings for [ICU4X](https://github.com/unicode-org/icu4x), providing internationalization functionality.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
ICU4X is a Unicode library that provides locale-aware formatting and processing. This gem wraps ICU4X via Rust extensions, offering an API similar to JavaScript's Intl.
|
|
8
|
+
|
|
9
|
+
No locale data is bundled with the gem. Users generate only the data they need, keeping applications lean.
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
- **Locale** - BCP 47 and POSIX locale identifier parsing
|
|
14
|
+
- **DateTimeFormat** - Locale-aware date/time formatting with timezone and calendar support
|
|
15
|
+
- **NumberFormat** - Number, currency, and percent formatting
|
|
16
|
+
- **PluralRules** - CLDR plural category selection (cardinal/ordinal)
|
|
17
|
+
- **Collator** - Locale-sensitive string comparison and sorting
|
|
18
|
+
- **ListFormat** - Locale-aware list formatting (and, or, unit)
|
|
19
|
+
- **RelativeTimeFormat** - Relative time formatting (e.g., "3 days ago")
|
|
20
|
+
- **DisplayNames** - Localized names for languages, regions, and scripts
|
|
21
|
+
- **Segmenter** - Text segmentation (grapheme, word, sentence, line)
|
|
22
|
+
- **DataProvider** - Locale data loading with automatic fallback
|
|
23
|
+
- **DataGenerator** - Locale data generation from CLDR
|
|
24
|
+
|
|
25
|
+
## Requirements
|
|
26
|
+
|
|
27
|
+
- Ruby 3.2+
|
|
28
|
+
- Rust toolchain (for building the native extension)
|
|
29
|
+
|
|
30
|
+
## Setup
|
|
31
|
+
|
|
32
|
+
Add to your Gemfile:
|
|
33
|
+
|
|
34
|
+
```ruby
|
|
35
|
+
gem "icu4x"
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Then generate locale data for your application:
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
require "icu4x"
|
|
42
|
+
|
|
43
|
+
ICU4X::DataGenerator.export(
|
|
44
|
+
locales: %w[en ja],
|
|
45
|
+
markers: :all,
|
|
46
|
+
format: :blob,
|
|
47
|
+
output: Pathname.new("data/i18n.blob")
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Usage
|
|
52
|
+
|
|
53
|
+
```ruby
|
|
54
|
+
require "icu4x"
|
|
55
|
+
|
|
56
|
+
# Option 1: Configure default provider (recommended)
|
|
57
|
+
ICU4X.configure do |config|
|
|
58
|
+
config.data_path = Pathname.new("data/i18n.blob")
|
|
59
|
+
end
|
|
60
|
+
# Or set ICU4X_DATA_PATH environment variable
|
|
61
|
+
|
|
62
|
+
# Option 2: Load provider explicitly
|
|
63
|
+
provider = ICU4X::DataProvider.from_blob(Pathname.new("data/i18n.blob"))
|
|
64
|
+
|
|
65
|
+
# Parse locale (BCP 47 or POSIX format)
|
|
66
|
+
locale = ICU4X::Locale.parse("ja-JP")
|
|
67
|
+
locale = ICU4X::Locale.parse_posix("ja_JP.UTF-8") # POSIX format also supported
|
|
68
|
+
|
|
69
|
+
# Date/time formatting (provider: can be omitted if default is configured)
|
|
70
|
+
dtf = ICU4X::DateTimeFormat.new(locale, provider:, date_style: :long)
|
|
71
|
+
dtf.format(Time.now)
|
|
72
|
+
# => "2025年12月30日"
|
|
73
|
+
|
|
74
|
+
# Number formatting
|
|
75
|
+
nf = ICU4X::NumberFormat.new(locale, provider:, style: :currency, currency: "JPY")
|
|
76
|
+
nf.format(1_234_567)
|
|
77
|
+
# => "¥1,234,567"
|
|
78
|
+
|
|
79
|
+
# Plural rules
|
|
80
|
+
pr = ICU4X::PluralRules.new(ICU4X::Locale.parse("en"), provider:)
|
|
81
|
+
pr.select(1) # => :one
|
|
82
|
+
pr.select(2) # => :other
|
|
83
|
+
|
|
84
|
+
# Collation (sorting)
|
|
85
|
+
collator = ICU4X::Collator.new(locale, provider:)
|
|
86
|
+
%w[メロン アップル なし].sort { |a, b| collator.compare(a, b) }
|
|
87
|
+
# => ["アップル", "なし", "メロン"]
|
|
88
|
+
|
|
89
|
+
# List formatting
|
|
90
|
+
lf = ICU4X::ListFormat.new(locale, provider:, type: :conjunction)
|
|
91
|
+
lf.format(%w[Apple Banana Cherry])
|
|
92
|
+
# => "Apple、Banana、Cherry"
|
|
93
|
+
|
|
94
|
+
# Relative time formatting
|
|
95
|
+
rtf = ICU4X::RelativeTimeFormat.new(locale, provider:)
|
|
96
|
+
rtf.format(-3, :day)
|
|
97
|
+
# => "3日前"
|
|
98
|
+
|
|
99
|
+
# Display names
|
|
100
|
+
dn = ICU4X::DisplayNames.new(locale, provider:, type: :language)
|
|
101
|
+
dn.of("en")
|
|
102
|
+
# => "英語"
|
|
103
|
+
|
|
104
|
+
# Text segmentation
|
|
105
|
+
segmenter = ICU4X::Segmenter.new(granularity: :word, provider:)
|
|
106
|
+
segmenter.segment("Hello, world!").map(&:text)
|
|
107
|
+
# => ["Hello", ",", " ", "world", "!"]
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
See [doc/](doc/) for detailed documentation.
|
|
111
|
+
|
|
112
|
+
## Contributing
|
|
113
|
+
|
|
114
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/sakuro/icu4x.
|
|
115
|
+
|
|
116
|
+
## License
|
|
117
|
+
|
|
118
|
+
MIT License. See [LICENSE](LICENSE.txt) for details.
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,701 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# YARD documentation for ICU4X Ruby API.
|
|
4
|
+
# This file contains only @!parse directives for documentation purposes.
|
|
5
|
+
# No actual code is evaluated.
|
|
6
|
+
|
|
7
|
+
# @!parse
|
|
8
|
+
# # ICU4X provides internationalization (i18n) functionality for Ruby applications.
|
|
9
|
+
# #
|
|
10
|
+
# # This library wraps ICU4X, a Unicode internationalization library written in Rust,
|
|
11
|
+
# # providing locale-aware formatting, parsing, and text processing capabilities.
|
|
12
|
+
# #
|
|
13
|
+
# # @example Configure data path
|
|
14
|
+
# # ICU4X.configure do |config|
|
|
15
|
+
# # config.data_path = Pathname.new("data.postcard")
|
|
16
|
+
# # end
|
|
17
|
+
# #
|
|
18
|
+
# # @example Using environment variable
|
|
19
|
+
# # # Set ICU4X_DATA_PATH environment variable
|
|
20
|
+
# # ENV["ICU4X_DATA_PATH"] = "/path/to/data.postcard"
|
|
21
|
+
# #
|
|
22
|
+
# # @see https://github.com/unicode-org/icu4x ICU4X project
|
|
23
|
+
# #
|
|
24
|
+
# module ICU4X
|
|
25
|
+
# # The version of the icu4x gem.
|
|
26
|
+
# VERSION = "0.0.0"
|
|
27
|
+
#
|
|
28
|
+
# # Returns the default data provider, lazily loaded from configuration.
|
|
29
|
+
# #
|
|
30
|
+
# # The provider is created from +config.data_path+ or the +ICU4X_DATA_PATH+
|
|
31
|
+
# # environment variable. Once created, the provider is cached.
|
|
32
|
+
# #
|
|
33
|
+
# # @return [DataProvider, nil] the default provider, or nil if not configured
|
|
34
|
+
# #
|
|
35
|
+
# # @example
|
|
36
|
+
# # ICU4X.configure { |c| c.data_path = Pathname.new("data.postcard") }
|
|
37
|
+
# # provider = ICU4X.default_provider
|
|
38
|
+
# #
|
|
39
|
+
# def self.default_provider; end
|
|
40
|
+
#
|
|
41
|
+
# # Resets the cached default data provider.
|
|
42
|
+
# #
|
|
43
|
+
# # After calling this method, the next call to {.default_provider} will
|
|
44
|
+
# # create a new provider from the current configuration.
|
|
45
|
+
# #
|
|
46
|
+
# # @return [void]
|
|
47
|
+
# #
|
|
48
|
+
# def self.reset_default_provider!; end
|
|
49
|
+
#
|
|
50
|
+
# # Base error class for all ICU4X errors.
|
|
51
|
+
# #
|
|
52
|
+
# class Error < StandardError; end
|
|
53
|
+
#
|
|
54
|
+
# # Raised when a locale identifier is invalid or cannot be parsed.
|
|
55
|
+
# #
|
|
56
|
+
# # @example
|
|
57
|
+
# # ICU4X::Locale.parse("invalid!!!") # raises LocaleError
|
|
58
|
+
# #
|
|
59
|
+
# class LocaleError < Error; end
|
|
60
|
+
#
|
|
61
|
+
# # Raised when data loading or provider operations fail.
|
|
62
|
+
# #
|
|
63
|
+
# # @example
|
|
64
|
+
# # ICU4X::DataProvider.from_blob(Pathname.new("nonexistent.postcard")) # raises DataError
|
|
65
|
+
# #
|
|
66
|
+
# class DataError < Error; end
|
|
67
|
+
#
|
|
68
|
+
# # Raised when data generation fails.
|
|
69
|
+
# #
|
|
70
|
+
# class DataGeneratorError < Error; end
|
|
71
|
+
#
|
|
72
|
+
# # Provides locale data for ICU4X formatters and other components.
|
|
73
|
+
# #
|
|
74
|
+
# # DataProvider loads precompiled locale data from a binary blob file.
|
|
75
|
+
# # The blob can be generated using {DataGenerator.export}.
|
|
76
|
+
# #
|
|
77
|
+
# # @example Load a data provider
|
|
78
|
+
# # provider = ICU4X::DataProvider.from_blob(Pathname.new("data.postcard"))
|
|
79
|
+
# #
|
|
80
|
+
# # @example Use with a formatter
|
|
81
|
+
# # provider = ICU4X::DataProvider.from_blob(Pathname.new("data.postcard"))
|
|
82
|
+
# # formatter = ICU4X::NumberFormat.new(locale, provider: provider)
|
|
83
|
+
# #
|
|
84
|
+
# class DataProvider
|
|
85
|
+
# # Creates a DataProvider from a binary blob file.
|
|
86
|
+
# #
|
|
87
|
+
# # @param path [Pathname] path to the .postcard blob file
|
|
88
|
+
# # @param priority [Symbol] collation fallback priority, either +:language+ or +:region+
|
|
89
|
+
# # @return [DataProvider] a new data provider instance
|
|
90
|
+
# # @raise [DataError] if the file cannot be read or is invalid
|
|
91
|
+
# #
|
|
92
|
+
# # @example
|
|
93
|
+
# # provider = ICU4X::DataProvider.from_blob(Pathname.new("data.postcard"))
|
|
94
|
+
# # provider = ICU4X::DataProvider.from_blob(Pathname.new("data.postcard"), priority: :region)
|
|
95
|
+
# #
|
|
96
|
+
# def self.from_blob(path, priority: :language); end
|
|
97
|
+
# end
|
|
98
|
+
#
|
|
99
|
+
# # Generates locale data blobs for use with {DataProvider}.
|
|
100
|
+
# #
|
|
101
|
+
# # DataGenerator exports Unicode CLDR data into a compact binary format
|
|
102
|
+
# # that can be loaded at runtime by {DataProvider}.
|
|
103
|
+
# #
|
|
104
|
+
# # @example Generate data for specific locales
|
|
105
|
+
# # ICU4X::DataGenerator.export(
|
|
106
|
+
# # locales: ["en", "ja", "de"],
|
|
107
|
+
# # markers: :all,
|
|
108
|
+
# # format: :blob,
|
|
109
|
+
# # output: Pathname.new("data.postcard")
|
|
110
|
+
# # )
|
|
111
|
+
# #
|
|
112
|
+
# class DataGenerator
|
|
113
|
+
# # Exports locale data to a file.
|
|
114
|
+
# #
|
|
115
|
+
# # @param locales [Array<String>] list of locale identifiers to include
|
|
116
|
+
# # @param markers [Symbol, Array<String>] data markers to include;
|
|
117
|
+
# # use +:all+ for all markers, or specify individual marker names
|
|
118
|
+
# # @param format [Symbol] output format, currently only +:blob+ is supported
|
|
119
|
+
# # @param output [Pathname] path to write the output file
|
|
120
|
+
# # @return [void]
|
|
121
|
+
# # @raise [DataGeneratorError] if export fails
|
|
122
|
+
# #
|
|
123
|
+
# # @example Export all data for Japanese and English
|
|
124
|
+
# # ICU4X::DataGenerator.export(
|
|
125
|
+
# # locales: ["ja", "en"],
|
|
126
|
+
# # markers: :all,
|
|
127
|
+
# # format: :blob,
|
|
128
|
+
# # output: Pathname.new("i18n_data.postcard")
|
|
129
|
+
# # )
|
|
130
|
+
# #
|
|
131
|
+
# # @see .available_markers
|
|
132
|
+
# #
|
|
133
|
+
# def self.export(locales:, markers:, format:, output:); end
|
|
134
|
+
#
|
|
135
|
+
# # Returns a list of all available data marker names.
|
|
136
|
+
# #
|
|
137
|
+
# # @return [Array<String>] list of marker names
|
|
138
|
+
# #
|
|
139
|
+
# # @example
|
|
140
|
+
# # markers = ICU4X::DataGenerator.available_markers
|
|
141
|
+
# # #=> ["datetime/gregory/datelengths@1", "decimal/symbols@1", ...]
|
|
142
|
+
# #
|
|
143
|
+
# def self.available_markers; end
|
|
144
|
+
# end
|
|
145
|
+
#
|
|
146
|
+
# # Represents a Unicode Locale Identifier (BCP 47).
|
|
147
|
+
# #
|
|
148
|
+
# # Locale provides parsing and access to locale components such as
|
|
149
|
+
# # language, script, region, and extensions.
|
|
150
|
+
# #
|
|
151
|
+
# # @example Parse a locale identifier
|
|
152
|
+
# # locale = ICU4X::Locale.parse("ja-JP")
|
|
153
|
+
# # locale.language #=> "ja"
|
|
154
|
+
# # locale.region #=> "JP"
|
|
155
|
+
# #
|
|
156
|
+
# # @example Parse a POSIX locale
|
|
157
|
+
# # locale = ICU4X::Locale.parse_posix("ja_JP.UTF-8")
|
|
158
|
+
# # locale.language #=> "ja"
|
|
159
|
+
# #
|
|
160
|
+
# # @see https://unicode.org/reports/tr35/ Unicode Locale Data Markup Language (LDML)
|
|
161
|
+
# #
|
|
162
|
+
# class Locale
|
|
163
|
+
# # Parses a BCP 47 locale identifier string.
|
|
164
|
+
# #
|
|
165
|
+
# # @param locale_str [String] a BCP 47 locale identifier (e.g., "en-US", "ja-JP-u-ca-japanese")
|
|
166
|
+
# # @return [Locale] a new Locale instance
|
|
167
|
+
# # @raise [LocaleError] if the identifier is invalid
|
|
168
|
+
# #
|
|
169
|
+
# # @example
|
|
170
|
+
# # locale = ICU4X::Locale.parse("en-US")
|
|
171
|
+
# # locale = ICU4X::Locale.parse("ja-JP-u-ca-japanese")
|
|
172
|
+
# #
|
|
173
|
+
# def self.parse(locale_str); end
|
|
174
|
+
#
|
|
175
|
+
# # Parses a POSIX locale string.
|
|
176
|
+
# #
|
|
177
|
+
# # Converts POSIX-style locale identifiers (e.g., "ja_JP.UTF-8") to
|
|
178
|
+
# # Unicode locale format.
|
|
179
|
+
# #
|
|
180
|
+
# # @param posix_str [String] a POSIX locale string
|
|
181
|
+
# # @return [Locale] a new Locale instance
|
|
182
|
+
# # @raise [LocaleError] if the identifier is invalid
|
|
183
|
+
# #
|
|
184
|
+
# # @example
|
|
185
|
+
# # locale = ICU4X::Locale.parse_posix("ja_JP.UTF-8")
|
|
186
|
+
# # locale.language #=> "ja"
|
|
187
|
+
# # locale.region #=> "JP"
|
|
188
|
+
# #
|
|
189
|
+
# def self.parse_posix(posix_str); end
|
|
190
|
+
#
|
|
191
|
+
# # Returns the language subtag.
|
|
192
|
+
# #
|
|
193
|
+
# # @return [String, nil] the language subtag (e.g., "en", "ja"), or nil if not set
|
|
194
|
+
# #
|
|
195
|
+
# def language; end
|
|
196
|
+
#
|
|
197
|
+
# # Returns the script subtag.
|
|
198
|
+
# #
|
|
199
|
+
# # @return [String, nil] the script subtag (e.g., "Latn", "Jpan"), or nil if not set
|
|
200
|
+
# #
|
|
201
|
+
# # @example
|
|
202
|
+
# # locale = ICU4X::Locale.parse("zh-Hant-TW")
|
|
203
|
+
# # locale.script #=> "Hant"
|
|
204
|
+
# #
|
|
205
|
+
# def script; end
|
|
206
|
+
#
|
|
207
|
+
# # Returns the region subtag.
|
|
208
|
+
# #
|
|
209
|
+
# # @return [String, nil] the region subtag (e.g., "US", "JP"), or nil if not set
|
|
210
|
+
# #
|
|
211
|
+
# def region; end
|
|
212
|
+
#
|
|
213
|
+
# # Returns the locale extensions.
|
|
214
|
+
# #
|
|
215
|
+
# # @return [Hash] a hash containing extension data with keys:
|
|
216
|
+
# # - +:unicode+ [Hash<String, String>] Unicode extension key-value pairs
|
|
217
|
+
# # - +:transform+ [String, nil] Transform extension string
|
|
218
|
+
# # - +:private+ [Array<String>] Private use extensions
|
|
219
|
+
# #
|
|
220
|
+
# # @example
|
|
221
|
+
# # locale = ICU4X::Locale.parse("ja-JP-u-ca-japanese")
|
|
222
|
+
# # locale.extensions[:unicode] #=> {"ca" => "japanese"}
|
|
223
|
+
# #
|
|
224
|
+
# def extensions; end
|
|
225
|
+
#
|
|
226
|
+
# # Returns the string representation of the locale.
|
|
227
|
+
# #
|
|
228
|
+
# # @return [String] the normalized BCP 47 locale identifier
|
|
229
|
+
# #
|
|
230
|
+
# def to_s; end
|
|
231
|
+
#
|
|
232
|
+
# # Returns a human-readable representation for debugging.
|
|
233
|
+
# #
|
|
234
|
+
# # @return [String] debug representation
|
|
235
|
+
# #
|
|
236
|
+
# def inspect; end
|
|
237
|
+
#
|
|
238
|
+
# # Compares two locales for equality.
|
|
239
|
+
# #
|
|
240
|
+
# # @param other [Locale] the locale to compare with
|
|
241
|
+
# # @return [Boolean] true if the locales are equal
|
|
242
|
+
# #
|
|
243
|
+
# def ==(other); end
|
|
244
|
+
#
|
|
245
|
+
# # Compares two locales for equality (used by Hash).
|
|
246
|
+
# #
|
|
247
|
+
# # @param other [Locale] the locale to compare with
|
|
248
|
+
# # @return [Boolean] true if the locales are equal
|
|
249
|
+
# #
|
|
250
|
+
# def eql?(other); end
|
|
251
|
+
#
|
|
252
|
+
# # Returns the hash code for this locale.
|
|
253
|
+
# #
|
|
254
|
+
# # @return [Integer] hash code
|
|
255
|
+
# #
|
|
256
|
+
# def hash; end
|
|
257
|
+
# end
|
|
258
|
+
#
|
|
259
|
+
# # Provides locale-aware plural rules for cardinal and ordinal numbers.
|
|
260
|
+
# #
|
|
261
|
+
# # PluralRules determines the appropriate plural category for a given number
|
|
262
|
+
# # based on the locale's pluralization rules.
|
|
263
|
+
# #
|
|
264
|
+
# # @example Cardinal plurals
|
|
265
|
+
# # rules = ICU4X::PluralRules.new(locale)
|
|
266
|
+
# # rules.select(1) #=> :one
|
|
267
|
+
# # rules.select(2) #=> :other
|
|
268
|
+
# #
|
|
269
|
+
# # @example Ordinal plurals
|
|
270
|
+
# # rules = ICU4X::PluralRules.new(locale, type: :ordinal)
|
|
271
|
+
# # rules.select(1) #=> :one (1st)
|
|
272
|
+
# # rules.select(2) #=> :two (2nd)
|
|
273
|
+
# # rules.select(3) #=> :few (3rd)
|
|
274
|
+
# #
|
|
275
|
+
# # @see https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
|
|
276
|
+
# #
|
|
277
|
+
# class PluralRules
|
|
278
|
+
# # Creates a new PluralRules instance.
|
|
279
|
+
# #
|
|
280
|
+
# # @param locale [Locale] the locale for plural rules
|
|
281
|
+
# # @param provider [DataProvider, nil] data provider (uses default if nil)
|
|
282
|
+
# # @param type [Symbol] plural rule type, either +:cardinal+ or +:ordinal+
|
|
283
|
+
# # @return [PluralRules] a new instance
|
|
284
|
+
# # @raise [DataError] if data for the locale is unavailable
|
|
285
|
+
# #
|
|
286
|
+
# # @example
|
|
287
|
+
# # rules = ICU4X::PluralRules.new(locale)
|
|
288
|
+
# # rules = ICU4X::PluralRules.new(locale, type: :ordinal)
|
|
289
|
+
# #
|
|
290
|
+
# def initialize(locale, provider: nil, type: :cardinal); end
|
|
291
|
+
#
|
|
292
|
+
# # Selects the plural category for a number.
|
|
293
|
+
# #
|
|
294
|
+
# # @param number [Integer, Float] the number to categorize
|
|
295
|
+
# # @return [Symbol] one of +:zero+, +:one+, +:two+, +:few+, +:many+, or +:other+
|
|
296
|
+
# #
|
|
297
|
+
# # @example
|
|
298
|
+
# # rules.select(0) #=> :other (in English)
|
|
299
|
+
# # rules.select(1) #=> :one
|
|
300
|
+
# # rules.select(2) #=> :other
|
|
301
|
+
# # rules.select(1.5) #=> :other
|
|
302
|
+
# #
|
|
303
|
+
# def select(number); end
|
|
304
|
+
#
|
|
305
|
+
# # Returns all plural categories available for this locale.
|
|
306
|
+
# #
|
|
307
|
+
# # @return [Array<Symbol>] array of category symbols
|
|
308
|
+
# #
|
|
309
|
+
# # @example
|
|
310
|
+
# # rules.categories #=> [:one, :other]
|
|
311
|
+
# #
|
|
312
|
+
# def categories; end
|
|
313
|
+
#
|
|
314
|
+
# # Returns the resolved options for this instance.
|
|
315
|
+
# #
|
|
316
|
+
# # @return [Hash] options hash with keys:
|
|
317
|
+
# # - +:locale+ [String] the resolved locale identifier
|
|
318
|
+
# # - +:type+ [Symbol] the plural rule type (+:cardinal+ or +:ordinal+)
|
|
319
|
+
# #
|
|
320
|
+
# def resolved_options; end
|
|
321
|
+
# end
|
|
322
|
+
#
|
|
323
|
+
# # Formats numbers according to locale-specific conventions.
|
|
324
|
+
# #
|
|
325
|
+
# # NumberFormat supports decimal, percent, and currency formatting
|
|
326
|
+
# # with configurable grouping, fraction digits, and rounding.
|
|
327
|
+
# #
|
|
328
|
+
# # @example Decimal formatting
|
|
329
|
+
# # formatter = ICU4X::NumberFormat.new(locale)
|
|
330
|
+
# # formatter.format(1234.56) #=> "1,234.56" (in en-US)
|
|
331
|
+
# #
|
|
332
|
+
# # @example Currency formatting
|
|
333
|
+
# # formatter = ICU4X::NumberFormat.new(locale, style: :currency, currency: "JPY")
|
|
334
|
+
# # formatter.format(1234) #=> "¥1,234" (in ja-JP)
|
|
335
|
+
# #
|
|
336
|
+
# # @example Percent formatting
|
|
337
|
+
# # formatter = ICU4X::NumberFormat.new(locale, style: :percent)
|
|
338
|
+
# # formatter.format(0.42) #=> "42%"
|
|
339
|
+
# #
|
|
340
|
+
# class NumberFormat
|
|
341
|
+
# # Creates a new NumberFormat instance.
|
|
342
|
+
# #
|
|
343
|
+
# # @param locale [Locale] the locale for formatting
|
|
344
|
+
# # @param provider [DataProvider, nil] data provider (uses default if nil)
|
|
345
|
+
# # @param style [Symbol] format style: +:decimal+, +:percent+, or +:currency+
|
|
346
|
+
# # @param currency [String, nil] ISO 4217 currency code (required for +:currency+ style)
|
|
347
|
+
# # @param use_grouping [Boolean] whether to use grouping separators
|
|
348
|
+
# # @param minimum_integer_digits [Integer, nil] minimum number of integer digits
|
|
349
|
+
# # @param minimum_fraction_digits [Integer, nil] minimum number of fraction digits
|
|
350
|
+
# # @param maximum_fraction_digits [Integer, nil] maximum number of fraction digits
|
|
351
|
+
# # @param rounding_mode [Symbol, nil] rounding mode for excess digits
|
|
352
|
+
# # @return [NumberFormat] a new instance
|
|
353
|
+
# # @raise [DataError] if data for the locale is unavailable
|
|
354
|
+
# #
|
|
355
|
+
# # @example
|
|
356
|
+
# # formatter = ICU4X::NumberFormat.new(locale, minimum_fraction_digits: 2)
|
|
357
|
+
# #
|
|
358
|
+
# def initialize(locale, provider: nil, style: :decimal, currency: nil,
|
|
359
|
+
# use_grouping: true, minimum_integer_digits: nil,
|
|
360
|
+
# minimum_fraction_digits: nil, maximum_fraction_digits: nil,
|
|
361
|
+
# rounding_mode: nil); end
|
|
362
|
+
#
|
|
363
|
+
# # Formats a number according to the configured options.
|
|
364
|
+
# #
|
|
365
|
+
# # @param number [Integer, Float, BigDecimal] the number to format
|
|
366
|
+
# # @return [String] the formatted number string
|
|
367
|
+
# #
|
|
368
|
+
# # @example
|
|
369
|
+
# # formatter.format(1234567.89) #=> "1,234,567.89"
|
|
370
|
+
# #
|
|
371
|
+
# def format(number); end
|
|
372
|
+
#
|
|
373
|
+
# # Returns the resolved options for this instance.
|
|
374
|
+
# #
|
|
375
|
+
# # @return [Hash] options hash with keys:
|
|
376
|
+
# # - +:locale+ [String] the resolved locale identifier
|
|
377
|
+
# # - +:style+ [Symbol] the format style
|
|
378
|
+
# # - +:use_grouping+ [Boolean] whether grouping is enabled
|
|
379
|
+
# # - +:currency+ [String] currency code (if applicable)
|
|
380
|
+
# # - +:minimum_integer_digits+ [Integer] minimum integer digits
|
|
381
|
+
# # - +:minimum_fraction_digits+ [Integer] minimum fraction digits
|
|
382
|
+
# # - +:maximum_fraction_digits+ [Integer] maximum fraction digits
|
|
383
|
+
# # - +:rounding_mode+ [Symbol] the rounding mode
|
|
384
|
+
# #
|
|
385
|
+
# def resolved_options; end
|
|
386
|
+
# end
|
|
387
|
+
#
|
|
388
|
+
# # Formats dates and times according to locale-specific conventions.
|
|
389
|
+
# #
|
|
390
|
+
# # DateTimeFormat supports various date and time styles and calendar systems.
|
|
391
|
+
# #
|
|
392
|
+
# # @example Format a date
|
|
393
|
+
# # formatter = ICU4X::DateTimeFormat.new(locale, date_style: :long)
|
|
394
|
+
# # formatter.format(Time.now) #=> "January 1, 2026"
|
|
395
|
+
# #
|
|
396
|
+
# # @example Format date and time
|
|
397
|
+
# # formatter = ICU4X::DateTimeFormat.new(locale, date_style: :short, time_style: :short)
|
|
398
|
+
# # formatter.format(Time.now) #=> "1/1/26, 12:00 PM"
|
|
399
|
+
# #
|
|
400
|
+
# # @example Use Japanese calendar
|
|
401
|
+
# # formatter = ICU4X::DateTimeFormat.new(locale, date_style: :long, calendar: :japanese)
|
|
402
|
+
# # formatter.format(Time.now) #=> "令和8年1月1日"
|
|
403
|
+
# #
|
|
404
|
+
# class DateTimeFormat
|
|
405
|
+
# # Creates a new DateTimeFormat instance.
|
|
406
|
+
# #
|
|
407
|
+
# # @param locale [Locale] the locale for formatting
|
|
408
|
+
# # @param provider [DataProvider, nil] data provider (uses default if nil)
|
|
409
|
+
# # @param date_style [Symbol, nil] date format style: +:full+, +:long+, +:medium+, or +:short+
|
|
410
|
+
# # @param time_style [Symbol, nil] time format style: +:full+, +:long+, +:medium+, or +:short+
|
|
411
|
+
# # @param time_zone [String, nil] IANA time zone identifier (e.g., "America/New_York")
|
|
412
|
+
# # @param calendar [Symbol] calendar system to use
|
|
413
|
+
# # @return [DateTimeFormat] a new instance
|
|
414
|
+
# # @raise [DataError] if data for the locale is unavailable
|
|
415
|
+
# #
|
|
416
|
+
# # @example
|
|
417
|
+
# # formatter = ICU4X::DateTimeFormat.new(locale, date_style: :long, time_style: :short)
|
|
418
|
+
# #
|
|
419
|
+
# def initialize(locale, provider: nil, date_style: nil, time_style: nil,
|
|
420
|
+
# time_zone: nil, calendar: :gregory); end
|
|
421
|
+
#
|
|
422
|
+
# # Formats a time value according to the configured options.
|
|
423
|
+
# #
|
|
424
|
+
# # @param time [Time] the time to format
|
|
425
|
+
# # @return [String] the formatted date/time string
|
|
426
|
+
# #
|
|
427
|
+
# def format(time); end
|
|
428
|
+
#
|
|
429
|
+
# # Returns the resolved options for this instance.
|
|
430
|
+
# #
|
|
431
|
+
# # @return [Hash] options hash with keys:
|
|
432
|
+
# # - +:locale+ [String] the resolved locale identifier
|
|
433
|
+
# # - +:calendar+ [Symbol] the calendar system
|
|
434
|
+
# # - +:date_style+ [Symbol] the date style (if set)
|
|
435
|
+
# # - +:time_style+ [Symbol] the time style (if set)
|
|
436
|
+
# # - +:time_zone+ [String] the time zone (if set)
|
|
437
|
+
# #
|
|
438
|
+
# def resolved_options; end
|
|
439
|
+
# end
|
|
440
|
+
#
|
|
441
|
+
# # Formats relative time expressions (e.g., "3 days ago", "in 2 hours").
|
|
442
|
+
# #
|
|
443
|
+
# # @example
|
|
444
|
+
# # formatter = ICU4X::RelativeTimeFormat.new(locale)
|
|
445
|
+
# # formatter.format(-1, :day) #=> "1 day ago"
|
|
446
|
+
# # formatter.format(2, :hour) #=> "in 2 hours"
|
|
447
|
+
# #
|
|
448
|
+
# # @example With auto numeric
|
|
449
|
+
# # formatter = ICU4X::RelativeTimeFormat.new(locale, numeric: :auto)
|
|
450
|
+
# # formatter.format(-1, :day) #=> "yesterday"
|
|
451
|
+
# # formatter.format(0, :day) #=> "today"
|
|
452
|
+
# #
|
|
453
|
+
# class RelativeTimeFormat
|
|
454
|
+
# # Creates a new RelativeTimeFormat instance.
|
|
455
|
+
# #
|
|
456
|
+
# # @param locale [Locale] the locale for formatting
|
|
457
|
+
# # @param provider [DataProvider, nil] data provider (uses default if nil)
|
|
458
|
+
# # @param style [Symbol] format style: +:long+, +:short+, or +:narrow+
|
|
459
|
+
# # @param numeric [Symbol] numeric display: +:always+ or +:auto+
|
|
460
|
+
# # @return [RelativeTimeFormat] a new instance
|
|
461
|
+
# # @raise [DataError] if data for the locale is unavailable
|
|
462
|
+
# #
|
|
463
|
+
# def initialize(locale, provider: nil, style: :long, numeric: :always); end
|
|
464
|
+
#
|
|
465
|
+
# # Formats a relative time value.
|
|
466
|
+
# #
|
|
467
|
+
# # @param value [Integer] the relative time value (negative for past, positive for future)
|
|
468
|
+
# # @param unit [Symbol] time unit: +:second+, +:minute+, +:hour+, +:day+,
|
|
469
|
+
# # +:week+, +:month+, +:quarter+, or +:year+
|
|
470
|
+
# # @return [String] the formatted relative time string
|
|
471
|
+
# #
|
|
472
|
+
# # @example
|
|
473
|
+
# # formatter.format(-3, :day) #=> "3 days ago"
|
|
474
|
+
# # formatter.format(1, :week) #=> "in 1 week"
|
|
475
|
+
# #
|
|
476
|
+
# def format(value, unit); end
|
|
477
|
+
#
|
|
478
|
+
# # Returns the resolved options for this instance.
|
|
479
|
+
# #
|
|
480
|
+
# # @return [Hash] options hash with keys:
|
|
481
|
+
# # - +:locale+ [String] the resolved locale identifier
|
|
482
|
+
# # - +:style+ [Symbol] the format style
|
|
483
|
+
# # - +:numeric+ [Symbol] the numeric display mode
|
|
484
|
+
# #
|
|
485
|
+
# def resolved_options; end
|
|
486
|
+
# end
|
|
487
|
+
#
|
|
488
|
+
# # Formats lists of items according to locale-specific conventions.
|
|
489
|
+
# #
|
|
490
|
+
# # @example Conjunction (and)
|
|
491
|
+
# # formatter = ICU4X::ListFormat.new(locale, type: :conjunction)
|
|
492
|
+
# # formatter.format(["A", "B", "C"]) #=> "A, B, and C"
|
|
493
|
+
# #
|
|
494
|
+
# # @example Disjunction (or)
|
|
495
|
+
# # formatter = ICU4X::ListFormat.new(locale, type: :disjunction)
|
|
496
|
+
# # formatter.format(["A", "B", "C"]) #=> "A, B, or C"
|
|
497
|
+
# #
|
|
498
|
+
# # @example Unit list
|
|
499
|
+
# # formatter = ICU4X::ListFormat.new(locale, type: :unit)
|
|
500
|
+
# # formatter.format(["5 lb", "12 oz"]) #=> "5 lb, 12 oz"
|
|
501
|
+
# #
|
|
502
|
+
# class ListFormat
|
|
503
|
+
# # Creates a new ListFormat instance.
|
|
504
|
+
# #
|
|
505
|
+
# # @param locale [Locale] the locale for formatting
|
|
506
|
+
# # @param provider [DataProvider, nil] data provider (uses default if nil)
|
|
507
|
+
# # @param type [Symbol] list type: +:conjunction+, +:disjunction+, or +:unit+
|
|
508
|
+
# # @param style [Symbol] format style: +:long+, +:short+, or +:narrow+
|
|
509
|
+
# # @return [ListFormat] a new instance
|
|
510
|
+
# # @raise [DataError] if data for the locale is unavailable
|
|
511
|
+
# #
|
|
512
|
+
# def initialize(locale, provider: nil, type: :conjunction, style: :long); end
|
|
513
|
+
#
|
|
514
|
+
# # Formats a list of strings.
|
|
515
|
+
# #
|
|
516
|
+
# # @param list [Array<String>] the list items to format
|
|
517
|
+
# # @return [String] the formatted list string
|
|
518
|
+
# #
|
|
519
|
+
# def format(list); end
|
|
520
|
+
#
|
|
521
|
+
# # Returns the resolved options for this instance.
|
|
522
|
+
# #
|
|
523
|
+
# # @return [Hash] options hash with keys:
|
|
524
|
+
# # - +:locale+ [String] the resolved locale identifier
|
|
525
|
+
# # - +:type+ [Symbol] the list type
|
|
526
|
+
# # - +:style+ [Symbol] the format style
|
|
527
|
+
# #
|
|
528
|
+
# def resolved_options; end
|
|
529
|
+
# end
|
|
530
|
+
#
|
|
531
|
+
# # Compares strings according to locale-specific collation rules.
|
|
532
|
+
# #
|
|
533
|
+
# # Collator provides locale-aware string comparison for sorting and searching.
|
|
534
|
+
# #
|
|
535
|
+
# # @example Basic comparison
|
|
536
|
+
# # collator = ICU4X::Collator.new(locale)
|
|
537
|
+
# # collator.compare("a", "b") #=> -1
|
|
538
|
+
# # collator.compare("b", "a") #=> 1
|
|
539
|
+
# # collator.compare("a", "a") #=> 0
|
|
540
|
+
# #
|
|
541
|
+
# # @example Sorting with collator
|
|
542
|
+
# # collator = ICU4X::Collator.new(locale)
|
|
543
|
+
# # words.sort { |a, b| collator.compare(a, b) }
|
|
544
|
+
# #
|
|
545
|
+
# # @example Numeric sorting
|
|
546
|
+
# # collator = ICU4X::Collator.new(locale, numeric: true)
|
|
547
|
+
# # collator.compare("file2", "file10") #=> -1 (2 < 10)
|
|
548
|
+
# #
|
|
549
|
+
# class Collator
|
|
550
|
+
# # Creates a new Collator instance.
|
|
551
|
+
# #
|
|
552
|
+
# # @param locale [Locale] the locale for collation rules
|
|
553
|
+
# # @param provider [DataProvider, nil] data provider (uses default if nil)
|
|
554
|
+
# # @param sensitivity [Symbol] comparison sensitivity:
|
|
555
|
+
# # +:base+, +:accent+, +:case+, or +:variant+
|
|
556
|
+
# # @param numeric [Boolean] whether to compare numeric strings as numbers
|
|
557
|
+
# # @param case_first [Symbol, nil] which case to sort first: +:upper+ or +:lower+
|
|
558
|
+
# # @return [Collator] a new instance
|
|
559
|
+
# # @raise [DataError] if data for the locale is unavailable
|
|
560
|
+
# #
|
|
561
|
+
# def initialize(locale, provider: nil, sensitivity: :variant,
|
|
562
|
+
# numeric: false, case_first: nil); end
|
|
563
|
+
#
|
|
564
|
+
# # Compares two strings.
|
|
565
|
+
# #
|
|
566
|
+
# # @param a [String] first string
|
|
567
|
+
# # @param b [String] second string
|
|
568
|
+
# # @return [Integer] -1 if a < b, 0 if a == b, 1 if a > b
|
|
569
|
+
# #
|
|
570
|
+
# def compare(a, b); end
|
|
571
|
+
#
|
|
572
|
+
# # Returns the resolved options for this instance.
|
|
573
|
+
# #
|
|
574
|
+
# # @return [Hash] options hash with keys:
|
|
575
|
+
# # - +:locale+ [String] the resolved locale identifier
|
|
576
|
+
# # - +:sensitivity+ [Symbol] the comparison sensitivity
|
|
577
|
+
# # - +:numeric+ [Boolean] whether numeric sorting is enabled
|
|
578
|
+
# # - +:case_first+ [Symbol] which case sorts first (if set)
|
|
579
|
+
# #
|
|
580
|
+
# def resolved_options; end
|
|
581
|
+
# end
|
|
582
|
+
#
|
|
583
|
+
# # Provides localized display names for languages, regions, and scripts.
|
|
584
|
+
# #
|
|
585
|
+
# # @example Language names
|
|
586
|
+
# # names = ICU4X::DisplayNames.new(locale, type: :language)
|
|
587
|
+
# # names.of("ja") #=> "Japanese"
|
|
588
|
+
# # names.of("en") #=> "English"
|
|
589
|
+
# #
|
|
590
|
+
# # @example Region names
|
|
591
|
+
# # names = ICU4X::DisplayNames.new(locale, type: :region)
|
|
592
|
+
# # names.of("JP") #=> "Japan"
|
|
593
|
+
# # names.of("US") #=> "United States"
|
|
594
|
+
# #
|
|
595
|
+
# class DisplayNames
|
|
596
|
+
# # Creates a new DisplayNames instance.
|
|
597
|
+
# #
|
|
598
|
+
# # @param locale [Locale] the locale for display names
|
|
599
|
+
# # @param provider [DataProvider, nil] data provider (uses default if nil)
|
|
600
|
+
# # @param type [Symbol] display name type: +:language+, +:region+, +:script+, or +:locale+
|
|
601
|
+
# # @param style [Symbol] display style: +:long+, +:short+, or +:narrow+
|
|
602
|
+
# # @param fallback [Symbol] fallback behavior: +:code+ or +:none+
|
|
603
|
+
# # @return [DisplayNames] a new instance
|
|
604
|
+
# # @raise [DataError] if data for the locale is unavailable
|
|
605
|
+
# #
|
|
606
|
+
# def initialize(locale, provider: nil, type:, style: :long, fallback: :code); end
|
|
607
|
+
#
|
|
608
|
+
# # Returns the display name for a code.
|
|
609
|
+
# #
|
|
610
|
+
# # @param code [String] the code to look up (language, region, script, or locale)
|
|
611
|
+
# # @return [String, nil] the localized display name, or nil if not found
|
|
612
|
+
# # (when fallback is +:none+)
|
|
613
|
+
# #
|
|
614
|
+
# # @example
|
|
615
|
+
# # names.of("ja") #=> "Japanese"
|
|
616
|
+
# # names.of("XX") #=> "XX" (with fallback: :code)
|
|
617
|
+
# # names.of("XX") #=> nil (with fallback: :none)
|
|
618
|
+
# #
|
|
619
|
+
# def of(code); end
|
|
620
|
+
#
|
|
621
|
+
# # Returns the resolved options for this instance.
|
|
622
|
+
# #
|
|
623
|
+
# # @return [Hash] options hash with keys:
|
|
624
|
+
# # - +:locale+ [String] the resolved locale identifier
|
|
625
|
+
# # - +:type+ [Symbol] the display name type
|
|
626
|
+
# # - +:style+ [Symbol] the display style
|
|
627
|
+
# # - +:fallback+ [Symbol] the fallback behavior
|
|
628
|
+
# #
|
|
629
|
+
# def resolved_options; end
|
|
630
|
+
# end
|
|
631
|
+
#
|
|
632
|
+
# # Segments text into graphemes, words, sentences, or lines.
|
|
633
|
+
# #
|
|
634
|
+
# # Segmenter provides Unicode-compliant text segmentation according to
|
|
635
|
+
# # UAX #29 (Text Segmentation) and UAX #14 (Line Breaking).
|
|
636
|
+
# #
|
|
637
|
+
# # @example Word segmentation
|
|
638
|
+
# # segmenter = ICU4X::Segmenter.new(granularity: :word)
|
|
639
|
+
# # segments = segmenter.segment("Hello, world!")
|
|
640
|
+
# # segments.map(&:segment) #=> ["Hello", ",", " ", "world", "!"]
|
|
641
|
+
# #
|
|
642
|
+
# # @example Grapheme segmentation
|
|
643
|
+
# # segmenter = ICU4X::Segmenter.new(granularity: :grapheme)
|
|
644
|
+
# # segments = segmenter.segment("👨👩👧")
|
|
645
|
+
# # segments.size #=> 1 (family emoji is one grapheme)
|
|
646
|
+
# #
|
|
647
|
+
# class Segmenter
|
|
648
|
+
# # Represents a segment of text.
|
|
649
|
+
# #
|
|
650
|
+
# class Segment
|
|
651
|
+
# # @return [String] the segment text
|
|
652
|
+
# attr_reader :segment
|
|
653
|
+
#
|
|
654
|
+
# # @return [Integer] the byte index of this segment in the original string
|
|
655
|
+
# attr_reader :index
|
|
656
|
+
#
|
|
657
|
+
# # Returns whether this segment is word-like.
|
|
658
|
+
# #
|
|
659
|
+
# # Only meaningful for word segmentation; returns nil for other granularities.
|
|
660
|
+
# #
|
|
661
|
+
# # @return [Boolean, nil] true if the segment is a word (not punctuation/whitespace),
|
|
662
|
+
# # nil for non-word segmentation
|
|
663
|
+
# #
|
|
664
|
+
# def word_like?; end
|
|
665
|
+
# end
|
|
666
|
+
#
|
|
667
|
+
# # Creates a new Segmenter instance.
|
|
668
|
+
# #
|
|
669
|
+
# # @param granularity [Symbol] segmentation granularity:
|
|
670
|
+
# # +:grapheme+, +:word+, +:sentence+, or +:line+
|
|
671
|
+
# # @param provider [DataProvider, nil] data provider (uses default if nil)
|
|
672
|
+
# # @return [Segmenter] a new instance
|
|
673
|
+
# # @raise [DataError] if data is unavailable
|
|
674
|
+
# #
|
|
675
|
+
# # @example
|
|
676
|
+
# # segmenter = ICU4X::Segmenter.new(granularity: :word)
|
|
677
|
+
# # segmenter = ICU4X::Segmenter.new(granularity: :sentence)
|
|
678
|
+
# #
|
|
679
|
+
# def initialize(granularity:, provider: nil); end
|
|
680
|
+
#
|
|
681
|
+
# # Segments text into an array of segments.
|
|
682
|
+
# #
|
|
683
|
+
# # @param text [String] the text to segment
|
|
684
|
+
# # @return [Array<Segment>] array of segment objects
|
|
685
|
+
# #
|
|
686
|
+
# # @example
|
|
687
|
+
# # segments = segmenter.segment("Hello world")
|
|
688
|
+
# # segments.each do |seg|
|
|
689
|
+
# # puts "#{seg.index}: #{seg.segment.inspect}"
|
|
690
|
+
# # end
|
|
691
|
+
# #
|
|
692
|
+
# def segment(text); end
|
|
693
|
+
#
|
|
694
|
+
# # Returns the resolved options for this instance.
|
|
695
|
+
# #
|
|
696
|
+
# # @return [Hash] options hash with keys:
|
|
697
|
+
# # - +:granularity+ [Symbol] the segmentation granularity
|
|
698
|
+
# #
|
|
699
|
+
# def resolved_options; end
|
|
700
|
+
# end
|
|
701
|
+
# end
|
data/lib/icu4x.rb
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "dry-configurable"
|
|
4
|
+
require "pathname"
|
|
5
|
+
|
|
6
|
+
require_relative "icu4x/icu4x" # Native extension
|
|
7
|
+
require_relative "icu4x/version"
|
|
8
|
+
|
|
9
|
+
# ICU4X provides Ruby bindings for ICU4X, a Unicode library.
|
|
10
|
+
#
|
|
11
|
+
# This module serves as the namespace for the gem's functionality.
|
|
12
|
+
module ICU4X
|
|
13
|
+
extend Dry::Configurable
|
|
14
|
+
|
|
15
|
+
setting :data_path, default: nil, constructor: ->(v) { v.nil? ? nil : Pathname(v) }
|
|
16
|
+
|
|
17
|
+
@default_provider_mutex = Mutex.new
|
|
18
|
+
|
|
19
|
+
# Returns the default provider, lazily loaded from configuration or environment.
|
|
20
|
+
# @return [DataProvider, nil] The default provider, or nil if not configured
|
|
21
|
+
def self.default_provider
|
|
22
|
+
@default_provider_mutex.synchronize do
|
|
23
|
+
@default_provider ||= begin
|
|
24
|
+
path = config.data_path || ENV["ICU4X_DATA_PATH"]&.then {|p| Pathname(p) }
|
|
25
|
+
path && DataProvider.from_blob(path)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Resets the cached default provider. Useful for testing.
|
|
31
|
+
# @return [void]
|
|
32
|
+
def self.reset_default_provider!
|
|
33
|
+
@default_provider_mutex.synchronize do
|
|
34
|
+
@default_provider = nil
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Base error class for all ICU4X errors
|
|
39
|
+
class Error < StandardError; end
|
|
40
|
+
|
|
41
|
+
# Error raised when locale parsing fails
|
|
42
|
+
class LocaleError < Error; end
|
|
43
|
+
|
|
44
|
+
# Error raised when data loading fails
|
|
45
|
+
class DataError < Error; end
|
|
46
|
+
|
|
47
|
+
# Error raised when data generation fails
|
|
48
|
+
class DataGeneratorError < Error; end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Define Segment data class for Segmenter
|
|
52
|
+
module ICU4X
|
|
53
|
+
class Segmenter
|
|
54
|
+
Segment = Data.define(:segment, :index, :word_like)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Enhance the Segment data class
|
|
59
|
+
module ICU4X
|
|
60
|
+
class Segmenter
|
|
61
|
+
# Represents a segment of text.
|
|
62
|
+
#
|
|
63
|
+
# @!attribute [r] segment
|
|
64
|
+
# @return [String] The segment string
|
|
65
|
+
# @!attribute [r] index
|
|
66
|
+
# @return [Integer] Byte offset in original text
|
|
67
|
+
class Segment
|
|
68
|
+
# Whether this segment is word-like.
|
|
69
|
+
# @return [Boolean] true if word-like (letters, numbers, CJK ideographs)
|
|
70
|
+
# @return [nil] for non-word granularity
|
|
71
|
+
alias word_like? word_like
|
|
72
|
+
private :word_like
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Enhance the native Locale class
|
|
78
|
+
module ICU4X
|
|
79
|
+
# Represents a BCP 47 locale identifier.
|
|
80
|
+
class Locale
|
|
81
|
+
# @return [String] Human-readable representation
|
|
82
|
+
def inspect = "#<ICU4X::Locale:#{self}>"
|
|
83
|
+
|
|
84
|
+
# @return [Integer] Hash code for use as Hash key
|
|
85
|
+
def hash = to_s.hash
|
|
86
|
+
|
|
87
|
+
# @param other [Locale] Another locale to compare
|
|
88
|
+
# @return [Boolean] True if locales are equal
|
|
89
|
+
def eql?(other) = self == other
|
|
90
|
+
end
|
|
91
|
+
end
|
data/sig/icu4x.rbs
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
module ICU4X
|
|
2
|
+
extend Dry::Configurable
|
|
3
|
+
|
|
4
|
+
VERSION: String
|
|
5
|
+
|
|
6
|
+
def self.default_provider: () -> DataProvider?
|
|
7
|
+
def self.reset_default_provider!: () -> void
|
|
8
|
+
|
|
9
|
+
class Error < StandardError
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
class LocaleError < Error
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
class DataError < Error
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class DataGeneratorError < Error
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
class DataProvider
|
|
22
|
+
def self.from_blob: (Pathname path, ?priority: :language | :region) -> DataProvider
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
class DataGenerator
|
|
26
|
+
def self.export: (locales: Array[String], markers: Symbol | Array[String], format: Symbol, output: Pathname) -> void
|
|
27
|
+
def self.available_markers: () -> Array[String]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
class Locale
|
|
31
|
+
def self.parse: (String locale_str) -> Locale
|
|
32
|
+
def self.parse_posix: (String posix_str) -> Locale
|
|
33
|
+
|
|
34
|
+
def language: () -> String?
|
|
35
|
+
def script: () -> String?
|
|
36
|
+
def region: () -> String?
|
|
37
|
+
def extensions: () -> { unicode: Hash[String, String], transform: String?, private: Array[String] }
|
|
38
|
+
def to_s: () -> String
|
|
39
|
+
def inspect: () -> String
|
|
40
|
+
def ==: (Locale other) -> bool
|
|
41
|
+
def eql?: (Locale other) -> bool
|
|
42
|
+
def hash: () -> Integer
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
type plural_category = :zero | :one | :two | :few | :many | :other
|
|
46
|
+
type plural_rule_type = :cardinal | :ordinal
|
|
47
|
+
|
|
48
|
+
class PluralRules
|
|
49
|
+
def self.new: (Locale locale, ?provider: DataProvider, ?type: plural_rule_type) -> PluralRules
|
|
50
|
+
|
|
51
|
+
def select: (Integer | Float number) -> plural_category
|
|
52
|
+
def categories: () -> Array[plural_category]
|
|
53
|
+
def resolved_options: () -> { locale: String, type: plural_rule_type }
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
type number_format_style = :decimal | :percent | :currency
|
|
57
|
+
type rounding_mode = :ceil | :floor | :expand | :trunc | :half_ceil | :half_floor | :half_expand | :half_trunc | :half_even
|
|
58
|
+
|
|
59
|
+
type date_style = :full | :long | :medium | :short
|
|
60
|
+
type time_style = :full | :long | :medium | :short
|
|
61
|
+
type datetime_calendar = :gregory | :japanese | :buddhist | :chinese | :hebrew | :islamic | :persian | :indian | :ethiopian | :coptic | :roc | :dangi
|
|
62
|
+
|
|
63
|
+
class NumberFormat
|
|
64
|
+
def self.new: (
|
|
65
|
+
Locale locale,
|
|
66
|
+
?provider: DataProvider,
|
|
67
|
+
?style: number_format_style,
|
|
68
|
+
?currency: String,
|
|
69
|
+
?use_grouping: bool,
|
|
70
|
+
?minimum_integer_digits: Integer,
|
|
71
|
+
?minimum_fraction_digits: Integer,
|
|
72
|
+
?maximum_fraction_digits: Integer,
|
|
73
|
+
?rounding_mode: rounding_mode
|
|
74
|
+
) -> NumberFormat
|
|
75
|
+
|
|
76
|
+
def format: (Integer | Float | BigDecimal number) -> String
|
|
77
|
+
def resolved_options: () -> {
|
|
78
|
+
locale: String,
|
|
79
|
+
style: number_format_style,
|
|
80
|
+
use_grouping: bool,
|
|
81
|
+
?currency: String,
|
|
82
|
+
?minimum_integer_digits: Integer,
|
|
83
|
+
?minimum_fraction_digits: Integer,
|
|
84
|
+
?maximum_fraction_digits: Integer,
|
|
85
|
+
?rounding_mode: rounding_mode
|
|
86
|
+
}
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
class DateTimeFormat
|
|
90
|
+
def self.new: (
|
|
91
|
+
Locale locale,
|
|
92
|
+
?provider: DataProvider,
|
|
93
|
+
?date_style: date_style,
|
|
94
|
+
?time_style: time_style,
|
|
95
|
+
?time_zone: String,
|
|
96
|
+
?calendar: datetime_calendar
|
|
97
|
+
) -> DateTimeFormat
|
|
98
|
+
|
|
99
|
+
def format: (Time time) -> String
|
|
100
|
+
def resolved_options: () -> {
|
|
101
|
+
locale: String,
|
|
102
|
+
calendar: datetime_calendar,
|
|
103
|
+
?date_style: date_style,
|
|
104
|
+
?time_style: time_style,
|
|
105
|
+
?time_zone: String
|
|
106
|
+
}
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
type relative_time_format_style = :long | :short | :narrow
|
|
110
|
+
type relative_time_format_numeric = :always | :auto
|
|
111
|
+
type relative_time_unit = :second | :minute | :hour | :day | :week | :month | :quarter | :year
|
|
112
|
+
|
|
113
|
+
class RelativeTimeFormat
|
|
114
|
+
def self.new: (
|
|
115
|
+
Locale locale,
|
|
116
|
+
?provider: DataProvider,
|
|
117
|
+
?style: relative_time_format_style,
|
|
118
|
+
?numeric: relative_time_format_numeric
|
|
119
|
+
) -> RelativeTimeFormat
|
|
120
|
+
|
|
121
|
+
def format: (Integer value, relative_time_unit unit) -> String
|
|
122
|
+
def resolved_options: () -> {
|
|
123
|
+
locale: String,
|
|
124
|
+
style: relative_time_format_style,
|
|
125
|
+
numeric: relative_time_format_numeric
|
|
126
|
+
}
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
type list_format_type = :conjunction | :disjunction | :unit
|
|
130
|
+
type list_format_style = :long | :short | :narrow
|
|
131
|
+
|
|
132
|
+
type collator_sensitivity = :base | :accent | :case | :variant
|
|
133
|
+
type collator_case_first = :upper | :lower
|
|
134
|
+
|
|
135
|
+
class ListFormat
|
|
136
|
+
def self.new: (
|
|
137
|
+
Locale locale,
|
|
138
|
+
?provider: DataProvider,
|
|
139
|
+
?type: list_format_type,
|
|
140
|
+
?style: list_format_style
|
|
141
|
+
) -> ListFormat
|
|
142
|
+
|
|
143
|
+
def format: (Array[String] list) -> String
|
|
144
|
+
def resolved_options: () -> {
|
|
145
|
+
locale: String,
|
|
146
|
+
type: list_format_type,
|
|
147
|
+
style: list_format_style
|
|
148
|
+
}
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
class Collator
|
|
152
|
+
def self.new: (
|
|
153
|
+
Locale locale,
|
|
154
|
+
?provider: DataProvider,
|
|
155
|
+
?sensitivity: collator_sensitivity,
|
|
156
|
+
?numeric: bool,
|
|
157
|
+
?case_first: collator_case_first
|
|
158
|
+
) -> Collator
|
|
159
|
+
|
|
160
|
+
def compare: (String a, String b) -> Integer
|
|
161
|
+
def resolved_options: () -> {
|
|
162
|
+
locale: String,
|
|
163
|
+
sensitivity: collator_sensitivity,
|
|
164
|
+
numeric: bool,
|
|
165
|
+
?case_first: collator_case_first
|
|
166
|
+
}
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
type display_names_type = :language | :region | :script | :locale
|
|
170
|
+
type display_names_style = :long | :short | :narrow
|
|
171
|
+
type display_names_fallback = :code | :none
|
|
172
|
+
|
|
173
|
+
class DisplayNames
|
|
174
|
+
def self.new: (
|
|
175
|
+
Locale locale,
|
|
176
|
+
?provider: DataProvider,
|
|
177
|
+
type: display_names_type,
|
|
178
|
+
?style: display_names_style,
|
|
179
|
+
?fallback: display_names_fallback
|
|
180
|
+
) -> DisplayNames
|
|
181
|
+
|
|
182
|
+
def of: (String code) -> String?
|
|
183
|
+
def resolved_options: () -> {
|
|
184
|
+
locale: String,
|
|
185
|
+
type: display_names_type,
|
|
186
|
+
style: display_names_style,
|
|
187
|
+
fallback: display_names_fallback
|
|
188
|
+
}
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
type segmenter_granularity = :grapheme | :word | :sentence | :line
|
|
192
|
+
|
|
193
|
+
class Segmenter
|
|
194
|
+
class Segment
|
|
195
|
+
attr_reader segment: String
|
|
196
|
+
attr_reader index: Integer
|
|
197
|
+
|
|
198
|
+
def word_like?: () -> bool?
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def self.new: (
|
|
202
|
+
granularity: segmenter_granularity,
|
|
203
|
+
?provider: DataProvider
|
|
204
|
+
) -> Segmenter
|
|
205
|
+
|
|
206
|
+
def segment: (String text) -> Array[Segment]
|
|
207
|
+
def resolved_options: () -> { granularity: segmenter_granularity }
|
|
208
|
+
end
|
|
209
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: icu4x
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.5.0
|
|
5
|
+
platform: x86_64-darwin
|
|
6
|
+
authors:
|
|
7
|
+
- OZAWA Sakuro
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-01-01 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: dry-configurable
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.3'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.3'
|
|
27
|
+
description: icu4x
|
|
28
|
+
email:
|
|
29
|
+
- 10973+sakuro@users.noreply.github.com
|
|
30
|
+
executables: []
|
|
31
|
+
extensions: []
|
|
32
|
+
extra_rdoc_files: []
|
|
33
|
+
files:
|
|
34
|
+
- CHANGELOG.md
|
|
35
|
+
- LICENSE.txt
|
|
36
|
+
- README.md
|
|
37
|
+
- lib/icu4x.rb
|
|
38
|
+
- lib/icu4x/3.2/icu4x.bundle
|
|
39
|
+
- lib/icu4x/3.3/icu4x.bundle
|
|
40
|
+
- lib/icu4x/3.4/icu4x.bundle
|
|
41
|
+
- lib/icu4x/version.rb
|
|
42
|
+
- lib/icu4x/yard_docs.rb
|
|
43
|
+
- sig/icu4x.rbs
|
|
44
|
+
homepage: https://github.com/sakuro/icu4x
|
|
45
|
+
licenses:
|
|
46
|
+
- MIT
|
|
47
|
+
metadata:
|
|
48
|
+
homepage_uri: https://github.com/sakuro/icu4x
|
|
49
|
+
source_code_uri: https://github.com/sakuro/icu4x.git
|
|
50
|
+
changelog_uri: https://github.com/sakuro/icu4x/blob/main/CHANGELOG.md
|
|
51
|
+
rubygems_mfa_required: 'true'
|
|
52
|
+
post_install_message:
|
|
53
|
+
rdoc_options: []
|
|
54
|
+
require_paths:
|
|
55
|
+
- lib
|
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '3.2'
|
|
61
|
+
- - "<"
|
|
62
|
+
- !ruby/object:Gem::Version
|
|
63
|
+
version: 3.5.dev
|
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
69
|
+
requirements: []
|
|
70
|
+
rubygems_version: 3.5.23
|
|
71
|
+
signing_key:
|
|
72
|
+
specification_version: 4
|
|
73
|
+
summary: icu4x
|
|
74
|
+
test_files: []
|