icu4r_19 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +87 -0
- data/MIT-LICENSE +20 -0
- data/README +156 -0
- data/Rakefile +32 -0
- data/calendar.c +636 -0
- data/collator.c +233 -0
- data/converter.c +322 -0
- data/docs/FORMATTING +131 -0
- data/docs/UNICODE_REGEXPS +204 -0
- data/extconf.rb +17 -0
- data/fmt.cpp +156 -0
- data/icu4r.c +18 -0
- data/icu_common.h +45 -0
- data/lib/dummy +0 -0
- data/samples/demo_each.rb +23 -0
- data/samples/demo_locales.rb +16 -0
- data/samples/demo_regexp.rb +11 -0
- data/samples/resbundle/appmsg/root.res +0 -0
- data/samples/resbundle/appmsg/ru.res +0 -0
- data/samples/resbundle/demo_bundle.rb +4 -0
- data/samples/resbundle/mkres.sh +4 -0
- data/samples/resbundle/root.txt +10 -0
- data/samples/resbundle/ru.txt +4 -0
- data/test/test_calendar.rb +123 -0
- data/test/test_collator.rb +33 -0
- data/test/test_converter.rb +72 -0
- data/test/test_ustring.rb +508 -0
- data/tools/doc.sh +2 -0
- data/tools/km.rb +425 -0
- data/ubundle.c +223 -0
- data/ucore_ext.c +168 -0
- data/uregex.c +697 -0
- data/uregex.h +27 -0
- data/ustring.c +3039 -0
- metadata +164 -0
metadata
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: icu4r_19
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '1.0'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Nikolai Lugovoi
|
9
|
+
- Perry Smith
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2011-10-16 00:00:00.000000000Z
|
14
|
+
dependencies: []
|
15
|
+
description: ! "== ICU4R - ICU Unicode bindings for Ruby\n\nICU4R is an attempt to
|
16
|
+
provide better Unicode support for Ruby,\nwhere it lacks for a long time.\n\nCurrent
|
17
|
+
code is mostly rewritten string.c from Ruby 1.8.3.\n\nICU4R is Ruby C-extension
|
18
|
+
binding for ICU library[1] \nand provides following classes and functionality:\n\n*
|
19
|
+
UString:\n - String-like class with internal UTF16 storage;\n - UCA rules
|
20
|
+
for UString comparisons (<=>, casecmp);\n - encoding(codepage) conversion;\n
|
21
|
+
\ - Unicode normalization;\n - transliteration, also rule-based;\n\n Bunch
|
22
|
+
of locale-sensitive functions:\n - upcase/downcase;\n - string collation;\n
|
23
|
+
\ - string search;\n - iterators over text line/word/char/sentence breaks;\n
|
24
|
+
\ - message formatting (number/currency/string/time);\n - date and number parsing.\n\n*
|
25
|
+
URegexp - unicode regular expressions.\n\n* UResourceBundle - access to resource
|
26
|
+
bundles, including ICU locale data.\n\n* UCalendar - date manipulation and timezone
|
27
|
+
info.\n\n* UConverter - codepage conversions API\n\n* UCollator - locale-sensitive
|
28
|
+
string comparison\n\n== Install and usage\n\n > ruby extconf.rb\n > make &&
|
29
|
+
make check\n > make install\n\nNow, in your scripts just require 'icu4r'.\n\nTo
|
30
|
+
create RDoc, run \n > sh tools/doc.sh\n\n== Requirements\n\nTo build and use ICU4R
|
31
|
+
you will need GCC and ICU v3.4 libraries[2].\n\n== Differences from Ruby String
|
32
|
+
and Regexp classes\n\n=== UString vs String\n\n1. UString substring/index methods
|
33
|
+
use UTF16 codeunit indexes, not code points.\n\n2. UString supports most methods
|
34
|
+
from String class. Missing methods are:\n capitalize, capitalize!, swapcase,
|
35
|
+
swapcase!\n %, center, ljust, rjust\n chomp, chomp!, chop, chop!\n
|
36
|
+
\ count, delete, delete!, squeeze, squeeze!, tr, tr!, tr_s, tr_s!\n crypt,
|
37
|
+
intern, sum, unpack\n dump, each_byte, each_line\n hex, oct, to_i,
|
38
|
+
to_sym\n reverse, reverse!\n succ, succ!, next, next!, upto\n \n3.
|
39
|
+
Instead of String#% method, UString#format is provided. See FORMATTING for short
|
40
|
+
reference.\n\n4. UStrings can be created via String.to_u(encoding='utf8') or global
|
41
|
+
u(str,[encoding='utf8'])\n calls. Note that +encoding+ parameter must be value
|
42
|
+
of String class. \n\n5. There's difference between character grapheme, codepoint
|
43
|
+
and codeunit. See UNICODE reports for\n gory details, but in short: locale dependent
|
44
|
+
notion of character can be presented using \n more than one codepoint - base letter
|
45
|
+
and combining (accents) (also possible more than one!), and\n each codepoint can
|
46
|
+
require more than one codeunit to store (for UTF8 codeunit size is 8bit, though\n
|
47
|
+
\ some codepoints require up to 4bytes). So, UString has normalization and locale
|
48
|
+
dependent break\n iterators.\n\t\n6. Currently UString doesn't include Enumerable
|
49
|
+
module.\n\n7. UString index/[] methods which accept URegexp, throw exception if
|
50
|
+
Regexp passed.\n\n8. UString#<=>, UString#casecmp use UCA rules.\n\n=== URegexp\n\nUString
|
51
|
+
uses ICU regexp library. Pattern syntax is described in [./docs/UNICODE_REGEXPS]
|
52
|
+
and ICU docs.\n\nThere are some differences between processing in Ruby Regexp and
|
53
|
+
URegexp:\n\n1. When UString#sub, UString#gsub are called with block, special vars
|
54
|
+
($~, $&, $1, ...) aren't\n set, as their values are processed through deep ruby
|
55
|
+
core code. Instead, block receives UMatch object,\n which is essentially immutable
|
56
|
+
array of matching groups:\n \"test\".u.gsub(ure(\"(e)(.)\")) do |match| \n
|
57
|
+
\ puts match[0] # => 'es' <--> $&\n puts match[1] # => 'e'
|
58
|
+
\ <--> $1\n puts match[2] # => 's' <--> $2\n end\n\n2. In URegexp
|
59
|
+
search pattern backreferences are in form \\n (\\1, \\2, ...), \n in replacement
|
60
|
+
string - in form $1, $2, ...\n\n NOTE: URegexp considers char to be a digit NOT
|
61
|
+
ONLY ASCII (0x0030-0x0039), but \n any Unicode char, which has property Decimal
|
62
|
+
digit number (Nd), e.g.:\n a = [?$, 0x1D7D9].pack(\"U*\").u * 2\n puts
|
63
|
+
a.inspect_names\n <U000024>DOLLAR SIGN\n <U01D7D9>MATHEMATICAL DOUBLE-STRUCK
|
64
|
+
DIGIT ONE\n <U000024>DOLLAR SIGN\n <U01D7D9>MATHEMATICAL DOUBLE-STRUCK
|
65
|
+
DIGIT ONE\n puts \"abracadabra\".u.gsub(/(b)/.U, a)\n abbracadabbra\n
|
66
|
+
\ \n\n3. One can create URegexp using global Kernel#ure function, Regexp#U, Regexp#to_u,
|
67
|
+
or\n from UString using URegexp.new, e.g:\n /pattern/.U =~ \"string\".u\n\n4.
|
68
|
+
There are differences about Regexp and URegexp multiline matching options:\n t
|
69
|
+
= \"text\\ntest\"\n # ^,$ handling : URegexp multiline <-> Ruby default\n t.u
|
70
|
+
=~ ure('^\\w+$', URegexp::MULTILINE)\n => #<UMatch:0xf6f7de04 @ranges=[0..3],
|
71
|
+
@cg=[\\u0074\\u0065\\u0078\\u0074]>\n t =~ /^\\w+$/\n => 0\n # .
|
72
|
+
matches \\n : URegexp DOTALL <-> /m\n t.u =~ ure('.+test', URegexp::DOTALL)\n
|
73
|
+
\ => #<UMatch:0xf6fa4d88 ...\n t.u =~ /.+test/m\n\n5. UMatch.range(idx)
|
74
|
+
returns range for capturing group idx. This range is in codeunits.\n\n=== References\n\n1.
|
75
|
+
ICU Official Homepage http://ibm.com/software/globalization/icu/ \n2. ICU downloads
|
76
|
+
\ http://ibm.com/software/globalization/icu/downloads.jsp\n3. ICU Home Page http://icu.sf.net
|
77
|
+
\n4. Unicode Home Page http://www.unicode.org\n\n==== BUGS, DOCS, TO DO\n\nThe code
|
78
|
+
is slow and inefficient yet, is still highly experimental, \nso can have many security
|
79
|
+
and memory leaks, bugs, inconsistent \ndocumentation, incomplete test suite. Use
|
80
|
+
it at your own risk.\n\nBug reports and feature requests are welcome :)\n\n=== Copying\n\nThis
|
81
|
+
extension module is copyrighted free software by Nikolai Lugovoi.\n\nYou can redistribute
|
82
|
+
it and/or modify it under the terms of MIT License.\n\nNikolai Lugovoi <meadow.nnick@gmail.com>\n\n"
|
83
|
+
email: pedz@easesoftware.com
|
84
|
+
executables: []
|
85
|
+
extensions:
|
86
|
+
- extconf.rb
|
87
|
+
extra_rdoc_files:
|
88
|
+
- README
|
89
|
+
- docs/FORMATTING
|
90
|
+
- docs/UNICODE_REGEXPS
|
91
|
+
- MIT-LICENSE
|
92
|
+
- calendar.c
|
93
|
+
- collator.c
|
94
|
+
- converter.c
|
95
|
+
- icu4r.c
|
96
|
+
- ubundle.c
|
97
|
+
- ucore_ext.c
|
98
|
+
- uregex.c
|
99
|
+
- ustring.c
|
100
|
+
files:
|
101
|
+
- calendar.c
|
102
|
+
- ChangeLog
|
103
|
+
- collator.c
|
104
|
+
- converter.c
|
105
|
+
- docs/FORMATTING
|
106
|
+
- docs/UNICODE_REGEXPS
|
107
|
+
- extconf.rb
|
108
|
+
- fmt.cpp
|
109
|
+
- icu4r.c
|
110
|
+
- icu_common.h
|
111
|
+
- lib/dummy
|
112
|
+
- MIT-LICENSE
|
113
|
+
- Rakefile
|
114
|
+
- README
|
115
|
+
- samples/demo_each.rb
|
116
|
+
- samples/demo_locales.rb
|
117
|
+
- samples/demo_regexp.rb
|
118
|
+
- samples/resbundle/appmsg/root.res
|
119
|
+
- samples/resbundle/appmsg/ru.res
|
120
|
+
- samples/resbundle/demo_bundle.rb
|
121
|
+
- samples/resbundle/mkres.sh
|
122
|
+
- samples/resbundle/root.txt
|
123
|
+
- samples/resbundle/ru.txt
|
124
|
+
- test/test_calendar.rb
|
125
|
+
- test/test_collator.rb
|
126
|
+
- test/test_converter.rb
|
127
|
+
- test/test_ustring.rb
|
128
|
+
- tools/doc.sh
|
129
|
+
- tools/km.rb
|
130
|
+
- ubundle.c
|
131
|
+
- ucore_ext.c
|
132
|
+
- uregex.c
|
133
|
+
- uregex.h
|
134
|
+
- ustring.c
|
135
|
+
homepage: https://github.com/pedz/icu4r_19
|
136
|
+
licenses: []
|
137
|
+
post_install_message:
|
138
|
+
rdoc_options:
|
139
|
+
- -c
|
140
|
+
- utf-8
|
141
|
+
- -x
|
142
|
+
- dummy
|
143
|
+
require_paths:
|
144
|
+
- lib
|
145
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
146
|
+
none: false
|
147
|
+
requirements:
|
148
|
+
- - ! '>='
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '1.9'
|
151
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
|
+
none: false
|
153
|
+
requirements:
|
154
|
+
- - ! '>='
|
155
|
+
- !ruby/object:Gem::Version
|
156
|
+
version: '0'
|
157
|
+
requirements:
|
158
|
+
- ICU libraries v 4.6.1
|
159
|
+
rubyforge_project:
|
160
|
+
rubygems_version: 1.8.11
|
161
|
+
signing_key:
|
162
|
+
specification_version: 3
|
163
|
+
summary: Ruby extension for Unicode support using ICU - 1.9.2 compatible
|
164
|
+
test_files: []
|