rjb 1.5.9 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/extconf.h +1 -1
- data/ext/riconv.c +142 -11
- data/ext/rjb.c +1 -1
- data/test/test.rb +20 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d1fbaad6b7b8881cdf6ca256139b8447590c578430e1775255fbc457e44f4aa
|
4
|
+
data.tar.gz: 5a3e4e5142eb5db86af1b9419f72e5128bb19975926a70c94e078e8553d6ccec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b4f7b723db8f2ce2415814e6689d26c4f38b221d75903767a9c0434cb0a1da0ba7c6b53fb58fdf7ba8a8861beea02c7cb80079b787f6702d5f03132b6fc09ea1
|
7
|
+
data.tar.gz: b1724e95cd003cc140c45f42916340bd90c7ce115933a5e83d1a31d3bf91504e2bea714600f7a430e1939083d8e1dbf29fac273eb26d1c4245ec2407cfb39abb
|
data/ext/extconf.h
CHANGED
data/ext/riconv.c
CHANGED
@@ -167,6 +167,126 @@ static void check_kcode()
|
|
167
167
|
objIconvR2J = objIconvJ2R = Qnil;
|
168
168
|
}
|
169
169
|
}
|
170
|
+
#else
|
171
|
+
VALUE cEncoding = Qnil;
|
172
|
+
VALUE encoding_utf8 = Qnil;
|
173
|
+
static void init_encoding_vars()
|
174
|
+
{
|
175
|
+
cEncoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
|
176
|
+
encoding_utf8 = rb_const_get(cEncoding, rb_intern("UTF_8"));
|
177
|
+
}
|
178
|
+
static int contains_surrogate_pair(const unsigned char* p)
|
179
|
+
{
|
180
|
+
while (*p)
|
181
|
+
{
|
182
|
+
switch (*p & 0xf0)
|
183
|
+
{
|
184
|
+
case 0xf0:
|
185
|
+
return 1;
|
186
|
+
case 0xe0:
|
187
|
+
p += 3;
|
188
|
+
break;
|
189
|
+
default:
|
190
|
+
p += (*p & 0x80) ? 2 : 1;
|
191
|
+
}
|
192
|
+
}
|
193
|
+
return 0;
|
194
|
+
}
|
195
|
+
static int contains_auxchar(const unsigned char* p)
|
196
|
+
{
|
197
|
+
while (*p)
|
198
|
+
{
|
199
|
+
if (*p == 0xed)
|
200
|
+
{
|
201
|
+
#if defined(DEBUG)
|
202
|
+
printf("find %02x %02x %02x %02x %02x %02x\n", *p, *(p + 1), *(p + 2), *(p + 3), *(p + 4), *(p + 5));
|
203
|
+
#endif
|
204
|
+
return 1;
|
205
|
+
}
|
206
|
+
switch (*p & 0xe0)
|
207
|
+
{
|
208
|
+
case 0xe0:
|
209
|
+
p++;
|
210
|
+
case 0xc0:
|
211
|
+
p++;
|
212
|
+
default:
|
213
|
+
p++;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
return 0;
|
217
|
+
}
|
218
|
+
|
219
|
+
static VALUE encode_to_cesu8(const unsigned char* p)
|
220
|
+
{
|
221
|
+
size_t len = strlen(p);
|
222
|
+
char* newstr = ALLOCA_N(char, len + (len + 1) / 2);
|
223
|
+
char* dest = newstr;
|
224
|
+
int sval, i;
|
225
|
+
while (*p)
|
226
|
+
{
|
227
|
+
switch (*p & 0xf0)
|
228
|
+
{
|
229
|
+
case 0xf0:
|
230
|
+
sval = *p++ & 7;
|
231
|
+
for (i = 0; i < 3; i++)
|
232
|
+
{
|
233
|
+
sval <<= 6;
|
234
|
+
sval |= (*p++ & 0x3f);
|
235
|
+
}
|
236
|
+
*dest++ = '\xed';
|
237
|
+
*dest++ = 0xa0 | (((sval >> 16) - 1) & 0x0f);
|
238
|
+
*dest++ = 0x80 | ((sval >> 10) & 0x3f);
|
239
|
+
*dest++ = '\xed';
|
240
|
+
*dest++ = 0xb0 | ((sval >> 6) & 0x0f);
|
241
|
+
*dest++ = 0x80 | (sval & 0x3f);
|
242
|
+
break;
|
243
|
+
case 0xe0:
|
244
|
+
*dest++ = *p++;
|
245
|
+
case 0xc0:
|
246
|
+
case 0xc1:
|
247
|
+
*dest++ = *p++;
|
248
|
+
default:
|
249
|
+
*dest++ = *p++;
|
250
|
+
}
|
251
|
+
}
|
252
|
+
return rb_str_new(newstr, dest - newstr);
|
253
|
+
}
|
254
|
+
static VALUE encode_to_utf8(const unsigned char* p)
|
255
|
+
{
|
256
|
+
size_t len = strlen(p);
|
257
|
+
char* newstr = ALLOCA_N(char, len);
|
258
|
+
char* dest = newstr;
|
259
|
+
int sval, i;
|
260
|
+
while (*p)
|
261
|
+
{
|
262
|
+
if (*p == 0xed)
|
263
|
+
{
|
264
|
+
char v = *(p + 1);
|
265
|
+
char w = *(p + 2);
|
266
|
+
char y = *(p + 4);
|
267
|
+
char z = *(p + 5);
|
268
|
+
p += 6;
|
269
|
+
sval = 0x10000 + ((v & 0x0f) << 16) + ((w & 0x3f) << 10) + ((y & 0x0f) << 6) + (z & 0x3f);
|
270
|
+
sval = (((v + 1) & 0x0f) << 16) + ((w & 0x3f) << 10) + ((y & 0x0f) << 6) + (z & 0x3f);
|
271
|
+
*dest++ = 0xf0 | ((sval >> 18));
|
272
|
+
*dest++ = 0x80 | ((sval >> 12) & 0x3f);
|
273
|
+
*dest++ = 0x80 | ((sval >> 6) & 0x3f);
|
274
|
+
*dest++ = 0x80 | (sval & 0x3f);
|
275
|
+
continue;
|
276
|
+
}
|
277
|
+
switch (*p & 0xe0)
|
278
|
+
{
|
279
|
+
case 0xe0:
|
280
|
+
*dest++ = *p++;
|
281
|
+
case 0xc0:
|
282
|
+
case 0xc1:
|
283
|
+
*dest++ = *p++;
|
284
|
+
default:
|
285
|
+
*dest++ = *p++;
|
286
|
+
}
|
287
|
+
}
|
288
|
+
return rb_str_new(newstr, dest - newstr);
|
289
|
+
}
|
170
290
|
#endif
|
171
291
|
|
172
292
|
#if defined(DEBUG)
|
@@ -177,6 +297,8 @@ static void debug_out(VALUE v)
|
|
177
297
|
strlen(p), p);
|
178
298
|
fflush(stdout);
|
179
299
|
}
|
300
|
+
#else
|
301
|
+
#define debug_out(n)
|
180
302
|
#endif
|
181
303
|
|
182
304
|
VALUE exticonv_local_to_utf8(VALUE local_string)
|
@@ -192,23 +314,24 @@ VALUE exticonv_local_to_utf8(VALUE local_string)
|
|
192
314
|
return local_string;
|
193
315
|
}
|
194
316
|
#else
|
195
|
-
VALUE
|
196
|
-
|
317
|
+
VALUE encoding;
|
318
|
+
if (NIL_P(cEncoding))
|
319
|
+
{
|
320
|
+
init_encoding_vars();
|
321
|
+
}
|
197
322
|
encoding = rb_funcall(local_string, rb_intern("encoding"), 0);
|
198
|
-
|
199
|
-
if (encoding != utf8)
|
323
|
+
if (encoding != encoding_utf8)
|
200
324
|
{
|
201
|
-
VALUE ret = rb_funcall(local_string, rb_intern("encode"), 2,
|
202
|
-
#if defined(DEBUG)
|
325
|
+
VALUE ret = rb_funcall(local_string, rb_intern("encode"), 2, encoding_utf8, encoding);
|
203
326
|
debug_out(local_string);
|
204
327
|
debug_out(ret);
|
205
|
-
|
206
|
-
return ret;
|
328
|
+
local_string = ret;
|
207
329
|
}
|
208
|
-
|
330
|
+
if (contains_surrogate_pair(StringValuePtr(local_string)))
|
209
331
|
{
|
210
|
-
|
332
|
+
local_string = encode_to_cesu8(StringValuePtr(local_string));
|
211
333
|
}
|
334
|
+
return local_string;
|
212
335
|
#endif
|
213
336
|
}
|
214
337
|
|
@@ -225,6 +348,14 @@ VALUE exticonv_utf8_to_local(VALUE utf8_string)
|
|
225
348
|
return utf8_string;
|
226
349
|
}
|
227
350
|
#else
|
228
|
-
|
351
|
+
if (NIL_P(cEncoding))
|
352
|
+
{
|
353
|
+
init_encoding_vars();
|
354
|
+
}
|
355
|
+
if (contains_auxchar(StringValuePtr(utf8_string)))
|
356
|
+
{
|
357
|
+
utf8_string = encode_to_utf8(StringValuePtr(utf8_string));
|
358
|
+
}
|
359
|
+
return rb_funcall(utf8_string, rb_intern("force_encoding"), 1, encoding_utf8);
|
229
360
|
#endif
|
230
361
|
}
|
data/ext/rjb.c
CHANGED
data/test/test.rb
CHANGED
@@ -216,17 +216,28 @@ class TestRjb < Test::Unit::TestCase
|
|
216
216
|
end
|
217
217
|
|
218
218
|
def test_combination_charcters
|
219
|
-
teststr = "\xc7\x96\xc3\xbc\xcc\x84\x75\xcc\x88\xcc\x84
|
219
|
+
teststr = "\xc7\x96\xc3\xbc\xcc\x84\x75\xcc\x88\xcc\x84𪚲\xe3\x81\x8b\xe3\x82\x9a"
|
220
220
|
test = import('jp.co.infoseek.hp.arton.rjb.Test').new
|
221
221
|
s = test.getUmlaut()
|
222
222
|
if Object::const_defined?(:Encoding) #>=1.9
|
223
|
-
|
224
|
-
|
223
|
+
=begin
|
224
|
+
n = [teststr.bytes.length, s.bytes.length].max
|
225
|
+
puts "org:#{teststr.bytes.length}, ret:#{s.bytes.length}"
|
226
|
+
0.upto(n - 1) do |i|
|
227
|
+
b0 = teststr.getbyte(i)
|
228
|
+
b0 = 0 unless b0
|
229
|
+
b1 = s.getbyte(i)
|
230
|
+
b1 = 0 unless b1
|
231
|
+
puts sprintf("%02X - %02X\n", b0, b1)
|
232
|
+
end
|
233
|
+
=end
|
234
|
+
assert_equal(teststr.bytes.length, s.bytes.length)
|
235
|
+
assert_equal(teststr, s)
|
225
236
|
else
|
226
237
|
default_kcode = $KCODE
|
227
238
|
begin
|
228
239
|
$KCODE = "utf8"
|
229
|
-
assert_equal(
|
240
|
+
assert_equal(teststr, s)
|
230
241
|
ensure
|
231
242
|
$KCODE = default_kcode
|
232
243
|
end
|
@@ -947,5 +958,10 @@ class TestRjb < Test::Unit::TestCase
|
|
947
958
|
end
|
948
959
|
end
|
949
960
|
end
|
961
|
+
|
962
|
+
def test_java_utf8
|
963
|
+
y = @jString.new('𠮷野家')
|
964
|
+
assert_equal '𠮷野家', y.toString
|
965
|
+
end
|
950
966
|
end
|
951
967
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rjb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- arton
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'RJB is a bridge program that connect between Ruby and Java with Java
|
14
14
|
Native Interface.
|