rjb 1.5.9 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. checksums.yaml +4 -4
  2. data/ext/extconf.h +1 -1
  3. data/ext/riconv.c +142 -11
  4. data/ext/rjb.c +1 -1
  5. data/test/test.rb +20 -4
  6. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b8eddc0f7bf9505a5f48e1098dd18f0133253986978599c40e5c39ce6cc48b90
4
- data.tar.gz: 880d819371d1cb1c738c2db0c0395873d86391ecef8b056c7597aacf930598ce
3
+ metadata.gz: 7d1fbaad6b7b8881cdf6ca256139b8447590c578430e1775255fbc457e44f4aa
4
+ data.tar.gz: 5a3e4e5142eb5db86af1b9419f72e5128bb19975926a70c94e078e8553d6ccec
5
5
  SHA512:
6
- metadata.gz: ad9b557b9ff3d9091663df6041b63672ba96a2e192f23deba2dbe6dfe94edf09f36176e8894b2e71c8194f68e6f4e23b1e67c9a7064c52c5dea5ce0723be9e14
7
- data.tar.gz: 7da7f62f72373a83ef662ee62cd37c7fdac28a52c6fc059af337970a7249103858442c304b139945118371c2893ac6057404f619016d4cff758a5aabaffd80f2
6
+ metadata.gz: b4f7b723db8f2ce2415814e6689d26c4f38b221d75903767a9c0434cb0a1da0ba7c6b53fb58fdf7ba8a8861beea02c7cb80079b787f6702d5f03132b6fc09ea1
7
+ data.tar.gz: b1724e95cd003cc140c45f42916340bd90c7ce115933a5e83d1a31d3bf91504e2bea714600f7a430e1939083d8e1dbf29fac273eb26d1c4245ec2407cfb39abb
@@ -4,5 +4,5 @@
4
4
  #define HAVE_NL_LANGINFO 1
5
5
  #define HAVE_SETLOCALE 1
6
6
  #define HAVE_GETENV 1
7
- #define RJB_RUBY_VERSION_CODE 261
7
+ #define RJB_RUBY_VERSION_CODE 270
8
8
  #endif
@@ -167,6 +167,126 @@ static void check_kcode()
167
167
  objIconvR2J = objIconvJ2R = Qnil;
168
168
  }
169
169
  }
170
+ #else
171
+ VALUE cEncoding = Qnil;
172
+ VALUE encoding_utf8 = Qnil;
173
+ static void init_encoding_vars()
174
+ {
175
+ cEncoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
176
+ encoding_utf8 = rb_const_get(cEncoding, rb_intern("UTF_8"));
177
+ }
178
+ static int contains_surrogate_pair(const unsigned char* p)
179
+ {
180
+ while (*p)
181
+ {
182
+ switch (*p & 0xf0)
183
+ {
184
+ case 0xf0:
185
+ return 1;
186
+ case 0xe0:
187
+ p += 3;
188
+ break;
189
+ default:
190
+ p += (*p & 0x80) ? 2 : 1;
191
+ }
192
+ }
193
+ return 0;
194
+ }
195
+ static int contains_auxchar(const unsigned char* p)
196
+ {
197
+ while (*p)
198
+ {
199
+ if (*p == 0xed)
200
+ {
201
+ #if defined(DEBUG)
202
+ printf("find %02x %02x %02x %02x %02x %02x\n", *p, *(p + 1), *(p + 2), *(p + 3), *(p + 4), *(p + 5));
203
+ #endif
204
+ return 1;
205
+ }
206
+ switch (*p & 0xe0)
207
+ {
208
+ case 0xe0:
209
+ p++;
210
+ case 0xc0:
211
+ p++;
212
+ default:
213
+ p++;
214
+ }
215
+ }
216
+ return 0;
217
+ }
218
+
219
+ static VALUE encode_to_cesu8(const unsigned char* p)
220
+ {
221
+ size_t len = strlen(p);
222
+ char* newstr = ALLOCA_N(char, len + (len + 1) / 2);
223
+ char* dest = newstr;
224
+ int sval, i;
225
+ while (*p)
226
+ {
227
+ switch (*p & 0xf0)
228
+ {
229
+ case 0xf0:
230
+ sval = *p++ & 7;
231
+ for (i = 0; i < 3; i++)
232
+ {
233
+ sval <<= 6;
234
+ sval |= (*p++ & 0x3f);
235
+ }
236
+ *dest++ = '\xed';
237
+ *dest++ = 0xa0 | (((sval >> 16) - 1) & 0x0f);
238
+ *dest++ = 0x80 | ((sval >> 10) & 0x3f);
239
+ *dest++ = '\xed';
240
+ *dest++ = 0xb0 | ((sval >> 6) & 0x0f);
241
+ *dest++ = 0x80 | (sval & 0x3f);
242
+ break;
243
+ case 0xe0:
244
+ *dest++ = *p++;
245
+ case 0xc0:
246
+ case 0xc1:
247
+ *dest++ = *p++;
248
+ default:
249
+ *dest++ = *p++;
250
+ }
251
+ }
252
+ return rb_str_new(newstr, dest - newstr);
253
+ }
254
+ static VALUE encode_to_utf8(const unsigned char* p)
255
+ {
256
+ size_t len = strlen(p);
257
+ char* newstr = ALLOCA_N(char, len);
258
+ char* dest = newstr;
259
+ int sval, i;
260
+ while (*p)
261
+ {
262
+ if (*p == 0xed)
263
+ {
264
+ char v = *(p + 1);
265
+ char w = *(p + 2);
266
+ char y = *(p + 4);
267
+ char z = *(p + 5);
268
+ p += 6;
269
+ sval = 0x10000 + ((v & 0x0f) << 16) + ((w & 0x3f) << 10) + ((y & 0x0f) << 6) + (z & 0x3f);
270
+ sval = (((v + 1) & 0x0f) << 16) + ((w & 0x3f) << 10) + ((y & 0x0f) << 6) + (z & 0x3f);
271
+ *dest++ = 0xf0 | ((sval >> 18));
272
+ *dest++ = 0x80 | ((sval >> 12) & 0x3f);
273
+ *dest++ = 0x80 | ((sval >> 6) & 0x3f);
274
+ *dest++ = 0x80 | (sval & 0x3f);
275
+ continue;
276
+ }
277
+ switch (*p & 0xe0)
278
+ {
279
+ case 0xe0:
280
+ *dest++ = *p++;
281
+ case 0xc0:
282
+ case 0xc1:
283
+ *dest++ = *p++;
284
+ default:
285
+ *dest++ = *p++;
286
+ }
287
+ }
288
+ return rb_str_new(newstr, dest - newstr);
289
+ }
170
290
  #endif
171
291
 
172
292
  #if defined(DEBUG)
@@ -177,6 +297,8 @@ static void debug_out(VALUE v)
177
297
  strlen(p), p);
178
298
  fflush(stdout);
179
299
  }
300
+ #else
301
+ #define debug_out(n)
180
302
  #endif
181
303
 
182
304
  VALUE exticonv_local_to_utf8(VALUE local_string)
@@ -192,23 +314,24 @@ VALUE exticonv_local_to_utf8(VALUE local_string)
192
314
  return local_string;
193
315
  }
194
316
  #else
195
- VALUE cEncoding, encoding, utf8;
196
- cEncoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
317
+ VALUE encoding;
318
+ if (NIL_P(cEncoding))
319
+ {
320
+ init_encoding_vars();
321
+ }
197
322
  encoding = rb_funcall(local_string, rb_intern("encoding"), 0);
198
- utf8 = rb_const_get(cEncoding, rb_intern("UTF_8"));
199
- if (encoding != utf8)
323
+ if (encoding != encoding_utf8)
200
324
  {
201
- VALUE ret = rb_funcall(local_string, rb_intern("encode"), 2, utf8, encoding);
202
- #if defined(DEBUG)
325
+ VALUE ret = rb_funcall(local_string, rb_intern("encode"), 2, encoding_utf8, encoding);
203
326
  debug_out(local_string);
204
327
  debug_out(ret);
205
- #endif
206
- return ret;
328
+ local_string = ret;
207
329
  }
208
- else
330
+ if (contains_surrogate_pair(StringValuePtr(local_string)))
209
331
  {
210
- return local_string;
332
+ local_string = encode_to_cesu8(StringValuePtr(local_string));
211
333
  }
334
+ return local_string;
212
335
  #endif
213
336
  }
214
337
 
@@ -225,6 +348,14 @@ VALUE exticonv_utf8_to_local(VALUE utf8_string)
225
348
  return utf8_string;
226
349
  }
227
350
  #else
228
- return rb_funcall(utf8_string, rb_intern("force_encoding"), 1, rb_const_get(rb_cEncoding, rb_intern("UTF_8")));
351
+ if (NIL_P(cEncoding))
352
+ {
353
+ init_encoding_vars();
354
+ }
355
+ if (contains_auxchar(StringValuePtr(utf8_string)))
356
+ {
357
+ utf8_string = encode_to_utf8(StringValuePtr(utf8_string));
358
+ }
359
+ return rb_funcall(utf8_string, rb_intern("force_encoding"), 1, encoding_utf8);
229
360
  #endif
230
361
  }
data/ext/rjb.c CHANGED
@@ -14,7 +14,7 @@
14
14
  *
15
15
  */
16
16
 
17
- #define RJB_VERSION "1.5.9"
17
+ #define RJB_VERSION "1.6.0"
18
18
 
19
19
  #include "ruby.h"
20
20
  #include "extconf.h"
@@ -216,17 +216,28 @@ class TestRjb < Test::Unit::TestCase
216
216
  end
217
217
 
218
218
  def test_combination_charcters
219
- teststr = "\xc7\x96\xc3\xbc\xcc\x84\x75\xcc\x88\xcc\x84\xed\xa1\xa9\xed\xba\xb2\xe3\x81\x8b\xe3\x82\x9a"
219
+ teststr = "\xc7\x96\xc3\xbc\xcc\x84\x75\xcc\x88\xcc\x84𪚲\xe3\x81\x8b\xe3\x82\x9a"
220
220
  test = import('jp.co.infoseek.hp.arton.rjb.Test').new
221
221
  s = test.getUmlaut()
222
222
  if Object::const_defined?(:Encoding) #>=1.9
223
- teststr = teststr.force_encoding(Encoding::UTF_8)
224
- assert_equal(s, teststr)
223
+ =begin
224
+ n = [teststr.bytes.length, s.bytes.length].max
225
+ puts "org:#{teststr.bytes.length}, ret:#{s.bytes.length}"
226
+ 0.upto(n - 1) do |i|
227
+ b0 = teststr.getbyte(i)
228
+ b0 = 0 unless b0
229
+ b1 = s.getbyte(i)
230
+ b1 = 0 unless b1
231
+ puts sprintf("%02X - %02X\n", b0, b1)
232
+ end
233
+ =end
234
+ assert_equal(teststr.bytes.length, s.bytes.length)
235
+ assert_equal(teststr, s)
225
236
  else
226
237
  default_kcode = $KCODE
227
238
  begin
228
239
  $KCODE = "utf8"
229
- assert_equal(s, teststr)
240
+ assert_equal(teststr, s)
230
241
  ensure
231
242
  $KCODE = default_kcode
232
243
  end
@@ -947,5 +958,10 @@ class TestRjb < Test::Unit::TestCase
947
958
  end
948
959
  end
949
960
  end
961
+
962
+ def test_java_utf8
963
+ y = @jString.new('𠮷野家')
964
+ assert_equal '𠮷野家', y.toString
965
+ end
950
966
  end
951
967
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rjb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.9
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - arton
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-02-17 00:00:00.000000000 Z
11
+ date: 2019-07-10 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'RJB is a bridge program that connect between Ruby and Java with Java
14
14
  Native Interface.