rjb 1.5.9 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +4 -4
  2. data/ext/extconf.h +1 -1
  3. data/ext/riconv.c +142 -11
  4. data/ext/rjb.c +1 -1
  5. data/test/test.rb +20 -4
  6. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b8eddc0f7bf9505a5f48e1098dd18f0133253986978599c40e5c39ce6cc48b90
4
- data.tar.gz: 880d819371d1cb1c738c2db0c0395873d86391ecef8b056c7597aacf930598ce
3
+ metadata.gz: 7d1fbaad6b7b8881cdf6ca256139b8447590c578430e1775255fbc457e44f4aa
4
+ data.tar.gz: 5a3e4e5142eb5db86af1b9419f72e5128bb19975926a70c94e078e8553d6ccec
5
5
  SHA512:
6
- metadata.gz: ad9b557b9ff3d9091663df6041b63672ba96a2e192f23deba2dbe6dfe94edf09f36176e8894b2e71c8194f68e6f4e23b1e67c9a7064c52c5dea5ce0723be9e14
7
- data.tar.gz: 7da7f62f72373a83ef662ee62cd37c7fdac28a52c6fc059af337970a7249103858442c304b139945118371c2893ac6057404f619016d4cff758a5aabaffd80f2
6
+ metadata.gz: b4f7b723db8f2ce2415814e6689d26c4f38b221d75903767a9c0434cb0a1da0ba7c6b53fb58fdf7ba8a8861beea02c7cb80079b787f6702d5f03132b6fc09ea1
7
+ data.tar.gz: b1724e95cd003cc140c45f42916340bd90c7ce115933a5e83d1a31d3bf91504e2bea714600f7a430e1939083d8e1dbf29fac273eb26d1c4245ec2407cfb39abb
@@ -4,5 +4,5 @@
4
4
  #define HAVE_NL_LANGINFO 1
5
5
  #define HAVE_SETLOCALE 1
6
6
  #define HAVE_GETENV 1
7
- #define RJB_RUBY_VERSION_CODE 261
7
+ #define RJB_RUBY_VERSION_CODE 270
8
8
  #endif
@@ -167,6 +167,126 @@ static void check_kcode()
167
167
  objIconvR2J = objIconvJ2R = Qnil;
168
168
  }
169
169
  }
170
+ #else
171
+ VALUE cEncoding = Qnil;
172
+ VALUE encoding_utf8 = Qnil;
173
+ static void init_encoding_vars()
174
+ {
175
+ cEncoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
176
+ encoding_utf8 = rb_const_get(cEncoding, rb_intern("UTF_8"));
177
+ }
178
+ static int contains_surrogate_pair(const unsigned char* p)
179
+ {
180
+ while (*p)
181
+ {
182
+ switch (*p & 0xf0)
183
+ {
184
+ case 0xf0:
185
+ return 1;
186
+ case 0xe0:
187
+ p += 3;
188
+ break;
189
+ default:
190
+ p += (*p & 0x80) ? 2 : 1;
191
+ }
192
+ }
193
+ return 0;
194
+ }
195
+ static int contains_auxchar(const unsigned char* p)
196
+ {
197
+ while (*p)
198
+ {
199
+ if (*p == 0xed)
200
+ {
201
+ #if defined(DEBUG)
202
+ printf("find %02x %02x %02x %02x %02x %02x\n", *p, *(p + 1), *(p + 2), *(p + 3), *(p + 4), *(p + 5));
203
+ #endif
204
+ return 1;
205
+ }
206
+ switch (*p & 0xe0)
207
+ {
208
+ case 0xe0:
209
+ p++;
210
+ case 0xc0:
211
+ p++;
212
+ default:
213
+ p++;
214
+ }
215
+ }
216
+ return 0;
217
+ }
218
+
219
+ static VALUE encode_to_cesu8(const unsigned char* p)
220
+ {
221
+ size_t len = strlen(p);
222
+ char* newstr = ALLOCA_N(char, len + (len + 1) / 2);
223
+ char* dest = newstr;
224
+ int sval, i;
225
+ while (*p)
226
+ {
227
+ switch (*p & 0xf0)
228
+ {
229
+ case 0xf0:
230
+ sval = *p++ & 7;
231
+ for (i = 0; i < 3; i++)
232
+ {
233
+ sval <<= 6;
234
+ sval |= (*p++ & 0x3f);
235
+ }
236
+ *dest++ = '\xed';
237
+ *dest++ = 0xa0 | (((sval >> 16) - 1) & 0x0f);
238
+ *dest++ = 0x80 | ((sval >> 10) & 0x3f);
239
+ *dest++ = '\xed';
240
+ *dest++ = 0xb0 | ((sval >> 6) & 0x0f);
241
+ *dest++ = 0x80 | (sval & 0x3f);
242
+ break;
243
+ case 0xe0:
244
+ *dest++ = *p++;
245
+ case 0xc0:
246
+ case 0xc1:
247
+ *dest++ = *p++;
248
+ default:
249
+ *dest++ = *p++;
250
+ }
251
+ }
252
+ return rb_str_new(newstr, dest - newstr);
253
+ }
254
+ static VALUE encode_to_utf8(const unsigned char* p)
255
+ {
256
+ size_t len = strlen(p);
257
+ char* newstr = ALLOCA_N(char, len);
258
+ char* dest = newstr;
259
+ int sval, i;
260
+ while (*p)
261
+ {
262
+ if (*p == 0xed)
263
+ {
264
+ char v = *(p + 1);
265
+ char w = *(p + 2);
266
+ char y = *(p + 4);
267
+ char z = *(p + 5);
268
+ p += 6;
269
+ sval = 0x10000 + ((v & 0x0f) << 16) + ((w & 0x3f) << 10) + ((y & 0x0f) << 6) + (z & 0x3f);
270
+ sval = (((v + 1) & 0x0f) << 16) + ((w & 0x3f) << 10) + ((y & 0x0f) << 6) + (z & 0x3f);
271
+ *dest++ = 0xf0 | ((sval >> 18));
272
+ *dest++ = 0x80 | ((sval >> 12) & 0x3f);
273
+ *dest++ = 0x80 | ((sval >> 6) & 0x3f);
274
+ *dest++ = 0x80 | (sval & 0x3f);
275
+ continue;
276
+ }
277
+ switch (*p & 0xe0)
278
+ {
279
+ case 0xe0:
280
+ *dest++ = *p++;
281
+ case 0xc0:
282
+ case 0xc1:
283
+ *dest++ = *p++;
284
+ default:
285
+ *dest++ = *p++;
286
+ }
287
+ }
288
+ return rb_str_new(newstr, dest - newstr);
289
+ }
170
290
  #endif
171
291
 
172
292
  #if defined(DEBUG)
@@ -177,6 +297,8 @@ static void debug_out(VALUE v)
177
297
  strlen(p), p);
178
298
  fflush(stdout);
179
299
  }
300
+ #else
301
+ #define debug_out(n)
180
302
  #endif
181
303
 
182
304
  VALUE exticonv_local_to_utf8(VALUE local_string)
@@ -192,23 +314,24 @@ VALUE exticonv_local_to_utf8(VALUE local_string)
192
314
  return local_string;
193
315
  }
194
316
  #else
195
- VALUE cEncoding, encoding, utf8;
196
- cEncoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
317
+ VALUE encoding;
318
+ if (NIL_P(cEncoding))
319
+ {
320
+ init_encoding_vars();
321
+ }
197
322
  encoding = rb_funcall(local_string, rb_intern("encoding"), 0);
198
- utf8 = rb_const_get(cEncoding, rb_intern("UTF_8"));
199
- if (encoding != utf8)
323
+ if (encoding != encoding_utf8)
200
324
  {
201
- VALUE ret = rb_funcall(local_string, rb_intern("encode"), 2, utf8, encoding);
202
- #if defined(DEBUG)
325
+ VALUE ret = rb_funcall(local_string, rb_intern("encode"), 2, encoding_utf8, encoding);
203
326
  debug_out(local_string);
204
327
  debug_out(ret);
205
- #endif
206
- return ret;
328
+ local_string = ret;
207
329
  }
208
- else
330
+ if (contains_surrogate_pair(StringValuePtr(local_string)))
209
331
  {
210
- return local_string;
332
+ local_string = encode_to_cesu8(StringValuePtr(local_string));
211
333
  }
334
+ return local_string;
212
335
  #endif
213
336
  }
214
337
 
@@ -225,6 +348,14 @@ VALUE exticonv_utf8_to_local(VALUE utf8_string)
225
348
  return utf8_string;
226
349
  }
227
350
  #else
228
- return rb_funcall(utf8_string, rb_intern("force_encoding"), 1, rb_const_get(rb_cEncoding, rb_intern("UTF_8")));
351
+ if (NIL_P(cEncoding))
352
+ {
353
+ init_encoding_vars();
354
+ }
355
+ if (contains_auxchar(StringValuePtr(utf8_string)))
356
+ {
357
+ utf8_string = encode_to_utf8(StringValuePtr(utf8_string));
358
+ }
359
+ return rb_funcall(utf8_string, rb_intern("force_encoding"), 1, encoding_utf8);
229
360
  #endif
230
361
  }
data/ext/rjb.c CHANGED
@@ -14,7 +14,7 @@
14
14
  *
15
15
  */
16
16
 
17
- #define RJB_VERSION "1.5.9"
17
+ #define RJB_VERSION "1.6.0"
18
18
 
19
19
  #include "ruby.h"
20
20
  #include "extconf.h"
@@ -216,17 +216,28 @@ class TestRjb < Test::Unit::TestCase
216
216
  end
217
217
 
218
218
  def test_combination_charcters
219
- teststr = "\xc7\x96\xc3\xbc\xcc\x84\x75\xcc\x88\xcc\x84\xed\xa1\xa9\xed\xba\xb2\xe3\x81\x8b\xe3\x82\x9a"
219
+ teststr = "\xc7\x96\xc3\xbc\xcc\x84\x75\xcc\x88\xcc\x84𪚲\xe3\x81\x8b\xe3\x82\x9a"
220
220
  test = import('jp.co.infoseek.hp.arton.rjb.Test').new
221
221
  s = test.getUmlaut()
222
222
  if Object::const_defined?(:Encoding) #>=1.9
223
- teststr = teststr.force_encoding(Encoding::UTF_8)
224
- assert_equal(s, teststr)
223
+ =begin
224
+ n = [teststr.bytes.length, s.bytes.length].max
225
+ puts "org:#{teststr.bytes.length}, ret:#{s.bytes.length}"
226
+ 0.upto(n - 1) do |i|
227
+ b0 = teststr.getbyte(i)
228
+ b0 = 0 unless b0
229
+ b1 = s.getbyte(i)
230
+ b1 = 0 unless b1
231
+ puts sprintf("%02X - %02X\n", b0, b1)
232
+ end
233
+ =end
234
+ assert_equal(teststr.bytes.length, s.bytes.length)
235
+ assert_equal(teststr, s)
225
236
  else
226
237
  default_kcode = $KCODE
227
238
  begin
228
239
  $KCODE = "utf8"
229
- assert_equal(s, teststr)
240
+ assert_equal(teststr, s)
230
241
  ensure
231
242
  $KCODE = default_kcode
232
243
  end
@@ -947,5 +958,10 @@ class TestRjb < Test::Unit::TestCase
947
958
  end
948
959
  end
949
960
  end
961
+
962
+ def test_java_utf8
963
+ y = @jString.new('𠮷野家')
964
+ assert_equal '𠮷野家', y.toString
965
+ end
950
966
  end
951
967
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rjb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.9
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - arton
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-02-17 00:00:00.000000000 Z
11
+ date: 2019-07-10 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'RJB is a bridge program that connect between Ruby and Java with Java
14
14
  Native Interface.