webruby 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/lib/webruby.rb +0 -4
  3. data/lib/webruby/app.rb +4 -0
  4. data/lib/webruby/rake/files.rake +2 -1
  5. data/lib/webruby/rake/mruby.rake +1 -1
  6. data/modules/emscripten/AUTHORS +1 -0
  7. data/modules/emscripten/ChangeLog +34 -1
  8. data/modules/emscripten/cmake/Platform/Emscripten.cmake +30 -9
  9. data/modules/emscripten/emcc +61 -28
  10. data/modules/emscripten/emrun +15 -11
  11. data/modules/emscripten/emscripten.py +3 -0
  12. data/modules/emscripten/src/closure-externs.js +110 -0
  13. data/modules/emscripten/src/intertyper.js +1 -1
  14. data/modules/emscripten/src/jsifier.js +7 -21
  15. data/modules/emscripten/src/library.js +2 -1
  16. data/modules/emscripten/src/library_browser.js +16 -5
  17. data/modules/emscripten/src/library_fs.js +3 -1
  18. data/modules/emscripten/src/library_gl.js +691 -591
  19. data/modules/emscripten/src/library_glut.js +2 -0
  20. data/modules/emscripten/src/library_sdl.js +29 -5
  21. data/modules/emscripten/src/library_uuid.js +140 -0
  22. data/modules/emscripten/src/modules.js +1 -1
  23. data/modules/emscripten/src/parseTools.js +29 -19
  24. data/modules/emscripten/src/postamble.js +3 -4
  25. data/modules/emscripten/src/preamble.js +17 -1
  26. data/modules/emscripten/src/relooper/Relooper.cpp +8 -8
  27. data/modules/emscripten/src/relooper/Relooper.h +5 -5
  28. data/modules/emscripten/src/relooper/test.txt +2 -2
  29. data/modules/emscripten/src/runtime.js +1 -1
  30. data/modules/emscripten/src/settings.js +3 -0
  31. data/modules/emscripten/src/struct_info.json +12 -0
  32. data/modules/emscripten/system/include/uuid/uuid.h +35 -0
  33. data/modules/emscripten/tools/js-optimizer.js +191 -142
  34. data/modules/emscripten/tools/js_optimizer.py +3 -29
  35. data/modules/emscripten/tools/shared.py +43 -6
  36. data/modules/mruby/include/mruby/value.h +3 -2
  37. data/modules/mruby/mrbgems/mruby-bin-mirb/tools/mirb/mirb.c +5 -9
  38. data/modules/mruby/mrbgems/mruby-bin-mruby/tools/mruby/mruby.c +3 -5
  39. data/modules/mruby/mrbgems/mruby-hash-ext/src/hash-ext.c +0 -3
  40. data/modules/mruby/mrbgems/mruby-numeric-ext/src/numeric_ext.c +0 -1
  41. data/modules/mruby/mrbgems/mruby-random/src/mt19937ar.c +0 -1
  42. data/modules/mruby/mrbgems/mruby-range-ext/src/range.c +2 -6
  43. data/modules/mruby/mrbgems/mruby-sprintf/src/sprintf.c +0 -4
  44. data/modules/mruby/mrbgems/mruby-string-ext/mrblib/string.rb +22 -0
  45. data/modules/mruby/mrbgems/mruby-string-ext/src/string.c +2 -2
  46. data/modules/mruby/mrbgems/mruby-string-ext/test/string.rb +21 -2
  47. data/modules/mruby/mrbgems/mruby-string-utf8/mrbgem.rake +4 -0
  48. data/modules/mruby/mrbgems/mruby-string-utf8/src/string.c +297 -0
  49. data/modules/mruby/mrbgems/mruby-string-utf8/test/string.rb +27 -0
  50. data/modules/mruby/mrbgems/mruby-struct/src/struct.c +0 -1
  51. data/modules/mruby/mrblib/init_mrblib.c +0 -3
  52. data/modules/mruby/src/array.c +22 -8
  53. data/modules/mruby/src/backtrace.c +12 -9
  54. data/modules/mruby/src/class.c +3 -3
  55. data/modules/mruby/src/codegen.c +17 -5
  56. data/modules/mruby/src/dump.c +5 -6
  57. data/modules/mruby/src/error.c +0 -2
  58. data/modules/mruby/src/etc.c +0 -2
  59. data/modules/mruby/src/gc.c +4 -8
  60. data/modules/mruby/src/load.c +1 -6
  61. data/modules/mruby/src/numeric.c +0 -6
  62. data/modules/mruby/src/object.c +3 -5
  63. data/modules/mruby/src/parse.y +37 -38
  64. data/modules/mruby/src/proc.c +8 -1
  65. data/modules/mruby/src/range.c +3 -7
  66. data/modules/mruby/src/state.c +0 -1
  67. data/modules/mruby/src/string.c +2 -17
  68. data/modules/mruby/src/symbol.c +0 -1
  69. data/modules/mruby/src/variable.c +3 -22
  70. data/modules/mruby/src/vm.c +9 -8
  71. data/modules/mruby/tasks/mrbgem_spec.rake +13 -5
  72. data/modules/mruby/tasks/mrbgems_test.rake +3 -3
  73. data/modules/mruby/tasks/mruby_build_commands.rake +2 -2
  74. data/modules/mruby/tasks/mruby_build_gem.rake +3 -3
  75. data/modules/mruby/test/init_mrbtest.c +0 -3
  76. data/modules/mruby/test/t/array.rb +12 -1
  77. data/modules/mruby/test/t/class.rb +67 -0
  78. data/modules/mruby/test/t/exception.rb +12 -0
  79. data/modules/mruby/test/t/kernel.rb +75 -1
  80. data/modules/mruby/test/t/syntax.rb +115 -0
  81. data/scripts/gen_require.rb +12 -1
  82. metadata +8 -2
@@ -24,41 +24,16 @@ import_sig = re.compile('var ([_\w$]+) *=[^;]+;')
24
24
 
25
25
  class Minifier:
26
26
  '''
27
- asm.js minification support. We calculate possible names and minification of
27
+ asm.js minification support. We calculate minification of
28
28
  globals here, then pass that into the parallel js-optimizer.js runners which
29
29
  during registerize perform minification of locals.
30
30
  '''
31
31
 
32
- def __init__(self, js, js_engine, MAX_NAMES):
32
+ def __init__(self, js, js_engine):
33
33
  self.js = js
34
34
  self.js_engine = js_engine
35
- MAX_NAMES = min(MAX_NAMES, 120000)
36
-
37
- # Create list of valid short names
38
-
39
- INVALID_2 = set(['do', 'if', 'in'])
40
- INVALID_3 = set(['for', 'new', 'try', 'var', 'env', 'let'])
41
-
42
- self.names = []
43
- init_possibles = string.ascii_letters + '_$'
44
- later_possibles = init_possibles + string.digits
45
- for a in init_possibles:
46
- if len(self.names) >= MAX_NAMES: break
47
- self.names.append(a)
48
- for a in init_possibles:
49
- for b in later_possibles:
50
- if len(self.names) >= MAX_NAMES: break
51
- curr = a + b
52
- if curr not in INVALID_2: self.names.append(curr)
53
- for a in init_possibles:
54
- for b in later_possibles:
55
- for c in later_possibles:
56
- if len(self.names) >= MAX_NAMES: break
57
- curr = a + b + c
58
- if curr not in INVALID_3: self.names.append(curr)
59
35
 
60
36
  def minify_shell(self, shell, minify_whitespace, source_map=False):
61
- #print >> sys.stderr, "MINIFY SHELL 1111111111", shell, "\n222222222222222"
62
37
  # Run through js-optimizer.js to find and minify the global symbols
63
38
  # We send it the globals, which it parses at the proper time. JS decides how
64
39
  # to minify all global names, we receive a dictionary back, which is then
@@ -91,7 +66,6 @@ class Minifier:
91
66
 
92
67
  def serialize(self):
93
68
  return {
94
- 'names': self.names,
95
69
  'globals': self.globs
96
70
  }
97
71
 
@@ -187,7 +161,7 @@ EMSCRIPTEN_FUNCS();
187
161
  js = js[start_funcs + len(start_funcs_marker):end_funcs]
188
162
 
189
163
  # we assume there is a maximum of one new name per line
190
- minifier = Minifier(js, js_engine, js.count('\n') + asm_shell.count('\n'))
164
+ minifier = Minifier(js, js_engine)
191
165
  asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();');
192
166
  asm_shell_post = asm_shell_post.replace('});', '})');
193
167
  pre += asm_shell_pre + '\n' + start_funcs_marker
@@ -272,9 +272,17 @@ if EM_POPEN_WORKAROUND and os.name == 'nt':
272
272
 
273
273
  EXPECTED_LLVM_VERSION = (3,2)
274
274
 
275
+ actual_clang_version = None
276
+
277
+ def get_clang_version():
278
+ global actual_clang_version
279
+ if actual_clang_version is None:
280
+ actual_clang_version = Popen([CLANG, '-v'], stderr=PIPE).communicate()[1].split('\n')[0].split(' ')[2]
281
+ return actual_clang_version
282
+
275
283
  def check_clang_version():
276
- expected = 'clang version ' + '.'.join(map(str, EXPECTED_LLVM_VERSION))
277
- actual = Popen([CLANG, '-v'], stderr=PIPE).communicate()[1].split('\n')[0]
284
+ expected = '.'.join(map(str, EXPECTED_LLVM_VERSION))
285
+ actual = get_clang_version()
278
286
  if expected in actual:
279
287
  return True
280
288
  logging.warning('LLVM version appears incorrect (seeing "%s", expected "%s")' % (actual, expected))
@@ -286,6 +294,21 @@ def check_llvm_version():
286
294
  except Exception, e:
287
295
  logging.warning('Could not verify LLVM version: %s' % str(e))
288
296
 
297
+ def check_fastcomp():
298
+ try:
299
+ llc_version_info = Popen([LLVM_COMPILER, '--version'], stdout=PIPE).communicate()[0]
300
+ pre, targets = llc_version_info.split('Registered Targets:')
301
+ if 'js' not in targets or 'JavaScript (asm.js, emscripten) backend' not in targets:
302
+ logging.critical('fastcomp in use, but LLVM has not been built with the JavaScript backend as a target, llc reports:')
303
+ print >> sys.stderr, '==========================================================================='
304
+ print >> sys.stderr, llc_version_info,
305
+ print >> sys.stderr, '==========================================================================='
306
+ return False
307
+ return True
308
+ except Exception, e:
309
+ logging.warning('cound not check fastcomp: %s' % str(e))
310
+ return True
311
+
289
312
  EXPECTED_NODE_VERSION = (0,8,0)
290
313
 
291
314
  def check_node_version():
@@ -322,10 +345,10 @@ def find_temp_directory():
322
345
  # we re-check sanity when the settings are changed)
323
346
  # We also re-check sanity and clear the cache when the version changes
324
347
 
325
- EMSCRIPTEN_VERSION = '1.8.0'
348
+ EMSCRIPTEN_VERSION = '1.8.6'
326
349
 
327
350
  def generate_sanity():
328
- return EMSCRIPTEN_VERSION + '|' + get_llvm_target() + '|' + LLVM_ROOT
351
+ return EMSCRIPTEN_VERSION + '|' + get_llvm_target() + '|' + LLVM_ROOT + '|' + get_clang_version()
329
352
 
330
353
  def check_sanity(force=False):
331
354
  try:
@@ -353,9 +376,11 @@ def check_sanity(force=False):
353
376
  Cache.erase()
354
377
  force = False # the check actually failed, so definitely write out the sanity file, to avoid others later seeing failures too
355
378
 
356
- # some warning, not fatal checks - do them even if EM_IGNORE_SANITY is on
379
+ # some warning, mostly not fatal checks - do them even if EM_IGNORE_SANITY is on
357
380
  check_llvm_version()
358
381
  check_node_version()
382
+ if os.environ.get('EMCC_FAST_COMPILER') == '1':
383
+ fastcomp_ok = check_fastcomp()
359
384
 
360
385
  if os.environ.get('EM_IGNORE_SANITY'):
361
386
  logging.info('EM_IGNORE_SANITY set, ignoring sanity checks')
@@ -377,6 +402,11 @@ def check_sanity(force=False):
377
402
  logging.critical('Cannot find %s, check the paths in %s' % (cmd, EM_CONFIG))
378
403
  sys.exit(1)
379
404
 
405
+ if os.environ.get('EMCC_FAST_COMPILER') == '1':
406
+ if not fastcomp_ok:
407
+ logging.critical('failing sanity checks due to previous fastcomp failure')
408
+ sys.exit(1)
409
+
380
410
  try:
381
411
  subprocess.call([JAVA, '-version'], stdout=PIPE, stderr=PIPE)
382
412
  except:
@@ -1141,6 +1171,8 @@ class Building:
1141
1171
  if type(opts) is int:
1142
1172
  opts = Building.pick_llvm_opts(opts)
1143
1173
  #opts += ['-debug-pass=Arguments']
1174
+ if get_clang_version() == '3.4' and not Settings.SIMD:
1175
+ opts += ['-disable-loop-vectorization', '-disable-slp-vectorization'] # llvm 3.4 has these on by default
1144
1176
  logging.debug('emcc: LLVM opts: ' + str(opts))
1145
1177
  target = out or (filename + '.opt.bc')
1146
1178
  output = Popen([LLVM_OPT, filename] + opts + ['-o', target], stdout=PIPE).communicate()[0]
@@ -1382,6 +1414,8 @@ class Building:
1382
1414
  if not os.path.exists(CLOSURE_COMPILER):
1383
1415
  raise Exception('Closure compiler appears to be missing, looked at: ' + str(CLOSURE_COMPILER))
1384
1416
 
1417
+ CLOSURE_EXTERNS = path_from_root('src', 'closure-externs.js')
1418
+
1385
1419
  # Something like this (adjust memory as needed):
1386
1420
  # java -Xmx1024m -jar CLOSURE_COMPILER --compilation_level ADVANCED_OPTIMIZATIONS --variable_map_output_file src.cpp.o.js.vars --js src.cpp.o.js --js_output_file src.cpp.o.cc.js
1387
1421
  args = [JAVA,
@@ -1389,6 +1423,7 @@ class Building:
1389
1423
  '-jar', CLOSURE_COMPILER,
1390
1424
  '--compilation_level', 'ADVANCED_OPTIMIZATIONS',
1391
1425
  '--language_in', 'ECMASCRIPT5',
1426
+ '--externs', CLOSURE_EXTERNS,
1392
1427
  #'--variable_map_output_file', filename + '.vars',
1393
1428
  '--js', filename, '--js_output_file', filename + '.cc.js']
1394
1429
  if pretty: args += ['--formatting', 'PRETTY_PRINT']
@@ -1438,7 +1473,7 @@ class Building:
1438
1473
  @staticmethod
1439
1474
  def ensure_relooper(relooper):
1440
1475
  if os.path.exists(relooper): return
1441
- if os.environ.get('EMCC_FAST_COMPILER'):
1476
+ if os.environ.get('EMCC_FAST_COMPILER') == '1':
1442
1477
  logging.debug('not building relooper to js, using it in c++ backend')
1443
1478
  return
1444
1479
 
@@ -1513,6 +1548,8 @@ class Building:
1513
1548
  text = m.groups(0)[0]
1514
1549
  assert text.count('(') == 1 and text.count(')') == 1, 'must have simple expressions in emscripten_jcache_printf calls, no parens'
1515
1550
  assert text.count('"') == 2, 'must have simple expressions in emscripten_jcache_printf calls, no strings as varargs parameters'
1551
+ if os.environ.get('EMCC_FAST_COMPILER') == '1': # fake it in fastcomp
1552
+ return text.replace('emscripten_jcache_printf', 'printf')
1516
1553
  start = text.index('(')
1517
1554
  end = text.rindex(')')
1518
1555
  args = text[start+1:end].split(',')
@@ -155,7 +155,7 @@ typedef struct mrb_value {
155
155
  #define mrb_tt(o) (((o).value.ttt & 0xfc000)>>14)
156
156
  #define mrb_mktt(tt) (0xfff00000|((tt)<<14))
157
157
  #define mrb_type(o) ((uint32_t)0xfff00000 < (o).value.ttt ? mrb_tt(o) : MRB_TT_FLOAT)
158
- #define mrb_ptr(o) ((void*)((((intptr_t)0x3fffffffffff)&((intptr_t)((o).value.p)))<<2))
158
+ #define mrb_ptr(o) ((void*)((((uintptr_t)0x3fffffffffff)&((uintptr_t)((o).value.p)))<<2))
159
159
  #define mrb_float(o) (o).f
160
160
 
161
161
  #define MRB_SET_VALUE(o, tt, attr, v) do {\
@@ -166,7 +166,7 @@ typedef struct mrb_value {
166
166
  case MRB_TT_UNDEF:\
167
167
  case MRB_TT_FIXNUM:\
168
168
  case MRB_TT_SYMBOL: (o).attr = (v); break;\
169
- default: (o).value.i = 0; (o).value.p = (void*)((intptr_t)(o).value.p | (((intptr_t)(v))>>2)); break;\
169
+ default: (o).value.i = 0; (o).value.p = (void*)((uintptr_t)(o).value.p | (((uintptr_t)(v))>>2)); break;\
170
170
  }\
171
171
  } while (0)
172
172
 
@@ -315,6 +315,7 @@ mrb_float_value(struct mrb_state *mrb, mrb_float f)
315
315
  #define mrb_bool(o) ((o).w != MRB_Qnil && (o).w != MRB_Qfalse)
316
316
 
317
317
  #else
318
+
318
319
  #define mrb_cptr(o) mrb_ptr(o)
319
320
  #define mrb_fixnum_p(o) (mrb_type(o) == MRB_TT_FIXNUM)
320
321
  #define mrb_undef_p(o) (mrb_type(o) == MRB_TT_UNDEF)
@@ -8,21 +8,17 @@
8
8
 
9
9
  #include <stdlib.h>
10
10
  #include <string.h>
11
-
12
- #include <mruby.h>
11
+ #include "mruby.h"
13
12
  #include "mruby/array.h"
14
- #include <mruby/proc.h>
15
- #include <mruby/data.h>
16
- #include <mruby/compile.h>
13
+ #include "mruby/proc.h"
14
+ #include "mruby/compile.h"
15
+ #include "mruby/string.h"
16
+
17
17
  #ifdef ENABLE_READLINE
18
18
  #include <limits.h>
19
19
  #include <readline/readline.h>
20
20
  #include <readline/history.h>
21
- #endif
22
- #include <mruby/string.h>
23
21
 
24
-
25
- #ifdef ENABLE_READLINE
26
22
  static const char *history_file_name = ".mirb_history";
27
23
  char history_path[PATH_MAX];
28
24
  #endif
@@ -1,13 +1,11 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
1
4
  #include "mruby.h"
2
- #include "mruby/proc.h"
3
5
  #include "mruby/array.h"
4
- #include "mruby/string.h"
5
6
  #include "mruby/compile.h"
6
7
  #include "mruby/dump.h"
7
8
  #include "mruby/variable.h"
8
- #include <stdio.h>
9
- #include <stdlib.h>
10
- #include <string.h>
11
9
 
12
10
  #ifndef ENABLE_STDIO
13
11
  static void
@@ -6,11 +6,8 @@
6
6
 
7
7
  #include "mruby.h"
8
8
  #include "mruby/array.h"
9
- #include "mruby/class.h"
10
9
  #include "mruby/hash.h"
11
10
  #include "mruby/khash.h"
12
- #include "mruby/string.h"
13
- #include "mruby/variable.h"
14
11
 
15
12
  /*
16
13
  * call-seq:
@@ -1,6 +1,5 @@
1
1
  #include <limits.h>
2
2
  #include "mruby.h"
3
- #include "mruby/numeric.h"
4
3
 
5
4
  static mrb_value
6
5
  mrb_int_chr(mrb_state *mrb, mrb_value x)
@@ -4,7 +4,6 @@
4
4
  ** See Copyright Notice in mruby.h
5
5
  */
6
6
 
7
- #include <stdio.h>
8
7
  #include "mt19937ar.h"
9
8
 
10
9
  /* Period parameters */
@@ -7,7 +7,7 @@ r_le(mrb_state *mrb, mrb_value a, mrb_value b)
7
7
  mrb_value r = mrb_funcall(mrb, a, "<=>", 1, b); /* compare result */
8
8
  /* output :a < b => -1, a = b => 0, a > b => +1 */
9
9
 
10
- if (mrb_type(r) == MRB_TT_FIXNUM) {
10
+ if (mrb_fixnum_p(r)) {
11
11
  mrb_int c = mrb_fixnum(r);
12
12
  if (c == 0 || c == -1) return TRUE;
13
13
  }
@@ -21,11 +21,7 @@ r_lt(mrb_state *mrb, mrb_value a, mrb_value b)
21
21
  mrb_value r = mrb_funcall(mrb, a, "<=>", 1, b);
22
22
  /* output :a < b => -1, a = b => 0, a > b => +1 */
23
23
 
24
- if (mrb_type(r) == MRB_TT_FIXNUM) {
25
- if (mrb_fixnum(r) == -1) return TRUE;
26
- }
27
-
28
- return FALSE;
24
+ return mrb_fixnum_p(r) && mrb_fixnum(r) == -1;
29
25
  }
30
26
 
31
27
  /*
@@ -15,10 +15,6 @@
15
15
  #include <math.h>
16
16
  #include <ctype.h>
17
17
 
18
- #ifdef HAVE_IEEEFP_H
19
- #include <ieeefp.h>
20
- #endif
21
-
22
18
  #define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */
23
19
  #define BITSPERDIG (sizeof(mrb_int)*CHAR_BIT)
24
20
  #define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n)))
@@ -49,4 +49,26 @@ class String
49
49
  def casecmp(str)
50
50
  self.downcase <=> str.downcase
51
51
  end
52
+
53
+ def partition(sep)
54
+ raise TypeError, "type mismatch: #{sep.class} given" unless sep.is_a? String
55
+ n = index(sep)
56
+ unless n.nil?
57
+ m = n + sep.size
58
+ [ slice(0, n), sep, slice(m, size - m) ]
59
+ else
60
+ [ self, "", "" ]
61
+ end
62
+ end
63
+
64
+ def rpartition(sep)
65
+ raise TypeError, "type mismatch: #{sep.class} given" unless sep.is_a? String
66
+ n = rindex(sep)
67
+ unless n.nil?
68
+ m = n + sep.size
69
+ [ slice(0, n), sep, slice(m, size - m) ]
70
+ else
71
+ [ "", "", self ]
72
+ end
73
+ end
52
74
  end
@@ -1,7 +1,7 @@
1
- #include "mruby.h"
2
- #include "mruby/string.h"
3
1
  #include <ctype.h>
4
2
  #include <string.h>
3
+ #include "mruby.h"
4
+ #include "mruby/string.h"
5
5
 
6
6
  static mrb_value
7
7
  mrb_str_getbyte(mrb_state *mrb, mrb_value str)
@@ -20,8 +20,7 @@ assert('String#dump') do
20
20
  end
21
21
 
22
22
  assert('String#strip') do
23
- s = " abc "
24
- s.strip
23
+ s = " abc "
25
24
  "".strip == "" and " \t\r\n\f\v".strip == "" and
26
25
  "\0a\0".strip == "\0a" and
27
26
  "abc".strip == "abc" and
@@ -114,3 +113,23 @@ assert('String#end_with?') do
114
113
  assert_true !"ng".end_with?("ing", "mng")
115
114
  assert_raise TypeError do "hello".end_with?(true) end
116
115
  end
116
+
117
+ assert('String#partition') do
118
+ assert_equal ["a", "x", "axa"], "axaxa".partition("x")
119
+ assert_equal ["aaaaa", "", ""], "aaaaa".partition("x")
120
+ assert_equal ["", "", "aaaaa"], "aaaaa".partition("")
121
+ assert_equal ["", "a", "aaaa"], "aaaaa".partition("a")
122
+ assert_equal ["aaaa", "b", ""], "aaaab".partition("b")
123
+ assert_equal ["", "b", "aaaa"], "baaaa".partition("b")
124
+ assert_equal ["", "", ""], "".partition("a")
125
+ end
126
+
127
+ assert('String#rpartition') do
128
+ assert_equal ["axa", "x", "a"], "axaxa".rpartition("x")
129
+ assert_equal ["", "", "aaaaa"], "aaaaa".rpartition("x")
130
+ assert_equal ["aaaaa", "", ""], "aaaaa".rpartition("")
131
+ assert_equal ["aaaa", "a", ""], "aaaaa".rpartition("a")
132
+ assert_equal ["aaaa", "b", ""], "aaaab".rpartition("b")
133
+ assert_equal ["", "b", "aaaa"], "baaaa".rpartition("b")
134
+ assert_equal ["", "", ""], "".rpartition("a")
135
+ end
@@ -0,0 +1,4 @@
1
+ MRuby::Gem::Specification.new('mruby-string-utf8') do |spec|
2
+ spec.license = 'MIT'
3
+ spec.author = 'mruby developers'
4
+ end
@@ -0,0 +1,297 @@
1
+ #include "mruby.h"
2
+ #include "mruby/string.h"
3
+ #include "mruby/range.h"
4
+ #include <ctype.h>
5
+ #include <string.h>
6
+
7
+ /* TODO: duplicate definition in src/re.h */
8
+ #define REGEXP_CLASS "Regexp"
9
+
10
+ static size_t utf8len_tab[256] =
11
+ {
12
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
14
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
15
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
16
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
17
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
18
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
19
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
20
+ };
21
+
22
+ static size_t
23
+ utf8len(unsigned char* p)
24
+ {
25
+ size_t len;
26
+ int i;
27
+
28
+ if (*p == 0)
29
+ return 1;
30
+ len = utf8len_tab[*p];
31
+ for (i = 1; i < len; ++i)
32
+ if ((p[i] & 0xc0) != 0x80)
33
+ return 1;
34
+ return len;
35
+ }
36
+
37
+ static size_t
38
+ mrb_utf8_strlen(mrb_value str)
39
+ {
40
+ size_t total = 0;
41
+ unsigned char* p = (unsigned char*) RSTRING_PTR(str);
42
+ unsigned char* e = p + RSTRING_LEN(str);
43
+ while (p<e) {
44
+ p += utf8len(p);
45
+ total++;
46
+ }
47
+ return total;
48
+ }
49
+
50
+ static mrb_value
51
+ mrb_str_size(mrb_state *mrb, mrb_value str)
52
+ {
53
+ size_t size = mrb_utf8_strlen(str);
54
+
55
+ return mrb_fixnum_value(size);
56
+ }
57
+
58
+ #define RSTRING_LEN_UTF8(s) mrb_utf8_strlen(s)
59
+
60
+ static mrb_value
61
+ noregexp(mrb_state *mrb, mrb_value self)
62
+ {
63
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
64
+ return mrb_nil_value();
65
+ }
66
+
67
+ static void
68
+ regexp_check(mrb_state *mrb, mrb_value obj)
69
+ {
70
+ if (!memcmp(mrb_obj_classname(mrb, obj), REGEXP_CLASS, sizeof(REGEXP_CLASS) - 1)) {
71
+ noregexp(mrb, obj);
72
+ }
73
+ }
74
+
75
+ static inline mrb_int
76
+ mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
77
+ {
78
+ const unsigned char *x = xs, *xe = xs + m;
79
+ const unsigned char *y = ys;
80
+ int i, qstable[256];
81
+
82
+ /* Preprocessing */
83
+ for (i = 0; i < 256; ++i)
84
+ qstable[i] = m + 1;
85
+ for (; x < xe; ++x)
86
+ qstable[*x] = xe - x;
87
+ /* Searching */
88
+ for (; y + m <= ys + n; y += *(qstable + y[m])) {
89
+ if (*xs == *y && memcmp(xs, y, m) == 0)
90
+ return y - ys;
91
+ }
92
+ return -1;
93
+ }
94
+ static mrb_int
95
+ mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
96
+ {
97
+ const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
98
+
99
+ if (m > n) return -1;
100
+ else if (m == n) {
101
+ return memcmp(x0, y0, m) == 0 ? 0 : -1;
102
+ }
103
+ else if (m < 1) {
104
+ return 0;
105
+ }
106
+ else if (m == 1) {
107
+ const unsigned char *ys = y, *ye = ys + n;
108
+ for (; y < ye; ++y) {
109
+ if (*x == *y)
110
+ return y - ys;
111
+ }
112
+ return -1;
113
+ }
114
+ return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
115
+ }
116
+
117
+ static mrb_value
118
+ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
119
+ {
120
+ int i;
121
+ unsigned char *p = (unsigned char*) RSTRING_PTR(str), *t;
122
+ unsigned char *e = p + RSTRING_LEN(str);
123
+
124
+
125
+ for (i = 0; i < beg && p<e; i++) {
126
+ p += utf8len(p);
127
+ }
128
+ t = p;
129
+ for (i = 0; i < len && t<e; i++) {
130
+ t += utf8len(t);
131
+ }
132
+ return mrb_str_new(mrb, (const char*)p, (int)(t - p));
133
+ }
134
+
135
+ static mrb_value
136
+ str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
137
+ {
138
+ mrb_value str2;
139
+ int len8 = RSTRING_LEN_UTF8(str);
140
+
141
+ if (len < 0) return mrb_nil_value();
142
+ if (len8 == 0) {
143
+ len = 0;
144
+ }
145
+ else if (beg < 0) {
146
+ beg = len8 + beg;
147
+ }
148
+ if (beg > len8) return mrb_nil_value();
149
+ if (beg < 0) {
150
+ beg += len8;
151
+ if (beg < 0) return mrb_nil_value();
152
+ }
153
+ if (beg + len > len8)
154
+ len = len8 - beg;
155
+ if (len <= 0) {
156
+ len = 0;
157
+ }
158
+ str2 = str_subseq(mrb, str, beg, len);
159
+
160
+ return str2;
161
+ }
162
+
163
+ static mrb_int
164
+ str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
165
+ {
166
+ mrb_int pos;
167
+ char *s, *sptr;
168
+ mrb_int len, slen;
169
+
170
+ len = RSTRING_LEN(str);
171
+ slen = RSTRING_LEN(sub);
172
+ if (offset < 0) {
173
+ offset += len;
174
+ if (offset < 0) return -1;
175
+ }
176
+ if (len - offset < slen) return -1;
177
+ s = RSTRING_PTR(str);
178
+ if (offset) {
179
+ s += offset;
180
+ }
181
+ if (slen == 0) return offset;
182
+ /* need proceed one character at a time */
183
+ sptr = RSTRING_PTR(sub);
184
+ slen = RSTRING_LEN(sub);
185
+ len = RSTRING_LEN(str) - offset;
186
+ pos = mrb_memsearch(sptr, slen, s, len);
187
+ if (pos < 0) return pos;
188
+ return pos + offset;
189
+ }
190
+
191
+ static mrb_value
192
+ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
193
+ {
194
+ mrb_int idx;
195
+
196
+ regexp_check(mrb, indx);
197
+ switch (mrb_type(indx)) {
198
+ case MRB_TT_FIXNUM:
199
+ idx = mrb_fixnum(indx);
200
+
201
+ num_index:
202
+ str = str_substr(mrb, str, idx, 1);
203
+ if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
204
+ return str;
205
+
206
+ case MRB_TT_STRING:
207
+ if (str_index(mrb, str, indx, 0) != -1)
208
+ return mrb_str_dup(mrb, indx);
209
+ return mrb_nil_value();
210
+
211
+ case MRB_TT_RANGE:
212
+ /* check if indx is Range */
213
+ {
214
+ mrb_int beg, len;
215
+ mrb_value tmp;
216
+
217
+ len = RSTRING_LEN_UTF8(str);
218
+ if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) {
219
+ tmp = str_subseq(mrb, str, beg, len);
220
+ return tmp;
221
+ }
222
+ else {
223
+ return mrb_nil_value();
224
+ }
225
+ }
226
+ default:
227
+ idx = mrb_fixnum(indx);
228
+ goto num_index;
229
+ }
230
+ return mrb_nil_value(); /* not reached */
231
+ }
232
+
233
+ static mrb_value
234
+ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
235
+ {
236
+ mrb_value a1, a2;
237
+ int argc;
238
+
239
+ argc = mrb_get_args(mrb, "o|o", &a1, &a2);
240
+ if (argc == 2) {
241
+ regexp_check(mrb, a1);
242
+ return str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
243
+ }
244
+ if (argc != 1) {
245
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
246
+ }
247
+ return mrb_str_aref(mrb, str, a1);
248
+ }
249
+
250
+ static mrb_value
251
+ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
252
+ {
253
+ int utf8_len = mrb_utf8_strlen(str);
254
+ if (utf8_len > 1) {
255
+ int len = RSTRING_LEN(str);
256
+ char *buf = (char *)mrb_malloc(mrb, len);
257
+ unsigned char* p = (unsigned char*)buf;
258
+ unsigned char* e = (unsigned char*)buf + len;
259
+ unsigned char* r = (unsigned char*)RSTRING_END(str);
260
+
261
+ memcpy(buf, RSTRING_PTR(str), len);
262
+ mrb_str_modify(mrb, mrb_str_ptr(str));
263
+
264
+ while (p<e) {
265
+ int clen = utf8len(p);
266
+ r -= clen;
267
+ memcpy(r, p, clen);
268
+ p += clen;
269
+ }
270
+ mrb_free(mrb, buf);
271
+ }
272
+
273
+ return str;
274
+ }
275
+
276
+ static mrb_value
277
+ mrb_str_reverse(mrb_state *mrb, mrb_value str)
278
+ {
279
+ return mrb_str_reverse_bang(mrb, mrb_str_dup(mrb, str));
280
+ }
281
+
282
+ void
283
+ mrb_mruby_string_utf8_gem_init(mrb_state* mrb)
284
+ {
285
+ struct RClass * s = mrb->string_class;
286
+
287
+ mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE());
288
+ mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY());
289
+ mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY());
290
+ mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE());
291
+ mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE());
292
+ }
293
+
294
+ void
295
+ mrb_mruby_string_utf8_gem_final(mrb_state* mrb)
296
+ {
297
+ }