oniguruma 0.9.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,34 @@
1
+ == 1.0.0 / 2007-03-27
2
+ * Added documentation for MatchData.
3
+ * Added ogsub, ogsub!, sub and sub! to ::String.
4
+ * Removed ::String definitions from tests.
5
+ * Now the minimal recommended version of oniglib is 5.5 or higher.
6
+ * Removed ugly #if statements from c code.
7
+ * Do not create @named_captures hash if there are no named groups for regexp -- somewhat improve speed for repetive calls
8
+ * Fixed usage of named backreferences in gsub with non-ascii names
9
+ * Move ORegexp#=~ to C code, make it work just like Regexp#=~, i.e. set $~. Throw ArgumentError instead of Exception if pattern does not compile
10
+ * Fix implementation of ORegexp#===, so it now does not raise errors in case statement anymore
11
+ (resembles plain Ruby Regexp#=== behaviour)
12
+ * Modified begin, end and offset methods in MatchData to handle named groups and default to group 0.
13
+ * Exception is not longer thrown when in oregexp_make_match_data.
14
+ * Removed references to MultiMatchData from documentation
15
+ * Removed class MultiMatchData
16
+ * Fix off by one error in region->num_regs usage
17
+ * Fix dumb bug with zero-width matches that made infinite loops. now consume at least one char in gsub and scan
18
+ * ORegexp API changes:
19
+ * Pass only MatchData to sub/gsub with blocks
20
+ oregexp.sub( str ) {|match_data| ... }
21
+ oregexp.gsub( str ) {|match_data| ... }
22
+ * Add ORegexp#scan instead of match_all
23
+ oregexp.scan(str) {|match_data| ... } # => MultiMatchData
24
+ * Friendly way to set options
25
+ ORegexp.new( pattern, options_str, encoding, syntax)
26
+ ORegexp.new('\w+', 'imsx', 'koi8r', 'perl')
27
+ * Named backreferences in substitions
28
+ ORegexp.new('(?<pre>\w+)\d+(?<after>\w+)').sub('abc123def', '\<after>123\<pre>') #=> 'def123abc'
29
+ * couple of bugfixes with region's num_regs
30
+ * some docs for substitution methods added
31
+
1
32
  == 0.9.1 / 2007-03-25
2
33
  * FIX: Buggy resolution of numeric codes for encoding and syntax options (Nikolai Lugovoi)
3
34
  * FIX: Buggy implementation of ORegexp#gsub and ORegexp#gsub methods. Now code is all C (Nikolai Lugovoi)
data/README.txt CHANGED
@@ -8,6 +8,7 @@ Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regul
8
8
  * Same interface than standard Regexp class (easy transition!).
9
9
  * Support for named groups, look-ahead, look-behind, and other
10
10
  cool features!
11
+ * Support for other regexp syntaxes (Perl, Python, Java, etc.)
11
12
 
12
13
  == SYNOPSIS:
13
14
 
@@ -23,7 +24,7 @@ Consult the Syntax.txt[link:files/Syntax_txt.html] page.
23
24
 
24
25
  == REQUIREMENTS:
25
26
 
26
- * Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 2.0 or greater
27
+ * Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 5.5 or higher
27
28
 
28
29
  == INSTALL:
29
30
 
@@ -43,7 +44,7 @@ sudo gem install -r oniguruma
43
44
 
44
45
  == CREDITS:
45
46
 
46
- * N. Lugovoi. ORegexp.sub and ORegexp.gsub code, plus other patches.
47
+ * N. Lugovoi. ORegexp.sub and ORegexp.gsub code and lots of other stuff.
47
48
  * K. Kosako. For his great library.
48
49
  * A lot of the documentation has been copied from the original Ruby Regex documentation.
49
50
 
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'hoe'
3
3
 
4
4
  class Hoe; def extra_deps; @extra_deps.reject { |x| Array(x).first == 'hoe' }; end end
5
5
 
6
- Hoe.new('oniguruma', '0.9.1') do |p|
6
+ Hoe.new('oniguruma', '1.0.0') do |p|
7
7
  p.rubyforge_name = 'oniguruma'
8
8
  p.author = 'Dizan Vasquez'
9
9
  p.email = 'dix_ans@yahoo.com'
@@ -101,15 +101,11 @@ static int name_callback(
101
101
  regex_t* reg,
102
102
  struct callback_packet* arg
103
103
  ) {
104
- int i, gn, ref;
105
- OnigRegion *region = arg->region;
104
+ int i, gn;
106
105
  VALUE nameHash = arg->hash;
107
106
 
108
107
  for (i = 0; i < ngroup_num; i++) {
109
108
  gn = group_nums[i];
110
- ref = onig_name_to_backref_number(reg, name, name_end, region);
111
- if (ref != gn )
112
- return 1;
113
109
  rb_hash_aset( nameHash, ID2SYM(rb_intern(name)), INT2FIX( gn ) );
114
110
  }
115
111
  return 0;
@@ -124,10 +120,6 @@ static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
124
120
  rb_iv_set( self, "@options", options );
125
121
  UChar* pat_ptr = RSTRING(pattern_str)->ptr;
126
122
  int pat_len = RSTRING(pattern_str)->len;
127
- if( pat_len == 0 ) {
128
- rb_raise(rb_eArgError, "Empty pattern makes no sense.");
129
- }
130
-
131
123
  VALUE rOptions = rb_hash_aref( options, ID2SYM( rb_intern( "options" ) ) );
132
124
  VALUE rEncoding = rb_hash_aref( options, ID2SYM( rb_intern( "encoding" ) ) );
133
125
  VALUE rSyntax = rb_hash_aref( options, ID2SYM( rb_intern( "syntax" ) ) );
@@ -142,16 +134,19 @@ static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
142
134
  if (r != ONIG_NORMAL) {
143
135
  char s[ONIG_MAX_ERROR_MESSAGE_LEN];
144
136
  onig_error_code_to_str(s, r, &einfo);
145
- rb_raise(rb_eException, "Oniguruma Error: %s", s);
137
+ rb_raise(rb_eArgError, "Oniguruma Error: %s", s);
146
138
  }
147
139
  return self;
148
140
  }
149
141
 
142
+ /* can't include re.h, since it conflicts with oniguruma typedefs */
150
143
  struct RMatch {
151
144
  struct RBasic basic;
152
145
  VALUE str;
153
146
  struct re_registers *regs;
154
147
  };
148
+ #define RMATCH(obj) (R_CAST(RMatch)(obj))
149
+ void rb_match_busy _((VALUE));
155
150
 
156
151
  static VALUE oregexp_make_match_data(ORegexp * oregexp, OnigRegion * region, VALUE string_str) {
157
152
  VALUE rb_cMatch = rb_const_get(rb_cObject, rb_intern("MatchData")) ;
@@ -163,21 +158,22 @@ static VALUE oregexp_make_match_data(ORegexp * oregexp, OnigRegion * region, VAL
163
158
 
164
159
  match->str = rb_str_new4(string_str);
165
160
  match->regs = ALLOC(struct re_registers);
166
- match->regs->allocated = count+1;
161
+ match->regs->allocated = count;
167
162
  match->regs->num_regs = count;
168
- match->regs->beg = ALLOC_N(int, (count+1));
169
- match->regs->end = ALLOC_N(int, (count+1));
163
+ match->regs->beg = ALLOC_N(int, count);
164
+ match->regs->end = ALLOC_N(int, count);
170
165
 
171
- for ( i = 0; i <= count; i++){
166
+ for ( i = 0; i < count; i++){
172
167
  match->regs->beg[i] = region->beg[i];
173
168
  match->regs->end[i] = region->end[i];
174
169
  }
175
170
  rb_cv_set( kORegexp, "@@last_match", (VALUE)match );
176
171
  packet.region = region;
177
- packet.hash = rb_hash_new();
178
- if( onig_foreach_name(oregexp->reg, name_callback, &packet) )
179
- rb_raise(rb_eException, "Oniguruma Error: group and backreference names are different");
180
- rb_iv_set((VALUE)match, "@named_captures", packet.hash);
172
+ if( onig_number_of_names( oregexp->reg ) > 0 ) {
173
+ packet.hash = rb_hash_new();
174
+ onig_foreach_name(oregexp->reg, name_callback, &packet);
175
+ rb_iv_set((VALUE)match, "@named_captures", packet.hash);
176
+ }
181
177
  return (VALUE)match;
182
178
  }
183
179
 
@@ -201,9 +197,12 @@ static VALUE oregexp_match( VALUE self, VALUE string ) {
201
197
 
202
198
  OnigRegion *region = onig_region_new();
203
199
  int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
200
+ rb_backref_set(Qnil);
204
201
  if (r >= 0) {
205
202
  VALUE matchData = oregexp_make_match_data( oregexp, region, string_str);
206
203
  onig_region_free(region, 1 );
204
+ rb_backref_set(matchData);
205
+ rb_match_busy(matchData);
207
206
  return matchData;
208
207
  } else if (r == ONIG_MISMATCH) {
209
208
  onig_region_free(region, 1 );
@@ -212,7 +211,7 @@ static VALUE oregexp_match( VALUE self, VALUE string ) {
212
211
  onig_region_free(region, 1 );
213
212
  char s[ONIG_MAX_ERROR_MESSAGE_LEN];
214
213
  onig_error_code_to_str(s, r);
215
- rb_raise(rb_eException, "Oniguruma Error: %s", s);
214
+ rb_raise(rb_eArgError, "Oniguruma Error: %s", s);
216
215
  }
217
216
 
218
217
  }
@@ -233,7 +232,7 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
233
232
  {
234
233
  ORegexp *oregexp;
235
234
  VALUE ret;
236
- int32_t replIdx = 0;
235
+ int32_t replIdx = 0, name_pos, name_start, name_end ;
237
236
  int32_t replacementLength = RSTRING(repl_text)->len;
238
237
  UChar *replacementText = RSTRING(repl_text)->ptr;
239
238
  UChar *replacementEnd = replacementText + (replacementLength-1);
@@ -254,6 +253,10 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
254
253
  while (replIdx < replacementLength) {
255
254
  OnigCodePoint c = ONIGENC_MBC_TO_CODE(enc, replacementText+replIdx, replacementEnd);
256
255
  int c_len =ONIGENC_MBC_ENC_LEN(enc, replacementText+replIdx) ;
256
+ if( c_len == 0 ) {
257
+ rb_warn("Strange, for %d enc_len is 0", c);
258
+ c_len = 1;
259
+ }
257
260
  replIdx += c_len;
258
261
  if ( c != BACKSLASH) {
259
262
  /* Common case, no substitution, no escaping, */
@@ -311,7 +314,7 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
311
314
  break;
312
315
  case '+': // last matched group
313
316
  replIdx += c_len;
314
- for(groupNum = region->num_regs; groupNum > 0; groupNum --) {
317
+ for(groupNum = region->num_regs-1; groupNum > 0; groupNum --) {
315
318
  g_start = region->beg[ groupNum ];
316
319
  g_end = region->end[ groupNum ];
317
320
  if( g_start != -1 ) {
@@ -320,7 +323,35 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
320
323
  }
321
324
  }
322
325
  break;
323
-
326
+ case '<': // named group references \<name>
327
+ name_pos = replIdx+c_len;
328
+ name_end = name_start = replIdx+c_len;
329
+ while(name_pos < replacementLength) {
330
+ c = ONIGENC_MBC_TO_CODE(enc, replacementText+name_pos, replacementEnd);
331
+ c_len = ONIGENC_MBC_ENC_LEN(enc, replacementText+name_pos) ;
332
+ name_pos += c_len;
333
+ if( c == '>') break;
334
+ if( ONIGENC_IS_CODE_WORD(enc, c) ) {
335
+ name_end += c_len;
336
+ } else {
337
+ break;
338
+ }
339
+ }
340
+ if( c != '>' || name_end == name_start ) {
341
+ // place backslash and '<'
342
+ rb_str_buf_cat(ret, replacementText+(replIdx-p_len), p_len+c_len);
343
+ replIdx += c_len;
344
+ } else {
345
+ // lookup for group and subst for that value
346
+ groupNum = onig_name_to_backref_number( oregexp->reg,
347
+ replacementText+name_start, replacementText+name_end, region);
348
+ if( groupNum >= 0 ) {
349
+ rb_str_buf_cat(ret, matchText+region->beg[groupNum],
350
+ region->end[groupNum]-region->beg[groupNum]);
351
+ }
352
+ replIdx = name_pos;
353
+ }
354
+ break;
324
355
  default:
325
356
  rb_str_buf_cat(ret, replacementText+(replIdx-p_len), p_len+c_len);
326
357
  replIdx += c_len;
@@ -328,7 +359,7 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
328
359
  }
329
360
  } else {
330
361
  /* Finally, append the capture group data to the destination. */
331
- if( groupNum < region->num_regs && region->beg[groupNum] >= 0 && region->end[groupNum]>= region->beg[groupNum] ) {
362
+ if( groupNum < region->num_regs && region->beg[groupNum] >= 0 ) {
332
363
  rb_str_buf_cat(ret, matchText+region->beg[groupNum], region->end[groupNum]-region->beg[groupNum]);
333
364
  }
334
365
  }
@@ -359,13 +390,15 @@ oregexp_gsub(self, argc, argv, bang, once, region)
359
390
  VALUE repl;
360
391
  long beg,
361
392
  end,
393
+ len,
362
394
  prev_end;
363
395
  int tainted = 0,
364
396
  iter = 0;
365
397
 
366
398
  VALUE buf, curr_repl, block_res;
367
399
  ORegexp *oregexp;
368
-
400
+ OnigEncoding enc;
401
+
369
402
  if (argc == 1 && rb_block_given_p()) {
370
403
  iter = 1;
371
404
  } else if (argc == 2) {
@@ -392,6 +425,7 @@ oregexp_gsub(self, argc, argv, bang, once, region)
392
425
  }
393
426
  end = 0;
394
427
  buf = rb_str_buf_new(str_len);
428
+ enc = onig_get_encoding( oregexp->reg );
395
429
  do {
396
430
  prev_end = end;
397
431
  beg = region->beg[0];
@@ -400,12 +434,8 @@ oregexp_gsub(self, argc, argv, bang, once, region)
400
434
  if ( iter ) {
401
435
  VALUE match_data = oregexp_make_match_data( oregexp, region, string_str );
402
436
  rb_backref_set(match_data);
403
- if( once )
404
- block_res = rb_yield( match_data );
405
- else {
406
- VALUE match_string = rb_str_new( str_ptr+beg, end-beg);
407
- block_res = rb_yield_values(2, match_string, match_data );
408
- }
437
+ rb_match_busy(match_data);
438
+ block_res = rb_yield( match_data );
409
439
  str_mod_check( string_str, str_ptr, str_len);
410
440
  curr_repl = rb_obj_as_string(block_res);
411
441
  } else {
@@ -414,6 +444,17 @@ oregexp_gsub(self, argc, argv, bang, once, region)
414
444
  rb_str_append(buf, curr_repl);
415
445
  if( once ) break;
416
446
  // find next match
447
+ if( end == beg) {
448
+ /*
449
+ * Always consume at least one character of the input string
450
+ * in order to prevent infinite loops.
451
+ */
452
+ if( str_len <= end )
453
+ break;
454
+ len = ONIGENC_MBC_ENC_LEN(enc, str_ptr + end);
455
+ rb_str_buf_cat( buf, str_ptr+end, len);
456
+ end += len;
457
+ }
417
458
  beg=onig_search(oregexp->reg, str_ptr, str_ptr + str_len,
418
459
  str_ptr+end, str_ptr + str_len,
419
460
  region, ONIG_OPTION_NONE);
@@ -456,28 +497,216 @@ static VALUE oregexp_safe_gsub(self, argc, argv, bang, once)
456
497
  gsub_packet call_args = {self, argc, argv, bang, once, region};
457
498
  return rb_ensure( oregexp_packed_gsub, (VALUE)&call_args, oregexp_cleanup_region, (VALUE)region);
458
499
  }
500
+
501
+ /**
502
+ * call-seq:
503
+ * rxp.gsub(str, replacement)
504
+ * rxp.gsub(str) {|match_data| ... }
505
+ *
506
+ * Returns a copy of _str_ with _all_ occurrences of _rxp_ pattern
507
+ * replaced with either _replacement_ or the value of the block.
508
+ *
509
+ * If a string is used as the replacement, the sequences \1, \2,
510
+ * and so on may be used to interpolate successive groups in the match.
511
+ *
512
+ * In the block form, the current MatchData object is passed in as a
513
+ * parameter. The value returned by the block will be substituted for
514
+ * the match on each call.
515
+ *
516
+ **/
459
517
  static VALUE oregexp_m_gsub(int argc, VALUE *argv, VALUE self) {
460
518
  return oregexp_safe_gsub(self, argc, argv, 0, 0);
461
519
  }
520
+
521
+ /**
522
+ * call-seq:
523
+ * rxp.sub(str, replacement)
524
+ * rxp.sub(str) {|match_data| ... }
525
+ *
526
+ * Returns a copy of _str_ with the _first_ occurrence of _rxp_ pattern
527
+ * replaced with either _replacement_ or the value of the block.
528
+ *
529
+ * If a string is used as the replacement, the sequences \1, \2,
530
+ * and so on may be used to interpolate successive groups in the match.
531
+ *
532
+ * In the block form, the current MatchData object is passed in as a
533
+ * parameter. The value returned by the block will be substituted for
534
+ * the match on each call.
535
+ *
536
+ **/
462
537
  static VALUE oregexp_m_sub(int argc, VALUE *argv, VALUE self) {
463
538
  return oregexp_safe_gsub(self, argc, argv, 0, 1);
464
539
  }
465
540
 
541
+ /**
542
+ * call-seq:
543
+ * rxp.gsub!(str, replacement)
544
+ * rxp.gsub!(str) {|match_data| ... }
545
+ *
546
+ * Performs the substitutions of ORegexp#gsub in place, returning
547
+ * _str_, or _nil_ if no substitutions were performed.
548
+ *
549
+ **/
466
550
  static VALUE oregexp_m_gsub_bang(int argc, VALUE *argv, VALUE self) {
467
551
  return oregexp_safe_gsub(self, argc, argv, 1, 0);
468
552
  }
553
+
554
+ /**
555
+ * call-seq:
556
+ * oregexp.sub!(str, replacement)
557
+ * oregexp.sub!(str) {|match_data| ... }
558
+ *
559
+ * Performs the substitutions of ORegexp#sub in place, returning
560
+ * _str_, or _nil_ if no substitutions were performed.
561
+ *
562
+ **/
469
563
  static VALUE oregexp_m_sub_bang(int argc, VALUE *argv, VALUE self) {
470
564
  return oregexp_safe_gsub(self, argc, argv, 1, 1);
471
565
  }
472
566
 
567
+ static VALUE
568
+ oregexp_scan(VALUE self, VALUE str, OnigRegion * region)
569
+ {
570
+ long beg,
571
+ len,
572
+ end;
573
+ int iter = 0;
574
+
575
+ VALUE matches;
576
+ ORegexp *oregexp;
577
+ OnigEncoding enc;
578
+
579
+ if ( rb_block_given_p()) {
580
+ iter = 1;
581
+ }
582
+ Data_Get_Struct( self, ORegexp, oregexp );
583
+
584
+ VALUE string_str = StringValue( str );
585
+ UChar* str_ptr = RSTRING(string_str)->ptr;
586
+ int str_len = RSTRING(string_str)->len;
587
+ beg = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
588
+ if (beg < 0) {
589
+ /* no match */
590
+ return Qnil;
591
+ }
592
+ matches = rb_ary_new();
593
+ enc = onig_get_encoding( oregexp -> reg );
594
+ do {
595
+ VALUE match_data = oregexp_make_match_data( oregexp, region, string_str );
596
+ end = region->end[0];
597
+ rb_ary_push( matches, match_data );
598
+ if ( iter )
599
+ rb_yield( match_data );
600
+ // find next match
601
+ if( end == beg) {
602
+ /*
603
+ * Always consume at least one character of the input string
604
+ * in order to prevent infinite loops.
605
+ */
606
+ if( str_len <= end )
607
+ break;
608
+ len = ONIGENC_MBC_ENC_LEN(enc, str_ptr + end);
609
+ end += len;
610
+ }
611
+
612
+ beg=onig_search(oregexp->reg, str_ptr, str_ptr + str_len,
613
+ str_ptr+end, str_ptr + str_len,
614
+ region, ONIG_OPTION_NONE);
615
+ } while ( beg >= 0);
616
+
617
+ return matches;
618
+ }
619
+
620
+ struct scan_packet {
621
+ VALUE self, str;
622
+ OnigRegion * region;
623
+ };
624
+ static VALUE oregexp_packed_scan( struct scan_packet * args) {
625
+ return oregexp_scan(args->self, args->str, args->region);
626
+ }
627
+ /**
628
+ * call-seq:
629
+ * rxp.scan(str) # => [matchdata1, matchdata2,...] or nil
630
+ * rxp.scan(str) {|match_data| ... } # => [matchdata1, matchdata2,...] or nil
631
+ *
632
+ * Both forms iterate through _str_, matching the pattern. For each match,
633
+ * a MatchData object is generated and passed to the block, and
634
+ * added to the resulting array of MatchData objects.
635
+ *
636
+ * If _str_ does not match pattern, _nil_ is returned.
637
+ *
638
+ **/
639
+ static VALUE oregexp_m_scan(VALUE self, VALUE str) {
640
+ OnigRegion * region = onig_region_new();
641
+ struct scan_packet call_args = {self, str, region};
642
+ return rb_ensure( oregexp_packed_scan, (VALUE)&call_args, oregexp_cleanup_region, (VALUE)region);
643
+ }
644
+
645
+ /**
646
+ * call-seq:
647
+ * rxp === str => true or false
648
+ *
649
+ * Case Equality---Synonym for <code>ORegexp#=~</code> used in case statements.
650
+ *
651
+ * a = "HELLO"
652
+ * case a
653
+ * when ORegexp.new('^[a-z]*$'); print "Lower case\n"
654
+ * when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
655
+ * else; print "Mixed case\n"
656
+ * end
657
+ *
658
+ * <em>produces:</em>
659
+ *
660
+ * Upper case
661
+ *
662
+ **/
663
+
664
+ static VALUE oregexp_m_eqq(VALUE self, VALUE str) {
665
+ VALUE match;
666
+
667
+ if (TYPE(str) != T_STRING) {
668
+ str = rb_check_string_type(str);
669
+ if (NIL_P(str)) {
670
+ return Qfalse;
671
+ }
672
+ }
673
+ StringValue(str);
674
+ match = oregexp_match(self, str);
675
+ if (Qnil == match) {
676
+ return Qfalse;
677
+ }
678
+ return Qtrue;
679
+ }
680
+ /*
681
+ * call-seq:
682
+ * rxp =~ string => int or nil
683
+ *
684
+ * Matches <code>rxp</code> against <code>string</code>, returning the offset of the
685
+ * start of the match or <code>nil</code> if the match failed. Sets $~ to the corresponding
686
+ * <code>MatchData</code> or <code>nil</code>.
687
+ *
688
+ * ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil
689
+ * ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
690
+ **/
691
+ static VALUE oregexp_match_op(VALUE self, VALUE str) {
692
+ VALUE ret = oregexp_match(self, str);
693
+ if(ret == Qnil)
694
+ return Qnil;
695
+ return INT2FIX(RMATCH(ret)->regs->beg[0]);
696
+ }
697
+
473
698
  void Init_oregexp() {
474
699
  mOniguruma = rb_define_module("Oniguruma");
475
700
  VALUE cORegexp = rb_define_class_under(mOniguruma, "ORegexp", rb_cObject);
476
701
  rb_define_alloc_func(cORegexp, oregexp_allocate);
477
702
  rb_define_method( cORegexp, "initialize", oregexp_initialize, 2 );
478
703
  rb_define_method( cORegexp, "match", oregexp_match, 1 );
704
+ rb_define_method( cORegexp, "=~", oregexp_match_op, 1 );
479
705
  rb_define_method( cORegexp, "gsub", oregexp_m_gsub, -1 );
480
706
  rb_define_method( cORegexp, "sub", oregexp_m_sub, -1 );
481
707
  rb_define_method( cORegexp, "gsub!", oregexp_m_gsub_bang, -1 );
482
708
  rb_define_method( cORegexp, "sub!", oregexp_m_sub_bang, -1 );
709
+ rb_define_method( cORegexp, "scan", oregexp_m_scan, 1 );
710
+ rb_define_method( cORegexp, "===", oregexp_m_eqq, 1 );
711
+ rb_define_const( mOniguruma, "VERSION", rb_str_new2(onig_version()) );
483
712
  }
@@ -17,6 +17,20 @@ module Oniguruma
17
17
  OPTION_MAXBIT = OPTION_POSIX_REGION
18
18
  OPTION_DEFAULT = OPTION_NONE
19
19
 
20
+ OPTIONS_SHORTCUTS = {
21
+ 'i' => OPTION_IGNORECASE,
22
+ 'x' => OPTION_EXTEND,
23
+ 'm' => OPTION_MULTILINE,
24
+ 's' => OPTION_SINGLELINE,
25
+ 'l' => OPTION_FIND_LONGEST,
26
+ 'E' => OPTION_FIND_NOT_EMPTY,
27
+ 'S' => OPTION_NEGATE_SINGLELINE,
28
+ 'G' => OPTION_DONT_CAPTURE_GROUP,
29
+ 'g' => OPTION_CAPTURE_GROUP,
30
+ 'B' => OPTION_NOTBOL,
31
+ 'E' => OPTION_NOTEOL,
32
+ }
33
+
20
34
  SYNTAX_ASIS = 0
21
35
  SYNTAX_POSIX_BASIC = 1
22
36
  SYNTAX_POSIX_EXTENDED = 2
@@ -117,8 +131,12 @@ module Oniguruma
117
131
  alias old_initialize initialize
118
132
  # :startdoc:
119
133
 
134
+ # call-seq:
135
+ # ORegexp.new( pattern, options_hash )
136
+ # ORegexp.new( pattern, option_str, encoding_str=nil, syntax_str=nil)
137
+ #
120
138
  # Constructs a new regular expression from <i>pattern</i>, which is a
121
- # <code>String</code>. The paramter <i>options</i> is a <code>Hash</code>
139
+ # <code>String</code>. The second parameter <i></i> may be a <code>Hash</code>
122
140
  # of the form:
123
141
  #
124
142
  # <code>{ :options => option_value, :encoding => encoding_value, :syntax => syntax_value }</code>
@@ -135,9 +153,27 @@ module Oniguruma
135
153
  #
136
154
  # #Accept java syntax on SJIS encoding:
137
155
  # r4 = ORegexp.new('ape', :syntax => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/
156
+ #
157
+ # Second form uses string shortcuts to set options and encoding:
158
+ # r = ORegexp.new('cat', 'i', 'utf8', 'java')
138
159
 
139
- def initialize( pattern, options = {} )
160
+ def initialize( pattern, *args )
140
161
  defaults = { :options => OPTION_DEFAULT, :encoding => ENCODING_ASCII, :syntax => SYNTAX_DEFAULT}
162
+ if args[0].is_a?(String)
163
+ options = {}
164
+ option_str, encoding_str, syntax_str = *args
165
+ opt = 0
166
+ option_str.each_byte {|x| opt |= (OPTIONS_SHORTCUTS[x.chr] || 0) }
167
+ options[:options] = opt
168
+ if encoding_str && Oniguruma::const_defined?("ENCODING_#{encoding_str.upcase}")
169
+ options[:encoding] = Oniguruma::const_get("ENCODING_#{encoding_str.upcase}")
170
+ end
171
+ if syntax_str && Oniguruma::const_defined?("SYNTAX_#{syntax_str.upcase}")
172
+ options[:syntax] = Oniguruma::const_get("SYNTAX_#{syntax_str.upcase}")
173
+ end
174
+ else
175
+ options = args[0] || {}
176
+ end
141
177
  old_initialize( pattern, defaults.merge( options ).freeze )
142
178
  end
143
179
 
@@ -241,131 +277,203 @@ module Oniguruma
241
277
  end
242
278
 
243
279
  # call-seq:
244
- # rxp =~ string => int or nil
245
- #
246
- # Matches <code>rxp</code> against <code>string</code>, returning the offset of the
247
- # start of the match or <code>nil</code> if the match failed. Sets $~ to the corresponding
248
- # <code>MatchData</code> or <code>nil</code>.
249
- #
250
- # ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil
251
- # ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
252
-
253
- def =~ string
254
- return nil unless string
255
- m = match( string )
256
- return nil unless m
257
- m.begin(0)
258
- end
259
-
260
- # call-seq:
261
- # rxp === str => true or false
280
+ # rxp.source => str
262
281
  #
263
- # Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
264
- #
265
- # a = "HELLO"
266
- # case a
267
- # when ORegexp.new('^[a-z]*$'); print "Lower case\n"
268
- # when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
269
- # else; print "Mixed case\n"
270
- # end
271
- #
272
- # <em>produces:</em>
273
- #
274
- # Upper case
275
-
276
- alias === =~
277
-
282
+ # Returns the original string of the pattern.
283
+ #
284
+ # ORegex.new( 'ab+c', 'ix' ).source #=> "ab+c"
278
285
  def source
279
286
  @pattern.freeze
280
287
  end
281
288
 
282
- def match_all string
283
- matches = []
284
- positions = []
285
- position = 0
286
- tmp_string = string
287
- while tmp_string != ""
288
- if m = match( tmp_string )
289
- matches << m
290
- positions << position
291
- tmp_string = m.post_match
292
- position += m.end(0)
293
- #if m.end == m.begin
294
- # tmp_string = tmp_string[1..-1]
295
- # position += 1
296
- #end
297
- else
298
- break
299
- end
300
- end
301
- if matches.size > 0
302
- MultiMatchData.new( string, matches, positions )
303
- else
304
- nil
305
- end
306
- end
289
+ alias match_all scan
290
+
307
291
  end
308
292
 
309
- class MultiMatchData
310
- def initialize( string, matches, positions )
311
- @matches = matches
312
- @positions = positions
313
- @string = string
314
- end
315
-
316
- def position index
317
- @positions[index]
318
- end
319
-
320
- def [] ( value1, value2 = nil )
321
- unless value2
322
- @matches[value1]
323
- else
324
- @matches[value1, value2]
325
- end
326
- end
327
-
328
- def begin index
329
- @matches[index].begin(0) + @positions[index]
330
- end
331
-
332
- def end index
333
- @matches[index].end(0) + @positions[index]
334
- end
335
-
336
- def length
337
- @matches.size
338
- end
339
- alias size length
340
-
341
- def offset index
342
- [self.begin(index), self.end(index) ]
343
- end
344
-
345
- def string
346
- @string.freeze
347
- end
348
-
349
- def to_a
350
- @matches
351
- end
352
-
353
- def each
354
- @matches.size.times do |i|
355
- yield @matches[i], @positions[i]
356
- end
357
- end
293
+ end
294
+
295
+ class ::String
296
+ # Calls <code>Oniguruma::ORegexp#gsub</code> on this string.
297
+ def ogsub(*args)
298
+ Oniguruma::ORegexp.new(args.shift).gsub(self, *args)
358
299
  end
359
300
 
301
+ # Calls <code>Oniguruma::ORegexp#gsub!</code> on this string.
302
+ def ogsub!(*args)
303
+ Oniguruma::ORegexp.new(args.shift).gsub!(self, *args)
304
+ end
305
+
306
+ # Calls <code>Oniguruma::ORegexp#sub</code> on this string.
307
+ def osub(re, *args)
308
+ Oniguruma::ORegexp.new( re ).sub(self, *args)
309
+ end
310
+
311
+ # Calls <code>Oniguruma::ORegexp#sub!</code> on this string.
312
+ def osub!(re, *args)
313
+ Oniguruma::ORegexp.new( re ).sub(self, *args)
314
+ end
360
315
  end
316
+
361
317
  class ::MatchData
362
- alias old_aref :[]
363
- def [](*idx)
364
- if idx[0].is_a?(Symbol)
365
- k = @named_captures && @named_captures[idx[0]]
366
- k && old_aref(k)
367
- else
368
- old_aref(*idx)
369
- end
370
- end
318
+ # call-seq:
319
+ # to_index[symbol] => int or nil
320
+ #
321
+ # Returns the group index for the corresponding named group, or
322
+ # <code>nil</code> if the group does not exist.
323
+ #
324
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
325
+ # m.to_index[:begin] #=> 1
326
+ # m.to_index[:unknown] #=> nil
327
+ def to_index symbol
328
+ @named_captures && @named_captures[symbol]
329
+ end
330
+
331
+ alias old_aref :[]
332
+
333
+ # call-seq:
334
+ # mtch[i] => obj
335
+ # mtch[start, length] => array
336
+ # mtch[range] => array
337
+ # mtch[symbol] => obj
338
+ #
339
+ # <code>MatchData</code> acts as an array, and may be
340
+ # accessed using the normal array indexing techniques. <i>mtch</i>[0] is
341
+ # equivalent to the special variable <code>$&</code>, and returns the entire
342
+ # matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
343
+ # of the matched backreferences (portions of the pattern between parentheses).
344
+ #
345
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
346
+ # m[0] #=> "HX1138"
347
+ # m[1, 2] #=> ["H", "X"]
348
+ # m[1..3] #=> ["H", "X", "113"]
349
+ # m[-3, 2] #=> ["X", "113"]
350
+ #
351
+ # If a symbol is used as index, the corresponding named group is returned,
352
+ # or <code>nil</code> if such a group does not exist.
353
+ #
354
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
355
+ # m[:begin] #=> "THX"
356
+ # m[:moddle] #=> "1"
357
+ # m[:end] #=> "138"
358
+
359
+ def [](*idx)
360
+ if idx[0].is_a?(Symbol)
361
+ k = to_index( idx[0] )
362
+ k && old_aref(k)
363
+ else
364
+ old_aref(*idx)
365
+ end
366
+ end
367
+
368
+ alias old_begin :begin
369
+
370
+ # call-seq:
371
+ # mtch.begin(n) => integer
372
+ # mtch.begin => integer
373
+ # mtch.begin(symbol) => integer
374
+ #
375
+ # Returns the offset of the start of the <em>n</em>th element of the match
376
+ # array in the string.
377
+ #
378
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
379
+ # m.begin(0) #=> 1
380
+ # m.begin(2) #=> 2
381
+ #
382
+ # If no arguments are given, the index of the
383
+ # first matching character is returned.
384
+ #
385
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
386
+ # m.begin #=> 1
387
+ #
388
+ # If the argument is a symbol, then the beginning of the
389
+ # corresponding named group is returned, or <code>nil</code>
390
+ # if the group does not exist.
391
+ #
392
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
393
+ # m.begin(:middle) #=> 3
394
+
395
+ def begin(*idx)
396
+ if idx[0].is_a?(Symbol)
397
+ k = to_index( idx[0] )
398
+ k && old_begin(k)
399
+ elsif idx.empty?
400
+ old_begin( 0 )
401
+ else
402
+ old_begin(*idx)
403
+ end
404
+ end
405
+
406
+ alias old_end :end
407
+
408
+ # call-seq:
409
+ # mtch.end(n) => integer
410
+ #
411
+ # Returns the offset of the character immediately following the end of the
412
+ # <em>n</em>th element of the match array in the string.
413
+ #
414
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
415
+ # m.end(0) #=> 7
416
+ # m.end(2) #=> 3
417
+ #
418
+ # If no arguments are given, the index of the
419
+ # last matching character is returned.
420
+ #
421
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
422
+ # m.last #=> 7
423
+ #
424
+ # If the argument is a symbol, then the beginning of the
425
+ # corresponding named group is returned, or <code>nil</code>
426
+ # if the group does not exist.
427
+ #
428
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
429
+ # m.end(:middle) #=> 4
430
+
431
+ def end(*idx)
432
+ if idx[0].is_a?(Symbol)
433
+ k = to_index( idx[0] )
434
+ k && old_end(k)
435
+ elsif idx.empty?
436
+ old_end( 0 )
437
+ else
438
+ old_end(*idx)
439
+ end
440
+ end
441
+
442
+ alias old_offset :offset
443
+
444
+ # call-seq:
445
+ # mtch.offset(n) => array
446
+ # mtch.offset => array
447
+ # mtch.offset(symbol) => array
448
+ #
449
+ # Returns a two-element array containing the beginning and ending offsets of
450
+ # the <em>n</em>th match.
451
+ #
452
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
453
+ # m.offset(0) #=> [1, 7]
454
+ # m.offset(4) #=> [6, 7]
455
+ #
456
+ # If no arguments are given, the offsets of the entire
457
+ # sequence are returned.
458
+ #
459
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
460
+ # m.offset #=> [1, 7]
461
+ #
462
+ # If the argument is a symbol, then the offsets of the
463
+ # corresponding named group are returned, or <code>nil</code>
464
+ # if the group does not exist.
465
+ #
466
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
467
+ # m.end(:middle) #=> [3, 4]
468
+
469
+ def offset(*idx)
470
+ if idx[0].is_a?(Symbol)
471
+ k = to_index( idx[0] )
472
+ k && old_offset(k)
473
+ elsif idx.empty?
474
+ old_offset( 0 )
475
+ else
476
+ old_offset(*idx)
477
+ end
478
+ end
371
479
  end
@@ -27,7 +27,7 @@ class ORegexpTestCase < Test::Unit::TestCase
27
27
  end
28
28
 
29
29
  def test_bad_initialization
30
- assert_raises(Exception) do
30
+ assert_raises(ArgumentError) do
31
31
  reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.))" )
32
32
  end
33
33
  end
@@ -53,7 +53,7 @@ class ORegexpTestCase < Test::Unit::TestCase
53
53
  string = 'My favorite fruits are (?#fruit1), (?#fruit2), and (?#fruit3)'
54
54
  assert_equal( "My favorite fruits are *, *, and *", reg.gsub( string, '*' ) )
55
55
  fruits = { "fruit1" => "apples", "fruit2" => "bananas", "fruit3" => "grapes" }
56
- assert_equal( "My favorite fruits are apples, bananas, and grapes", reg.gsub( string ) { |text, match| fruits[match[1]]} )
56
+ assert_equal( "My favorite fruits are apples, bananas, and grapes", reg.gsub( string ) { |match| fruits[match[1]]} )
57
57
  end
58
58
 
59
59
  def test_eql
@@ -74,10 +74,23 @@ class ORegexpTestCase < Test::Unit::TestCase
74
74
 
75
75
  assert_equal( "Upper case\n", result )
76
76
  end
77
-
77
+
78
+ def test_case_eql_compat
79
+ # === method should not raise when used in case statements
80
+ a = Time.now
81
+ result = ""
82
+ case a
83
+ when /./ ; result = "rgx"
84
+ when Oniguruma::ORegexp.new('.'); result = "ore"
85
+ else; result = "else"
86
+ end
87
+ assert_equal( "else", result )
88
+ end
89
+
78
90
  def test_operator_match
79
91
  assert_equal( nil, Oniguruma::ORegexp.new( 'SIT' ) =~ "insensitive" )
80
92
  assert_equal( 5, Oniguruma::ORegexp.new( 'SIT', :options => Oniguruma::OPTION_IGNORECASE ) =~ "insensitive" )
93
+ assert_equal( 5, Oniguruma::ORegexp.new( 'SIT', 'i' ) =~ "insensitive" )
81
94
  end
82
95
 
83
96
  # def test_operator_match_2
@@ -96,6 +109,8 @@ class ORegexpTestCase < Test::Unit::TestCase
96
109
  def test_kcode
97
110
  reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
98
111
  assert_equal( Oniguruma::ENCODING_ASCII, reg.kcode )
112
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)", '', 'SJIS' )
113
+ assert_equal( Oniguruma::ENCODING_SJIS, reg.kcode )
99
114
  end
100
115
 
101
116
  def test_options
@@ -106,6 +121,40 @@ class ORegexpTestCase < Test::Unit::TestCase
106
121
  string = '(?<=\n)\\.*ocatarinetabelachitchix'
107
122
  assert_equal( string, Oniguruma::ORegexp.new( string ).source )
108
123
  end
124
+
125
+ def test_named_sub_backrefs
126
+ re = Oniguruma::ORegexp.new('(?<pre>\w+?)\d+(?<after>\w+)')
127
+ assert_equal(' def123abc ', re.sub('abc123def', ' \<after>123\<pre> ') )
128
+ end
129
+
130
+ def test_named_sub_backrefs_dupes
131
+ re = Oniguruma::ORegexp.new('(?<pre>\w+?)\d+(?<pre>\w+)')
132
+ assert_equal('123def', re.sub('abc123def', '123\<pre>') )
133
+ end
134
+
135
+ def test_backref_set_for_match
136
+ re = Oniguruma::ORegexp.new('Date:(\d{4})/(\d{2})/(\d{2})')
137
+ assert re.match( "Date:2007/03/25" )
138
+ assert_not_nil $~
139
+ assert_equal "2007", $1
140
+ assert_equal "03", $2
141
+ assert_equal "25", $3
142
+ end
143
+
144
+ def test_backref_set_for_match_op
145
+ re = Oniguruma::ORegexp.new('Date:(\d{4})/(\d{2})/(\d{2})')
146
+ assert re =~ "Date:2007/03/25"
147
+ assert_not_nil $~
148
+ assert_equal "2007", $1
149
+ assert_equal "03", $2
150
+ assert_equal "25", $3
151
+ end
152
+
153
+ def test_multibyte_named_backrefs
154
+ r = Oniguruma::ORegexp.new('(?<группа>test).+(\k<группа>)', :encoding => Oniguruma::ENCODING_UTF8)
155
+ assert_equal "should !test!", r.sub("should test this damned test", '!\<группа>!')
156
+ end
157
+
109
158
  end
110
159
 
111
160
  class MatchDataTestCase < Test::Unit::TestCase
@@ -123,6 +172,7 @@ class MatchDataTestCase < Test::Unit::TestCase
123
172
 
124
173
  def test_begin
125
174
  matches = @reg.match( "THX1138." )
175
+ assert_equal( 1, matches.begin )
126
176
  assert_equal( 1, matches.begin(0) )
127
177
  assert_equal( 2, matches.begin(2) )
128
178
  end
@@ -134,6 +184,7 @@ class MatchDataTestCase < Test::Unit::TestCase
134
184
 
135
185
  def test_end
136
186
  matches = @reg.match( "THX1138." )
187
+ assert_equal( 7, matches.end )
137
188
  assert_equal( 7, matches.end(0) )
138
189
  assert_equal( 3, matches.end(2) )
139
190
  end
@@ -146,6 +197,7 @@ class MatchDataTestCase < Test::Unit::TestCase
146
197
 
147
198
  def test_offset
148
199
  matches = @reg.match( "THX1138." )
200
+ assert_equal( [1, 7], matches.offset )
149
201
  assert_equal( [1, 7], matches.offset(0) )
150
202
  assert_equal( [6, 7], matches.offset(4) )
151
203
  end
@@ -189,9 +241,20 @@ class MatchDataTestCase < Test::Unit::TestCase
189
241
  def test_match_all
190
242
  reg = Oniguruma::ORegexp.new( 'ca' )
191
243
  matches = reg.match_all( 'ocatacachaca' )
244
+ a = []
245
+ matches.each { |m| a << m.offset(0) }
246
+ assert_equal( [ [1,3], [5,7], [10,12] ], a)
192
247
  assert_equal( 3, matches.size )
193
- assert_equal( 7, matches.position(2) )
194
- assert_equal( "ca", matches.string[matches.begin(1)...matches.end(1)])
248
+ assert_equal( 10, matches[2].begin( 0 ) )
249
+ assert_equal( "ca", matches[1].string[matches[1].begin( 0 )...matches[1].end( 0 )])
250
+ end
251
+
252
+ def test_scan
253
+ reg = Oniguruma::ORegexp.new( 'ca' )
254
+ a = []
255
+ matches = reg.match_all( 'ocatacachaca' ) { |m| a << m.offset(0) }
256
+ #assert_kind_of(Oniguruma::MultiMatchData, matches)
257
+ assert_equal( [ [1,3], [5,7], [10,12] ], a)
195
258
  end
196
259
 
197
260
  def test_match_empty_string
@@ -205,12 +268,32 @@ class MatchDataTestCase < Test::Unit::TestCase
205
268
  reg = Oniguruma::ORegexp.new( '(?<begin>\()(?<body>.*)(?<end>\))', :options => Oniguruma::OPTION_MULTILINE )
206
269
  matches = reg.match( "blah (content) blah" )
207
270
  assert_not_nil( matches )
271
+ assert_equal $~, matches
208
272
  assert_equal( '(', matches[:begin] )
209
273
  assert_equal( 'content', matches[:body] )
210
274
  assert_equal( ')', matches[:end] )
211
275
  assert_equal( nil, matches[:inexistent])
212
276
  end
213
277
 
278
+ def test_multibyte_named_backrefs
279
+ r = Oniguruma::ORegexp.new('(?<имя>test).+(\k<имя>)', :encoding => Oniguruma::ENCODING_UTF8)
280
+ assert_equal "should TEST", r.sub("should test this damned test") {|m| m[:"имя"].upcase }
281
+ end
282
+
283
+ def test_no_named_backrefs
284
+ r = Oniguruma::ORegexp.new('(.+).+(.+)')
285
+ r.match("text")
286
+ assert_not_nil $~
287
+ assert_equal 0, $~.instance_variables.size
288
+ r = Oniguruma::ORegexp.new('(?<a>.+).+(?<b>.+)')
289
+ r.match("text")
290
+ assert_not_nil $~
291
+ assert_equal 1, $~.instance_variables.size
292
+
293
+ end
294
+
295
+ # casefolding for full Unicode set is not present in versions prior to 5.
296
+ if Oniguruma::VERSION >= '5.0.0'
214
297
  def test_utf8_ignore_case
215
298
  reg = Oniguruma::ORegexp.new( '([а-я])+', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
216
299
  matches = reg.match("Text: Ехал Грека Через Реку")
@@ -222,16 +305,17 @@ class MatchDataTestCase < Test::Unit::TestCase
222
305
 
223
306
  def test_utf8_gsub
224
307
  reg = Oniguruma::ORegexp.new( '([а-я])([а-я])([а-я]+)', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
225
- new_str = reg.gsub("Text: Ехал Грека Через Реку") {|s,m| m[1]*2+m[2]*2+m[3] }
308
+ new_str = reg.gsub("Text: Ехал Грека Через Реку") {|m| m[1]*2+m[2]*2+m[3] }
226
309
  assert_equal("Text: ЕЕххал ГГррека ЧЧеерез РРееку", new_str)
227
310
  end
228
311
 
229
312
  def test_utf8_gsub2
230
313
  reg = Oniguruma::ORegexp.new( '[а-я]', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
231
- new_str = reg.gsub("Text: Ехал Грека Через Реку") {|s,m| s*2 }
314
+ new_str = reg.gsub("Text: Ехал Грека Через Реку") {|m| m[0]*2 }
232
315
  assert_equal("Text: ЕЕххаалл ГГррееккаа ЧЧеерреезз РРееккуу", new_str)
233
316
  end
234
-
317
+ end
318
+
235
319
  def test_sub_compatibility
236
320
  $x = "a.gif"
237
321
  assert_equal("b.gif", $x.osub('.*\.([^\.]+)$', 'b.\1'))
@@ -242,36 +326,36 @@ class MatchDataTestCase < Test::Unit::TestCase
242
326
  assert_equal("<a.gif>", $x.osub('.*\.([^\.]+)$', '<\&>'))
243
327
  assert_equal("a.a.", $x.osub('(gif)', '\`') )
244
328
  end
245
-
246
- class ::String
247
- def ogsub(*args)
248
- Oniguruma::ORegexp.new(args.shift).gsub(self, *args)
249
- end
250
- def ogsub!(*args)
251
- Oniguruma::ORegexp.new(args.shift).gsub!(self, *args)
252
- end
253
- def osub(re, *args)
254
- Oniguruma::ORegexp.new( re ).sub(self, *args)
255
- end
256
- end
257
329
 
258
330
  def test_gsub_compat
259
331
  assert_equal("hello".ogsub('[aeiou]', '*') , "h*ll*")
260
332
  assert_equal("hello".ogsub('([aeiou])', '<\1>') , "h<e>ll<o>")
261
333
  i = 0
262
- assert_equal("12345" , Oniguruma::ORegexp.new('.').gsub("hello") {|s,m| i+=1; i.to_s})
263
- assert_equal("214365", Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|s,m| m[2] + m[1] })
334
+ assert_equal("12345" , Oniguruma::ORegexp.new('.').gsub("hello") {|m| i+=1; i.to_s})
335
+ assert_equal("214365", Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|m| m[2] + m[1] })
264
336
  a = "test"
265
337
  a.ogsub!('t', a)
266
338
  assert_equal("testestest", a)
267
339
  end
268
340
 
269
341
  def test_match_compat
270
- t = Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|s,m| "#$2#$1" }
342
+ t = Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|m| "#$2#$1" }
271
343
  assert_equal("214365", t )
272
- t = Oniguruma::ORegexp.new('([aeiou])').gsub("hello") {|s,m| "<#$1>" }
344
+ t = Oniguruma::ORegexp.new('([aeiou])').gsub("hello") {|m| "<#$1>" }
273
345
  assert_equal( "h<e>ll<o>", t)
274
346
  end
275
347
 
348
+ def _u16(str)
349
+ str.unpack("U*").pack("n*")
350
+ end
351
+ puts Oniguruma::VERSION
352
+ if Oniguruma::VERSION >= '4.0.0'
353
+ def test_utf16_gsub
354
+ r = Oniguruma::ORegexp.new( _u16('[aeiou]'), :encoding => Oniguruma::ENCODING_UTF16_BE)
355
+ assert_equal( _u16("h*ll*"), r.gsub( _u16("hello"), _u16('*')) )
356
+ r = Oniguruma::ORegexp.new( _u16('([aeiou])'), :encoding => Oniguruma::ENCODING_UTF16_BE)
357
+ assert_equal( _u16("h<e>\\ll<o>\\"), r.gsub( _u16("hello"), _u16('<\1>\\')) )
358
+ end
359
+ end
276
360
 
277
361
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: oniguruma
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.9.1
7
- date: 2007-03-25 00:00:00 +01:00
6
+ version: 1.0.0
7
+ date: 2007-03-27 00:00:00 +02:00
8
8
  summary: Bindings for the oniguruma regular expression library
9
9
  require_paths:
10
10
  - lib