oniguruma 0.9.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,34 @@
1
+ == 1.0.0 / 2007-03-27
2
+ * Added documentation for MatchData.
3
+ * Added ogsub, ogsub!, sub and sub! to ::String.
4
+ * Removed ::String definitions from tests.
5
+ * Now the minimal recommended version of oniglib is 5.5 or higher.
6
+ * Removed ugly #if statements from c code.
7
+ * Do not create @named_captures hash if there are no named groups for regexp -- somewhat improve speed for repetive calls
8
+ * Fixed usage of named backreferences in gsub with non-ascii names
9
+ * Move ORegexp#=~ to C code, make it work just like Regexp#=~, i.e. set $~. Throw ArgumentError instead of Exception if pattern does not compile
10
+ * Fix implementation of ORegexp#===, so it now does not raise errors in case statement anymore
11
+ (resembles plain Ruby Regexp#=== behaviour)
12
+ * Modified begin, end and offset methods in MatchData to handle named groups and default to group 0.
13
+ * Exception is not longer thrown when in oregexp_make_match_data.
14
+ * Removed references to MultiMatchData from documentation
15
+ * Removed class MultiMatchData
16
+ * Fix off by one error in region->num_regs usage
17
+ * Fix dumb bug with zero-width matches that made infinite loops. now consume at least one char in gsub and scan
18
+ * ORegexp API changes:
19
+ * Pass only MatchData to sub/gsub with blocks
20
+ oregexp.sub( str ) {|match_data| ... }
21
+ oregexp.gsub( str ) {|match_data| ... }
22
+ * Add ORegexp#scan instead of match_all
23
+ oregexp.scan(str) {|match_data| ... } # => MultiMatchData
24
+ * Friendly way to set options
25
+ ORegexp.new( pattern, options_str, encoding, syntax)
26
+ ORegexp.new('\w+', 'imsx', 'koi8r', 'perl')
27
+ * Named backreferences in substitions
28
+ ORegexp.new('(?<pre>\w+)\d+(?<after>\w+)').sub('abc123def', '\<after>123\<pre>') #=> 'def123abc'
29
+ * couple of bugfixes with region's num_regs
30
+ * some docs for substitution methods added
31
+
1
32
  == 0.9.1 / 2007-03-25
2
33
  * FIX: Buggy resolution of numeric codes for encoding and syntax options (Nikolai Lugovoi)
3
34
  * FIX: Buggy implementation of ORegexp#gsub and ORegexp#gsub methods. Now code is all C (Nikolai Lugovoi)
data/README.txt CHANGED
@@ -8,6 +8,7 @@ Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regul
8
8
  * Same interface than standard Regexp class (easy transition!).
9
9
  * Support for named groups, look-ahead, look-behind, and other
10
10
  cool features!
11
+ * Support for other regexp syntaxes (Perl, Python, Java, etc.)
11
12
 
12
13
  == SYNOPSIS:
13
14
 
@@ -23,7 +24,7 @@ Consult the Syntax.txt[link:files/Syntax_txt.html] page.
23
24
 
24
25
  == REQUIREMENTS:
25
26
 
26
- * Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 2.0 or greater
27
+ * Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 5.5 or higher
27
28
 
28
29
  == INSTALL:
29
30
 
@@ -43,7 +44,7 @@ sudo gem install -r oniguruma
43
44
 
44
45
  == CREDITS:
45
46
 
46
- * N. Lugovoi. ORegexp.sub and ORegexp.gsub code, plus other patches.
47
+ * N. Lugovoi. ORegexp.sub and ORegexp.gsub code and lots of other stuff.
47
48
  * K. Kosako. For his great library.
48
49
  * A lot of the documentation has been copied from the original Ruby Regex documentation.
49
50
 
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require 'hoe'
3
3
 
4
4
  class Hoe; def extra_deps; @extra_deps.reject { |x| Array(x).first == 'hoe' }; end end
5
5
 
6
- Hoe.new('oniguruma', '0.9.1') do |p|
6
+ Hoe.new('oniguruma', '1.0.0') do |p|
7
7
  p.rubyforge_name = 'oniguruma'
8
8
  p.author = 'Dizan Vasquez'
9
9
  p.email = 'dix_ans@yahoo.com'
@@ -101,15 +101,11 @@ static int name_callback(
101
101
  regex_t* reg,
102
102
  struct callback_packet* arg
103
103
  ) {
104
- int i, gn, ref;
105
- OnigRegion *region = arg->region;
104
+ int i, gn;
106
105
  VALUE nameHash = arg->hash;
107
106
 
108
107
  for (i = 0; i < ngroup_num; i++) {
109
108
  gn = group_nums[i];
110
- ref = onig_name_to_backref_number(reg, name, name_end, region);
111
- if (ref != gn )
112
- return 1;
113
109
  rb_hash_aset( nameHash, ID2SYM(rb_intern(name)), INT2FIX( gn ) );
114
110
  }
115
111
  return 0;
@@ -124,10 +120,6 @@ static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
124
120
  rb_iv_set( self, "@options", options );
125
121
  UChar* pat_ptr = RSTRING(pattern_str)->ptr;
126
122
  int pat_len = RSTRING(pattern_str)->len;
127
- if( pat_len == 0 ) {
128
- rb_raise(rb_eArgError, "Empty pattern makes no sense.");
129
- }
130
-
131
123
  VALUE rOptions = rb_hash_aref( options, ID2SYM( rb_intern( "options" ) ) );
132
124
  VALUE rEncoding = rb_hash_aref( options, ID2SYM( rb_intern( "encoding" ) ) );
133
125
  VALUE rSyntax = rb_hash_aref( options, ID2SYM( rb_intern( "syntax" ) ) );
@@ -142,16 +134,19 @@ static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
142
134
  if (r != ONIG_NORMAL) {
143
135
  char s[ONIG_MAX_ERROR_MESSAGE_LEN];
144
136
  onig_error_code_to_str(s, r, &einfo);
145
- rb_raise(rb_eException, "Oniguruma Error: %s", s);
137
+ rb_raise(rb_eArgError, "Oniguruma Error: %s", s);
146
138
  }
147
139
  return self;
148
140
  }
149
141
 
142
+ /* can't include re.h, since it conflicts with oniguruma typedefs */
150
143
  struct RMatch {
151
144
  struct RBasic basic;
152
145
  VALUE str;
153
146
  struct re_registers *regs;
154
147
  };
148
+ #define RMATCH(obj) (R_CAST(RMatch)(obj))
149
+ void rb_match_busy _((VALUE));
155
150
 
156
151
  static VALUE oregexp_make_match_data(ORegexp * oregexp, OnigRegion * region, VALUE string_str) {
157
152
  VALUE rb_cMatch = rb_const_get(rb_cObject, rb_intern("MatchData")) ;
@@ -163,21 +158,22 @@ static VALUE oregexp_make_match_data(ORegexp * oregexp, OnigRegion * region, VAL
163
158
 
164
159
  match->str = rb_str_new4(string_str);
165
160
  match->regs = ALLOC(struct re_registers);
166
- match->regs->allocated = count+1;
161
+ match->regs->allocated = count;
167
162
  match->regs->num_regs = count;
168
- match->regs->beg = ALLOC_N(int, (count+1));
169
- match->regs->end = ALLOC_N(int, (count+1));
163
+ match->regs->beg = ALLOC_N(int, count);
164
+ match->regs->end = ALLOC_N(int, count);
170
165
 
171
- for ( i = 0; i <= count; i++){
166
+ for ( i = 0; i < count; i++){
172
167
  match->regs->beg[i] = region->beg[i];
173
168
  match->regs->end[i] = region->end[i];
174
169
  }
175
170
  rb_cv_set( kORegexp, "@@last_match", (VALUE)match );
176
171
  packet.region = region;
177
- packet.hash = rb_hash_new();
178
- if( onig_foreach_name(oregexp->reg, name_callback, &packet) )
179
- rb_raise(rb_eException, "Oniguruma Error: group and backreference names are different");
180
- rb_iv_set((VALUE)match, "@named_captures", packet.hash);
172
+ if( onig_number_of_names( oregexp->reg ) > 0 ) {
173
+ packet.hash = rb_hash_new();
174
+ onig_foreach_name(oregexp->reg, name_callback, &packet);
175
+ rb_iv_set((VALUE)match, "@named_captures", packet.hash);
176
+ }
181
177
  return (VALUE)match;
182
178
  }
183
179
 
@@ -201,9 +197,12 @@ static VALUE oregexp_match( VALUE self, VALUE string ) {
201
197
 
202
198
  OnigRegion *region = onig_region_new();
203
199
  int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
200
+ rb_backref_set(Qnil);
204
201
  if (r >= 0) {
205
202
  VALUE matchData = oregexp_make_match_data( oregexp, region, string_str);
206
203
  onig_region_free(region, 1 );
204
+ rb_backref_set(matchData);
205
+ rb_match_busy(matchData);
207
206
  return matchData;
208
207
  } else if (r == ONIG_MISMATCH) {
209
208
  onig_region_free(region, 1 );
@@ -212,7 +211,7 @@ static VALUE oregexp_match( VALUE self, VALUE string ) {
212
211
  onig_region_free(region, 1 );
213
212
  char s[ONIG_MAX_ERROR_MESSAGE_LEN];
214
213
  onig_error_code_to_str(s, r);
215
- rb_raise(rb_eException, "Oniguruma Error: %s", s);
214
+ rb_raise(rb_eArgError, "Oniguruma Error: %s", s);
216
215
  }
217
216
 
218
217
  }
@@ -233,7 +232,7 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
233
232
  {
234
233
  ORegexp *oregexp;
235
234
  VALUE ret;
236
- int32_t replIdx = 0;
235
+ int32_t replIdx = 0, name_pos, name_start, name_end ;
237
236
  int32_t replacementLength = RSTRING(repl_text)->len;
238
237
  UChar *replacementText = RSTRING(repl_text)->ptr;
239
238
  UChar *replacementEnd = replacementText + (replacementLength-1);
@@ -254,6 +253,10 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
254
253
  while (replIdx < replacementLength) {
255
254
  OnigCodePoint c = ONIGENC_MBC_TO_CODE(enc, replacementText+replIdx, replacementEnd);
256
255
  int c_len =ONIGENC_MBC_ENC_LEN(enc, replacementText+replIdx) ;
256
+ if( c_len == 0 ) {
257
+ rb_warn("Strange, for %d enc_len is 0", c);
258
+ c_len = 1;
259
+ }
257
260
  replIdx += c_len;
258
261
  if ( c != BACKSLASH) {
259
262
  /* Common case, no substitution, no escaping, */
@@ -311,7 +314,7 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
311
314
  break;
312
315
  case '+': // last matched group
313
316
  replIdx += c_len;
314
- for(groupNum = region->num_regs; groupNum > 0; groupNum --) {
317
+ for(groupNum = region->num_regs-1; groupNum > 0; groupNum --) {
315
318
  g_start = region->beg[ groupNum ];
316
319
  g_end = region->end[ groupNum ];
317
320
  if( g_start != -1 ) {
@@ -320,7 +323,35 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
320
323
  }
321
324
  }
322
325
  break;
323
-
326
+ case '<': // named group references \<name>
327
+ name_pos = replIdx+c_len;
328
+ name_end = name_start = replIdx+c_len;
329
+ while(name_pos < replacementLength) {
330
+ c = ONIGENC_MBC_TO_CODE(enc, replacementText+name_pos, replacementEnd);
331
+ c_len = ONIGENC_MBC_ENC_LEN(enc, replacementText+name_pos) ;
332
+ name_pos += c_len;
333
+ if( c == '>') break;
334
+ if( ONIGENC_IS_CODE_WORD(enc, c) ) {
335
+ name_end += c_len;
336
+ } else {
337
+ break;
338
+ }
339
+ }
340
+ if( c != '>' || name_end == name_start ) {
341
+ // place backslash and '<'
342
+ rb_str_buf_cat(ret, replacementText+(replIdx-p_len), p_len+c_len);
343
+ replIdx += c_len;
344
+ } else {
345
+ // lookup for group and subst for that value
346
+ groupNum = onig_name_to_backref_number( oregexp->reg,
347
+ replacementText+name_start, replacementText+name_end, region);
348
+ if( groupNum >= 0 ) {
349
+ rb_str_buf_cat(ret, matchText+region->beg[groupNum],
350
+ region->end[groupNum]-region->beg[groupNum]);
351
+ }
352
+ replIdx = name_pos;
353
+ }
354
+ break;
324
355
  default:
325
356
  rb_str_buf_cat(ret, replacementText+(replIdx-p_len), p_len+c_len);
326
357
  replIdx += c_len;
@@ -328,7 +359,7 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
328
359
  }
329
360
  } else {
330
361
  /* Finally, append the capture group data to the destination. */
331
- if( groupNum < region->num_regs && region->beg[groupNum] >= 0 && region->end[groupNum]>= region->beg[groupNum] ) {
362
+ if( groupNum < region->num_regs && region->beg[groupNum] >= 0 ) {
332
363
  rb_str_buf_cat(ret, matchText+region->beg[groupNum], region->end[groupNum]-region->beg[groupNum]);
333
364
  }
334
365
  }
@@ -359,13 +390,15 @@ oregexp_gsub(self, argc, argv, bang, once, region)
359
390
  VALUE repl;
360
391
  long beg,
361
392
  end,
393
+ len,
362
394
  prev_end;
363
395
  int tainted = 0,
364
396
  iter = 0;
365
397
 
366
398
  VALUE buf, curr_repl, block_res;
367
399
  ORegexp *oregexp;
368
-
400
+ OnigEncoding enc;
401
+
369
402
  if (argc == 1 && rb_block_given_p()) {
370
403
  iter = 1;
371
404
  } else if (argc == 2) {
@@ -392,6 +425,7 @@ oregexp_gsub(self, argc, argv, bang, once, region)
392
425
  }
393
426
  end = 0;
394
427
  buf = rb_str_buf_new(str_len);
428
+ enc = onig_get_encoding( oregexp->reg );
395
429
  do {
396
430
  prev_end = end;
397
431
  beg = region->beg[0];
@@ -400,12 +434,8 @@ oregexp_gsub(self, argc, argv, bang, once, region)
400
434
  if ( iter ) {
401
435
  VALUE match_data = oregexp_make_match_data( oregexp, region, string_str );
402
436
  rb_backref_set(match_data);
403
- if( once )
404
- block_res = rb_yield( match_data );
405
- else {
406
- VALUE match_string = rb_str_new( str_ptr+beg, end-beg);
407
- block_res = rb_yield_values(2, match_string, match_data );
408
- }
437
+ rb_match_busy(match_data);
438
+ block_res = rb_yield( match_data );
409
439
  str_mod_check( string_str, str_ptr, str_len);
410
440
  curr_repl = rb_obj_as_string(block_res);
411
441
  } else {
@@ -414,6 +444,17 @@ oregexp_gsub(self, argc, argv, bang, once, region)
414
444
  rb_str_append(buf, curr_repl);
415
445
  if( once ) break;
416
446
  // find next match
447
+ if( end == beg) {
448
+ /*
449
+ * Always consume at least one character of the input string
450
+ * in order to prevent infinite loops.
451
+ */
452
+ if( str_len <= end )
453
+ break;
454
+ len = ONIGENC_MBC_ENC_LEN(enc, str_ptr + end);
455
+ rb_str_buf_cat( buf, str_ptr+end, len);
456
+ end += len;
457
+ }
417
458
  beg=onig_search(oregexp->reg, str_ptr, str_ptr + str_len,
418
459
  str_ptr+end, str_ptr + str_len,
419
460
  region, ONIG_OPTION_NONE);
@@ -456,28 +497,216 @@ static VALUE oregexp_safe_gsub(self, argc, argv, bang, once)
456
497
  gsub_packet call_args = {self, argc, argv, bang, once, region};
457
498
  return rb_ensure( oregexp_packed_gsub, (VALUE)&call_args, oregexp_cleanup_region, (VALUE)region);
458
499
  }
500
+
501
+ /**
502
+ * call-seq:
503
+ * rxp.gsub(str, replacement)
504
+ * rxp.gsub(str) {|match_data| ... }
505
+ *
506
+ * Returns a copy of _str_ with _all_ occurrences of _rxp_ pattern
507
+ * replaced with either _replacement_ or the value of the block.
508
+ *
509
+ * If a string is used as the replacement, the sequences \1, \2,
510
+ * and so on may be used to interpolate successive groups in the match.
511
+ *
512
+ * In the block form, the current MatchData object is passed in as a
513
+ * parameter. The value returned by the block will be substituted for
514
+ * the match on each call.
515
+ *
516
+ **/
459
517
  static VALUE oregexp_m_gsub(int argc, VALUE *argv, VALUE self) {
460
518
  return oregexp_safe_gsub(self, argc, argv, 0, 0);
461
519
  }
520
+
521
+ /**
522
+ * call-seq:
523
+ * rxp.sub(str, replacement)
524
+ * rxp.sub(str) {|match_data| ... }
525
+ *
526
+ * Returns a copy of _str_ with the _first_ occurrence of _rxp_ pattern
527
+ * replaced with either _replacement_ or the value of the block.
528
+ *
529
+ * If a string is used as the replacement, the sequences \1, \2,
530
+ * and so on may be used to interpolate successive groups in the match.
531
+ *
532
+ * In the block form, the current MatchData object is passed in as a
533
+ * parameter. The value returned by the block will be substituted for
534
+ * the match on each call.
535
+ *
536
+ **/
462
537
  static VALUE oregexp_m_sub(int argc, VALUE *argv, VALUE self) {
463
538
  return oregexp_safe_gsub(self, argc, argv, 0, 1);
464
539
  }
465
540
 
541
+ /**
542
+ * call-seq:
543
+ * rxp.gsub!(str, replacement)
544
+ * rxp.gsub!(str) {|match_data| ... }
545
+ *
546
+ * Performs the substitutions of ORegexp#gsub in place, returning
547
+ * _str_, or _nil_ if no substitutions were performed.
548
+ *
549
+ **/
466
550
  static VALUE oregexp_m_gsub_bang(int argc, VALUE *argv, VALUE self) {
467
551
  return oregexp_safe_gsub(self, argc, argv, 1, 0);
468
552
  }
553
+
554
+ /**
555
+ * call-seq:
556
+ * oregexp.sub!(str, replacement)
557
+ * oregexp.sub!(str) {|match_data| ... }
558
+ *
559
+ * Performs the substitutions of ORegexp#sub in place, returning
560
+ * _str_, or _nil_ if no substitutions were performed.
561
+ *
562
+ **/
469
563
  static VALUE oregexp_m_sub_bang(int argc, VALUE *argv, VALUE self) {
470
564
  return oregexp_safe_gsub(self, argc, argv, 1, 1);
471
565
  }
472
566
 
567
+ static VALUE
568
+ oregexp_scan(VALUE self, VALUE str, OnigRegion * region)
569
+ {
570
+ long beg,
571
+ len,
572
+ end;
573
+ int iter = 0;
574
+
575
+ VALUE matches;
576
+ ORegexp *oregexp;
577
+ OnigEncoding enc;
578
+
579
+ if ( rb_block_given_p()) {
580
+ iter = 1;
581
+ }
582
+ Data_Get_Struct( self, ORegexp, oregexp );
583
+
584
+ VALUE string_str = StringValue( str );
585
+ UChar* str_ptr = RSTRING(string_str)->ptr;
586
+ int str_len = RSTRING(string_str)->len;
587
+ beg = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
588
+ if (beg < 0) {
589
+ /* no match */
590
+ return Qnil;
591
+ }
592
+ matches = rb_ary_new();
593
+ enc = onig_get_encoding( oregexp -> reg );
594
+ do {
595
+ VALUE match_data = oregexp_make_match_data( oregexp, region, string_str );
596
+ end = region->end[0];
597
+ rb_ary_push( matches, match_data );
598
+ if ( iter )
599
+ rb_yield( match_data );
600
+ // find next match
601
+ if( end == beg) {
602
+ /*
603
+ * Always consume at least one character of the input string
604
+ * in order to prevent infinite loops.
605
+ */
606
+ if( str_len <= end )
607
+ break;
608
+ len = ONIGENC_MBC_ENC_LEN(enc, str_ptr + end);
609
+ end += len;
610
+ }
611
+
612
+ beg=onig_search(oregexp->reg, str_ptr, str_ptr + str_len,
613
+ str_ptr+end, str_ptr + str_len,
614
+ region, ONIG_OPTION_NONE);
615
+ } while ( beg >= 0);
616
+
617
+ return matches;
618
+ }
619
+
620
+ struct scan_packet {
621
+ VALUE self, str;
622
+ OnigRegion * region;
623
+ };
624
+ static VALUE oregexp_packed_scan( struct scan_packet * args) {
625
+ return oregexp_scan(args->self, args->str, args->region);
626
+ }
627
+ /**
628
+ * call-seq:
629
+ * rxp.scan(str) # => [matchdata1, matchdata2,...] or nil
630
+ * rxp.scan(str) {|match_data| ... } # => [matchdata1, matchdata2,...] or nil
631
+ *
632
+ * Both forms iterate through _str_, matching the pattern. For each match,
633
+ * a MatchData object is generated and passed to the block, and
634
+ * added to the resulting array of MatchData objects.
635
+ *
636
+ * If _str_ does not match pattern, _nil_ is returned.
637
+ *
638
+ **/
639
+ static VALUE oregexp_m_scan(VALUE self, VALUE str) {
640
+ OnigRegion * region = onig_region_new();
641
+ struct scan_packet call_args = {self, str, region};
642
+ return rb_ensure( oregexp_packed_scan, (VALUE)&call_args, oregexp_cleanup_region, (VALUE)region);
643
+ }
644
+
645
+ /**
646
+ * call-seq:
647
+ * rxp === str => true or false
648
+ *
649
+ * Case Equality---Synonym for <code>ORegexp#=~</code> used in case statements.
650
+ *
651
+ * a = "HELLO"
652
+ * case a
653
+ * when ORegexp.new('^[a-z]*$'); print "Lower case\n"
654
+ * when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
655
+ * else; print "Mixed case\n"
656
+ * end
657
+ *
658
+ * <em>produces:</em>
659
+ *
660
+ * Upper case
661
+ *
662
+ **/
663
+
664
+ static VALUE oregexp_m_eqq(VALUE self, VALUE str) {
665
+ VALUE match;
666
+
667
+ if (TYPE(str) != T_STRING) {
668
+ str = rb_check_string_type(str);
669
+ if (NIL_P(str)) {
670
+ return Qfalse;
671
+ }
672
+ }
673
+ StringValue(str);
674
+ match = oregexp_match(self, str);
675
+ if (Qnil == match) {
676
+ return Qfalse;
677
+ }
678
+ return Qtrue;
679
+ }
680
+ /*
681
+ * call-seq:
682
+ * rxp =~ string => int or nil
683
+ *
684
+ * Matches <code>rxp</code> against <code>string</code>, returning the offset of the
685
+ * start of the match or <code>nil</code> if the match failed. Sets $~ to the corresponding
686
+ * <code>MatchData</code> or <code>nil</code>.
687
+ *
688
+ * ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil
689
+ * ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
690
+ **/
691
+ static VALUE oregexp_match_op(VALUE self, VALUE str) {
692
+ VALUE ret = oregexp_match(self, str);
693
+ if(ret == Qnil)
694
+ return Qnil;
695
+ return INT2FIX(RMATCH(ret)->regs->beg[0]);
696
+ }
697
+
473
698
  void Init_oregexp() {
474
699
  mOniguruma = rb_define_module("Oniguruma");
475
700
  VALUE cORegexp = rb_define_class_under(mOniguruma, "ORegexp", rb_cObject);
476
701
  rb_define_alloc_func(cORegexp, oregexp_allocate);
477
702
  rb_define_method( cORegexp, "initialize", oregexp_initialize, 2 );
478
703
  rb_define_method( cORegexp, "match", oregexp_match, 1 );
704
+ rb_define_method( cORegexp, "=~", oregexp_match_op, 1 );
479
705
  rb_define_method( cORegexp, "gsub", oregexp_m_gsub, -1 );
480
706
  rb_define_method( cORegexp, "sub", oregexp_m_sub, -1 );
481
707
  rb_define_method( cORegexp, "gsub!", oregexp_m_gsub_bang, -1 );
482
708
  rb_define_method( cORegexp, "sub!", oregexp_m_sub_bang, -1 );
709
+ rb_define_method( cORegexp, "scan", oregexp_m_scan, 1 );
710
+ rb_define_method( cORegexp, "===", oregexp_m_eqq, 1 );
711
+ rb_define_const( mOniguruma, "VERSION", rb_str_new2(onig_version()) );
483
712
  }
@@ -17,6 +17,20 @@ module Oniguruma
17
17
  OPTION_MAXBIT = OPTION_POSIX_REGION
18
18
  OPTION_DEFAULT = OPTION_NONE
19
19
 
20
+ OPTIONS_SHORTCUTS = {
21
+ 'i' => OPTION_IGNORECASE,
22
+ 'x' => OPTION_EXTEND,
23
+ 'm' => OPTION_MULTILINE,
24
+ 's' => OPTION_SINGLELINE,
25
+ 'l' => OPTION_FIND_LONGEST,
26
+ 'E' => OPTION_FIND_NOT_EMPTY,
27
+ 'S' => OPTION_NEGATE_SINGLELINE,
28
+ 'G' => OPTION_DONT_CAPTURE_GROUP,
29
+ 'g' => OPTION_CAPTURE_GROUP,
30
+ 'B' => OPTION_NOTBOL,
31
+ 'E' => OPTION_NOTEOL,
32
+ }
33
+
20
34
  SYNTAX_ASIS = 0
21
35
  SYNTAX_POSIX_BASIC = 1
22
36
  SYNTAX_POSIX_EXTENDED = 2
@@ -117,8 +131,12 @@ module Oniguruma
117
131
  alias old_initialize initialize
118
132
  # :startdoc:
119
133
 
134
+ # call-seq:
135
+ # ORegexp.new( pattern, options_hash )
136
+ # ORegexp.new( pattern, option_str, encoding_str=nil, syntax_str=nil)
137
+ #
120
138
  # Constructs a new regular expression from <i>pattern</i>, which is a
121
- # <code>String</code>. The paramter <i>options</i> is a <code>Hash</code>
139
+ # <code>String</code>. The second parameter <i></i> may be a <code>Hash</code>
122
140
  # of the form:
123
141
  #
124
142
  # <code>{ :options => option_value, :encoding => encoding_value, :syntax => syntax_value }</code>
@@ -135,9 +153,27 @@ module Oniguruma
135
153
  #
136
154
  # #Accept java syntax on SJIS encoding:
137
155
  # r4 = ORegexp.new('ape', :syntax => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/
156
+ #
157
+ # Second form uses string shortcuts to set options and encoding:
158
+ # r = ORegexp.new('cat', 'i', 'utf8', 'java')
138
159
 
139
- def initialize( pattern, options = {} )
160
+ def initialize( pattern, *args )
140
161
  defaults = { :options => OPTION_DEFAULT, :encoding => ENCODING_ASCII, :syntax => SYNTAX_DEFAULT}
162
+ if args[0].is_a?(String)
163
+ options = {}
164
+ option_str, encoding_str, syntax_str = *args
165
+ opt = 0
166
+ option_str.each_byte {|x| opt |= (OPTIONS_SHORTCUTS[x.chr] || 0) }
167
+ options[:options] = opt
168
+ if encoding_str && Oniguruma::const_defined?("ENCODING_#{encoding_str.upcase}")
169
+ options[:encoding] = Oniguruma::const_get("ENCODING_#{encoding_str.upcase}")
170
+ end
171
+ if syntax_str && Oniguruma::const_defined?("SYNTAX_#{syntax_str.upcase}")
172
+ options[:syntax] = Oniguruma::const_get("SYNTAX_#{syntax_str.upcase}")
173
+ end
174
+ else
175
+ options = args[0] || {}
176
+ end
141
177
  old_initialize( pattern, defaults.merge( options ).freeze )
142
178
  end
143
179
 
@@ -241,131 +277,203 @@ module Oniguruma
241
277
  end
242
278
 
243
279
  # call-seq:
244
- # rxp =~ string => int or nil
245
- #
246
- # Matches <code>rxp</code> against <code>string</code>, returning the offset of the
247
- # start of the match or <code>nil</code> if the match failed. Sets $~ to the corresponding
248
- # <code>MatchData</code> or <code>nil</code>.
249
- #
250
- # ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil
251
- # ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
252
-
253
- def =~ string
254
- return nil unless string
255
- m = match( string )
256
- return nil unless m
257
- m.begin(0)
258
- end
259
-
260
- # call-seq:
261
- # rxp === str => true or false
280
+ # rxp.source => str
262
281
  #
263
- # Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
264
- #
265
- # a = "HELLO"
266
- # case a
267
- # when ORegexp.new('^[a-z]*$'); print "Lower case\n"
268
- # when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
269
- # else; print "Mixed case\n"
270
- # end
271
- #
272
- # <em>produces:</em>
273
- #
274
- # Upper case
275
-
276
- alias === =~
277
-
282
+ # Returns the original string of the pattern.
283
+ #
284
+ # ORegex.new( 'ab+c', 'ix' ).source #=> "ab+c"
278
285
  def source
279
286
  @pattern.freeze
280
287
  end
281
288
 
282
- def match_all string
283
- matches = []
284
- positions = []
285
- position = 0
286
- tmp_string = string
287
- while tmp_string != ""
288
- if m = match( tmp_string )
289
- matches << m
290
- positions << position
291
- tmp_string = m.post_match
292
- position += m.end(0)
293
- #if m.end == m.begin
294
- # tmp_string = tmp_string[1..-1]
295
- # position += 1
296
- #end
297
- else
298
- break
299
- end
300
- end
301
- if matches.size > 0
302
- MultiMatchData.new( string, matches, positions )
303
- else
304
- nil
305
- end
306
- end
289
+ alias match_all scan
290
+
307
291
  end
308
292
 
309
- class MultiMatchData
310
- def initialize( string, matches, positions )
311
- @matches = matches
312
- @positions = positions
313
- @string = string
314
- end
315
-
316
- def position index
317
- @positions[index]
318
- end
319
-
320
- def [] ( value1, value2 = nil )
321
- unless value2
322
- @matches[value1]
323
- else
324
- @matches[value1, value2]
325
- end
326
- end
327
-
328
- def begin index
329
- @matches[index].begin(0) + @positions[index]
330
- end
331
-
332
- def end index
333
- @matches[index].end(0) + @positions[index]
334
- end
335
-
336
- def length
337
- @matches.size
338
- end
339
- alias size length
340
-
341
- def offset index
342
- [self.begin(index), self.end(index) ]
343
- end
344
-
345
- def string
346
- @string.freeze
347
- end
348
-
349
- def to_a
350
- @matches
351
- end
352
-
353
- def each
354
- @matches.size.times do |i|
355
- yield @matches[i], @positions[i]
356
- end
357
- end
293
+ end
294
+
295
+ class ::String
296
+ # Calls <code>Oniguruma::ORegexp#gsub</code> on this string.
297
+ def ogsub(*args)
298
+ Oniguruma::ORegexp.new(args.shift).gsub(self, *args)
358
299
  end
359
300
 
301
+ # Calls <code>Oniguruma::ORegexp#gsub!</code> on this string.
302
+ def ogsub!(*args)
303
+ Oniguruma::ORegexp.new(args.shift).gsub!(self, *args)
304
+ end
305
+
306
+ # Calls <code>Oniguruma::ORegexp#sub</code> on this string.
307
+ def osub(re, *args)
308
+ Oniguruma::ORegexp.new( re ).sub(self, *args)
309
+ end
310
+
311
+ # Calls <code>Oniguruma::ORegexp#sub!</code> on this string.
312
+ def osub!(re, *args)
313
+ Oniguruma::ORegexp.new( re ).sub(self, *args)
314
+ end
360
315
  end
316
+
361
317
  class ::MatchData
362
- alias old_aref :[]
363
- def [](*idx)
364
- if idx[0].is_a?(Symbol)
365
- k = @named_captures && @named_captures[idx[0]]
366
- k && old_aref(k)
367
- else
368
- old_aref(*idx)
369
- end
370
- end
318
+ # call-seq:
319
+ # to_index[symbol] => int or nil
320
+ #
321
+ # Returns the group index for the corresponding named group, or
322
+ # <code>nil</code> if the group does not exist.
323
+ #
324
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
325
+ # m.to_index[:begin] #=> 1
326
+ # m.to_index[:unknown] #=> nil
327
+ def to_index symbol
328
+ @named_captures && @named_captures[symbol]
329
+ end
330
+
331
+ alias old_aref :[]
332
+
333
+ # call-seq:
334
+ # mtch[i] => obj
335
+ # mtch[start, length] => array
336
+ # mtch[range] => array
337
+ # mtch[symbol] => obj
338
+ #
339
+ # <code>MatchData</code> acts as an array, and may be
340
+ # accessed using the normal array indexing techniques. <i>mtch</i>[0] is
341
+ # equivalent to the special variable <code>$&</code>, and returns the entire
342
+ # matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
343
+ # of the matched backreferences (portions of the pattern between parentheses).
344
+ #
345
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
346
+ # m[0] #=> "HX1138"
347
+ # m[1, 2] #=> ["H", "X"]
348
+ # m[1..3] #=> ["H", "X", "113"]
349
+ # m[-3, 2] #=> ["X", "113"]
350
+ #
351
+ # If a symbol is used as index, the corresponding named group is returned,
352
+ # or <code>nil</code> if such a group does not exist.
353
+ #
354
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
355
+ # m[:begin] #=> "THX"
356
+ # m[:moddle] #=> "1"
357
+ # m[:end] #=> "138"
358
+
359
+ def [](*idx)
360
+ if idx[0].is_a?(Symbol)
361
+ k = to_index( idx[0] )
362
+ k && old_aref(k)
363
+ else
364
+ old_aref(*idx)
365
+ end
366
+ end
367
+
368
+ alias old_begin :begin
369
+
370
+ # call-seq:
371
+ # mtch.begin(n) => integer
372
+ # mtch.begin => integer
373
+ # mtch.begin(symbol) => integer
374
+ #
375
+ # Returns the offset of the start of the <em>n</em>th element of the match
376
+ # array in the string.
377
+ #
378
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
379
+ # m.begin(0) #=> 1
380
+ # m.begin(2) #=> 2
381
+ #
382
+ # If no arguments are given, the index of the
383
+ # first matching character is returned.
384
+ #
385
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
386
+ # m.begin #=> 1
387
+ #
388
+ # If the argument is a symbol, then the beginning of the
389
+ # corresponding named group is returned, or <code>nil</code>
390
+ # if the group does not exist.
391
+ #
392
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
393
+ # m.begin(:middle) #=> 3
394
+
395
+ def begin(*idx)
396
+ if idx[0].is_a?(Symbol)
397
+ k = to_index( idx[0] )
398
+ k && old_begin(k)
399
+ elsif idx.empty?
400
+ old_begin( 0 )
401
+ else
402
+ old_begin(*idx)
403
+ end
404
+ end
405
+
406
+ alias old_end :end
407
+
408
+ # call-seq:
409
+ # mtch.end(n) => integer
410
+ #
411
+ # Returns the offset of the character immediately following the end of the
412
+ # <em>n</em>th element of the match array in the string.
413
+ #
414
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
415
+ # m.end(0) #=> 7
416
+ # m.end(2) #=> 3
417
+ #
418
+ # If no arguments are given, the index of the
419
+ # last matching character is returned.
420
+ #
421
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
422
+ # m.last #=> 7
423
+ #
424
+ # If the argument is a symbol, then the beginning of the
425
+ # corresponding named group is returned, or <code>nil</code>
426
+ # if the group does not exist.
427
+ #
428
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
429
+ # m.end(:middle) #=> 4
430
+
431
+ def end(*idx)
432
+ if idx[0].is_a?(Symbol)
433
+ k = to_index( idx[0] )
434
+ k && old_end(k)
435
+ elsif idx.empty?
436
+ old_end( 0 )
437
+ else
438
+ old_end(*idx)
439
+ end
440
+ end
441
+
442
+ alias old_offset :offset
443
+
444
+ # call-seq:
445
+ # mtch.offset(n) => array
446
+ # mtch.offset => array
447
+ # mtch.offset(symbol) => array
448
+ #
449
+ # Returns a two-element array containing the beginning and ending offsets of
450
+ # the <em>n</em>th match.
451
+ #
452
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
453
+ # m.offset(0) #=> [1, 7]
454
+ # m.offset(4) #=> [6, 7]
455
+ #
456
+ # If no arguments are given, the offsets of the entire
457
+ # sequence are returned.
458
+ #
459
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
460
+ # m.offset #=> [1, 7]
461
+ #
462
+ # If the argument is a symbol, then the offsets of the
463
+ # corresponding named group are returned, or <code>nil</code>
464
+ # if the group does not exist.
465
+ #
466
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
467
+ # m.end(:middle) #=> [3, 4]
468
+
469
+ def offset(*idx)
470
+ if idx[0].is_a?(Symbol)
471
+ k = to_index( idx[0] )
472
+ k && old_offset(k)
473
+ elsif idx.empty?
474
+ old_offset( 0 )
475
+ else
476
+ old_offset(*idx)
477
+ end
478
+ end
371
479
  end
@@ -27,7 +27,7 @@ class ORegexpTestCase < Test::Unit::TestCase
27
27
  end
28
28
 
29
29
  def test_bad_initialization
30
- assert_raises(Exception) do
30
+ assert_raises(ArgumentError) do
31
31
  reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.))" )
32
32
  end
33
33
  end
@@ -53,7 +53,7 @@ class ORegexpTestCase < Test::Unit::TestCase
53
53
  string = 'My favorite fruits are (?#fruit1), (?#fruit2), and (?#fruit3)'
54
54
  assert_equal( "My favorite fruits are *, *, and *", reg.gsub( string, '*' ) )
55
55
  fruits = { "fruit1" => "apples", "fruit2" => "bananas", "fruit3" => "grapes" }
56
- assert_equal( "My favorite fruits are apples, bananas, and grapes", reg.gsub( string ) { |text, match| fruits[match[1]]} )
56
+ assert_equal( "My favorite fruits are apples, bananas, and grapes", reg.gsub( string ) { |match| fruits[match[1]]} )
57
57
  end
58
58
 
59
59
  def test_eql
@@ -74,10 +74,23 @@ class ORegexpTestCase < Test::Unit::TestCase
74
74
 
75
75
  assert_equal( "Upper case\n", result )
76
76
  end
77
-
77
+
78
+ def test_case_eql_compat
79
+ # === method should not raise when used in case statements
80
+ a = Time.now
81
+ result = ""
82
+ case a
83
+ when /./ ; result = "rgx"
84
+ when Oniguruma::ORegexp.new('.'); result = "ore"
85
+ else; result = "else"
86
+ end
87
+ assert_equal( "else", result )
88
+ end
89
+
78
90
  def test_operator_match
79
91
  assert_equal( nil, Oniguruma::ORegexp.new( 'SIT' ) =~ "insensitive" )
80
92
  assert_equal( 5, Oniguruma::ORegexp.new( 'SIT', :options => Oniguruma::OPTION_IGNORECASE ) =~ "insensitive" )
93
+ assert_equal( 5, Oniguruma::ORegexp.new( 'SIT', 'i' ) =~ "insensitive" )
81
94
  end
82
95
 
83
96
  # def test_operator_match_2
@@ -96,6 +109,8 @@ class ORegexpTestCase < Test::Unit::TestCase
96
109
  def test_kcode
97
110
  reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
98
111
  assert_equal( Oniguruma::ENCODING_ASCII, reg.kcode )
112
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)", '', 'SJIS' )
113
+ assert_equal( Oniguruma::ENCODING_SJIS, reg.kcode )
99
114
  end
100
115
 
101
116
  def test_options
@@ -106,6 +121,40 @@ class ORegexpTestCase < Test::Unit::TestCase
106
121
  string = '(?<=\n)\\.*ocatarinetabelachitchix'
107
122
  assert_equal( string, Oniguruma::ORegexp.new( string ).source )
108
123
  end
124
+
125
+ def test_named_sub_backrefs
126
+ re = Oniguruma::ORegexp.new('(?<pre>\w+?)\d+(?<after>\w+)')
127
+ assert_equal(' def123abc ', re.sub('abc123def', ' \<after>123\<pre> ') )
128
+ end
129
+
130
+ def test_named_sub_backrefs_dupes
131
+ re = Oniguruma::ORegexp.new('(?<pre>\w+?)\d+(?<pre>\w+)')
132
+ assert_equal('123def', re.sub('abc123def', '123\<pre>') )
133
+ end
134
+
135
+ def test_backref_set_for_match
136
+ re = Oniguruma::ORegexp.new('Date:(\d{4})/(\d{2})/(\d{2})')
137
+ assert re.match( "Date:2007/03/25" )
138
+ assert_not_nil $~
139
+ assert_equal "2007", $1
140
+ assert_equal "03", $2
141
+ assert_equal "25", $3
142
+ end
143
+
144
+ def test_backref_set_for_match_op
145
+ re = Oniguruma::ORegexp.new('Date:(\d{4})/(\d{2})/(\d{2})')
146
+ assert re =~ "Date:2007/03/25"
147
+ assert_not_nil $~
148
+ assert_equal "2007", $1
149
+ assert_equal "03", $2
150
+ assert_equal "25", $3
151
+ end
152
+
153
+ def test_multibyte_named_backrefs
154
+ r = Oniguruma::ORegexp.new('(?<группа>test).+(\k<группа>)', :encoding => Oniguruma::ENCODING_UTF8)
155
+ assert_equal "should !test!", r.sub("should test this damned test", '!\<группа>!')
156
+ end
157
+
109
158
  end
110
159
 
111
160
  class MatchDataTestCase < Test::Unit::TestCase
@@ -123,6 +172,7 @@ class MatchDataTestCase < Test::Unit::TestCase
123
172
 
124
173
  def test_begin
125
174
  matches = @reg.match( "THX1138." )
175
+ assert_equal( 1, matches.begin )
126
176
  assert_equal( 1, matches.begin(0) )
127
177
  assert_equal( 2, matches.begin(2) )
128
178
  end
@@ -134,6 +184,7 @@ class MatchDataTestCase < Test::Unit::TestCase
134
184
 
135
185
  def test_end
136
186
  matches = @reg.match( "THX1138." )
187
+ assert_equal( 7, matches.end )
137
188
  assert_equal( 7, matches.end(0) )
138
189
  assert_equal( 3, matches.end(2) )
139
190
  end
@@ -146,6 +197,7 @@ class MatchDataTestCase < Test::Unit::TestCase
146
197
 
147
198
  def test_offset
148
199
  matches = @reg.match( "THX1138." )
200
+ assert_equal( [1, 7], matches.offset )
149
201
  assert_equal( [1, 7], matches.offset(0) )
150
202
  assert_equal( [6, 7], matches.offset(4) )
151
203
  end
@@ -189,9 +241,20 @@ class MatchDataTestCase < Test::Unit::TestCase
189
241
  def test_match_all
190
242
  reg = Oniguruma::ORegexp.new( 'ca' )
191
243
  matches = reg.match_all( 'ocatacachaca' )
244
+ a = []
245
+ matches.each { |m| a << m.offset(0) }
246
+ assert_equal( [ [1,3], [5,7], [10,12] ], a)
192
247
  assert_equal( 3, matches.size )
193
- assert_equal( 7, matches.position(2) )
194
- assert_equal( "ca", matches.string[matches.begin(1)...matches.end(1)])
248
+ assert_equal( 10, matches[2].begin( 0 ) )
249
+ assert_equal( "ca", matches[1].string[matches[1].begin( 0 )...matches[1].end( 0 )])
250
+ end
251
+
252
+ def test_scan
253
+ reg = Oniguruma::ORegexp.new( 'ca' )
254
+ a = []
255
+ matches = reg.match_all( 'ocatacachaca' ) { |m| a << m.offset(0) }
256
+ #assert_kind_of(Oniguruma::MultiMatchData, matches)
257
+ assert_equal( [ [1,3], [5,7], [10,12] ], a)
195
258
  end
196
259
 
197
260
  def test_match_empty_string
@@ -205,12 +268,32 @@ class MatchDataTestCase < Test::Unit::TestCase
205
268
  reg = Oniguruma::ORegexp.new( '(?<begin>\()(?<body>.*)(?<end>\))', :options => Oniguruma::OPTION_MULTILINE )
206
269
  matches = reg.match( "blah (content) blah" )
207
270
  assert_not_nil( matches )
271
+ assert_equal $~, matches
208
272
  assert_equal( '(', matches[:begin] )
209
273
  assert_equal( 'content', matches[:body] )
210
274
  assert_equal( ')', matches[:end] )
211
275
  assert_equal( nil, matches[:inexistent])
212
276
  end
213
277
 
278
+ def test_multibyte_named_backrefs
279
+ r = Oniguruma::ORegexp.new('(?<имя>test).+(\k<имя>)', :encoding => Oniguruma::ENCODING_UTF8)
280
+ assert_equal "should TEST", r.sub("should test this damned test") {|m| m[:"имя"].upcase }
281
+ end
282
+
283
+ def test_no_named_backrefs
284
+ r = Oniguruma::ORegexp.new('(.+).+(.+)')
285
+ r.match("text")
286
+ assert_not_nil $~
287
+ assert_equal 0, $~.instance_variables.size
288
+ r = Oniguruma::ORegexp.new('(?<a>.+).+(?<b>.+)')
289
+ r.match("text")
290
+ assert_not_nil $~
291
+ assert_equal 1, $~.instance_variables.size
292
+
293
+ end
294
+
295
+ # casefolding for full Unicode set is not present in versions prior to 5.
296
+ if Oniguruma::VERSION >= '5.0.0'
214
297
  def test_utf8_ignore_case
215
298
  reg = Oniguruma::ORegexp.new( '([а-я])+', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
216
299
  matches = reg.match("Text: Ехал Грека Через Реку")
@@ -222,16 +305,17 @@ class MatchDataTestCase < Test::Unit::TestCase
222
305
 
223
306
  def test_utf8_gsub
224
307
  reg = Oniguruma::ORegexp.new( '([а-я])([а-я])([а-я]+)', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
225
- new_str = reg.gsub("Text: Ехал Грека Через Реку") {|s,m| m[1]*2+m[2]*2+m[3] }
308
+ new_str = reg.gsub("Text: Ехал Грека Через Реку") {|m| m[1]*2+m[2]*2+m[3] }
226
309
  assert_equal("Text: ЕЕххал ГГррека ЧЧеерез РРееку", new_str)
227
310
  end
228
311
 
229
312
  def test_utf8_gsub2
230
313
  reg = Oniguruma::ORegexp.new( '[а-я]', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
231
- new_str = reg.gsub("Text: Ехал Грека Через Реку") {|s,m| s*2 }
314
+ new_str = reg.gsub("Text: Ехал Грека Через Реку") {|m| m[0]*2 }
232
315
  assert_equal("Text: ЕЕххаалл ГГррееккаа ЧЧеерреезз РРееккуу", new_str)
233
316
  end
234
-
317
+ end
318
+
235
319
  def test_sub_compatibility
236
320
  $x = "a.gif"
237
321
  assert_equal("b.gif", $x.osub('.*\.([^\.]+)$', 'b.\1'))
@@ -242,36 +326,36 @@ class MatchDataTestCase < Test::Unit::TestCase
242
326
  assert_equal("<a.gif>", $x.osub('.*\.([^\.]+)$', '<\&>'))
243
327
  assert_equal("a.a.", $x.osub('(gif)', '\`') )
244
328
  end
245
-
246
- class ::String
247
- def ogsub(*args)
248
- Oniguruma::ORegexp.new(args.shift).gsub(self, *args)
249
- end
250
- def ogsub!(*args)
251
- Oniguruma::ORegexp.new(args.shift).gsub!(self, *args)
252
- end
253
- def osub(re, *args)
254
- Oniguruma::ORegexp.new( re ).sub(self, *args)
255
- end
256
- end
257
329
 
258
330
  def test_gsub_compat
259
331
  assert_equal("hello".ogsub('[aeiou]', '*') , "h*ll*")
260
332
  assert_equal("hello".ogsub('([aeiou])', '<\1>') , "h<e>ll<o>")
261
333
  i = 0
262
- assert_equal("12345" , Oniguruma::ORegexp.new('.').gsub("hello") {|s,m| i+=1; i.to_s})
263
- assert_equal("214365", Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|s,m| m[2] + m[1] })
334
+ assert_equal("12345" , Oniguruma::ORegexp.new('.').gsub("hello") {|m| i+=1; i.to_s})
335
+ assert_equal("214365", Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|m| m[2] + m[1] })
264
336
  a = "test"
265
337
  a.ogsub!('t', a)
266
338
  assert_equal("testestest", a)
267
339
  end
268
340
 
269
341
  def test_match_compat
270
- t = Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|s,m| "#$2#$1" }
342
+ t = Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|m| "#$2#$1" }
271
343
  assert_equal("214365", t )
272
- t = Oniguruma::ORegexp.new('([aeiou])').gsub("hello") {|s,m| "<#$1>" }
344
+ t = Oniguruma::ORegexp.new('([aeiou])').gsub("hello") {|m| "<#$1>" }
273
345
  assert_equal( "h<e>ll<o>", t)
274
346
  end
275
347
 
348
+ def _u16(str)
349
+ str.unpack("U*").pack("n*")
350
+ end
351
+ puts Oniguruma::VERSION
352
+ if Oniguruma::VERSION >= '4.0.0'
353
+ def test_utf16_gsub
354
+ r = Oniguruma::ORegexp.new( _u16('[aeiou]'), :encoding => Oniguruma::ENCODING_UTF16_BE)
355
+ assert_equal( _u16("h*ll*"), r.gsub( _u16("hello"), _u16('*')) )
356
+ r = Oniguruma::ORegexp.new( _u16('([aeiou])'), :encoding => Oniguruma::ENCODING_UTF16_BE)
357
+ assert_equal( _u16("h<e>\\ll<o>\\"), r.gsub( _u16("hello"), _u16('<\1>\\')) )
358
+ end
359
+ end
276
360
 
277
361
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: oniguruma
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.9.1
7
- date: 2007-03-25 00:00:00 +01:00
6
+ version: 1.0.0
7
+ date: 2007-03-27 00:00:00 +02:00
8
8
  summary: Bindings for the oniguruma regular expression library
9
9
  require_paths:
10
10
  - lib