oniguruma 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/Manifest.txt +7 -0
- data/README.txt +65 -0
- data/Rakefile +18 -0
- data/ext/extconf.rb +3 -0
- data/ext/oregexp.c +194 -0
- data/lib/oniguruma.rb +491 -0
- data/test/test_oniguruma.rb +214 -0
- metadata +53 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
== ONIGURUMA FOR RUBY:
|
2
|
+
|
3
|
+
Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regular expression library (no need to recompile Ruby).
|
4
|
+
|
5
|
+
== FEATURES:
|
6
|
+
|
7
|
+
* Increased performance.
|
8
|
+
* Same interface than standard Regexp class (easy transition!).
|
9
|
+
* Support for named groups, look-ahead, look-behind, and other
|
10
|
+
cool features!
|
11
|
+
|
12
|
+
== SYNOPSIS:
|
13
|
+
|
14
|
+
reg = Oniguruma::ORegex.new( '(?<before>.*)(a)(?<after>.*)' )
|
15
|
+
match = reg.match( 'terraforming' )
|
16
|
+
puts match[0] <= 'terraforming'
|
17
|
+
puts match[:before] <= 'terr'
|
18
|
+
puts match[:after] <= 'forming'
|
19
|
+
|
20
|
+
== REQUIREMENTS:
|
21
|
+
|
22
|
+
* Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 2.0 or greater
|
23
|
+
|
24
|
+
== INSTALL:
|
25
|
+
|
26
|
+
sudo gem install -r oniguruma
|
27
|
+
|
28
|
+
== BUGS/PROBLEMS/INCOMPATIBILITIES:
|
29
|
+
|
30
|
+
* <code>ORegexp#~</code> is not implemented.
|
31
|
+
* <code>ORegexp#kcode</code> results are not compatible with <code>Regexp</code>.
|
32
|
+
* <code>ORegexp</code> options set in the string are not visible, this affects
|
33
|
+
<code>ORegexp#options</code>, <code>ORegexp#to_s</code>, <code>ORegexp#inspect</code>
|
34
|
+
and <code>ORegexp#==</code>.
|
35
|
+
|
36
|
+
== TODO:
|
37
|
+
|
38
|
+
* Complete documentation (methods, oniguruma syntax).
|
39
|
+
|
40
|
+
== CREDITS:
|
41
|
+
|
42
|
+
* K.Kosako, for his great library.
|
43
|
+
* A lot of the documentation has been copied from the orininal Ruby Regex documentation.
|
44
|
+
|
45
|
+
== LICENSE:
|
46
|
+
|
47
|
+
New BSD License
|
48
|
+
|
49
|
+
Copyright (c) 2007, Dizan Vasquez
|
50
|
+
All rights reserved.
|
51
|
+
|
52
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
53
|
+
|
54
|
+
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
55
|
+
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
|
56
|
+
documentation and/or other materials provided with the distribution.
|
57
|
+
* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this
|
58
|
+
software without specific prior written permission.
|
59
|
+
|
60
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
61
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
62
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
63
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
64
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
65
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hoe'
|
3
|
+
|
4
|
+
class Hoe; def extra_deps; @extra_deps.reject { |x| Array(x).first == 'hoe' }; end end
|
5
|
+
|
6
|
+
Hoe.new('oniguruma', '0.9.0') do |p|
|
7
|
+
p.rubyforge_name = 'oniguruma'
|
8
|
+
p.author = 'Dizan Vasquez'
|
9
|
+
p.email = 'dix_ans@yahoo.com'
|
10
|
+
p.summary = 'Bindings for the oniguruma regular expression library'
|
11
|
+
p.description = p.paragraphs_of('README.txt', 1 ).join('\n\n')
|
12
|
+
p.url = 'http://oniguruma.rubyforge.org'
|
13
|
+
p.spec_extras[:extensions] = ["ext/extconf.rb"]
|
14
|
+
p.rdoc_pattern = /^(lib|bin|ext)|txt$/
|
15
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
16
|
+
end
|
17
|
+
|
18
|
+
|
data/ext/extconf.rb
ADDED
data/ext/oregexp.c
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <oniguruma.h>
|
3
|
+
/*
|
4
|
+
TODO:
|
5
|
+
- Add named backreferences.
|
6
|
+
*/
|
7
|
+
|
8
|
+
typedef struct _oregexp {
|
9
|
+
regex_t * reg;
|
10
|
+
} ORegexp;
|
11
|
+
|
12
|
+
VALUE mOniguruma;
|
13
|
+
VALUE nameHash;
|
14
|
+
|
15
|
+
static void oregexp_free( ORegexp * oregexp) {
|
16
|
+
onig_free( oregexp->reg );
|
17
|
+
free( oregexp );
|
18
|
+
}
|
19
|
+
|
20
|
+
static VALUE oregexp_allocate( VALUE klass ) {
|
21
|
+
ORegexp * oregexp = malloc( sizeof( ORegexp ) );
|
22
|
+
oregexp->reg = NULL;
|
23
|
+
return Data_Wrap_Struct( klass, 0, oregexp_free, oregexp );
|
24
|
+
}
|
25
|
+
|
26
|
+
|
27
|
+
static OnigEncodingType * int2encoding( int index ) {
|
28
|
+
switch( index ) {
|
29
|
+
case 0: return ONIG_ENCODING_ASCII;
|
30
|
+
case 1: return ONIG_ENCODING_ISO_8859_1;
|
31
|
+
case 2: return ONIG_ENCODING_ISO_8859_2;
|
32
|
+
case 3: return ONIG_ENCODING_ISO_8859_3;
|
33
|
+
case 4: return ONIG_ENCODING_ISO_8859_4;
|
34
|
+
case 5: return ONIG_ENCODING_ISO_8859_5;
|
35
|
+
case 6: return ONIG_ENCODING_ISO_8859_6;
|
36
|
+
case 7: return ONIG_ENCODING_ISO_8859_7;
|
37
|
+
case 8: return ONIG_ENCODING_ISO_8859_8;
|
38
|
+
case 9: return ONIG_ENCODING_ISO_8859_9;
|
39
|
+
case 10: return ONIG_ENCODING_ISO_8859_10;
|
40
|
+
case 11: return ONIG_ENCODING_ISO_8859_11;
|
41
|
+
case 12: return ONIG_ENCODING_ISO_8859_11;
|
42
|
+
case 13: return ONIG_ENCODING_ISO_8859_13;
|
43
|
+
case 14: return ONIG_ENCODING_ISO_8859_14;
|
44
|
+
case 15: return ONIG_ENCODING_ISO_8859_15;
|
45
|
+
case 16: return ONIG_ENCODING_ISO_8859_16;
|
46
|
+
case 17: return ONIG_ENCODING_UTF8;
|
47
|
+
case 18: return ONIG_ENCODING_UTF16_BE;
|
48
|
+
case 19: return ONIG_ENCODING_UTF16_LE;
|
49
|
+
case 20: return ONIG_ENCODING_UTF32_BE;
|
50
|
+
case 21: return ONIG_ENCODING_UTF32_LE;
|
51
|
+
case 22: return ONIG_ENCODING_EUC_JP;
|
52
|
+
case 23: return ONIG_ENCODING_EUC_TW;
|
53
|
+
case 24: return ONIG_ENCODING_EUC_KR;
|
54
|
+
case 25: return ONIG_ENCODING_EUC_CN;
|
55
|
+
case 26: return ONIG_ENCODING_SJIS;
|
56
|
+
/*case 27: return ONIG_ENCODING_KOI8;*/
|
57
|
+
case 28: return ONIG_ENCODING_KOI8_R;
|
58
|
+
case 29: return ONIG_ENCODING_CP1251;
|
59
|
+
case 30: return ONIG_ENCODING_BIG5;
|
60
|
+
case 31: return ONIG_ENCODING_GB18030;
|
61
|
+
case 32: return ONIG_ENCODING_UNDEF;
|
62
|
+
}
|
63
|
+
return ONIG_ENCODING_UNDEF;
|
64
|
+
}
|
65
|
+
|
66
|
+
static OnigSyntaxType * int2syntax( int index ) {
|
67
|
+
switch( index ) {
|
68
|
+
case 0: return ONIG_SYNTAX_ASIS;
|
69
|
+
case 1: return ONIG_SYNTAX_POSIX_BASIC;
|
70
|
+
case 2: return ONIG_SYNTAX_POSIX_EXTENDED;
|
71
|
+
case 3: return ONIG_SYNTAX_EMACS;
|
72
|
+
case 4: return ONIG_SYNTAX_GREP;
|
73
|
+
case 5: return ONIG_SYNTAX_GNU_REGEX;
|
74
|
+
case 6: return ONIG_SYNTAX_JAVA;
|
75
|
+
case 7: return ONIG_SYNTAX_PERL;
|
76
|
+
case 8: return ONIG_SYNTAX_PERL_NG;
|
77
|
+
case 9: return ONIG_SYNTAX_RUBY;
|
78
|
+
case 10: return ONIG_SYNTAX_DEFAULT;
|
79
|
+
}
|
80
|
+
return ONIG_SYNTAX_DEFAULT;
|
81
|
+
}
|
82
|
+
|
83
|
+
static int name_callback(
|
84
|
+
const UChar* name,
|
85
|
+
const UChar* name_end,
|
86
|
+
int ngroup_num,
|
87
|
+
int* group_nums,
|
88
|
+
regex_t* reg,
|
89
|
+
void* arg
|
90
|
+
) {
|
91
|
+
int i, gn, ref;
|
92
|
+
OnigRegion *region = (OnigRegion* )arg;
|
93
|
+
|
94
|
+
for (i = 0; i < ngroup_num; i++) {
|
95
|
+
gn = group_nums[i];
|
96
|
+
ref = onig_name_to_backref_number(reg, name, name_end, region);
|
97
|
+
if (ref != gn )
|
98
|
+
rb_raise(rb_eException, "Oniguruma Error: group and backreference names are different");
|
99
|
+
rb_hash_aset( nameHash, ID2SYM(rb_intern(name)), INT2FIX( gn ) );
|
100
|
+
}
|
101
|
+
return 0;
|
102
|
+
}
|
103
|
+
|
104
|
+
static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
|
105
|
+
ORegexp *oregexp;
|
106
|
+
Data_Get_Struct( self, ORegexp, oregexp );
|
107
|
+
|
108
|
+
VALUE pattern_str = StringValue( pattern );
|
109
|
+
rb_iv_set( self, "@pattern", pattern_str );
|
110
|
+
rb_iv_set( self, "@options", options );
|
111
|
+
UChar* pat_ptr = RSTRING(pattern_str)->ptr;
|
112
|
+
int pat_len = RSTRING(pattern_str)->len;
|
113
|
+
|
114
|
+
VALUE rOptions = rb_hash_aref( options, ID2SYM( rb_intern( "options" ) ) );
|
115
|
+
VALUE rEncoding = rb_hash_aref( options, ID2SYM( rb_intern( "encoding" ) ) );
|
116
|
+
VALUE rSyntax = rb_hash_aref( options, ID2SYM( rb_intern( "syntax" ) ) );
|
117
|
+
int iOptions = NUM2INT( rOptions );
|
118
|
+
int iEncoding = int2encoding( rEncoding );
|
119
|
+
int iSyntax = int2syntax( rSyntax );
|
120
|
+
|
121
|
+
|
122
|
+
int r;
|
123
|
+
OnigErrorInfo einfo;
|
124
|
+
r = onig_new(&(oregexp->reg), pat_ptr, pat_ptr + pat_len, iOptions, iEncoding, iSyntax, &einfo);
|
125
|
+
if (r != ONIG_NORMAL) {
|
126
|
+
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
127
|
+
onig_error_code_to_str(s, r, &einfo);
|
128
|
+
rb_raise(rb_eException, "Oniguruma Error: %s", s);
|
129
|
+
}
|
130
|
+
return self;
|
131
|
+
}
|
132
|
+
|
133
|
+
/*
|
134
|
+
* call-seq:
|
135
|
+
* rxp.match(str) => matchdata or nil
|
136
|
+
*
|
137
|
+
* Returns a <code>MatchData</code> object describing the match, or
|
138
|
+
* <code>nil</code> if there was no match. This is equivalent to retrieving the
|
139
|
+
* value of the special variable <code>$~</code> following a normal match.
|
140
|
+
*
|
141
|
+
* /(.)(.)(.)/.match("abc")[2] #=> "b"
|
142
|
+
*/
|
143
|
+
static VALUE oregexp_match( VALUE self, VALUE string ) {
|
144
|
+
ORegexp *oregexp;
|
145
|
+
Data_Get_Struct( self, ORegexp, oregexp );
|
146
|
+
|
147
|
+
VALUE string_str = StringValue( string );
|
148
|
+
UChar* str_ptr = RSTRING(string_str)->ptr;
|
149
|
+
int str_len = RSTRING(string_str)->len;
|
150
|
+
|
151
|
+
OnigRegion *region = onig_region_new();
|
152
|
+
int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
|
153
|
+
if (r >= 0) {
|
154
|
+
|
155
|
+
VALUE begins = rb_ary_new();
|
156
|
+
VALUE ends = rb_ary_new();
|
157
|
+
nameHash = rb_hash_new();
|
158
|
+
|
159
|
+
onig_foreach_name(oregexp->reg, name_callback, (void* )region);
|
160
|
+
|
161
|
+
|
162
|
+
int i;
|
163
|
+
|
164
|
+
for (i = 0; i < region->num_regs; i++) {
|
165
|
+
rb_ary_push( begins, INT2FIX( region->beg[i] ) );
|
166
|
+
rb_ary_push( ends, INT2FIX( region->end[i] ) );
|
167
|
+
}
|
168
|
+
VALUE kMatchData = rb_const_get( mOniguruma, rb_intern( "MatchData" ) );
|
169
|
+
VALUE kORegexp = rb_const_get( mOniguruma, rb_intern( "ORegexp" ) );
|
170
|
+
VALUE matchData = rb_funcall(kMatchData, rb_intern("new"), 4, string_str, begins, ends, nameHash );
|
171
|
+
rb_cv_set( kORegexp, "@@last_match", matchData );
|
172
|
+
|
173
|
+
onig_region_free(region, 1 );
|
174
|
+
return matchData;
|
175
|
+
} else if (r == ONIG_MISMATCH) {
|
176
|
+
onig_region_free(region, 1 );
|
177
|
+
return Qnil;
|
178
|
+
} else {
|
179
|
+
onig_region_free(region, 1 );
|
180
|
+
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
181
|
+
onig_error_code_to_str(s, r);
|
182
|
+
rb_raise(rb_eException, "Oniguruma Error: %s", s);
|
183
|
+
}
|
184
|
+
|
185
|
+
}
|
186
|
+
|
187
|
+
void Init_oregexp() {
|
188
|
+
mOniguruma = rb_define_module("Oniguruma");
|
189
|
+
VALUE cORegexp = rb_define_class_under(mOniguruma, "ORegexp", rb_cObject);
|
190
|
+
rb_define_alloc_func(cORegexp, oregexp_allocate);
|
191
|
+
rb_define_method( cORegexp, "initialize", oregexp_initialize, 2 );
|
192
|
+
rb_define_method( cORegexp, "match", oregexp_match, 1 );
|
193
|
+
|
194
|
+
}
|
data/lib/oniguruma.rb
ADDED
@@ -0,0 +1,491 @@
|
|
1
|
+
require 'oregexp'
|
2
|
+
|
3
|
+
module Oniguruma
|
4
|
+
OPTION_NONE = 0
|
5
|
+
OPTION_IGNORECASE = 1
|
6
|
+
OPTION_EXTEND = (OPTION_IGNORECASE << 1)
|
7
|
+
OPTION_MULTILINE = (OPTION_EXTEND << 1)
|
8
|
+
OPTION_SINGLELINE = (OPTION_MULTILINE << 1)
|
9
|
+
OPTION_FIND_LONGEST = (OPTION_SINGLELINE << 1)
|
10
|
+
OPTION_FIND_NOT_EMPTY = (OPTION_FIND_LONGEST << 1)
|
11
|
+
OPTION_NEGATE_SINGLELINE = (OPTION_FIND_NOT_EMPTY << 1)
|
12
|
+
OPTION_DONT_CAPTURE_GROUP = (OPTION_NEGATE_SINGLELINE << 1)
|
13
|
+
OPTION_CAPTURE_GROUP = (OPTION_DONT_CAPTURE_GROUP << 1)
|
14
|
+
OPTION_NOTBOL = (OPTION_CAPTURE_GROUP << 1)
|
15
|
+
OPTION_NOTEOL = (OPTION_NOTBOL << 1)
|
16
|
+
OPTION_POSIX_REGION = (OPTION_NOTEOL << 1)
|
17
|
+
OPTION_MAXBIT = OPTION_POSIX_REGION
|
18
|
+
OPTION_DEFAULT = OPTION_NONE
|
19
|
+
|
20
|
+
SYNTAX_ASIS = 0
|
21
|
+
SYNTAX_POSIX_BASIC = 1
|
22
|
+
SYNTAX_POSIX_EXTENDED = 2
|
23
|
+
SYNTAX_EMACS = 3
|
24
|
+
SYNTAX_GREP = 4
|
25
|
+
SYNTAX_GNU_REGEX = 5
|
26
|
+
SYNTAX_JAVA = 6
|
27
|
+
SYNTAX_PERL = 7
|
28
|
+
SYNTAX_PERL_NG = 8
|
29
|
+
SYNTAX_RUBY = 9
|
30
|
+
SYNTAX_DEFAULT = 10
|
31
|
+
|
32
|
+
ENCODING_ASCII = 0
|
33
|
+
ENCODING_ISO_8859_1 = 1
|
34
|
+
ENCODING_ISO_8859_2 = 2
|
35
|
+
ENCODING_ISO_8859_3 = 3
|
36
|
+
ENCODING_ISO_8859_4 = 4
|
37
|
+
ENCODING_ISO_8859_5 = 5
|
38
|
+
ENCODING_ISO_8859_6 = 6
|
39
|
+
ENCODING_ISO_8859_7 = 7
|
40
|
+
ENCODING_ISO_8859_8 = 8
|
41
|
+
ENCODING_ISO_8859_9 = 9
|
42
|
+
ENCODING_ISO_8859_10 = 10
|
43
|
+
ENCODING_ISO_8859_11 = 11
|
44
|
+
ENCODING_ISO_8859_12 = 12
|
45
|
+
ENCODING_ISO_8859_13 = 13
|
46
|
+
ENCODING_ISO_8859_14 = 14
|
47
|
+
ENCODING_ISO_8859_15 = 15
|
48
|
+
ENCODING_ISO_8859_16 = 16
|
49
|
+
ENCODING_UTF8 = 17
|
50
|
+
ENCODING_UTF16_BE = 18
|
51
|
+
ENCODING_UTF16_LE = 19
|
52
|
+
ENCODING_UTF32_BE = 20
|
53
|
+
ENCODING_UTF32_LE = 21
|
54
|
+
ENCODING_EUC_JP = 22
|
55
|
+
ENCODING_EUC_TW = 23
|
56
|
+
ENCODING_EUC_KR = 24
|
57
|
+
ENCODING_EUC_CN = 25
|
58
|
+
ENCODING_SJIS = 26
|
59
|
+
ENCODING_KOI8 = 27
|
60
|
+
ENCODING_KOI8_R = 28
|
61
|
+
ENCODING_CP1251 = 29
|
62
|
+
ENCODING_BIG5 = 30
|
63
|
+
ENCODING_GB18030 = 31
|
64
|
+
ENCODING_UNDEF = 32
|
65
|
+
|
66
|
+
|
67
|
+
class ORegexp
|
68
|
+
|
69
|
+
class << self
|
70
|
+
# :stopdoc:
|
71
|
+
alias compile new
|
72
|
+
# :startdoc:
|
73
|
+
|
74
|
+
# call-seq:
|
75
|
+
# ORegexp.escape(str) => a_str
|
76
|
+
# ORegexp.quote(str) => a_str
|
77
|
+
#
|
78
|
+
# Escapes any characters that would have special meaning in a regular
|
79
|
+
# expression. Returns a new escaped string, or self if no characters are
|
80
|
+
# escaped. For any string,
|
81
|
+
# <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
|
82
|
+
#
|
83
|
+
# ORegexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
|
84
|
+
#
|
85
|
+
|
86
|
+
def escape( *args )
|
87
|
+
Regexp.escape( *args )
|
88
|
+
end
|
89
|
+
# :stopdoc:
|
90
|
+
alias quote escape
|
91
|
+
# :startdoc:
|
92
|
+
|
93
|
+
# call-seq:
|
94
|
+
# ORegexp.last_match => matchdata
|
95
|
+
# ORegexp.last_match(fixnum) => str
|
96
|
+
#
|
97
|
+
# The first form returns the <code>MatchData</code> object generated by the
|
98
|
+
# last successful pattern match. The second form returns the nth field in this
|
99
|
+
# <code>MatchData</code> object.
|
100
|
+
#
|
101
|
+
# ORegexp.new( 'c(.)t' ) =~ 'cat' #=> 0
|
102
|
+
# ORegexp.last_match #=> #<MatchData:0x401b3d30>
|
103
|
+
# ORegexp.last_match(0) #=> "cat"
|
104
|
+
# ORegexp.last_match(1) #=> "a"
|
105
|
+
# ORegexp.last_match(2) #=> nil
|
106
|
+
|
107
|
+
def last_match( index = nil)
|
108
|
+
if index
|
109
|
+
@@last_match[index]
|
110
|
+
else
|
111
|
+
@@last_match
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# :stopdoc:
|
117
|
+
alias old_initialize initialize
|
118
|
+
# :startdoc:
|
119
|
+
|
120
|
+
# Constructs a new regular expression from <i>pattern</i>, which is a
|
121
|
+
# <code>String</code>. The paramter <i>options</i> is a <code>Hash</code>
|
122
|
+
# of the form:
|
123
|
+
#
|
124
|
+
# <code>{ :options => option_value, :encoding => encoding_value, :syntax => syntax_value }</code>
|
125
|
+
#
|
126
|
+
# Where <code>option_value</code> is a bitwise <code>OR</code> of
|
127
|
+
# <code>Oniguruma::OPTION_XXX</code> constants; <code>encoding_value</code>
|
128
|
+
# is one of <code>Oniguruma::ENCODING_XXX</code> constants; and
|
129
|
+
# <code>syntax_value</code> is one of <code>Oniguruma::SYNTAX_XXX</code>
|
130
|
+
# constants.
|
131
|
+
#
|
132
|
+
# r1 = ORegexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
|
133
|
+
# r2 = ORegexp.new('cat', :options => OPTION_IGNORECASE ) #=> /cat/i
|
134
|
+
# r3 = ORegexp.new('dog', :options => OPTION_EXTEND ) #=> /dog/x
|
135
|
+
#
|
136
|
+
# #Accept java syntax on SJIS encoding:
|
137
|
+
# r4 = ORegexp.new('ape', :syntax => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/
|
138
|
+
|
139
|
+
def initialize( pattern, options = {} )
|
140
|
+
defaults = { :options => OPTION_DEFAULT, :encoding => ENCODING_ASCII, :syntax => SYNTAX_DEFAULT}
|
141
|
+
old_initialize( pattern, defaults.merge( options ).freeze )
|
142
|
+
end
|
143
|
+
|
144
|
+
# call-seq:
|
145
|
+
# rxp == other_rxp => true or false
|
146
|
+
# rxp.eql?(other_rxp) => true or false
|
147
|
+
#
|
148
|
+
# Equality---Two regexps are equal if their patterns are identical, they have
|
149
|
+
# the same character set code, and their <code>#casefold?</code> values are the
|
150
|
+
# same.
|
151
|
+
|
152
|
+
def == regexp
|
153
|
+
@pattern == regexp.source && kcode == regexp.kcode && casefold? == regexp.casefold?
|
154
|
+
end
|
155
|
+
alias eql? ==
|
156
|
+
|
157
|
+
# call-seq:
|
158
|
+
# rxp.casefold? => true of false
|
159
|
+
#
|
160
|
+
# Returns the value of the case-insensitive flag.
|
161
|
+
|
162
|
+
def casefold?
|
163
|
+
(@options[:options] & OPTION_IGNORECASE) > 0
|
164
|
+
end
|
165
|
+
|
166
|
+
# call-seq:
|
167
|
+
# rxp.kode => int
|
168
|
+
#
|
169
|
+
# Returns the character set code for the regexp.
|
170
|
+
def kcode
|
171
|
+
@options[:encoding]
|
172
|
+
end
|
173
|
+
|
174
|
+
# call-seq:
|
175
|
+
# rxp.options => fixnum
|
176
|
+
#
|
177
|
+
# Returns the set of bits corresponding to the options used when creating this
|
178
|
+
# ORegexp (see <code>ORegexp::new</code> for details. Note that additional bits
|
179
|
+
# may be set in the returned options: these are used internally by the regular
|
180
|
+
# expression code. These extra bits are ignored if the options are passed to
|
181
|
+
# <code>ORegexp::new</code>.
|
182
|
+
#
|
183
|
+
# Oniguruma::OPTION_IGNORECASE #=> 1
|
184
|
+
# Oniguruma::OPTION_EXTEND #=> 2
|
185
|
+
# Oniguruma::OPTION_MULTILINE #=> 4
|
186
|
+
#
|
187
|
+
# Regexp.new(r.source, :options => Oniguruma::OPTION_EXTEND ) #=> 2
|
188
|
+
|
189
|
+
def options
|
190
|
+
@options[:options]
|
191
|
+
end
|
192
|
+
|
193
|
+
# call-seq:
|
194
|
+
# rxp.to_s => str
|
195
|
+
#
|
196
|
+
# Returns a string containing the regular expression and its options (using the
|
197
|
+
# <code>(?xxx:yyy)</code> notation. This string can be fed back in to
|
198
|
+
# <code>Regexp::new</code> to a regular expression with the same semantics as
|
199
|
+
# the original. (However, <code>Regexp#==</code> may not return true when
|
200
|
+
# comparing the two, as the source of the regular expression itself may
|
201
|
+
# differ, as the example shows). <code>Regexp#inspect</code> produces a
|
202
|
+
# generally more readable version of <i>rxp</i>.
|
203
|
+
#
|
204
|
+
# r1 = ORegexp.new( 'ab+c', :options OPTION_IGNORECASE | OPTION_EXTEND ) #=> /ab+c/ix
|
205
|
+
# s1 = r1.to_s #=> "(?ix-m:ab+c)"
|
206
|
+
# r2 = ORegexp.new(s1) #=> /(?ix-m:ab+c)/
|
207
|
+
# r1 == r2 #=> false
|
208
|
+
# r1.source #=> "ab+c"
|
209
|
+
# r2.source #=> "(?ix-m:ab+c)"
|
210
|
+
|
211
|
+
def to_s
|
212
|
+
opt_str = "(?"
|
213
|
+
opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
|
214
|
+
opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
|
215
|
+
opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
|
216
|
+
unless opt_str == "(?imx"
|
217
|
+
opt_str += "-"
|
218
|
+
opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) == 0
|
219
|
+
opt_str += "m" if (@options[:options] & OPTION_MULTILINE) == 0
|
220
|
+
opt_str += "x" if (@options[:options] & OPTION_EXTEND) == 0
|
221
|
+
end
|
222
|
+
opt_str += ")"
|
223
|
+
opt_str + ORegexp.escape( @pattern )
|
224
|
+
end
|
225
|
+
|
226
|
+
|
227
|
+
# call-seq:
|
228
|
+
# rxp.inspect => string
|
229
|
+
#
|
230
|
+
# Returns a readable version of <i>rxp</i>
|
231
|
+
#
|
232
|
+
# ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).inspect => /cat/im
|
233
|
+
# ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).to_s => (?im-x)cat
|
234
|
+
|
235
|
+
def inspect
|
236
|
+
opt_str = ""
|
237
|
+
opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
|
238
|
+
opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
|
239
|
+
opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
|
240
|
+
"/" + ORegexp.escape( @pattern ) + "/" + opt_str
|
241
|
+
end
|
242
|
+
|
243
|
+
# call-seq:
|
244
|
+
# rxp =~ string => int or nil
|
245
|
+
#
|
246
|
+
# Matches <code>rxp</code> against <code>string</code>, returning the offset of the
|
247
|
+
# start of the match or <code>nil</code> if the match failed. Sets $~ to the corresponding
|
248
|
+
# <code>MatchData</code> or <code>nil</code>.
|
249
|
+
#
|
250
|
+
# ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil
|
251
|
+
# ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
|
252
|
+
|
253
|
+
def =~ string
|
254
|
+
return nil unless string
|
255
|
+
m = match( string )
|
256
|
+
return nil unless m
|
257
|
+
m.begin
|
258
|
+
end
|
259
|
+
|
260
|
+
# call-seq:
|
261
|
+
# rxp === str => true or false
|
262
|
+
#
|
263
|
+
# Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
|
264
|
+
#
|
265
|
+
# a = "HELLO"
|
266
|
+
# case a
|
267
|
+
# when ORegexp.new('^[a-z]*$'); print "Lower case\n"
|
268
|
+
# when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
|
269
|
+
# else; print "Mixed case\n"
|
270
|
+
# end
|
271
|
+
#
|
272
|
+
# <em>produces:</em>
|
273
|
+
#
|
274
|
+
# Upper case
|
275
|
+
|
276
|
+
alias === =~
|
277
|
+
|
278
|
+
def source
|
279
|
+
@pattern.freeze
|
280
|
+
end
|
281
|
+
|
282
|
+
def match_all string
|
283
|
+
matches = []
|
284
|
+
positions = []
|
285
|
+
position = 0
|
286
|
+
tmp_string = string
|
287
|
+
while tmp_string != ""
|
288
|
+
if m = match( tmp_string )
|
289
|
+
matches << m
|
290
|
+
positions << position
|
291
|
+
tmp_string = m.post_match
|
292
|
+
position += m.end
|
293
|
+
#if m.end == m.begin
|
294
|
+
# tmp_string = tmp_string[1..-1]
|
295
|
+
# position += 1
|
296
|
+
#end
|
297
|
+
else
|
298
|
+
break
|
299
|
+
end
|
300
|
+
end
|
301
|
+
if matches.size > 0
|
302
|
+
MultiMatchData.new( string, matches, positions )
|
303
|
+
else
|
304
|
+
nil
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
def sub string, replacement = nil
|
309
|
+
matches = match( string )
|
310
|
+
if matches
|
311
|
+
replacement = yield matches[0] unless replacement
|
312
|
+
string.sub( matches[0], replacement )
|
313
|
+
else
|
314
|
+
return string
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
def gsub string, replacement = nil
|
319
|
+
result = string
|
320
|
+
matches = match_all( string )
|
321
|
+
string_replace = replacement
|
322
|
+
if matches
|
323
|
+
matches.each do |m, p|
|
324
|
+
replacement = yield( m[0], m ) unless string_replace
|
325
|
+
result = result.sub( m[0], replacement )
|
326
|
+
end
|
327
|
+
end
|
328
|
+
result
|
329
|
+
end
|
330
|
+
|
331
|
+
def sub! string, replacement = nil
|
332
|
+
matches = match( string )
|
333
|
+
if matches
|
334
|
+
replacement = yield matches[0] unless replacement
|
335
|
+
string.sub!( matches[0], replacement )
|
336
|
+
else
|
337
|
+
return string
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
def gsub! string, replacement = nil
|
342
|
+
matches = match_all( string )
|
343
|
+
string_replace = replacement
|
344
|
+
if matches
|
345
|
+
matches.each do |m, p|
|
346
|
+
replacement = yield( m[0], m ) unless string_replace
|
347
|
+
string.sub!( m[0], replacement )
|
348
|
+
end
|
349
|
+
end
|
350
|
+
string
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
class MultiMatchData
|
355
|
+
def initialize( string, matches, positions )
|
356
|
+
@matches = matches
|
357
|
+
@positions = positions
|
358
|
+
@string = string
|
359
|
+
end
|
360
|
+
|
361
|
+
def position index
|
362
|
+
@positions[index]
|
363
|
+
end
|
364
|
+
|
365
|
+
def [] ( value1, value2 = nil )
|
366
|
+
unless value2
|
367
|
+
@matches[value1]
|
368
|
+
else
|
369
|
+
@matches[value1, value2]
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
def begin index
|
374
|
+
@matches[index].begin + @positions[index]
|
375
|
+
end
|
376
|
+
|
377
|
+
def end index
|
378
|
+
@matches[index].end + @positions[index]
|
379
|
+
end
|
380
|
+
|
381
|
+
def length
|
382
|
+
@matches.size
|
383
|
+
end
|
384
|
+
alias size length
|
385
|
+
|
386
|
+
def offset index
|
387
|
+
[self.begin(index), self.end(index) ]
|
388
|
+
end
|
389
|
+
|
390
|
+
def string
|
391
|
+
@string.freeze
|
392
|
+
end
|
393
|
+
|
394
|
+
def to_a
|
395
|
+
@matches
|
396
|
+
end
|
397
|
+
|
398
|
+
def each
|
399
|
+
@matches.size.times do |i|
|
400
|
+
yield @matches[i], @positions[i]
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
class MatchData
|
406
|
+
def initialize( string, starts, ends, names )
|
407
|
+
@string = string
|
408
|
+
@starts = starts
|
409
|
+
@ends = ends
|
410
|
+
@matches = []
|
411
|
+
@starts.size.times do |i|
|
412
|
+
@matches << @string[@starts[i]...@ends[i]]
|
413
|
+
end
|
414
|
+
@match_count = @matches.size
|
415
|
+
@start_pos = 0
|
416
|
+
@names = names
|
417
|
+
end
|
418
|
+
|
419
|
+
def [] ( value1, value2 = nil )
|
420
|
+
unless value2
|
421
|
+
if index = to_index( value1 )
|
422
|
+
@matches[index]
|
423
|
+
else
|
424
|
+
nil
|
425
|
+
end
|
426
|
+
else
|
427
|
+
@matches[value1, value2]
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
def to_index name
|
432
|
+
if name.is_a? Symbol
|
433
|
+
@names[name]
|
434
|
+
else
|
435
|
+
name
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
def begin index = 0
|
440
|
+
@starts[to_index( index )]
|
441
|
+
end
|
442
|
+
|
443
|
+
def end index = 0
|
444
|
+
@ends[to_index( index )]
|
445
|
+
end
|
446
|
+
|
447
|
+
def captures
|
448
|
+
@matches[1..-1]
|
449
|
+
end
|
450
|
+
|
451
|
+
def length
|
452
|
+
@match_count
|
453
|
+
end
|
454
|
+
alias size length
|
455
|
+
|
456
|
+
def offset index = 0
|
457
|
+
[@starts[to_index( index )], @ends[to_index( index )]]
|
458
|
+
end
|
459
|
+
|
460
|
+
def post_match
|
461
|
+
@string[@ends[0], @string.length]
|
462
|
+
end
|
463
|
+
|
464
|
+
def pre_match
|
465
|
+
@string[0, @starts[0]]
|
466
|
+
end
|
467
|
+
|
468
|
+
def select &block
|
469
|
+
@matches.select( &block )
|
470
|
+
end
|
471
|
+
|
472
|
+
def string
|
473
|
+
@string.freeze
|
474
|
+
end
|
475
|
+
|
476
|
+
def to_a
|
477
|
+
@matches
|
478
|
+
end
|
479
|
+
|
480
|
+
def to_s
|
481
|
+
@matches[0]
|
482
|
+
end
|
483
|
+
|
484
|
+
def values_at *values
|
485
|
+
result = []
|
486
|
+
values.each { |v| result << @matches[v] }
|
487
|
+
result
|
488
|
+
end
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
@@ -0,0 +1,214 @@
|
|
1
|
+
require 'oniguruma'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
|
5
|
+
class ORegexpTestCase < Test::Unit::TestCase
|
6
|
+
def test_initialization
|
7
|
+
assert_nothing_raised do
|
8
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_compile
|
13
|
+
assert_nothing_raised do
|
14
|
+
reg = Oniguruma::ORegexp.compile( "(3.)(.*)(3.)" )
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_escape
|
19
|
+
assert_equal( '\\\\\*\?\{\}\.', Oniguruma::ORegexp.escape('\\*?{}.') )
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_last_match
|
23
|
+
assert_equal( 0, Oniguruma::ORegexp.new( 'c(.)t') =~ 'cat' )
|
24
|
+
assert_equal( "cat", Oniguruma::ORegexp.last_match(0) )
|
25
|
+
assert_equal( "a", Oniguruma::ORegexp.last_match(1) )
|
26
|
+
assert_equal( nil, Oniguruma::ORegexp.last_match(2) )
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_bad_initialization
|
30
|
+
assert_raises(Exception) do
|
31
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.))" )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_match
|
36
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
|
37
|
+
assert_not_nil( reg.match( "12345634" ) )
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_no_match
|
41
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
|
42
|
+
assert_nil( reg.match( "12145614" ) )
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_sub
|
46
|
+
reg = Oniguruma::ORegexp.new( 'pe')
|
47
|
+
assert_equal( "**nelope", reg.sub( 'penelope', '**' ) )
|
48
|
+
assert_equal( "++nelope", reg.sub( 'penelope' ) { |m| '++' })
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_gsub
|
52
|
+
reg = Oniguruma::ORegexp.new( '\(\?#(\w+?)\)')
|
53
|
+
string = 'My favorite fruits are (?#fruit1), (?#fruit2), and (?#fruit3)'
|
54
|
+
assert_equal( "My favorite fruits are *, *, and *", reg.gsub( string, '*' ) )
|
55
|
+
fruits = { "fruit1" => "apples", "fruit2" => "bananas", "fruit3" => "grapes" }
|
56
|
+
assert_equal( "My favorite fruits are apples, bananas, and grapes", reg.gsub( string ) { |text, match| fruits[match[1]]} )
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_eql
|
60
|
+
assert_equal( Oniguruma::ORegexp.new( 'expression'), Oniguruma::ORegexp.new( 'expression') )
|
61
|
+
assert_not_equal( Oniguruma::ORegexp.new( 'expression'), Oniguruma::ORegexp.new( 'expresssion') )
|
62
|
+
assert_not_equal( Oniguruma::ORegexp.new( 'expression', :encoding => Oniguruma::ENCODING_ASCII ), Oniguruma::ORegexp.new( 'expression', :encoding => Oniguruma::ENCODING_ISO_8859_1 ) )
|
63
|
+
assert_not_equal( Oniguruma::ORegexp.new( 'expression', :options => Oniguruma::OPTION_IGNORECASE ), Oniguruma::ORegexp.new( 'expression', :options => Oniguruma::OPTION_NONE ) )
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_case_eql
|
67
|
+
a = "HELLO"
|
68
|
+
result = ""
|
69
|
+
case a
|
70
|
+
when Oniguruma::ORegexp.new('^[a-z]*$'); result = "Lower case\n"
|
71
|
+
when Oniguruma::ORegexp.new('^[A-Z]*$'); result = "Upper case\n"
|
72
|
+
else; result = "Mixed case\n"
|
73
|
+
end
|
74
|
+
|
75
|
+
assert_equal( "Upper case\n", result )
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_operator_match
|
79
|
+
assert_equal( nil, Oniguruma::ORegexp.new( 'SIT' ) =~ "insensitive" )
|
80
|
+
assert_equal( 5, Oniguruma::ORegexp.new( 'SIT', :options => Oniguruma::OPTION_IGNORECASE ) =~ "insensitive" )
|
81
|
+
end
|
82
|
+
|
83
|
+
# def test_operator_match_2
|
84
|
+
# $_ = "input data"
|
85
|
+
# assert_equal( 7, ~Oniguruma::ORegexp.new( 'at' ) )
|
86
|
+
# end
|
87
|
+
|
88
|
+
def test_inspect
|
89
|
+
assert_equal( "/cat/im", Oniguruma::ORegexp.new( 'cat', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_MULTILINE ).inspect )
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_to_s
|
93
|
+
assert_equal( "(?im-x)cat", Oniguruma::ORegexp.new( 'cat', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_MULTILINE ).to_s )
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_kcode
|
97
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
|
98
|
+
assert_equal( Oniguruma::ENCODING_ASCII, reg.kcode )
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_options
|
102
|
+
assert_equal( 3, Oniguruma::ORegexp.new( 'abc', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_EXTEND ).options )
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_source
|
106
|
+
string = '(?<=\n)\\.*ocatarinetabelachitchix'
|
107
|
+
assert_equal( string, Oniguruma::ORegexp.new( string ).source )
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class MatchDataTestCase < Test::Unit::TestCase
|
112
|
+
def setup
|
113
|
+
@reg = Oniguruma::ORegexp.new( '(.)(.)(\d+)(\d)' )
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_square_brackets
|
117
|
+
matches = @reg.match( "THX1138." )
|
118
|
+
assert_equal( "HX1138", matches[0] )
|
119
|
+
assert_equal( ["H", "X"], matches[1, 2] )
|
120
|
+
assert_equal( ["H", "X", "113"], matches[1..3] )
|
121
|
+
assert_equal( ["X", "113"], matches[-3, 2] )
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_begin
|
125
|
+
matches = @reg.match( "THX1138." )
|
126
|
+
assert_equal( 1, matches.begin(0) )
|
127
|
+
assert_equal( 2, matches.begin(2) )
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_captures
|
131
|
+
matches = @reg.match( "THX1138." )
|
132
|
+
assert_equal( ["H", "X", "113", "8" ], matches.captures )
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_end
|
136
|
+
matches = @reg.match( "THX1138." )
|
137
|
+
assert_equal( 7, matches.end(0) )
|
138
|
+
assert_equal( 3, matches.end(2) )
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_size
|
142
|
+
matches = @reg.match( "THX1138." )
|
143
|
+
assert_equal( 5, matches.length )
|
144
|
+
assert_equal( 5, matches.size )
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_offset
|
148
|
+
matches = @reg.match( "THX1138." )
|
149
|
+
assert_equal( [1, 7], matches.offset(0) )
|
150
|
+
assert_equal( [6, 7], matches.offset(4) )
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_post_match
|
154
|
+
matches = @reg.match( "THX1138: The Movie" )
|
155
|
+
assert_equal( ": The Movie", matches.post_match )
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_pre_match
|
159
|
+
matches = @reg.match( "THX1138." )
|
160
|
+
assert_equal( "T", matches.pre_match )
|
161
|
+
end
|
162
|
+
|
163
|
+
def test_select
|
164
|
+
matches = @reg.match( "THX1138: The Movie" )
|
165
|
+
assert_equal( ["HX1138", "113"], matches.select{ |v| v =~ /\d\d/} )
|
166
|
+
end
|
167
|
+
|
168
|
+
def test_string
|
169
|
+
matches = @reg.match( "THX1138." )
|
170
|
+
assert_equal( "THX1138.", matches.string )
|
171
|
+
assert( matches.string.frozen? )
|
172
|
+
end
|
173
|
+
|
174
|
+
def test_to_a
|
175
|
+
matches = @reg.match( "THX1138." )
|
176
|
+
assert_equal( ["HX1138", "H", "X", "113", "8" ], matches.to_a )
|
177
|
+
end
|
178
|
+
|
179
|
+
def test_to_s
|
180
|
+
matches = @reg.match( "THX1138." )
|
181
|
+
assert_equal( "HX1138", matches.to_s )
|
182
|
+
end
|
183
|
+
|
184
|
+
def test_values_at
|
185
|
+
matches = @reg.match( "THX1138: The Movie" )
|
186
|
+
assert_equal( ["HX1138", "X", "113"], matches.values_at( 0, 2, -2) )
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_match_all
|
190
|
+
reg = Oniguruma::ORegexp.new( 'ca' )
|
191
|
+
matches = reg.match_all( 'ocatacachaca' )
|
192
|
+
assert_equal( 3, matches.size )
|
193
|
+
assert_equal( 7, matches.position(2) )
|
194
|
+
assert_equal( "ca", matches.string[matches.begin(1)...matches.end(1)])
|
195
|
+
end
|
196
|
+
|
197
|
+
def test_match_empty_string
|
198
|
+
reg = Oniguruma::ORegexp.new( '^\s*?(\n|\r)', :options => Oniguruma::OPTION_MULTILINE )
|
199
|
+
matches = reg.match( "\n\n\n\n\n" )
|
200
|
+
assert_not_nil( matches )
|
201
|
+
assert_equal( "\n\n\n\n", matches.post_match )
|
202
|
+
end
|
203
|
+
|
204
|
+
def test_group_by_name
|
205
|
+
reg = Oniguruma::ORegexp.new( '(?<begin>\()(?<body>.*)(?<end>\))', :options => Oniguruma::OPTION_MULTILINE )
|
206
|
+
matches = reg.match( "blah (content) blah" )
|
207
|
+
assert_not_nil( matches )
|
208
|
+
assert_equal( '(', matches[:begin] )
|
209
|
+
assert_equal( 'content', matches[:body] )
|
210
|
+
assert_equal( ')', matches[:end] )
|
211
|
+
assert_equal( nil, matches[:inexistent])
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
metadata
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: oniguruma
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.9.0
|
7
|
+
date: 2007-03-22 00:00:00 +01:00
|
8
|
+
summary: Bindings for the oniguruma regular expression library
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
- ext
|
12
|
+
email: dix_ans@yahoo.com
|
13
|
+
homepage: http://oniguruma.rubyforge.org
|
14
|
+
rubyforge_project: oniguruma
|
15
|
+
description: Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regular expression library (no need to recompile Ruby).
|
16
|
+
autorequire:
|
17
|
+
default_executable:
|
18
|
+
bindir: bin
|
19
|
+
has_rdoc: true
|
20
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">"
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.0.0
|
25
|
+
version:
|
26
|
+
platform: ruby
|
27
|
+
signing_key:
|
28
|
+
cert_chain:
|
29
|
+
post_install_message:
|
30
|
+
authors:
|
31
|
+
- Dizan Vasquez
|
32
|
+
files:
|
33
|
+
- History.txt
|
34
|
+
- Manifest.txt
|
35
|
+
- README.txt
|
36
|
+
- Rakefile
|
37
|
+
- lib/oniguruma.rb
|
38
|
+
- ext/oregexp.c
|
39
|
+
- test/test_oniguruma.rb
|
40
|
+
test_files:
|
41
|
+
- test/test_oniguruma.rb
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
extra_rdoc_files: []
|
45
|
+
|
46
|
+
executables: []
|
47
|
+
|
48
|
+
extensions:
|
49
|
+
- ext/extconf.rb
|
50
|
+
requirements: []
|
51
|
+
|
52
|
+
dependencies: []
|
53
|
+
|