oniguruma 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/Manifest.txt +7 -0
- data/README.txt +65 -0
- data/Rakefile +18 -0
- data/ext/extconf.rb +3 -0
- data/ext/oregexp.c +194 -0
- data/lib/oniguruma.rb +491 -0
- data/test/test_oniguruma.rb +214 -0
- metadata +53 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
== ONIGURUMA FOR RUBY:
|
2
|
+
|
3
|
+
Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regular expression library (no need to recompile Ruby).
|
4
|
+
|
5
|
+
== FEATURES:
|
6
|
+
|
7
|
+
* Increased performance.
|
8
|
+
* Same interface than standard Regexp class (easy transition!).
|
9
|
+
* Support for named groups, look-ahead, look-behind, and other
|
10
|
+
cool features!
|
11
|
+
|
12
|
+
== SYNOPSIS:
|
13
|
+
|
14
|
+
reg = Oniguruma::ORegex.new( '(?<before>.*)(a)(?<after>.*)' )
|
15
|
+
match = reg.match( 'terraforming' )
|
16
|
+
puts match[0] <= 'terraforming'
|
17
|
+
puts match[:before] <= 'terr'
|
18
|
+
puts match[:after] <= 'forming'
|
19
|
+
|
20
|
+
== REQUIREMENTS:
|
21
|
+
|
22
|
+
* Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 2.0 or greater
|
23
|
+
|
24
|
+
== INSTALL:
|
25
|
+
|
26
|
+
sudo gem install -r oniguruma
|
27
|
+
|
28
|
+
== BUGS/PROBLEMS/INCOMPATIBILITIES:
|
29
|
+
|
30
|
+
* <code>ORegexp#~</code> is not implemented.
|
31
|
+
* <code>ORegexp#kcode</code> results are not compatible with <code>Regexp</code>.
|
32
|
+
* <code>ORegexp</code> options set in the string are not visible, this affects
|
33
|
+
<code>ORegexp#options</code>, <code>ORegexp#to_s</code>, <code>ORegexp#inspect</code>
|
34
|
+
and <code>ORegexp#==</code>.
|
35
|
+
|
36
|
+
== TODO:
|
37
|
+
|
38
|
+
* Complete documentation (methods, oniguruma syntax).
|
39
|
+
|
40
|
+
== CREDITS:
|
41
|
+
|
42
|
+
* K.Kosako, for his great library.
|
43
|
+
* A lot of the documentation has been copied from the orininal Ruby Regex documentation.
|
44
|
+
|
45
|
+
== LICENSE:
|
46
|
+
|
47
|
+
New BSD License
|
48
|
+
|
49
|
+
Copyright (c) 2007, Dizan Vasquez
|
50
|
+
All rights reserved.
|
51
|
+
|
52
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
53
|
+
|
54
|
+
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
55
|
+
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
|
56
|
+
documentation and/or other materials provided with the distribution.
|
57
|
+
* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this
|
58
|
+
software without specific prior written permission.
|
59
|
+
|
60
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
61
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
62
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
63
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
64
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
65
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hoe'
|
3
|
+
|
4
|
+
class Hoe; def extra_deps; @extra_deps.reject { |x| Array(x).first == 'hoe' }; end end
|
5
|
+
|
6
|
+
Hoe.new('oniguruma', '0.9.0') do |p|
|
7
|
+
p.rubyforge_name = 'oniguruma'
|
8
|
+
p.author = 'Dizan Vasquez'
|
9
|
+
p.email = 'dix_ans@yahoo.com'
|
10
|
+
p.summary = 'Bindings for the oniguruma regular expression library'
|
11
|
+
p.description = p.paragraphs_of('README.txt', 1 ).join('\n\n')
|
12
|
+
p.url = 'http://oniguruma.rubyforge.org'
|
13
|
+
p.spec_extras[:extensions] = ["ext/extconf.rb"]
|
14
|
+
p.rdoc_pattern = /^(lib|bin|ext)|txt$/
|
15
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
16
|
+
end
|
17
|
+
|
18
|
+
|
data/ext/extconf.rb
ADDED
data/ext/oregexp.c
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <oniguruma.h>
|
3
|
+
/*
|
4
|
+
TODO:
|
5
|
+
- Add named backreferences.
|
6
|
+
*/
|
7
|
+
|
8
|
+
typedef struct _oregexp {
|
9
|
+
regex_t * reg;
|
10
|
+
} ORegexp;
|
11
|
+
|
12
|
+
VALUE mOniguruma;
|
13
|
+
VALUE nameHash;
|
14
|
+
|
15
|
+
static void oregexp_free( ORegexp * oregexp) {
|
16
|
+
onig_free( oregexp->reg );
|
17
|
+
free( oregexp );
|
18
|
+
}
|
19
|
+
|
20
|
+
static VALUE oregexp_allocate( VALUE klass ) {
|
21
|
+
ORegexp * oregexp = malloc( sizeof( ORegexp ) );
|
22
|
+
oregexp->reg = NULL;
|
23
|
+
return Data_Wrap_Struct( klass, 0, oregexp_free, oregexp );
|
24
|
+
}
|
25
|
+
|
26
|
+
|
27
|
+
static OnigEncodingType * int2encoding( int index ) {
|
28
|
+
switch( index ) {
|
29
|
+
case 0: return ONIG_ENCODING_ASCII;
|
30
|
+
case 1: return ONIG_ENCODING_ISO_8859_1;
|
31
|
+
case 2: return ONIG_ENCODING_ISO_8859_2;
|
32
|
+
case 3: return ONIG_ENCODING_ISO_8859_3;
|
33
|
+
case 4: return ONIG_ENCODING_ISO_8859_4;
|
34
|
+
case 5: return ONIG_ENCODING_ISO_8859_5;
|
35
|
+
case 6: return ONIG_ENCODING_ISO_8859_6;
|
36
|
+
case 7: return ONIG_ENCODING_ISO_8859_7;
|
37
|
+
case 8: return ONIG_ENCODING_ISO_8859_8;
|
38
|
+
case 9: return ONIG_ENCODING_ISO_8859_9;
|
39
|
+
case 10: return ONIG_ENCODING_ISO_8859_10;
|
40
|
+
case 11: return ONIG_ENCODING_ISO_8859_11;
|
41
|
+
case 12: return ONIG_ENCODING_ISO_8859_11;
|
42
|
+
case 13: return ONIG_ENCODING_ISO_8859_13;
|
43
|
+
case 14: return ONIG_ENCODING_ISO_8859_14;
|
44
|
+
case 15: return ONIG_ENCODING_ISO_8859_15;
|
45
|
+
case 16: return ONIG_ENCODING_ISO_8859_16;
|
46
|
+
case 17: return ONIG_ENCODING_UTF8;
|
47
|
+
case 18: return ONIG_ENCODING_UTF16_BE;
|
48
|
+
case 19: return ONIG_ENCODING_UTF16_LE;
|
49
|
+
case 20: return ONIG_ENCODING_UTF32_BE;
|
50
|
+
case 21: return ONIG_ENCODING_UTF32_LE;
|
51
|
+
case 22: return ONIG_ENCODING_EUC_JP;
|
52
|
+
case 23: return ONIG_ENCODING_EUC_TW;
|
53
|
+
case 24: return ONIG_ENCODING_EUC_KR;
|
54
|
+
case 25: return ONIG_ENCODING_EUC_CN;
|
55
|
+
case 26: return ONIG_ENCODING_SJIS;
|
56
|
+
/*case 27: return ONIG_ENCODING_KOI8;*/
|
57
|
+
case 28: return ONIG_ENCODING_KOI8_R;
|
58
|
+
case 29: return ONIG_ENCODING_CP1251;
|
59
|
+
case 30: return ONIG_ENCODING_BIG5;
|
60
|
+
case 31: return ONIG_ENCODING_GB18030;
|
61
|
+
case 32: return ONIG_ENCODING_UNDEF;
|
62
|
+
}
|
63
|
+
return ONIG_ENCODING_UNDEF;
|
64
|
+
}
|
65
|
+
|
66
|
+
static OnigSyntaxType * int2syntax( int index ) {
|
67
|
+
switch( index ) {
|
68
|
+
case 0: return ONIG_SYNTAX_ASIS;
|
69
|
+
case 1: return ONIG_SYNTAX_POSIX_BASIC;
|
70
|
+
case 2: return ONIG_SYNTAX_POSIX_EXTENDED;
|
71
|
+
case 3: return ONIG_SYNTAX_EMACS;
|
72
|
+
case 4: return ONIG_SYNTAX_GREP;
|
73
|
+
case 5: return ONIG_SYNTAX_GNU_REGEX;
|
74
|
+
case 6: return ONIG_SYNTAX_JAVA;
|
75
|
+
case 7: return ONIG_SYNTAX_PERL;
|
76
|
+
case 8: return ONIG_SYNTAX_PERL_NG;
|
77
|
+
case 9: return ONIG_SYNTAX_RUBY;
|
78
|
+
case 10: return ONIG_SYNTAX_DEFAULT;
|
79
|
+
}
|
80
|
+
return ONIG_SYNTAX_DEFAULT;
|
81
|
+
}
|
82
|
+
|
83
|
+
static int name_callback(
|
84
|
+
const UChar* name,
|
85
|
+
const UChar* name_end,
|
86
|
+
int ngroup_num,
|
87
|
+
int* group_nums,
|
88
|
+
regex_t* reg,
|
89
|
+
void* arg
|
90
|
+
) {
|
91
|
+
int i, gn, ref;
|
92
|
+
OnigRegion *region = (OnigRegion* )arg;
|
93
|
+
|
94
|
+
for (i = 0; i < ngroup_num; i++) {
|
95
|
+
gn = group_nums[i];
|
96
|
+
ref = onig_name_to_backref_number(reg, name, name_end, region);
|
97
|
+
if (ref != gn )
|
98
|
+
rb_raise(rb_eException, "Oniguruma Error: group and backreference names are different");
|
99
|
+
rb_hash_aset( nameHash, ID2SYM(rb_intern(name)), INT2FIX( gn ) );
|
100
|
+
}
|
101
|
+
return 0;
|
102
|
+
}
|
103
|
+
|
104
|
+
static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
|
105
|
+
ORegexp *oregexp;
|
106
|
+
Data_Get_Struct( self, ORegexp, oregexp );
|
107
|
+
|
108
|
+
VALUE pattern_str = StringValue( pattern );
|
109
|
+
rb_iv_set( self, "@pattern", pattern_str );
|
110
|
+
rb_iv_set( self, "@options", options );
|
111
|
+
UChar* pat_ptr = RSTRING(pattern_str)->ptr;
|
112
|
+
int pat_len = RSTRING(pattern_str)->len;
|
113
|
+
|
114
|
+
VALUE rOptions = rb_hash_aref( options, ID2SYM( rb_intern( "options" ) ) );
|
115
|
+
VALUE rEncoding = rb_hash_aref( options, ID2SYM( rb_intern( "encoding" ) ) );
|
116
|
+
VALUE rSyntax = rb_hash_aref( options, ID2SYM( rb_intern( "syntax" ) ) );
|
117
|
+
int iOptions = NUM2INT( rOptions );
|
118
|
+
int iEncoding = int2encoding( rEncoding );
|
119
|
+
int iSyntax = int2syntax( rSyntax );
|
120
|
+
|
121
|
+
|
122
|
+
int r;
|
123
|
+
OnigErrorInfo einfo;
|
124
|
+
r = onig_new(&(oregexp->reg), pat_ptr, pat_ptr + pat_len, iOptions, iEncoding, iSyntax, &einfo);
|
125
|
+
if (r != ONIG_NORMAL) {
|
126
|
+
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
127
|
+
onig_error_code_to_str(s, r, &einfo);
|
128
|
+
rb_raise(rb_eException, "Oniguruma Error: %s", s);
|
129
|
+
}
|
130
|
+
return self;
|
131
|
+
}
|
132
|
+
|
133
|
+
/*
|
134
|
+
* call-seq:
|
135
|
+
* rxp.match(str) => matchdata or nil
|
136
|
+
*
|
137
|
+
* Returns a <code>MatchData</code> object describing the match, or
|
138
|
+
* <code>nil</code> if there was no match. This is equivalent to retrieving the
|
139
|
+
* value of the special variable <code>$~</code> following a normal match.
|
140
|
+
*
|
141
|
+
* /(.)(.)(.)/.match("abc")[2] #=> "b"
|
142
|
+
*/
|
143
|
+
static VALUE oregexp_match( VALUE self, VALUE string ) {
|
144
|
+
ORegexp *oregexp;
|
145
|
+
Data_Get_Struct( self, ORegexp, oregexp );
|
146
|
+
|
147
|
+
VALUE string_str = StringValue( string );
|
148
|
+
UChar* str_ptr = RSTRING(string_str)->ptr;
|
149
|
+
int str_len = RSTRING(string_str)->len;
|
150
|
+
|
151
|
+
OnigRegion *region = onig_region_new();
|
152
|
+
int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
|
153
|
+
if (r >= 0) {
|
154
|
+
|
155
|
+
VALUE begins = rb_ary_new();
|
156
|
+
VALUE ends = rb_ary_new();
|
157
|
+
nameHash = rb_hash_new();
|
158
|
+
|
159
|
+
onig_foreach_name(oregexp->reg, name_callback, (void* )region);
|
160
|
+
|
161
|
+
|
162
|
+
int i;
|
163
|
+
|
164
|
+
for (i = 0; i < region->num_regs; i++) {
|
165
|
+
rb_ary_push( begins, INT2FIX( region->beg[i] ) );
|
166
|
+
rb_ary_push( ends, INT2FIX( region->end[i] ) );
|
167
|
+
}
|
168
|
+
VALUE kMatchData = rb_const_get( mOniguruma, rb_intern( "MatchData" ) );
|
169
|
+
VALUE kORegexp = rb_const_get( mOniguruma, rb_intern( "ORegexp" ) );
|
170
|
+
VALUE matchData = rb_funcall(kMatchData, rb_intern("new"), 4, string_str, begins, ends, nameHash );
|
171
|
+
rb_cv_set( kORegexp, "@@last_match", matchData );
|
172
|
+
|
173
|
+
onig_region_free(region, 1 );
|
174
|
+
return matchData;
|
175
|
+
} else if (r == ONIG_MISMATCH) {
|
176
|
+
onig_region_free(region, 1 );
|
177
|
+
return Qnil;
|
178
|
+
} else {
|
179
|
+
onig_region_free(region, 1 );
|
180
|
+
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
181
|
+
onig_error_code_to_str(s, r);
|
182
|
+
rb_raise(rb_eException, "Oniguruma Error: %s", s);
|
183
|
+
}
|
184
|
+
|
185
|
+
}
|
186
|
+
|
187
|
+
void Init_oregexp() {
|
188
|
+
mOniguruma = rb_define_module("Oniguruma");
|
189
|
+
VALUE cORegexp = rb_define_class_under(mOniguruma, "ORegexp", rb_cObject);
|
190
|
+
rb_define_alloc_func(cORegexp, oregexp_allocate);
|
191
|
+
rb_define_method( cORegexp, "initialize", oregexp_initialize, 2 );
|
192
|
+
rb_define_method( cORegexp, "match", oregexp_match, 1 );
|
193
|
+
|
194
|
+
}
|
data/lib/oniguruma.rb
ADDED
@@ -0,0 +1,491 @@
|
|
1
|
+
require 'oregexp'
|
2
|
+
|
3
|
+
module Oniguruma
|
4
|
+
OPTION_NONE = 0
|
5
|
+
OPTION_IGNORECASE = 1
|
6
|
+
OPTION_EXTEND = (OPTION_IGNORECASE << 1)
|
7
|
+
OPTION_MULTILINE = (OPTION_EXTEND << 1)
|
8
|
+
OPTION_SINGLELINE = (OPTION_MULTILINE << 1)
|
9
|
+
OPTION_FIND_LONGEST = (OPTION_SINGLELINE << 1)
|
10
|
+
OPTION_FIND_NOT_EMPTY = (OPTION_FIND_LONGEST << 1)
|
11
|
+
OPTION_NEGATE_SINGLELINE = (OPTION_FIND_NOT_EMPTY << 1)
|
12
|
+
OPTION_DONT_CAPTURE_GROUP = (OPTION_NEGATE_SINGLELINE << 1)
|
13
|
+
OPTION_CAPTURE_GROUP = (OPTION_DONT_CAPTURE_GROUP << 1)
|
14
|
+
OPTION_NOTBOL = (OPTION_CAPTURE_GROUP << 1)
|
15
|
+
OPTION_NOTEOL = (OPTION_NOTBOL << 1)
|
16
|
+
OPTION_POSIX_REGION = (OPTION_NOTEOL << 1)
|
17
|
+
OPTION_MAXBIT = OPTION_POSIX_REGION
|
18
|
+
OPTION_DEFAULT = OPTION_NONE
|
19
|
+
|
20
|
+
SYNTAX_ASIS = 0
|
21
|
+
SYNTAX_POSIX_BASIC = 1
|
22
|
+
SYNTAX_POSIX_EXTENDED = 2
|
23
|
+
SYNTAX_EMACS = 3
|
24
|
+
SYNTAX_GREP = 4
|
25
|
+
SYNTAX_GNU_REGEX = 5
|
26
|
+
SYNTAX_JAVA = 6
|
27
|
+
SYNTAX_PERL = 7
|
28
|
+
SYNTAX_PERL_NG = 8
|
29
|
+
SYNTAX_RUBY = 9
|
30
|
+
SYNTAX_DEFAULT = 10
|
31
|
+
|
32
|
+
ENCODING_ASCII = 0
|
33
|
+
ENCODING_ISO_8859_1 = 1
|
34
|
+
ENCODING_ISO_8859_2 = 2
|
35
|
+
ENCODING_ISO_8859_3 = 3
|
36
|
+
ENCODING_ISO_8859_4 = 4
|
37
|
+
ENCODING_ISO_8859_5 = 5
|
38
|
+
ENCODING_ISO_8859_6 = 6
|
39
|
+
ENCODING_ISO_8859_7 = 7
|
40
|
+
ENCODING_ISO_8859_8 = 8
|
41
|
+
ENCODING_ISO_8859_9 = 9
|
42
|
+
ENCODING_ISO_8859_10 = 10
|
43
|
+
ENCODING_ISO_8859_11 = 11
|
44
|
+
ENCODING_ISO_8859_12 = 12
|
45
|
+
ENCODING_ISO_8859_13 = 13
|
46
|
+
ENCODING_ISO_8859_14 = 14
|
47
|
+
ENCODING_ISO_8859_15 = 15
|
48
|
+
ENCODING_ISO_8859_16 = 16
|
49
|
+
ENCODING_UTF8 = 17
|
50
|
+
ENCODING_UTF16_BE = 18
|
51
|
+
ENCODING_UTF16_LE = 19
|
52
|
+
ENCODING_UTF32_BE = 20
|
53
|
+
ENCODING_UTF32_LE = 21
|
54
|
+
ENCODING_EUC_JP = 22
|
55
|
+
ENCODING_EUC_TW = 23
|
56
|
+
ENCODING_EUC_KR = 24
|
57
|
+
ENCODING_EUC_CN = 25
|
58
|
+
ENCODING_SJIS = 26
|
59
|
+
ENCODING_KOI8 = 27
|
60
|
+
ENCODING_KOI8_R = 28
|
61
|
+
ENCODING_CP1251 = 29
|
62
|
+
ENCODING_BIG5 = 30
|
63
|
+
ENCODING_GB18030 = 31
|
64
|
+
ENCODING_UNDEF = 32
|
65
|
+
|
66
|
+
|
67
|
+
class ORegexp
|
68
|
+
|
69
|
+
class << self
|
70
|
+
# :stopdoc:
|
71
|
+
alias compile new
|
72
|
+
# :startdoc:
|
73
|
+
|
74
|
+
# call-seq:
|
75
|
+
# ORegexp.escape(str) => a_str
|
76
|
+
# ORegexp.quote(str) => a_str
|
77
|
+
#
|
78
|
+
# Escapes any characters that would have special meaning in a regular
|
79
|
+
# expression. Returns a new escaped string, or self if no characters are
|
80
|
+
# escaped. For any string,
|
81
|
+
# <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
|
82
|
+
#
|
83
|
+
# ORegexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
|
84
|
+
#
|
85
|
+
|
86
|
+
def escape( *args )
|
87
|
+
Regexp.escape( *args )
|
88
|
+
end
|
89
|
+
# :stopdoc:
|
90
|
+
alias quote escape
|
91
|
+
# :startdoc:
|
92
|
+
|
93
|
+
# call-seq:
|
94
|
+
# ORegexp.last_match => matchdata
|
95
|
+
# ORegexp.last_match(fixnum) => str
|
96
|
+
#
|
97
|
+
# The first form returns the <code>MatchData</code> object generated by the
|
98
|
+
# last successful pattern match. The second form returns the nth field in this
|
99
|
+
# <code>MatchData</code> object.
|
100
|
+
#
|
101
|
+
# ORegexp.new( 'c(.)t' ) =~ 'cat' #=> 0
|
102
|
+
# ORegexp.last_match #=> #<MatchData:0x401b3d30>
|
103
|
+
# ORegexp.last_match(0) #=> "cat"
|
104
|
+
# ORegexp.last_match(1) #=> "a"
|
105
|
+
# ORegexp.last_match(2) #=> nil
|
106
|
+
|
107
|
+
def last_match( index = nil)
|
108
|
+
if index
|
109
|
+
@@last_match[index]
|
110
|
+
else
|
111
|
+
@@last_match
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# :stopdoc:
|
117
|
+
alias old_initialize initialize
|
118
|
+
# :startdoc:
|
119
|
+
|
120
|
+
# Constructs a new regular expression from <i>pattern</i>, which is a
|
121
|
+
# <code>String</code>. The paramter <i>options</i> is a <code>Hash</code>
|
122
|
+
# of the form:
|
123
|
+
#
|
124
|
+
# <code>{ :options => option_value, :encoding => encoding_value, :syntax => syntax_value }</code>
|
125
|
+
#
|
126
|
+
# Where <code>option_value</code> is a bitwise <code>OR</code> of
|
127
|
+
# <code>Oniguruma::OPTION_XXX</code> constants; <code>encoding_value</code>
|
128
|
+
# is one of <code>Oniguruma::ENCODING_XXX</code> constants; and
|
129
|
+
# <code>syntax_value</code> is one of <code>Oniguruma::SYNTAX_XXX</code>
|
130
|
+
# constants.
|
131
|
+
#
|
132
|
+
# r1 = ORegexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
|
133
|
+
# r2 = ORegexp.new('cat', :options => OPTION_IGNORECASE ) #=> /cat/i
|
134
|
+
# r3 = ORegexp.new('dog', :options => OPTION_EXTEND ) #=> /dog/x
|
135
|
+
#
|
136
|
+
# #Accept java syntax on SJIS encoding:
|
137
|
+
# r4 = ORegexp.new('ape', :syntax => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/
|
138
|
+
|
139
|
+
def initialize( pattern, options = {} )
|
140
|
+
defaults = { :options => OPTION_DEFAULT, :encoding => ENCODING_ASCII, :syntax => SYNTAX_DEFAULT}
|
141
|
+
old_initialize( pattern, defaults.merge( options ).freeze )
|
142
|
+
end
|
143
|
+
|
144
|
+
# call-seq:
|
145
|
+
# rxp == other_rxp => true or false
|
146
|
+
# rxp.eql?(other_rxp) => true or false
|
147
|
+
#
|
148
|
+
# Equality---Two regexps are equal if their patterns are identical, they have
|
149
|
+
# the same character set code, and their <code>#casefold?</code> values are the
|
150
|
+
# same.
|
151
|
+
|
152
|
+
def == regexp
|
153
|
+
@pattern == regexp.source && kcode == regexp.kcode && casefold? == regexp.casefold?
|
154
|
+
end
|
155
|
+
alias eql? ==
|
156
|
+
|
157
|
+
# call-seq:
|
158
|
+
# rxp.casefold? => true of false
|
159
|
+
#
|
160
|
+
# Returns the value of the case-insensitive flag.
|
161
|
+
|
162
|
+
def casefold?
|
163
|
+
(@options[:options] & OPTION_IGNORECASE) > 0
|
164
|
+
end
|
165
|
+
|
166
|
+
# call-seq:
|
167
|
+
# rxp.kode => int
|
168
|
+
#
|
169
|
+
# Returns the character set code for the regexp.
|
170
|
+
def kcode
|
171
|
+
@options[:encoding]
|
172
|
+
end
|
173
|
+
|
174
|
+
# call-seq:
|
175
|
+
# rxp.options => fixnum
|
176
|
+
#
|
177
|
+
# Returns the set of bits corresponding to the options used when creating this
|
178
|
+
# ORegexp (see <code>ORegexp::new</code> for details. Note that additional bits
|
179
|
+
# may be set in the returned options: these are used internally by the regular
|
180
|
+
# expression code. These extra bits are ignored if the options are passed to
|
181
|
+
# <code>ORegexp::new</code>.
|
182
|
+
#
|
183
|
+
# Oniguruma::OPTION_IGNORECASE #=> 1
|
184
|
+
# Oniguruma::OPTION_EXTEND #=> 2
|
185
|
+
# Oniguruma::OPTION_MULTILINE #=> 4
|
186
|
+
#
|
187
|
+
# Regexp.new(r.source, :options => Oniguruma::OPTION_EXTEND ) #=> 2
|
188
|
+
|
189
|
+
def options
|
190
|
+
@options[:options]
|
191
|
+
end
|
192
|
+
|
193
|
+
# call-seq:
|
194
|
+
# rxp.to_s => str
|
195
|
+
#
|
196
|
+
# Returns a string containing the regular expression and its options (using the
|
197
|
+
# <code>(?xxx:yyy)</code> notation. This string can be fed back in to
|
198
|
+
# <code>Regexp::new</code> to a regular expression with the same semantics as
|
199
|
+
# the original. (However, <code>Regexp#==</code> may not return true when
|
200
|
+
# comparing the two, as the source of the regular expression itself may
|
201
|
+
# differ, as the example shows). <code>Regexp#inspect</code> produces a
|
202
|
+
# generally more readable version of <i>rxp</i>.
|
203
|
+
#
|
204
|
+
# r1 = ORegexp.new( 'ab+c', :options OPTION_IGNORECASE | OPTION_EXTEND ) #=> /ab+c/ix
|
205
|
+
# s1 = r1.to_s #=> "(?ix-m:ab+c)"
|
206
|
+
# r2 = ORegexp.new(s1) #=> /(?ix-m:ab+c)/
|
207
|
+
# r1 == r2 #=> false
|
208
|
+
# r1.source #=> "ab+c"
|
209
|
+
# r2.source #=> "(?ix-m:ab+c)"
|
210
|
+
|
211
|
+
def to_s
|
212
|
+
opt_str = "(?"
|
213
|
+
opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
|
214
|
+
opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
|
215
|
+
opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
|
216
|
+
unless opt_str == "(?imx"
|
217
|
+
opt_str += "-"
|
218
|
+
opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) == 0
|
219
|
+
opt_str += "m" if (@options[:options] & OPTION_MULTILINE) == 0
|
220
|
+
opt_str += "x" if (@options[:options] & OPTION_EXTEND) == 0
|
221
|
+
end
|
222
|
+
opt_str += ")"
|
223
|
+
opt_str + ORegexp.escape( @pattern )
|
224
|
+
end
|
225
|
+
|
226
|
+
|
227
|
+
# call-seq:
|
228
|
+
# rxp.inspect => string
|
229
|
+
#
|
230
|
+
# Returns a readable version of <i>rxp</i>
|
231
|
+
#
|
232
|
+
# ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).inspect => /cat/im
|
233
|
+
# ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).to_s => (?im-x)cat
|
234
|
+
|
235
|
+
def inspect
|
236
|
+
opt_str = ""
|
237
|
+
opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
|
238
|
+
opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
|
239
|
+
opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
|
240
|
+
"/" + ORegexp.escape( @pattern ) + "/" + opt_str
|
241
|
+
end
|
242
|
+
|
243
|
+
# call-seq:
|
244
|
+
# rxp =~ string => int or nil
|
245
|
+
#
|
246
|
+
# Matches <code>rxp</code> against <code>string</code>, returning the offset of the
|
247
|
+
# start of the match or <code>nil</code> if the match failed. Sets $~ to the corresponding
|
248
|
+
# <code>MatchData</code> or <code>nil</code>.
|
249
|
+
#
|
250
|
+
# ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil
|
251
|
+
# ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
|
252
|
+
|
253
|
+
def =~ string
|
254
|
+
return nil unless string
|
255
|
+
m = match( string )
|
256
|
+
return nil unless m
|
257
|
+
m.begin
|
258
|
+
end
|
259
|
+
|
260
|
+
# call-seq:
|
261
|
+
# rxp === str => true or false
|
262
|
+
#
|
263
|
+
# Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
|
264
|
+
#
|
265
|
+
# a = "HELLO"
|
266
|
+
# case a
|
267
|
+
# when ORegexp.new('^[a-z]*$'); print "Lower case\n"
|
268
|
+
# when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
|
269
|
+
# else; print "Mixed case\n"
|
270
|
+
# end
|
271
|
+
#
|
272
|
+
# <em>produces:</em>
|
273
|
+
#
|
274
|
+
# Upper case
|
275
|
+
|
276
|
+
alias === =~
|
277
|
+
|
278
|
+
def source
|
279
|
+
@pattern.freeze
|
280
|
+
end
|
281
|
+
|
282
|
+
def match_all string
|
283
|
+
matches = []
|
284
|
+
positions = []
|
285
|
+
position = 0
|
286
|
+
tmp_string = string
|
287
|
+
while tmp_string != ""
|
288
|
+
if m = match( tmp_string )
|
289
|
+
matches << m
|
290
|
+
positions << position
|
291
|
+
tmp_string = m.post_match
|
292
|
+
position += m.end
|
293
|
+
#if m.end == m.begin
|
294
|
+
# tmp_string = tmp_string[1..-1]
|
295
|
+
# position += 1
|
296
|
+
#end
|
297
|
+
else
|
298
|
+
break
|
299
|
+
end
|
300
|
+
end
|
301
|
+
if matches.size > 0
|
302
|
+
MultiMatchData.new( string, matches, positions )
|
303
|
+
else
|
304
|
+
nil
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
def sub string, replacement = nil
|
309
|
+
matches = match( string )
|
310
|
+
if matches
|
311
|
+
replacement = yield matches[0] unless replacement
|
312
|
+
string.sub( matches[0], replacement )
|
313
|
+
else
|
314
|
+
return string
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
def gsub string, replacement = nil
|
319
|
+
result = string
|
320
|
+
matches = match_all( string )
|
321
|
+
string_replace = replacement
|
322
|
+
if matches
|
323
|
+
matches.each do |m, p|
|
324
|
+
replacement = yield( m[0], m ) unless string_replace
|
325
|
+
result = result.sub( m[0], replacement )
|
326
|
+
end
|
327
|
+
end
|
328
|
+
result
|
329
|
+
end
|
330
|
+
|
331
|
+
def sub! string, replacement = nil
|
332
|
+
matches = match( string )
|
333
|
+
if matches
|
334
|
+
replacement = yield matches[0] unless replacement
|
335
|
+
string.sub!( matches[0], replacement )
|
336
|
+
else
|
337
|
+
return string
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
def gsub! string, replacement = nil
|
342
|
+
matches = match_all( string )
|
343
|
+
string_replace = replacement
|
344
|
+
if matches
|
345
|
+
matches.each do |m, p|
|
346
|
+
replacement = yield( m[0], m ) unless string_replace
|
347
|
+
string.sub!( m[0], replacement )
|
348
|
+
end
|
349
|
+
end
|
350
|
+
string
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
class MultiMatchData
|
355
|
+
def initialize( string, matches, positions )
|
356
|
+
@matches = matches
|
357
|
+
@positions = positions
|
358
|
+
@string = string
|
359
|
+
end
|
360
|
+
|
361
|
+
def position index
|
362
|
+
@positions[index]
|
363
|
+
end
|
364
|
+
|
365
|
+
def [] ( value1, value2 = nil )
|
366
|
+
unless value2
|
367
|
+
@matches[value1]
|
368
|
+
else
|
369
|
+
@matches[value1, value2]
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
def begin index
|
374
|
+
@matches[index].begin + @positions[index]
|
375
|
+
end
|
376
|
+
|
377
|
+
def end index
|
378
|
+
@matches[index].end + @positions[index]
|
379
|
+
end
|
380
|
+
|
381
|
+
def length
|
382
|
+
@matches.size
|
383
|
+
end
|
384
|
+
alias size length
|
385
|
+
|
386
|
+
def offset index
|
387
|
+
[self.begin(index), self.end(index) ]
|
388
|
+
end
|
389
|
+
|
390
|
+
def string
|
391
|
+
@string.freeze
|
392
|
+
end
|
393
|
+
|
394
|
+
def to_a
|
395
|
+
@matches
|
396
|
+
end
|
397
|
+
|
398
|
+
def each
|
399
|
+
@matches.size.times do |i|
|
400
|
+
yield @matches[i], @positions[i]
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
class MatchData
|
406
|
+
def initialize( string, starts, ends, names )
|
407
|
+
@string = string
|
408
|
+
@starts = starts
|
409
|
+
@ends = ends
|
410
|
+
@matches = []
|
411
|
+
@starts.size.times do |i|
|
412
|
+
@matches << @string[@starts[i]...@ends[i]]
|
413
|
+
end
|
414
|
+
@match_count = @matches.size
|
415
|
+
@start_pos = 0
|
416
|
+
@names = names
|
417
|
+
end
|
418
|
+
|
419
|
+
def [] ( value1, value2 = nil )
|
420
|
+
unless value2
|
421
|
+
if index = to_index( value1 )
|
422
|
+
@matches[index]
|
423
|
+
else
|
424
|
+
nil
|
425
|
+
end
|
426
|
+
else
|
427
|
+
@matches[value1, value2]
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
def to_index name
|
432
|
+
if name.is_a? Symbol
|
433
|
+
@names[name]
|
434
|
+
else
|
435
|
+
name
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
def begin index = 0
|
440
|
+
@starts[to_index( index )]
|
441
|
+
end
|
442
|
+
|
443
|
+
def end index = 0
|
444
|
+
@ends[to_index( index )]
|
445
|
+
end
|
446
|
+
|
447
|
+
def captures
|
448
|
+
@matches[1..-1]
|
449
|
+
end
|
450
|
+
|
451
|
+
def length
|
452
|
+
@match_count
|
453
|
+
end
|
454
|
+
alias size length
|
455
|
+
|
456
|
+
def offset index = 0
|
457
|
+
[@starts[to_index( index )], @ends[to_index( index )]]
|
458
|
+
end
|
459
|
+
|
460
|
+
def post_match
|
461
|
+
@string[@ends[0], @string.length]
|
462
|
+
end
|
463
|
+
|
464
|
+
def pre_match
|
465
|
+
@string[0, @starts[0]]
|
466
|
+
end
|
467
|
+
|
468
|
+
def select &block
|
469
|
+
@matches.select( &block )
|
470
|
+
end
|
471
|
+
|
472
|
+
def string
|
473
|
+
@string.freeze
|
474
|
+
end
|
475
|
+
|
476
|
+
def to_a
|
477
|
+
@matches
|
478
|
+
end
|
479
|
+
|
480
|
+
def to_s
|
481
|
+
@matches[0]
|
482
|
+
end
|
483
|
+
|
484
|
+
def values_at *values
|
485
|
+
result = []
|
486
|
+
values.each { |v| result << @matches[v] }
|
487
|
+
result
|
488
|
+
end
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
@@ -0,0 +1,214 @@
|
|
1
|
+
require 'oniguruma'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
|
5
|
+
class ORegexpTestCase < Test::Unit::TestCase
|
6
|
+
def test_initialization
|
7
|
+
assert_nothing_raised do
|
8
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_compile
|
13
|
+
assert_nothing_raised do
|
14
|
+
reg = Oniguruma::ORegexp.compile( "(3.)(.*)(3.)" )
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_escape
|
19
|
+
assert_equal( '\\\\\*\?\{\}\.', Oniguruma::ORegexp.escape('\\*?{}.') )
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_last_match
|
23
|
+
assert_equal( 0, Oniguruma::ORegexp.new( 'c(.)t') =~ 'cat' )
|
24
|
+
assert_equal( "cat", Oniguruma::ORegexp.last_match(0) )
|
25
|
+
assert_equal( "a", Oniguruma::ORegexp.last_match(1) )
|
26
|
+
assert_equal( nil, Oniguruma::ORegexp.last_match(2) )
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_bad_initialization
|
30
|
+
assert_raises(Exception) do
|
31
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.))" )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_match
|
36
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
|
37
|
+
assert_not_nil( reg.match( "12345634" ) )
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_no_match
|
41
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
|
42
|
+
assert_nil( reg.match( "12145614" ) )
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_sub
|
46
|
+
reg = Oniguruma::ORegexp.new( 'pe')
|
47
|
+
assert_equal( "**nelope", reg.sub( 'penelope', '**' ) )
|
48
|
+
assert_equal( "++nelope", reg.sub( 'penelope' ) { |m| '++' })
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_gsub
|
52
|
+
reg = Oniguruma::ORegexp.new( '\(\?#(\w+?)\)')
|
53
|
+
string = 'My favorite fruits are (?#fruit1), (?#fruit2), and (?#fruit3)'
|
54
|
+
assert_equal( "My favorite fruits are *, *, and *", reg.gsub( string, '*' ) )
|
55
|
+
fruits = { "fruit1" => "apples", "fruit2" => "bananas", "fruit3" => "grapes" }
|
56
|
+
assert_equal( "My favorite fruits are apples, bananas, and grapes", reg.gsub( string ) { |text, match| fruits[match[1]]} )
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_eql
|
60
|
+
assert_equal( Oniguruma::ORegexp.new( 'expression'), Oniguruma::ORegexp.new( 'expression') )
|
61
|
+
assert_not_equal( Oniguruma::ORegexp.new( 'expression'), Oniguruma::ORegexp.new( 'expresssion') )
|
62
|
+
assert_not_equal( Oniguruma::ORegexp.new( 'expression', :encoding => Oniguruma::ENCODING_ASCII ), Oniguruma::ORegexp.new( 'expression', :encoding => Oniguruma::ENCODING_ISO_8859_1 ) )
|
63
|
+
assert_not_equal( Oniguruma::ORegexp.new( 'expression', :options => Oniguruma::OPTION_IGNORECASE ), Oniguruma::ORegexp.new( 'expression', :options => Oniguruma::OPTION_NONE ) )
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_case_eql
|
67
|
+
a = "HELLO"
|
68
|
+
result = ""
|
69
|
+
case a
|
70
|
+
when Oniguruma::ORegexp.new('^[a-z]*$'); result = "Lower case\n"
|
71
|
+
when Oniguruma::ORegexp.new('^[A-Z]*$'); result = "Upper case\n"
|
72
|
+
else; result = "Mixed case\n"
|
73
|
+
end
|
74
|
+
|
75
|
+
assert_equal( "Upper case\n", result )
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_operator_match
|
79
|
+
assert_equal( nil, Oniguruma::ORegexp.new( 'SIT' ) =~ "insensitive" )
|
80
|
+
assert_equal( 5, Oniguruma::ORegexp.new( 'SIT', :options => Oniguruma::OPTION_IGNORECASE ) =~ "insensitive" )
|
81
|
+
end
|
82
|
+
|
83
|
+
# def test_operator_match_2
|
84
|
+
# $_ = "input data"
|
85
|
+
# assert_equal( 7, ~Oniguruma::ORegexp.new( 'at' ) )
|
86
|
+
# end
|
87
|
+
|
88
|
+
def test_inspect
|
89
|
+
assert_equal( "/cat/im", Oniguruma::ORegexp.new( 'cat', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_MULTILINE ).inspect )
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_to_s
|
93
|
+
assert_equal( "(?im-x)cat", Oniguruma::ORegexp.new( 'cat', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_MULTILINE ).to_s )
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_kcode
|
97
|
+
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
|
98
|
+
assert_equal( Oniguruma::ENCODING_ASCII, reg.kcode )
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_options
|
102
|
+
assert_equal( 3, Oniguruma::ORegexp.new( 'abc', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_EXTEND ).options )
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_source
|
106
|
+
string = '(?<=\n)\\.*ocatarinetabelachitchix'
|
107
|
+
assert_equal( string, Oniguruma::ORegexp.new( string ).source )
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class MatchDataTestCase < Test::Unit::TestCase
|
112
|
+
def setup
|
113
|
+
@reg = Oniguruma::ORegexp.new( '(.)(.)(\d+)(\d)' )
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_square_brackets
|
117
|
+
matches = @reg.match( "THX1138." )
|
118
|
+
assert_equal( "HX1138", matches[0] )
|
119
|
+
assert_equal( ["H", "X"], matches[1, 2] )
|
120
|
+
assert_equal( ["H", "X", "113"], matches[1..3] )
|
121
|
+
assert_equal( ["X", "113"], matches[-3, 2] )
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_begin
|
125
|
+
matches = @reg.match( "THX1138." )
|
126
|
+
assert_equal( 1, matches.begin(0) )
|
127
|
+
assert_equal( 2, matches.begin(2) )
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_captures
|
131
|
+
matches = @reg.match( "THX1138." )
|
132
|
+
assert_equal( ["H", "X", "113", "8" ], matches.captures )
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_end
|
136
|
+
matches = @reg.match( "THX1138." )
|
137
|
+
assert_equal( 7, matches.end(0) )
|
138
|
+
assert_equal( 3, matches.end(2) )
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_size
|
142
|
+
matches = @reg.match( "THX1138." )
|
143
|
+
assert_equal( 5, matches.length )
|
144
|
+
assert_equal( 5, matches.size )
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_offset
|
148
|
+
matches = @reg.match( "THX1138." )
|
149
|
+
assert_equal( [1, 7], matches.offset(0) )
|
150
|
+
assert_equal( [6, 7], matches.offset(4) )
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_post_match
|
154
|
+
matches = @reg.match( "THX1138: The Movie" )
|
155
|
+
assert_equal( ": The Movie", matches.post_match )
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_pre_match
|
159
|
+
matches = @reg.match( "THX1138." )
|
160
|
+
assert_equal( "T", matches.pre_match )
|
161
|
+
end
|
162
|
+
|
163
|
+
def test_select
|
164
|
+
matches = @reg.match( "THX1138: The Movie" )
|
165
|
+
assert_equal( ["HX1138", "113"], matches.select{ |v| v =~ /\d\d/} )
|
166
|
+
end
|
167
|
+
|
168
|
+
def test_string
|
169
|
+
matches = @reg.match( "THX1138." )
|
170
|
+
assert_equal( "THX1138.", matches.string )
|
171
|
+
assert( matches.string.frozen? )
|
172
|
+
end
|
173
|
+
|
174
|
+
def test_to_a
|
175
|
+
matches = @reg.match( "THX1138." )
|
176
|
+
assert_equal( ["HX1138", "H", "X", "113", "8" ], matches.to_a )
|
177
|
+
end
|
178
|
+
|
179
|
+
def test_to_s
|
180
|
+
matches = @reg.match( "THX1138." )
|
181
|
+
assert_equal( "HX1138", matches.to_s )
|
182
|
+
end
|
183
|
+
|
184
|
+
def test_values_at
|
185
|
+
matches = @reg.match( "THX1138: The Movie" )
|
186
|
+
assert_equal( ["HX1138", "X", "113"], matches.values_at( 0, 2, -2) )
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_match_all
|
190
|
+
reg = Oniguruma::ORegexp.new( 'ca' )
|
191
|
+
matches = reg.match_all( 'ocatacachaca' )
|
192
|
+
assert_equal( 3, matches.size )
|
193
|
+
assert_equal( 7, matches.position(2) )
|
194
|
+
assert_equal( "ca", matches.string[matches.begin(1)...matches.end(1)])
|
195
|
+
end
|
196
|
+
|
197
|
+
def test_match_empty_string
|
198
|
+
reg = Oniguruma::ORegexp.new( '^\s*?(\n|\r)', :options => Oniguruma::OPTION_MULTILINE )
|
199
|
+
matches = reg.match( "\n\n\n\n\n" )
|
200
|
+
assert_not_nil( matches )
|
201
|
+
assert_equal( "\n\n\n\n", matches.post_match )
|
202
|
+
end
|
203
|
+
|
204
|
+
def test_group_by_name
|
205
|
+
reg = Oniguruma::ORegexp.new( '(?<begin>\()(?<body>.*)(?<end>\))', :options => Oniguruma::OPTION_MULTILINE )
|
206
|
+
matches = reg.match( "blah (content) blah" )
|
207
|
+
assert_not_nil( matches )
|
208
|
+
assert_equal( '(', matches[:begin] )
|
209
|
+
assert_equal( 'content', matches[:body] )
|
210
|
+
assert_equal( ')', matches[:end] )
|
211
|
+
assert_equal( nil, matches[:inexistent])
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
metadata
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: oniguruma
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.9.0
|
7
|
+
date: 2007-03-22 00:00:00 +01:00
|
8
|
+
summary: Bindings for the oniguruma regular expression library
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
- ext
|
12
|
+
email: dix_ans@yahoo.com
|
13
|
+
homepage: http://oniguruma.rubyforge.org
|
14
|
+
rubyforge_project: oniguruma
|
15
|
+
description: Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regular expression library (no need to recompile Ruby).
|
16
|
+
autorequire:
|
17
|
+
default_executable:
|
18
|
+
bindir: bin
|
19
|
+
has_rdoc: true
|
20
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">"
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.0.0
|
25
|
+
version:
|
26
|
+
platform: ruby
|
27
|
+
signing_key:
|
28
|
+
cert_chain:
|
29
|
+
post_install_message:
|
30
|
+
authors:
|
31
|
+
- Dizan Vasquez
|
32
|
+
files:
|
33
|
+
- History.txt
|
34
|
+
- Manifest.txt
|
35
|
+
- README.txt
|
36
|
+
- Rakefile
|
37
|
+
- lib/oniguruma.rb
|
38
|
+
- ext/oregexp.c
|
39
|
+
- test/test_oniguruma.rb
|
40
|
+
test_files:
|
41
|
+
- test/test_oniguruma.rb
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
extra_rdoc_files: []
|
45
|
+
|
46
|
+
executables: []
|
47
|
+
|
48
|
+
extensions:
|
49
|
+
- ext/extconf.rb
|
50
|
+
requirements: []
|
51
|
+
|
52
|
+
dependencies: []
|
53
|
+
|