oniguruma 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,5 @@
1
+ == 1.0.0 / 2007-03-19
2
+
3
+ * 1 major enhancement
4
+ * Birthday!
5
+
data/Manifest.txt ADDED
@@ -0,0 +1,7 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/oniguruma.rb
6
+ ext/oregexp.c
7
+ test/test_oniguruma.rb
data/README.txt ADDED
@@ -0,0 +1,65 @@
1
+ == ONIGURUMA FOR RUBY:
2
+
3
+ Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regular expression library (no need to recompile Ruby).
4
+
5
+ == FEATURES:
6
+
7
+ * Increased performance.
8
+ * Same interface than standard Regexp class (easy transition!).
9
+ * Support for named groups, look-ahead, look-behind, and other
10
+ cool features!
11
+
12
+ == SYNOPSIS:
13
+
14
+ reg = Oniguruma::ORegex.new( '(?<before>.*)(a)(?<after>.*)' )
15
+ match = reg.match( 'terraforming' )
16
+ puts match[0] <= 'terraforming'
17
+ puts match[:before] <= 'terr'
18
+ puts match[:after] <= 'forming'
19
+
20
+ == REQUIREMENTS:
21
+
22
+ * Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 2.0 or greater
23
+
24
+ == INSTALL:
25
+
26
+ sudo gem install -r oniguruma
27
+
28
+ == BUGS/PROBLEMS/INCOMPATIBILITIES:
29
+
30
+ * <code>ORegexp#~</code> is not implemented.
31
+ * <code>ORegexp#kcode</code> results are not compatible with <code>Regexp</code>.
32
+ * <code>ORegexp</code> options set in the string are not visible, this affects
33
+ <code>ORegexp#options</code>, <code>ORegexp#to_s</code>, <code>ORegexp#inspect</code>
34
+ and <code>ORegexp#==</code>.
35
+
36
+ == TODO:
37
+
38
+ * Complete documentation (methods, oniguruma syntax).
39
+
40
+ == CREDITS:
41
+
42
+ * K.Kosako, for his great library.
43
+ * A lot of the documentation has been copied from the orininal Ruby Regex documentation.
44
+
45
+ == LICENSE:
46
+
47
+ New BSD License
48
+
49
+ Copyright (c) 2007, Dizan Vasquez
50
+ All rights reserved.
51
+
52
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
53
+
54
+ * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
55
+ * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
56
+ documentation and/or other materials provided with the distribution.
57
+ * Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this
58
+ software without specific prior written permission.
59
+
60
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
61
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
62
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
63
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
64
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
65
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ class Hoe; def extra_deps; @extra_deps.reject { |x| Array(x).first == 'hoe' }; end end
5
+
6
+ Hoe.new('oniguruma', '0.9.0') do |p|
7
+ p.rubyforge_name = 'oniguruma'
8
+ p.author = 'Dizan Vasquez'
9
+ p.email = 'dix_ans@yahoo.com'
10
+ p.summary = 'Bindings for the oniguruma regular expression library'
11
+ p.description = p.paragraphs_of('README.txt', 1 ).join('\n\n')
12
+ p.url = 'http://oniguruma.rubyforge.org'
13
+ p.spec_extras[:extensions] = ["ext/extconf.rb"]
14
+ p.rdoc_pattern = /^(lib|bin|ext)|txt$/
15
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
16
+ end
17
+
18
+
data/ext/extconf.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ have_library("onig")
3
+ create_makefile( "oregexp" )
data/ext/oregexp.c ADDED
@@ -0,0 +1,194 @@
1
+ #include <ruby.h>
2
+ #include <oniguruma.h>
3
+ /*
4
+ TODO:
5
+ - Add named backreferences.
6
+ */
7
+
8
+ typedef struct _oregexp {
9
+ regex_t * reg;
10
+ } ORegexp;
11
+
12
+ VALUE mOniguruma;
13
+ VALUE nameHash;
14
+
15
+ static void oregexp_free( ORegexp * oregexp) {
16
+ onig_free( oregexp->reg );
17
+ free( oregexp );
18
+ }
19
+
20
+ static VALUE oregexp_allocate( VALUE klass ) {
21
+ ORegexp * oregexp = malloc( sizeof( ORegexp ) );
22
+ oregexp->reg = NULL;
23
+ return Data_Wrap_Struct( klass, 0, oregexp_free, oregexp );
24
+ }
25
+
26
+
27
+ static OnigEncodingType * int2encoding( int index ) {
28
+ switch( index ) {
29
+ case 0: return ONIG_ENCODING_ASCII;
30
+ case 1: return ONIG_ENCODING_ISO_8859_1;
31
+ case 2: return ONIG_ENCODING_ISO_8859_2;
32
+ case 3: return ONIG_ENCODING_ISO_8859_3;
33
+ case 4: return ONIG_ENCODING_ISO_8859_4;
34
+ case 5: return ONIG_ENCODING_ISO_8859_5;
35
+ case 6: return ONIG_ENCODING_ISO_8859_6;
36
+ case 7: return ONIG_ENCODING_ISO_8859_7;
37
+ case 8: return ONIG_ENCODING_ISO_8859_8;
38
+ case 9: return ONIG_ENCODING_ISO_8859_9;
39
+ case 10: return ONIG_ENCODING_ISO_8859_10;
40
+ case 11: return ONIG_ENCODING_ISO_8859_11;
41
+ case 12: return ONIG_ENCODING_ISO_8859_11;
42
+ case 13: return ONIG_ENCODING_ISO_8859_13;
43
+ case 14: return ONIG_ENCODING_ISO_8859_14;
44
+ case 15: return ONIG_ENCODING_ISO_8859_15;
45
+ case 16: return ONIG_ENCODING_ISO_8859_16;
46
+ case 17: return ONIG_ENCODING_UTF8;
47
+ case 18: return ONIG_ENCODING_UTF16_BE;
48
+ case 19: return ONIG_ENCODING_UTF16_LE;
49
+ case 20: return ONIG_ENCODING_UTF32_BE;
50
+ case 21: return ONIG_ENCODING_UTF32_LE;
51
+ case 22: return ONIG_ENCODING_EUC_JP;
52
+ case 23: return ONIG_ENCODING_EUC_TW;
53
+ case 24: return ONIG_ENCODING_EUC_KR;
54
+ case 25: return ONIG_ENCODING_EUC_CN;
55
+ case 26: return ONIG_ENCODING_SJIS;
56
+ /*case 27: return ONIG_ENCODING_KOI8;*/
57
+ case 28: return ONIG_ENCODING_KOI8_R;
58
+ case 29: return ONIG_ENCODING_CP1251;
59
+ case 30: return ONIG_ENCODING_BIG5;
60
+ case 31: return ONIG_ENCODING_GB18030;
61
+ case 32: return ONIG_ENCODING_UNDEF;
62
+ }
63
+ return ONIG_ENCODING_UNDEF;
64
+ }
65
+
66
+ static OnigSyntaxType * int2syntax( int index ) {
67
+ switch( index ) {
68
+ case 0: return ONIG_SYNTAX_ASIS;
69
+ case 1: return ONIG_SYNTAX_POSIX_BASIC;
70
+ case 2: return ONIG_SYNTAX_POSIX_EXTENDED;
71
+ case 3: return ONIG_SYNTAX_EMACS;
72
+ case 4: return ONIG_SYNTAX_GREP;
73
+ case 5: return ONIG_SYNTAX_GNU_REGEX;
74
+ case 6: return ONIG_SYNTAX_JAVA;
75
+ case 7: return ONIG_SYNTAX_PERL;
76
+ case 8: return ONIG_SYNTAX_PERL_NG;
77
+ case 9: return ONIG_SYNTAX_RUBY;
78
+ case 10: return ONIG_SYNTAX_DEFAULT;
79
+ }
80
+ return ONIG_SYNTAX_DEFAULT;
81
+ }
82
+
83
+ static int name_callback(
84
+ const UChar* name,
85
+ const UChar* name_end,
86
+ int ngroup_num,
87
+ int* group_nums,
88
+ regex_t* reg,
89
+ void* arg
90
+ ) {
91
+ int i, gn, ref;
92
+ OnigRegion *region = (OnigRegion* )arg;
93
+
94
+ for (i = 0; i < ngroup_num; i++) {
95
+ gn = group_nums[i];
96
+ ref = onig_name_to_backref_number(reg, name, name_end, region);
97
+ if (ref != gn )
98
+ rb_raise(rb_eException, "Oniguruma Error: group and backreference names are different");
99
+ rb_hash_aset( nameHash, ID2SYM(rb_intern(name)), INT2FIX( gn ) );
100
+ }
101
+ return 0;
102
+ }
103
+
104
+ static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
105
+ ORegexp *oregexp;
106
+ Data_Get_Struct( self, ORegexp, oregexp );
107
+
108
+ VALUE pattern_str = StringValue( pattern );
109
+ rb_iv_set( self, "@pattern", pattern_str );
110
+ rb_iv_set( self, "@options", options );
111
+ UChar* pat_ptr = RSTRING(pattern_str)->ptr;
112
+ int pat_len = RSTRING(pattern_str)->len;
113
+
114
+ VALUE rOptions = rb_hash_aref( options, ID2SYM( rb_intern( "options" ) ) );
115
+ VALUE rEncoding = rb_hash_aref( options, ID2SYM( rb_intern( "encoding" ) ) );
116
+ VALUE rSyntax = rb_hash_aref( options, ID2SYM( rb_intern( "syntax" ) ) );
117
+ int iOptions = NUM2INT( rOptions );
118
+ int iEncoding = int2encoding( rEncoding );
119
+ int iSyntax = int2syntax( rSyntax );
120
+
121
+
122
+ int r;
123
+ OnigErrorInfo einfo;
124
+ r = onig_new(&(oregexp->reg), pat_ptr, pat_ptr + pat_len, iOptions, iEncoding, iSyntax, &einfo);
125
+ if (r != ONIG_NORMAL) {
126
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
127
+ onig_error_code_to_str(s, r, &einfo);
128
+ rb_raise(rb_eException, "Oniguruma Error: %s", s);
129
+ }
130
+ return self;
131
+ }
132
+
133
+ /*
134
+ * call-seq:
135
+ * rxp.match(str) => matchdata or nil
136
+ *
137
+ * Returns a <code>MatchData</code> object describing the match, or
138
+ * <code>nil</code> if there was no match. This is equivalent to retrieving the
139
+ * value of the special variable <code>$~</code> following a normal match.
140
+ *
141
+ * /(.)(.)(.)/.match("abc")[2] #=> "b"
142
+ */
143
+ static VALUE oregexp_match( VALUE self, VALUE string ) {
144
+ ORegexp *oregexp;
145
+ Data_Get_Struct( self, ORegexp, oregexp );
146
+
147
+ VALUE string_str = StringValue( string );
148
+ UChar* str_ptr = RSTRING(string_str)->ptr;
149
+ int str_len = RSTRING(string_str)->len;
150
+
151
+ OnigRegion *region = onig_region_new();
152
+ int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
153
+ if (r >= 0) {
154
+
155
+ VALUE begins = rb_ary_new();
156
+ VALUE ends = rb_ary_new();
157
+ nameHash = rb_hash_new();
158
+
159
+ onig_foreach_name(oregexp->reg, name_callback, (void* )region);
160
+
161
+
162
+ int i;
163
+
164
+ for (i = 0; i < region->num_regs; i++) {
165
+ rb_ary_push( begins, INT2FIX( region->beg[i] ) );
166
+ rb_ary_push( ends, INT2FIX( region->end[i] ) );
167
+ }
168
+ VALUE kMatchData = rb_const_get( mOniguruma, rb_intern( "MatchData" ) );
169
+ VALUE kORegexp = rb_const_get( mOniguruma, rb_intern( "ORegexp" ) );
170
+ VALUE matchData = rb_funcall(kMatchData, rb_intern("new"), 4, string_str, begins, ends, nameHash );
171
+ rb_cv_set( kORegexp, "@@last_match", matchData );
172
+
173
+ onig_region_free(region, 1 );
174
+ return matchData;
175
+ } else if (r == ONIG_MISMATCH) {
176
+ onig_region_free(region, 1 );
177
+ return Qnil;
178
+ } else {
179
+ onig_region_free(region, 1 );
180
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
181
+ onig_error_code_to_str(s, r);
182
+ rb_raise(rb_eException, "Oniguruma Error: %s", s);
183
+ }
184
+
185
+ }
186
+
187
+ void Init_oregexp() {
188
+ mOniguruma = rb_define_module("Oniguruma");
189
+ VALUE cORegexp = rb_define_class_under(mOniguruma, "ORegexp", rb_cObject);
190
+ rb_define_alloc_func(cORegexp, oregexp_allocate);
191
+ rb_define_method( cORegexp, "initialize", oregexp_initialize, 2 );
192
+ rb_define_method( cORegexp, "match", oregexp_match, 1 );
193
+
194
+ }
data/lib/oniguruma.rb ADDED
@@ -0,0 +1,491 @@
1
+ require 'oregexp'
2
+
3
+ module Oniguruma
4
+ OPTION_NONE = 0
5
+ OPTION_IGNORECASE = 1
6
+ OPTION_EXTEND = (OPTION_IGNORECASE << 1)
7
+ OPTION_MULTILINE = (OPTION_EXTEND << 1)
8
+ OPTION_SINGLELINE = (OPTION_MULTILINE << 1)
9
+ OPTION_FIND_LONGEST = (OPTION_SINGLELINE << 1)
10
+ OPTION_FIND_NOT_EMPTY = (OPTION_FIND_LONGEST << 1)
11
+ OPTION_NEGATE_SINGLELINE = (OPTION_FIND_NOT_EMPTY << 1)
12
+ OPTION_DONT_CAPTURE_GROUP = (OPTION_NEGATE_SINGLELINE << 1)
13
+ OPTION_CAPTURE_GROUP = (OPTION_DONT_CAPTURE_GROUP << 1)
14
+ OPTION_NOTBOL = (OPTION_CAPTURE_GROUP << 1)
15
+ OPTION_NOTEOL = (OPTION_NOTBOL << 1)
16
+ OPTION_POSIX_REGION = (OPTION_NOTEOL << 1)
17
+ OPTION_MAXBIT = OPTION_POSIX_REGION
18
+ OPTION_DEFAULT = OPTION_NONE
19
+
20
+ SYNTAX_ASIS = 0
21
+ SYNTAX_POSIX_BASIC = 1
22
+ SYNTAX_POSIX_EXTENDED = 2
23
+ SYNTAX_EMACS = 3
24
+ SYNTAX_GREP = 4
25
+ SYNTAX_GNU_REGEX = 5
26
+ SYNTAX_JAVA = 6
27
+ SYNTAX_PERL = 7
28
+ SYNTAX_PERL_NG = 8
29
+ SYNTAX_RUBY = 9
30
+ SYNTAX_DEFAULT = 10
31
+
32
+ ENCODING_ASCII = 0
33
+ ENCODING_ISO_8859_1 = 1
34
+ ENCODING_ISO_8859_2 = 2
35
+ ENCODING_ISO_8859_3 = 3
36
+ ENCODING_ISO_8859_4 = 4
37
+ ENCODING_ISO_8859_5 = 5
38
+ ENCODING_ISO_8859_6 = 6
39
+ ENCODING_ISO_8859_7 = 7
40
+ ENCODING_ISO_8859_8 = 8
41
+ ENCODING_ISO_8859_9 = 9
42
+ ENCODING_ISO_8859_10 = 10
43
+ ENCODING_ISO_8859_11 = 11
44
+ ENCODING_ISO_8859_12 = 12
45
+ ENCODING_ISO_8859_13 = 13
46
+ ENCODING_ISO_8859_14 = 14
47
+ ENCODING_ISO_8859_15 = 15
48
+ ENCODING_ISO_8859_16 = 16
49
+ ENCODING_UTF8 = 17
50
+ ENCODING_UTF16_BE = 18
51
+ ENCODING_UTF16_LE = 19
52
+ ENCODING_UTF32_BE = 20
53
+ ENCODING_UTF32_LE = 21
54
+ ENCODING_EUC_JP = 22
55
+ ENCODING_EUC_TW = 23
56
+ ENCODING_EUC_KR = 24
57
+ ENCODING_EUC_CN = 25
58
+ ENCODING_SJIS = 26
59
+ ENCODING_KOI8 = 27
60
+ ENCODING_KOI8_R = 28
61
+ ENCODING_CP1251 = 29
62
+ ENCODING_BIG5 = 30
63
+ ENCODING_GB18030 = 31
64
+ ENCODING_UNDEF = 32
65
+
66
+
67
+ class ORegexp
68
+
69
+ class << self
70
+ # :stopdoc:
71
+ alias compile new
72
+ # :startdoc:
73
+
74
+ # call-seq:
75
+ # ORegexp.escape(str) => a_str
76
+ # ORegexp.quote(str) => a_str
77
+ #
78
+ # Escapes any characters that would have special meaning in a regular
79
+ # expression. Returns a new escaped string, or self if no characters are
80
+ # escaped. For any string,
81
+ # <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
82
+ #
83
+ # ORegexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
84
+ #
85
+
86
+ def escape( *args )
87
+ Regexp.escape( *args )
88
+ end
89
+ # :stopdoc:
90
+ alias quote escape
91
+ # :startdoc:
92
+
93
+ # call-seq:
94
+ # ORegexp.last_match => matchdata
95
+ # ORegexp.last_match(fixnum) => str
96
+ #
97
+ # The first form returns the <code>MatchData</code> object generated by the
98
+ # last successful pattern match. The second form returns the nth field in this
99
+ # <code>MatchData</code> object.
100
+ #
101
+ # ORegexp.new( 'c(.)t' ) =~ 'cat' #=> 0
102
+ # ORegexp.last_match #=> #<MatchData:0x401b3d30>
103
+ # ORegexp.last_match(0) #=> "cat"
104
+ # ORegexp.last_match(1) #=> "a"
105
+ # ORegexp.last_match(2) #=> nil
106
+
107
+ def last_match( index = nil)
108
+ if index
109
+ @@last_match[index]
110
+ else
111
+ @@last_match
112
+ end
113
+ end
114
+ end
115
+
116
+ # :stopdoc:
117
+ alias old_initialize initialize
118
+ # :startdoc:
119
+
120
+ # Constructs a new regular expression from <i>pattern</i>, which is a
121
+ # <code>String</code>. The paramter <i>options</i> is a <code>Hash</code>
122
+ # of the form:
123
+ #
124
+ # <code>{ :options => option_value, :encoding => encoding_value, :syntax => syntax_value }</code>
125
+ #
126
+ # Where <code>option_value</code> is a bitwise <code>OR</code> of
127
+ # <code>Oniguruma::OPTION_XXX</code> constants; <code>encoding_value</code>
128
+ # is one of <code>Oniguruma::ENCODING_XXX</code> constants; and
129
+ # <code>syntax_value</code> is one of <code>Oniguruma::SYNTAX_XXX</code>
130
+ # constants.
131
+ #
132
+ # r1 = ORegexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
133
+ # r2 = ORegexp.new('cat', :options => OPTION_IGNORECASE ) #=> /cat/i
134
+ # r3 = ORegexp.new('dog', :options => OPTION_EXTEND ) #=> /dog/x
135
+ #
136
+ # #Accept java syntax on SJIS encoding:
137
+ # r4 = ORegexp.new('ape', :syntax => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/
138
+
139
+ def initialize( pattern, options = {} )
140
+ defaults = { :options => OPTION_DEFAULT, :encoding => ENCODING_ASCII, :syntax => SYNTAX_DEFAULT}
141
+ old_initialize( pattern, defaults.merge( options ).freeze )
142
+ end
143
+
144
+ # call-seq:
145
+ # rxp == other_rxp => true or false
146
+ # rxp.eql?(other_rxp) => true or false
147
+ #
148
+ # Equality---Two regexps are equal if their patterns are identical, they have
149
+ # the same character set code, and their <code>#casefold?</code> values are the
150
+ # same.
151
+
152
+ def == regexp
153
+ @pattern == regexp.source && kcode == regexp.kcode && casefold? == regexp.casefold?
154
+ end
155
+ alias eql? ==
156
+
157
+ # call-seq:
158
+ # rxp.casefold? => true of false
159
+ #
160
+ # Returns the value of the case-insensitive flag.
161
+
162
+ def casefold?
163
+ (@options[:options] & OPTION_IGNORECASE) > 0
164
+ end
165
+
166
+ # call-seq:
167
+ # rxp.kode => int
168
+ #
169
+ # Returns the character set code for the regexp.
170
+ def kcode
171
+ @options[:encoding]
172
+ end
173
+
174
+ # call-seq:
175
+ # rxp.options => fixnum
176
+ #
177
+ # Returns the set of bits corresponding to the options used when creating this
178
+ # ORegexp (see <code>ORegexp::new</code> for details. Note that additional bits
179
+ # may be set in the returned options: these are used internally by the regular
180
+ # expression code. These extra bits are ignored if the options are passed to
181
+ # <code>ORegexp::new</code>.
182
+ #
183
+ # Oniguruma::OPTION_IGNORECASE #=> 1
184
+ # Oniguruma::OPTION_EXTEND #=> 2
185
+ # Oniguruma::OPTION_MULTILINE #=> 4
186
+ #
187
+ # Regexp.new(r.source, :options => Oniguruma::OPTION_EXTEND ) #=> 2
188
+
189
+ def options
190
+ @options[:options]
191
+ end
192
+
193
+ # call-seq:
194
+ # rxp.to_s => str
195
+ #
196
+ # Returns a string containing the regular expression and its options (using the
197
+ # <code>(?xxx:yyy)</code> notation. This string can be fed back in to
198
+ # <code>Regexp::new</code> to a regular expression with the same semantics as
199
+ # the original. (However, <code>Regexp#==</code> may not return true when
200
+ # comparing the two, as the source of the regular expression itself may
201
+ # differ, as the example shows). <code>Regexp#inspect</code> produces a
202
+ # generally more readable version of <i>rxp</i>.
203
+ #
204
+ # r1 = ORegexp.new( 'ab+c', :options OPTION_IGNORECASE | OPTION_EXTEND ) #=> /ab+c/ix
205
+ # s1 = r1.to_s #=> "(?ix-m:ab+c)"
206
+ # r2 = ORegexp.new(s1) #=> /(?ix-m:ab+c)/
207
+ # r1 == r2 #=> false
208
+ # r1.source #=> "ab+c"
209
+ # r2.source #=> "(?ix-m:ab+c)"
210
+
211
+ def to_s
212
+ opt_str = "(?"
213
+ opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
214
+ opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
215
+ opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
216
+ unless opt_str == "(?imx"
217
+ opt_str += "-"
218
+ opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) == 0
219
+ opt_str += "m" if (@options[:options] & OPTION_MULTILINE) == 0
220
+ opt_str += "x" if (@options[:options] & OPTION_EXTEND) == 0
221
+ end
222
+ opt_str += ")"
223
+ opt_str + ORegexp.escape( @pattern )
224
+ end
225
+
226
+
227
+ # call-seq:
228
+ # rxp.inspect => string
229
+ #
230
+ # Returns a readable version of <i>rxp</i>
231
+ #
232
+ # ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).inspect => /cat/im
233
+ # ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).to_s => (?im-x)cat
234
+
235
+ def inspect
236
+ opt_str = ""
237
+ opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
238
+ opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
239
+ opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
240
+ "/" + ORegexp.escape( @pattern ) + "/" + opt_str
241
+ end
242
+
243
+ # call-seq:
244
+ # rxp =~ string => int or nil
245
+ #
246
+ # Matches <code>rxp</code> against <code>string</code>, returning the offset of the
247
+ # start of the match or <code>nil</code> if the match failed. Sets $~ to the corresponding
248
+ # <code>MatchData</code> or <code>nil</code>.
249
+ #
250
+ # ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil
251
+ # ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
252
+
253
+ def =~ string
254
+ return nil unless string
255
+ m = match( string )
256
+ return nil unless m
257
+ m.begin
258
+ end
259
+
260
+ # call-seq:
261
+ # rxp === str => true or false
262
+ #
263
+ # Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
264
+ #
265
+ # a = "HELLO"
266
+ # case a
267
+ # when ORegexp.new('^[a-z]*$'); print "Lower case\n"
268
+ # when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
269
+ # else; print "Mixed case\n"
270
+ # end
271
+ #
272
+ # <em>produces:</em>
273
+ #
274
+ # Upper case
275
+
276
+ alias === =~
277
+
278
+ def source
279
+ @pattern.freeze
280
+ end
281
+
282
+ def match_all string
283
+ matches = []
284
+ positions = []
285
+ position = 0
286
+ tmp_string = string
287
+ while tmp_string != ""
288
+ if m = match( tmp_string )
289
+ matches << m
290
+ positions << position
291
+ tmp_string = m.post_match
292
+ position += m.end
293
+ #if m.end == m.begin
294
+ # tmp_string = tmp_string[1..-1]
295
+ # position += 1
296
+ #end
297
+ else
298
+ break
299
+ end
300
+ end
301
+ if matches.size > 0
302
+ MultiMatchData.new( string, matches, positions )
303
+ else
304
+ nil
305
+ end
306
+ end
307
+
308
+ def sub string, replacement = nil
309
+ matches = match( string )
310
+ if matches
311
+ replacement = yield matches[0] unless replacement
312
+ string.sub( matches[0], replacement )
313
+ else
314
+ return string
315
+ end
316
+ end
317
+
318
+ def gsub string, replacement = nil
319
+ result = string
320
+ matches = match_all( string )
321
+ string_replace = replacement
322
+ if matches
323
+ matches.each do |m, p|
324
+ replacement = yield( m[0], m ) unless string_replace
325
+ result = result.sub( m[0], replacement )
326
+ end
327
+ end
328
+ result
329
+ end
330
+
331
+ def sub! string, replacement = nil
332
+ matches = match( string )
333
+ if matches
334
+ replacement = yield matches[0] unless replacement
335
+ string.sub!( matches[0], replacement )
336
+ else
337
+ return string
338
+ end
339
+ end
340
+
341
+ def gsub! string, replacement = nil
342
+ matches = match_all( string )
343
+ string_replace = replacement
344
+ if matches
345
+ matches.each do |m, p|
346
+ replacement = yield( m[0], m ) unless string_replace
347
+ string.sub!( m[0], replacement )
348
+ end
349
+ end
350
+ string
351
+ end
352
+ end
353
+
354
+ class MultiMatchData
355
+ def initialize( string, matches, positions )
356
+ @matches = matches
357
+ @positions = positions
358
+ @string = string
359
+ end
360
+
361
+ def position index
362
+ @positions[index]
363
+ end
364
+
365
+ def [] ( value1, value2 = nil )
366
+ unless value2
367
+ @matches[value1]
368
+ else
369
+ @matches[value1, value2]
370
+ end
371
+ end
372
+
373
+ def begin index
374
+ @matches[index].begin + @positions[index]
375
+ end
376
+
377
+ def end index
378
+ @matches[index].end + @positions[index]
379
+ end
380
+
381
+ def length
382
+ @matches.size
383
+ end
384
+ alias size length
385
+
386
+ def offset index
387
+ [self.begin(index), self.end(index) ]
388
+ end
389
+
390
+ def string
391
+ @string.freeze
392
+ end
393
+
394
+ def to_a
395
+ @matches
396
+ end
397
+
398
+ def each
399
+ @matches.size.times do |i|
400
+ yield @matches[i], @positions[i]
401
+ end
402
+ end
403
+ end
404
+
405
+ class MatchData
406
+ def initialize( string, starts, ends, names )
407
+ @string = string
408
+ @starts = starts
409
+ @ends = ends
410
+ @matches = []
411
+ @starts.size.times do |i|
412
+ @matches << @string[@starts[i]...@ends[i]]
413
+ end
414
+ @match_count = @matches.size
415
+ @start_pos = 0
416
+ @names = names
417
+ end
418
+
419
+ def [] ( value1, value2 = nil )
420
+ unless value2
421
+ if index = to_index( value1 )
422
+ @matches[index]
423
+ else
424
+ nil
425
+ end
426
+ else
427
+ @matches[value1, value2]
428
+ end
429
+ end
430
+
431
+ def to_index name
432
+ if name.is_a? Symbol
433
+ @names[name]
434
+ else
435
+ name
436
+ end
437
+ end
438
+
439
+ def begin index = 0
440
+ @starts[to_index( index )]
441
+ end
442
+
443
+ def end index = 0
444
+ @ends[to_index( index )]
445
+ end
446
+
447
+ def captures
448
+ @matches[1..-1]
449
+ end
450
+
451
+ def length
452
+ @match_count
453
+ end
454
+ alias size length
455
+
456
+ def offset index = 0
457
+ [@starts[to_index( index )], @ends[to_index( index )]]
458
+ end
459
+
460
+ def post_match
461
+ @string[@ends[0], @string.length]
462
+ end
463
+
464
+ def pre_match
465
+ @string[0, @starts[0]]
466
+ end
467
+
468
+ def select &block
469
+ @matches.select( &block )
470
+ end
471
+
472
+ def string
473
+ @string.freeze
474
+ end
475
+
476
+ def to_a
477
+ @matches
478
+ end
479
+
480
+ def to_s
481
+ @matches[0]
482
+ end
483
+
484
+ def values_at *values
485
+ result = []
486
+ values.each { |v| result << @matches[v] }
487
+ result
488
+ end
489
+ end
490
+ end
491
+
@@ -0,0 +1,214 @@
1
+ require 'oniguruma'
2
+ require 'test/unit'
3
+
4
+
5
+ class ORegexpTestCase < Test::Unit::TestCase
6
+ def test_initialization
7
+ assert_nothing_raised do
8
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
9
+ end
10
+ end
11
+
12
+ def test_compile
13
+ assert_nothing_raised do
14
+ reg = Oniguruma::ORegexp.compile( "(3.)(.*)(3.)" )
15
+ end
16
+ end
17
+
18
+ def test_escape
19
+ assert_equal( '\\\\\*\?\{\}\.', Oniguruma::ORegexp.escape('\\*?{}.') )
20
+ end
21
+
22
+ def test_last_match
23
+ assert_equal( 0, Oniguruma::ORegexp.new( 'c(.)t') =~ 'cat' )
24
+ assert_equal( "cat", Oniguruma::ORegexp.last_match(0) )
25
+ assert_equal( "a", Oniguruma::ORegexp.last_match(1) )
26
+ assert_equal( nil, Oniguruma::ORegexp.last_match(2) )
27
+ end
28
+
29
+ def test_bad_initialization
30
+ assert_raises(Exception) do
31
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.))" )
32
+ end
33
+ end
34
+
35
+ def test_match
36
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
37
+ assert_not_nil( reg.match( "12345634" ) )
38
+ end
39
+
40
+ def test_no_match
41
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
42
+ assert_nil( reg.match( "12145614" ) )
43
+ end
44
+
45
+ def test_sub
46
+ reg = Oniguruma::ORegexp.new( 'pe')
47
+ assert_equal( "**nelope", reg.sub( 'penelope', '**' ) )
48
+ assert_equal( "++nelope", reg.sub( 'penelope' ) { |m| '++' })
49
+ end
50
+
51
+ def test_gsub
52
+ reg = Oniguruma::ORegexp.new( '\(\?#(\w+?)\)')
53
+ string = 'My favorite fruits are (?#fruit1), (?#fruit2), and (?#fruit3)'
54
+ assert_equal( "My favorite fruits are *, *, and *", reg.gsub( string, '*' ) )
55
+ fruits = { "fruit1" => "apples", "fruit2" => "bananas", "fruit3" => "grapes" }
56
+ assert_equal( "My favorite fruits are apples, bananas, and grapes", reg.gsub( string ) { |text, match| fruits[match[1]]} )
57
+ end
58
+
59
+ def test_eql
60
+ assert_equal( Oniguruma::ORegexp.new( 'expression'), Oniguruma::ORegexp.new( 'expression') )
61
+ assert_not_equal( Oniguruma::ORegexp.new( 'expression'), Oniguruma::ORegexp.new( 'expresssion') )
62
+ assert_not_equal( Oniguruma::ORegexp.new( 'expression', :encoding => Oniguruma::ENCODING_ASCII ), Oniguruma::ORegexp.new( 'expression', :encoding => Oniguruma::ENCODING_ISO_8859_1 ) )
63
+ assert_not_equal( Oniguruma::ORegexp.new( 'expression', :options => Oniguruma::OPTION_IGNORECASE ), Oniguruma::ORegexp.new( 'expression', :options => Oniguruma::OPTION_NONE ) )
64
+ end
65
+
66
+ def test_case_eql
67
+ a = "HELLO"
68
+ result = ""
69
+ case a
70
+ when Oniguruma::ORegexp.new('^[a-z]*$'); result = "Lower case\n"
71
+ when Oniguruma::ORegexp.new('^[A-Z]*$'); result = "Upper case\n"
72
+ else; result = "Mixed case\n"
73
+ end
74
+
75
+ assert_equal( "Upper case\n", result )
76
+ end
77
+
78
+ def test_operator_match
79
+ assert_equal( nil, Oniguruma::ORegexp.new( 'SIT' ) =~ "insensitive" )
80
+ assert_equal( 5, Oniguruma::ORegexp.new( 'SIT', :options => Oniguruma::OPTION_IGNORECASE ) =~ "insensitive" )
81
+ end
82
+
83
+ # def test_operator_match_2
84
+ # $_ = "input data"
85
+ # assert_equal( 7, ~Oniguruma::ORegexp.new( 'at' ) )
86
+ # end
87
+
88
+ def test_inspect
89
+ assert_equal( "/cat/im", Oniguruma::ORegexp.new( 'cat', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_MULTILINE ).inspect )
90
+ end
91
+
92
+ def test_to_s
93
+ assert_equal( "(?im-x)cat", Oniguruma::ORegexp.new( 'cat', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_MULTILINE ).to_s )
94
+ end
95
+
96
+ def test_kcode
97
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
98
+ assert_equal( Oniguruma::ENCODING_ASCII, reg.kcode )
99
+ end
100
+
101
+ def test_options
102
+ assert_equal( 3, Oniguruma::ORegexp.new( 'abc', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_EXTEND ).options )
103
+ end
104
+
105
+ def test_source
106
+ string = '(?<=\n)\\.*ocatarinetabelachitchix'
107
+ assert_equal( string, Oniguruma::ORegexp.new( string ).source )
108
+ end
109
+ end
110
+
111
+ class MatchDataTestCase < Test::Unit::TestCase
112
+ def setup
113
+ @reg = Oniguruma::ORegexp.new( '(.)(.)(\d+)(\d)' )
114
+ end
115
+
116
+ def test_square_brackets
117
+ matches = @reg.match( "THX1138." )
118
+ assert_equal( "HX1138", matches[0] )
119
+ assert_equal( ["H", "X"], matches[1, 2] )
120
+ assert_equal( ["H", "X", "113"], matches[1..3] )
121
+ assert_equal( ["X", "113"], matches[-3, 2] )
122
+ end
123
+
124
+ def test_begin
125
+ matches = @reg.match( "THX1138." )
126
+ assert_equal( 1, matches.begin(0) )
127
+ assert_equal( 2, matches.begin(2) )
128
+ end
129
+
130
+ def test_captures
131
+ matches = @reg.match( "THX1138." )
132
+ assert_equal( ["H", "X", "113", "8" ], matches.captures )
133
+ end
134
+
135
+ def test_end
136
+ matches = @reg.match( "THX1138." )
137
+ assert_equal( 7, matches.end(0) )
138
+ assert_equal( 3, matches.end(2) )
139
+ end
140
+
141
+ def test_size
142
+ matches = @reg.match( "THX1138." )
143
+ assert_equal( 5, matches.length )
144
+ assert_equal( 5, matches.size )
145
+ end
146
+
147
+ def test_offset
148
+ matches = @reg.match( "THX1138." )
149
+ assert_equal( [1, 7], matches.offset(0) )
150
+ assert_equal( [6, 7], matches.offset(4) )
151
+ end
152
+
153
+ def test_post_match
154
+ matches = @reg.match( "THX1138: The Movie" )
155
+ assert_equal( ": The Movie", matches.post_match )
156
+ end
157
+
158
+ def test_pre_match
159
+ matches = @reg.match( "THX1138." )
160
+ assert_equal( "T", matches.pre_match )
161
+ end
162
+
163
+ def test_select
164
+ matches = @reg.match( "THX1138: The Movie" )
165
+ assert_equal( ["HX1138", "113"], matches.select{ |v| v =~ /\d\d/} )
166
+ end
167
+
168
+ def test_string
169
+ matches = @reg.match( "THX1138." )
170
+ assert_equal( "THX1138.", matches.string )
171
+ assert( matches.string.frozen? )
172
+ end
173
+
174
+ def test_to_a
175
+ matches = @reg.match( "THX1138." )
176
+ assert_equal( ["HX1138", "H", "X", "113", "8" ], matches.to_a )
177
+ end
178
+
179
+ def test_to_s
180
+ matches = @reg.match( "THX1138." )
181
+ assert_equal( "HX1138", matches.to_s )
182
+ end
183
+
184
+ def test_values_at
185
+ matches = @reg.match( "THX1138: The Movie" )
186
+ assert_equal( ["HX1138", "X", "113"], matches.values_at( 0, 2, -2) )
187
+ end
188
+
189
+ def test_match_all
190
+ reg = Oniguruma::ORegexp.new( 'ca' )
191
+ matches = reg.match_all( 'ocatacachaca' )
192
+ assert_equal( 3, matches.size )
193
+ assert_equal( 7, matches.position(2) )
194
+ assert_equal( "ca", matches.string[matches.begin(1)...matches.end(1)])
195
+ end
196
+
197
+ def test_match_empty_string
198
+ reg = Oniguruma::ORegexp.new( '^\s*?(\n|\r)', :options => Oniguruma::OPTION_MULTILINE )
199
+ matches = reg.match( "\n\n\n\n\n" )
200
+ assert_not_nil( matches )
201
+ assert_equal( "\n\n\n\n", matches.post_match )
202
+ end
203
+
204
+ def test_group_by_name
205
+ reg = Oniguruma::ORegexp.new( '(?<begin>\()(?<body>.*)(?<end>\))', :options => Oniguruma::OPTION_MULTILINE )
206
+ matches = reg.match( "blah (content) blah" )
207
+ assert_not_nil( matches )
208
+ assert_equal( '(', matches[:begin] )
209
+ assert_equal( 'content', matches[:body] )
210
+ assert_equal( ')', matches[:end] )
211
+ assert_equal( nil, matches[:inexistent])
212
+ end
213
+
214
+ end
metadata ADDED
@@ -0,0 +1,53 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: oniguruma
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.9.0
7
+ date: 2007-03-22 00:00:00 +01:00
8
+ summary: Bindings for the oniguruma regular expression library
9
+ require_paths:
10
+ - lib
11
+ - ext
12
+ email: dix_ans@yahoo.com
13
+ homepage: http://oniguruma.rubyforge.org
14
+ rubyforge_project: oniguruma
15
+ description: Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regular expression library (no need to recompile Ruby).
16
+ autorequire:
17
+ default_executable:
18
+ bindir: bin
19
+ has_rdoc: true
20
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
21
+ requirements:
22
+ - - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ platform: ruby
27
+ signing_key:
28
+ cert_chain:
29
+ post_install_message:
30
+ authors:
31
+ - Dizan Vasquez
32
+ files:
33
+ - History.txt
34
+ - Manifest.txt
35
+ - README.txt
36
+ - Rakefile
37
+ - lib/oniguruma.rb
38
+ - ext/oregexp.c
39
+ - test/test_oniguruma.rb
40
+ test_files:
41
+ - test/test_oniguruma.rb
42
+ rdoc_options: []
43
+
44
+ extra_rdoc_files: []
45
+
46
+ executables: []
47
+
48
+ extensions:
49
+ - ext/extconf.rb
50
+ requirements: []
51
+
52
+ dependencies: []
53
+