oniguruma 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,5 @@
1
+ == 1.0.0 / 2007-03-19
2
+
3
+ * 1 major enhancement
4
+ * Birthday!
5
+
data/Manifest.txt ADDED
@@ -0,0 +1,7 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/oniguruma.rb
6
+ ext/oregexp.c
7
+ test/test_oniguruma.rb
data/README.txt ADDED
@@ -0,0 +1,65 @@
1
+ == ONIGURUMA FOR RUBY:
2
+
3
+ Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regular expression library (no need to recompile Ruby).
4
+
5
+ == FEATURES:
6
+
7
+ * Increased performance.
8
+ * Same interface than standard Regexp class (easy transition!).
9
+ * Support for named groups, look-ahead, look-behind, and other
10
+ cool features!
11
+
12
+ == SYNOPSIS:
13
+
14
+ reg = Oniguruma::ORegex.new( '(?<before>.*)(a)(?<after>.*)' )
15
+ match = reg.match( 'terraforming' )
16
+ puts match[0] <= 'terraforming'
17
+ puts match[:before] <= 'terr'
18
+ puts match[:after] <= 'forming'
19
+
20
+ == REQUIREMENTS:
21
+
22
+ * Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 2.0 or greater
23
+
24
+ == INSTALL:
25
+
26
+ sudo gem install -r oniguruma
27
+
28
+ == BUGS/PROBLEMS/INCOMPATIBILITIES:
29
+
30
+ * <code>ORegexp#~</code> is not implemented.
31
+ * <code>ORegexp#kcode</code> results are not compatible with <code>Regexp</code>.
32
+ * <code>ORegexp</code> options set in the string are not visible, this affects
33
+ <code>ORegexp#options</code>, <code>ORegexp#to_s</code>, <code>ORegexp#inspect</code>
34
+ and <code>ORegexp#==</code>.
35
+
36
+ == TODO:
37
+
38
+ * Complete documentation (methods, oniguruma syntax).
39
+
40
+ == CREDITS:
41
+
42
+ * K.Kosako, for his great library.
43
+ * A lot of the documentation has been copied from the orininal Ruby Regex documentation.
44
+
45
+ == LICENSE:
46
+
47
+ New BSD License
48
+
49
+ Copyright (c) 2007, Dizan Vasquez
50
+ All rights reserved.
51
+
52
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
53
+
54
+ * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
55
+ * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
56
+ documentation and/or other materials provided with the distribution.
57
+ * Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this
58
+ software without specific prior written permission.
59
+
60
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
61
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
62
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
63
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
64
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
65
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ class Hoe; def extra_deps; @extra_deps.reject { |x| Array(x).first == 'hoe' }; end end
5
+
6
+ Hoe.new('oniguruma', '0.9.0') do |p|
7
+ p.rubyforge_name = 'oniguruma'
8
+ p.author = 'Dizan Vasquez'
9
+ p.email = 'dix_ans@yahoo.com'
10
+ p.summary = 'Bindings for the oniguruma regular expression library'
11
+ p.description = p.paragraphs_of('README.txt', 1 ).join('\n\n')
12
+ p.url = 'http://oniguruma.rubyforge.org'
13
+ p.spec_extras[:extensions] = ["ext/extconf.rb"]
14
+ p.rdoc_pattern = /^(lib|bin|ext)|txt$/
15
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
16
+ end
17
+
18
+
data/ext/extconf.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ have_library("onig")
3
+ create_makefile( "oregexp" )
data/ext/oregexp.c ADDED
@@ -0,0 +1,194 @@
1
+ #include <ruby.h>
2
+ #include <oniguruma.h>
3
+ /*
4
+ TODO:
5
+ - Add named backreferences.
6
+ */
7
+
8
+ typedef struct _oregexp {
9
+ regex_t * reg;
10
+ } ORegexp;
11
+
12
+ VALUE mOniguruma;
13
+ VALUE nameHash;
14
+
15
+ static void oregexp_free( ORegexp * oregexp) {
16
+ onig_free( oregexp->reg );
17
+ free( oregexp );
18
+ }
19
+
20
+ static VALUE oregexp_allocate( VALUE klass ) {
21
+ ORegexp * oregexp = malloc( sizeof( ORegexp ) );
22
+ oregexp->reg = NULL;
23
+ return Data_Wrap_Struct( klass, 0, oregexp_free, oregexp );
24
+ }
25
+
26
+
27
+ static OnigEncodingType * int2encoding( int index ) {
28
+ switch( index ) {
29
+ case 0: return ONIG_ENCODING_ASCII;
30
+ case 1: return ONIG_ENCODING_ISO_8859_1;
31
+ case 2: return ONIG_ENCODING_ISO_8859_2;
32
+ case 3: return ONIG_ENCODING_ISO_8859_3;
33
+ case 4: return ONIG_ENCODING_ISO_8859_4;
34
+ case 5: return ONIG_ENCODING_ISO_8859_5;
35
+ case 6: return ONIG_ENCODING_ISO_8859_6;
36
+ case 7: return ONIG_ENCODING_ISO_8859_7;
37
+ case 8: return ONIG_ENCODING_ISO_8859_8;
38
+ case 9: return ONIG_ENCODING_ISO_8859_9;
39
+ case 10: return ONIG_ENCODING_ISO_8859_10;
40
+ case 11: return ONIG_ENCODING_ISO_8859_11;
41
+ case 12: return ONIG_ENCODING_ISO_8859_11;
42
+ case 13: return ONIG_ENCODING_ISO_8859_13;
43
+ case 14: return ONIG_ENCODING_ISO_8859_14;
44
+ case 15: return ONIG_ENCODING_ISO_8859_15;
45
+ case 16: return ONIG_ENCODING_ISO_8859_16;
46
+ case 17: return ONIG_ENCODING_UTF8;
47
+ case 18: return ONIG_ENCODING_UTF16_BE;
48
+ case 19: return ONIG_ENCODING_UTF16_LE;
49
+ case 20: return ONIG_ENCODING_UTF32_BE;
50
+ case 21: return ONIG_ENCODING_UTF32_LE;
51
+ case 22: return ONIG_ENCODING_EUC_JP;
52
+ case 23: return ONIG_ENCODING_EUC_TW;
53
+ case 24: return ONIG_ENCODING_EUC_KR;
54
+ case 25: return ONIG_ENCODING_EUC_CN;
55
+ case 26: return ONIG_ENCODING_SJIS;
56
+ /*case 27: return ONIG_ENCODING_KOI8;*/
57
+ case 28: return ONIG_ENCODING_KOI8_R;
58
+ case 29: return ONIG_ENCODING_CP1251;
59
+ case 30: return ONIG_ENCODING_BIG5;
60
+ case 31: return ONIG_ENCODING_GB18030;
61
+ case 32: return ONIG_ENCODING_UNDEF;
62
+ }
63
+ return ONIG_ENCODING_UNDEF;
64
+ }
65
+
66
+ static OnigSyntaxType * int2syntax( int index ) {
67
+ switch( index ) {
68
+ case 0: return ONIG_SYNTAX_ASIS;
69
+ case 1: return ONIG_SYNTAX_POSIX_BASIC;
70
+ case 2: return ONIG_SYNTAX_POSIX_EXTENDED;
71
+ case 3: return ONIG_SYNTAX_EMACS;
72
+ case 4: return ONIG_SYNTAX_GREP;
73
+ case 5: return ONIG_SYNTAX_GNU_REGEX;
74
+ case 6: return ONIG_SYNTAX_JAVA;
75
+ case 7: return ONIG_SYNTAX_PERL;
76
+ case 8: return ONIG_SYNTAX_PERL_NG;
77
+ case 9: return ONIG_SYNTAX_RUBY;
78
+ case 10: return ONIG_SYNTAX_DEFAULT;
79
+ }
80
+ return ONIG_SYNTAX_DEFAULT;
81
+ }
82
+
83
+ static int name_callback(
84
+ const UChar* name,
85
+ const UChar* name_end,
86
+ int ngroup_num,
87
+ int* group_nums,
88
+ regex_t* reg,
89
+ void* arg
90
+ ) {
91
+ int i, gn, ref;
92
+ OnigRegion *region = (OnigRegion* )arg;
93
+
94
+ for (i = 0; i < ngroup_num; i++) {
95
+ gn = group_nums[i];
96
+ ref = onig_name_to_backref_number(reg, name, name_end, region);
97
+ if (ref != gn )
98
+ rb_raise(rb_eException, "Oniguruma Error: group and backreference names are different");
99
+ rb_hash_aset( nameHash, ID2SYM(rb_intern(name)), INT2FIX( gn ) );
100
+ }
101
+ return 0;
102
+ }
103
+
104
+ static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
105
+ ORegexp *oregexp;
106
+ Data_Get_Struct( self, ORegexp, oregexp );
107
+
108
+ VALUE pattern_str = StringValue( pattern );
109
+ rb_iv_set( self, "@pattern", pattern_str );
110
+ rb_iv_set( self, "@options", options );
111
+ UChar* pat_ptr = RSTRING(pattern_str)->ptr;
112
+ int pat_len = RSTRING(pattern_str)->len;
113
+
114
+ VALUE rOptions = rb_hash_aref( options, ID2SYM( rb_intern( "options" ) ) );
115
+ VALUE rEncoding = rb_hash_aref( options, ID2SYM( rb_intern( "encoding" ) ) );
116
+ VALUE rSyntax = rb_hash_aref( options, ID2SYM( rb_intern( "syntax" ) ) );
117
+ int iOptions = NUM2INT( rOptions );
118
+ int iEncoding = int2encoding( rEncoding );
119
+ int iSyntax = int2syntax( rSyntax );
120
+
121
+
122
+ int r;
123
+ OnigErrorInfo einfo;
124
+ r = onig_new(&(oregexp->reg), pat_ptr, pat_ptr + pat_len, iOptions, iEncoding, iSyntax, &einfo);
125
+ if (r != ONIG_NORMAL) {
126
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
127
+ onig_error_code_to_str(s, r, &einfo);
128
+ rb_raise(rb_eException, "Oniguruma Error: %s", s);
129
+ }
130
+ return self;
131
+ }
132
+
133
+ /*
134
+ * call-seq:
135
+ * rxp.match(str) => matchdata or nil
136
+ *
137
+ * Returns a <code>MatchData</code> object describing the match, or
138
+ * <code>nil</code> if there was no match. This is equivalent to retrieving the
139
+ * value of the special variable <code>$~</code> following a normal match.
140
+ *
141
+ * /(.)(.)(.)/.match("abc")[2] #=> "b"
142
+ */
143
+ static VALUE oregexp_match( VALUE self, VALUE string ) {
144
+ ORegexp *oregexp;
145
+ Data_Get_Struct( self, ORegexp, oregexp );
146
+
147
+ VALUE string_str = StringValue( string );
148
+ UChar* str_ptr = RSTRING(string_str)->ptr;
149
+ int str_len = RSTRING(string_str)->len;
150
+
151
+ OnigRegion *region = onig_region_new();
152
+ int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
153
+ if (r >= 0) {
154
+
155
+ VALUE begins = rb_ary_new();
156
+ VALUE ends = rb_ary_new();
157
+ nameHash = rb_hash_new();
158
+
159
+ onig_foreach_name(oregexp->reg, name_callback, (void* )region);
160
+
161
+
162
+ int i;
163
+
164
+ for (i = 0; i < region->num_regs; i++) {
165
+ rb_ary_push( begins, INT2FIX( region->beg[i] ) );
166
+ rb_ary_push( ends, INT2FIX( region->end[i] ) );
167
+ }
168
+ VALUE kMatchData = rb_const_get( mOniguruma, rb_intern( "MatchData" ) );
169
+ VALUE kORegexp = rb_const_get( mOniguruma, rb_intern( "ORegexp" ) );
170
+ VALUE matchData = rb_funcall(kMatchData, rb_intern("new"), 4, string_str, begins, ends, nameHash );
171
+ rb_cv_set( kORegexp, "@@last_match", matchData );
172
+
173
+ onig_region_free(region, 1 );
174
+ return matchData;
175
+ } else if (r == ONIG_MISMATCH) {
176
+ onig_region_free(region, 1 );
177
+ return Qnil;
178
+ } else {
179
+ onig_region_free(region, 1 );
180
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
181
+ onig_error_code_to_str(s, r);
182
+ rb_raise(rb_eException, "Oniguruma Error: %s", s);
183
+ }
184
+
185
+ }
186
+
187
+ void Init_oregexp() {
188
+ mOniguruma = rb_define_module("Oniguruma");
189
+ VALUE cORegexp = rb_define_class_under(mOniguruma, "ORegexp", rb_cObject);
190
+ rb_define_alloc_func(cORegexp, oregexp_allocate);
191
+ rb_define_method( cORegexp, "initialize", oregexp_initialize, 2 );
192
+ rb_define_method( cORegexp, "match", oregexp_match, 1 );
193
+
194
+ }
data/lib/oniguruma.rb ADDED
@@ -0,0 +1,491 @@
1
+ require 'oregexp'
2
+
3
+ module Oniguruma
4
+ OPTION_NONE = 0
5
+ OPTION_IGNORECASE = 1
6
+ OPTION_EXTEND = (OPTION_IGNORECASE << 1)
7
+ OPTION_MULTILINE = (OPTION_EXTEND << 1)
8
+ OPTION_SINGLELINE = (OPTION_MULTILINE << 1)
9
+ OPTION_FIND_LONGEST = (OPTION_SINGLELINE << 1)
10
+ OPTION_FIND_NOT_EMPTY = (OPTION_FIND_LONGEST << 1)
11
+ OPTION_NEGATE_SINGLELINE = (OPTION_FIND_NOT_EMPTY << 1)
12
+ OPTION_DONT_CAPTURE_GROUP = (OPTION_NEGATE_SINGLELINE << 1)
13
+ OPTION_CAPTURE_GROUP = (OPTION_DONT_CAPTURE_GROUP << 1)
14
+ OPTION_NOTBOL = (OPTION_CAPTURE_GROUP << 1)
15
+ OPTION_NOTEOL = (OPTION_NOTBOL << 1)
16
+ OPTION_POSIX_REGION = (OPTION_NOTEOL << 1)
17
+ OPTION_MAXBIT = OPTION_POSIX_REGION
18
+ OPTION_DEFAULT = OPTION_NONE
19
+
20
+ SYNTAX_ASIS = 0
21
+ SYNTAX_POSIX_BASIC = 1
22
+ SYNTAX_POSIX_EXTENDED = 2
23
+ SYNTAX_EMACS = 3
24
+ SYNTAX_GREP = 4
25
+ SYNTAX_GNU_REGEX = 5
26
+ SYNTAX_JAVA = 6
27
+ SYNTAX_PERL = 7
28
+ SYNTAX_PERL_NG = 8
29
+ SYNTAX_RUBY = 9
30
+ SYNTAX_DEFAULT = 10
31
+
32
+ ENCODING_ASCII = 0
33
+ ENCODING_ISO_8859_1 = 1
34
+ ENCODING_ISO_8859_2 = 2
35
+ ENCODING_ISO_8859_3 = 3
36
+ ENCODING_ISO_8859_4 = 4
37
+ ENCODING_ISO_8859_5 = 5
38
+ ENCODING_ISO_8859_6 = 6
39
+ ENCODING_ISO_8859_7 = 7
40
+ ENCODING_ISO_8859_8 = 8
41
+ ENCODING_ISO_8859_9 = 9
42
+ ENCODING_ISO_8859_10 = 10
43
+ ENCODING_ISO_8859_11 = 11
44
+ ENCODING_ISO_8859_12 = 12
45
+ ENCODING_ISO_8859_13 = 13
46
+ ENCODING_ISO_8859_14 = 14
47
+ ENCODING_ISO_8859_15 = 15
48
+ ENCODING_ISO_8859_16 = 16
49
+ ENCODING_UTF8 = 17
50
+ ENCODING_UTF16_BE = 18
51
+ ENCODING_UTF16_LE = 19
52
+ ENCODING_UTF32_BE = 20
53
+ ENCODING_UTF32_LE = 21
54
+ ENCODING_EUC_JP = 22
55
+ ENCODING_EUC_TW = 23
56
+ ENCODING_EUC_KR = 24
57
+ ENCODING_EUC_CN = 25
58
+ ENCODING_SJIS = 26
59
+ ENCODING_KOI8 = 27
60
+ ENCODING_KOI8_R = 28
61
+ ENCODING_CP1251 = 29
62
+ ENCODING_BIG5 = 30
63
+ ENCODING_GB18030 = 31
64
+ ENCODING_UNDEF = 32
65
+
66
+
67
+ class ORegexp
68
+
69
+ class << self
70
+ # :stopdoc:
71
+ alias compile new
72
+ # :startdoc:
73
+
74
+ # call-seq:
75
+ # ORegexp.escape(str) => a_str
76
+ # ORegexp.quote(str) => a_str
77
+ #
78
+ # Escapes any characters that would have special meaning in a regular
79
+ # expression. Returns a new escaped string, or self if no characters are
80
+ # escaped. For any string,
81
+ # <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
82
+ #
83
+ # ORegexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
84
+ #
85
+
86
+ def escape( *args )
87
+ Regexp.escape( *args )
88
+ end
89
+ # :stopdoc:
90
+ alias quote escape
91
+ # :startdoc:
92
+
93
+ # call-seq:
94
+ # ORegexp.last_match => matchdata
95
+ # ORegexp.last_match(fixnum) => str
96
+ #
97
+ # The first form returns the <code>MatchData</code> object generated by the
98
+ # last successful pattern match. The second form returns the nth field in this
99
+ # <code>MatchData</code> object.
100
+ #
101
+ # ORegexp.new( 'c(.)t' ) =~ 'cat' #=> 0
102
+ # ORegexp.last_match #=> #<MatchData:0x401b3d30>
103
+ # ORegexp.last_match(0) #=> "cat"
104
+ # ORegexp.last_match(1) #=> "a"
105
+ # ORegexp.last_match(2) #=> nil
106
+
107
+ def last_match( index = nil)
108
+ if index
109
+ @@last_match[index]
110
+ else
111
+ @@last_match
112
+ end
113
+ end
114
+ end
115
+
116
+ # :stopdoc:
117
+ alias old_initialize initialize
118
+ # :startdoc:
119
+
120
+ # Constructs a new regular expression from <i>pattern</i>, which is a
121
+ # <code>String</code>. The paramter <i>options</i> is a <code>Hash</code>
122
+ # of the form:
123
+ #
124
+ # <code>{ :options => option_value, :encoding => encoding_value, :syntax => syntax_value }</code>
125
+ #
126
+ # Where <code>option_value</code> is a bitwise <code>OR</code> of
127
+ # <code>Oniguruma::OPTION_XXX</code> constants; <code>encoding_value</code>
128
+ # is one of <code>Oniguruma::ENCODING_XXX</code> constants; and
129
+ # <code>syntax_value</code> is one of <code>Oniguruma::SYNTAX_XXX</code>
130
+ # constants.
131
+ #
132
+ # r1 = ORegexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
133
+ # r2 = ORegexp.new('cat', :options => OPTION_IGNORECASE ) #=> /cat/i
134
+ # r3 = ORegexp.new('dog', :options => OPTION_EXTEND ) #=> /dog/x
135
+ #
136
+ # #Accept java syntax on SJIS encoding:
137
+ # r4 = ORegexp.new('ape', :syntax => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/
138
+
139
+ def initialize( pattern, options = {} )
140
+ defaults = { :options => OPTION_DEFAULT, :encoding => ENCODING_ASCII, :syntax => SYNTAX_DEFAULT}
141
+ old_initialize( pattern, defaults.merge( options ).freeze )
142
+ end
143
+
144
+ # call-seq:
145
+ # rxp == other_rxp => true or false
146
+ # rxp.eql?(other_rxp) => true or false
147
+ #
148
+ # Equality---Two regexps are equal if their patterns are identical, they have
149
+ # the same character set code, and their <code>#casefold?</code> values are the
150
+ # same.
151
+
152
+ def == regexp
153
+ @pattern == regexp.source && kcode == regexp.kcode && casefold? == regexp.casefold?
154
+ end
155
+ alias eql? ==
156
+
157
+ # call-seq:
158
+ # rxp.casefold? => true of false
159
+ #
160
+ # Returns the value of the case-insensitive flag.
161
+
162
+ def casefold?
163
+ (@options[:options] & OPTION_IGNORECASE) > 0
164
+ end
165
+
166
+ # call-seq:
167
+ # rxp.kode => int
168
+ #
169
+ # Returns the character set code for the regexp.
170
+ def kcode
171
+ @options[:encoding]
172
+ end
173
+
174
+ # call-seq:
175
+ # rxp.options => fixnum
176
+ #
177
+ # Returns the set of bits corresponding to the options used when creating this
178
+ # ORegexp (see <code>ORegexp::new</code> for details. Note that additional bits
179
+ # may be set in the returned options: these are used internally by the regular
180
+ # expression code. These extra bits are ignored if the options are passed to
181
+ # <code>ORegexp::new</code>.
182
+ #
183
+ # Oniguruma::OPTION_IGNORECASE #=> 1
184
+ # Oniguruma::OPTION_EXTEND #=> 2
185
+ # Oniguruma::OPTION_MULTILINE #=> 4
186
+ #
187
+ # Regexp.new(r.source, :options => Oniguruma::OPTION_EXTEND ) #=> 2
188
+
189
+ def options
190
+ @options[:options]
191
+ end
192
+
193
+ # call-seq:
194
+ # rxp.to_s => str
195
+ #
196
+ # Returns a string containing the regular expression and its options (using the
197
+ # <code>(?xxx:yyy)</code> notation. This string can be fed back in to
198
+ # <code>Regexp::new</code> to a regular expression with the same semantics as
199
+ # the original. (However, <code>Regexp#==</code> may not return true when
200
+ # comparing the two, as the source of the regular expression itself may
201
+ # differ, as the example shows). <code>Regexp#inspect</code> produces a
202
+ # generally more readable version of <i>rxp</i>.
203
+ #
204
+ # r1 = ORegexp.new( 'ab+c', :options OPTION_IGNORECASE | OPTION_EXTEND ) #=> /ab+c/ix
205
+ # s1 = r1.to_s #=> "(?ix-m:ab+c)"
206
+ # r2 = ORegexp.new(s1) #=> /(?ix-m:ab+c)/
207
+ # r1 == r2 #=> false
208
+ # r1.source #=> "ab+c"
209
+ # r2.source #=> "(?ix-m:ab+c)"
210
+
211
+ def to_s
212
+ opt_str = "(?"
213
+ opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
214
+ opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
215
+ opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
216
+ unless opt_str == "(?imx"
217
+ opt_str += "-"
218
+ opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) == 0
219
+ opt_str += "m" if (@options[:options] & OPTION_MULTILINE) == 0
220
+ opt_str += "x" if (@options[:options] & OPTION_EXTEND) == 0
221
+ end
222
+ opt_str += ")"
223
+ opt_str + ORegexp.escape( @pattern )
224
+ end
225
+
226
+
227
+ # call-seq:
228
+ # rxp.inspect => string
229
+ #
230
+ # Returns a readable version of <i>rxp</i>
231
+ #
232
+ # ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).inspect => /cat/im
233
+ # ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).to_s => (?im-x)cat
234
+
235
+ def inspect
236
+ opt_str = ""
237
+ opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
238
+ opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
239
+ opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
240
+ "/" + ORegexp.escape( @pattern ) + "/" + opt_str
241
+ end
242
+
243
+ # call-seq:
244
+ # rxp =~ string => int or nil
245
+ #
246
+ # Matches <code>rxp</code> against <code>string</code>, returning the offset of the
247
+ # start of the match or <code>nil</code> if the match failed. Sets $~ to the corresponding
248
+ # <code>MatchData</code> or <code>nil</code>.
249
+ #
250
+ # ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil
251
+ # ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
252
+
253
+ def =~ string
254
+ return nil unless string
255
+ m = match( string )
256
+ return nil unless m
257
+ m.begin
258
+ end
259
+
260
+ # call-seq:
261
+ # rxp === str => true or false
262
+ #
263
+ # Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
264
+ #
265
+ # a = "HELLO"
266
+ # case a
267
+ # when ORegexp.new('^[a-z]*$'); print "Lower case\n"
268
+ # when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
269
+ # else; print "Mixed case\n"
270
+ # end
271
+ #
272
+ # <em>produces:</em>
273
+ #
274
+ # Upper case
275
+
276
+ alias === =~
277
+
278
+ def source
279
+ @pattern.freeze
280
+ end
281
+
282
+ def match_all string
283
+ matches = []
284
+ positions = []
285
+ position = 0
286
+ tmp_string = string
287
+ while tmp_string != ""
288
+ if m = match( tmp_string )
289
+ matches << m
290
+ positions << position
291
+ tmp_string = m.post_match
292
+ position += m.end
293
+ #if m.end == m.begin
294
+ # tmp_string = tmp_string[1..-1]
295
+ # position += 1
296
+ #end
297
+ else
298
+ break
299
+ end
300
+ end
301
+ if matches.size > 0
302
+ MultiMatchData.new( string, matches, positions )
303
+ else
304
+ nil
305
+ end
306
+ end
307
+
308
+ def sub string, replacement = nil
309
+ matches = match( string )
310
+ if matches
311
+ replacement = yield matches[0] unless replacement
312
+ string.sub( matches[0], replacement )
313
+ else
314
+ return string
315
+ end
316
+ end
317
+
318
+ def gsub string, replacement = nil
319
+ result = string
320
+ matches = match_all( string )
321
+ string_replace = replacement
322
+ if matches
323
+ matches.each do |m, p|
324
+ replacement = yield( m[0], m ) unless string_replace
325
+ result = result.sub( m[0], replacement )
326
+ end
327
+ end
328
+ result
329
+ end
330
+
331
+ def sub! string, replacement = nil
332
+ matches = match( string )
333
+ if matches
334
+ replacement = yield matches[0] unless replacement
335
+ string.sub!( matches[0], replacement )
336
+ else
337
+ return string
338
+ end
339
+ end
340
+
341
+ def gsub! string, replacement = nil
342
+ matches = match_all( string )
343
+ string_replace = replacement
344
+ if matches
345
+ matches.each do |m, p|
346
+ replacement = yield( m[0], m ) unless string_replace
347
+ string.sub!( m[0], replacement )
348
+ end
349
+ end
350
+ string
351
+ end
352
+ end
353
+
354
+ class MultiMatchData
355
+ def initialize( string, matches, positions )
356
+ @matches = matches
357
+ @positions = positions
358
+ @string = string
359
+ end
360
+
361
+ def position index
362
+ @positions[index]
363
+ end
364
+
365
+ def [] ( value1, value2 = nil )
366
+ unless value2
367
+ @matches[value1]
368
+ else
369
+ @matches[value1, value2]
370
+ end
371
+ end
372
+
373
+ def begin index
374
+ @matches[index].begin + @positions[index]
375
+ end
376
+
377
+ def end index
378
+ @matches[index].end + @positions[index]
379
+ end
380
+
381
+ def length
382
+ @matches.size
383
+ end
384
+ alias size length
385
+
386
+ def offset index
387
+ [self.begin(index), self.end(index) ]
388
+ end
389
+
390
+ def string
391
+ @string.freeze
392
+ end
393
+
394
+ def to_a
395
+ @matches
396
+ end
397
+
398
+ def each
399
+ @matches.size.times do |i|
400
+ yield @matches[i], @positions[i]
401
+ end
402
+ end
403
+ end
404
+
405
+ class MatchData
406
+ def initialize( string, starts, ends, names )
407
+ @string = string
408
+ @starts = starts
409
+ @ends = ends
410
+ @matches = []
411
+ @starts.size.times do |i|
412
+ @matches << @string[@starts[i]...@ends[i]]
413
+ end
414
+ @match_count = @matches.size
415
+ @start_pos = 0
416
+ @names = names
417
+ end
418
+
419
+ def [] ( value1, value2 = nil )
420
+ unless value2
421
+ if index = to_index( value1 )
422
+ @matches[index]
423
+ else
424
+ nil
425
+ end
426
+ else
427
+ @matches[value1, value2]
428
+ end
429
+ end
430
+
431
+ def to_index name
432
+ if name.is_a? Symbol
433
+ @names[name]
434
+ else
435
+ name
436
+ end
437
+ end
438
+
439
+ def begin index = 0
440
+ @starts[to_index( index )]
441
+ end
442
+
443
+ def end index = 0
444
+ @ends[to_index( index )]
445
+ end
446
+
447
+ def captures
448
+ @matches[1..-1]
449
+ end
450
+
451
+ def length
452
+ @match_count
453
+ end
454
+ alias size length
455
+
456
+ def offset index = 0
457
+ [@starts[to_index( index )], @ends[to_index( index )]]
458
+ end
459
+
460
+ def post_match
461
+ @string[@ends[0], @string.length]
462
+ end
463
+
464
+ def pre_match
465
+ @string[0, @starts[0]]
466
+ end
467
+
468
+ def select &block
469
+ @matches.select( &block )
470
+ end
471
+
472
+ def string
473
+ @string.freeze
474
+ end
475
+
476
+ def to_a
477
+ @matches
478
+ end
479
+
480
+ def to_s
481
+ @matches[0]
482
+ end
483
+
484
+ def values_at *values
485
+ result = []
486
+ values.each { |v| result << @matches[v] }
487
+ result
488
+ end
489
+ end
490
+ end
491
+
@@ -0,0 +1,214 @@
1
+ require 'oniguruma'
2
+ require 'test/unit'
3
+
4
+
5
+ class ORegexpTestCase < Test::Unit::TestCase
6
+ def test_initialization
7
+ assert_nothing_raised do
8
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
9
+ end
10
+ end
11
+
12
+ def test_compile
13
+ assert_nothing_raised do
14
+ reg = Oniguruma::ORegexp.compile( "(3.)(.*)(3.)" )
15
+ end
16
+ end
17
+
18
+ def test_escape
19
+ assert_equal( '\\\\\*\?\{\}\.', Oniguruma::ORegexp.escape('\\*?{}.') )
20
+ end
21
+
22
+ def test_last_match
23
+ assert_equal( 0, Oniguruma::ORegexp.new( 'c(.)t') =~ 'cat' )
24
+ assert_equal( "cat", Oniguruma::ORegexp.last_match(0) )
25
+ assert_equal( "a", Oniguruma::ORegexp.last_match(1) )
26
+ assert_equal( nil, Oniguruma::ORegexp.last_match(2) )
27
+ end
28
+
29
+ def test_bad_initialization
30
+ assert_raises(Exception) do
31
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.))" )
32
+ end
33
+ end
34
+
35
+ def test_match
36
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
37
+ assert_not_nil( reg.match( "12345634" ) )
38
+ end
39
+
40
+ def test_no_match
41
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
42
+ assert_nil( reg.match( "12145614" ) )
43
+ end
44
+
45
+ def test_sub
46
+ reg = Oniguruma::ORegexp.new( 'pe')
47
+ assert_equal( "**nelope", reg.sub( 'penelope', '**' ) )
48
+ assert_equal( "++nelope", reg.sub( 'penelope' ) { |m| '++' })
49
+ end
50
+
51
+ def test_gsub
52
+ reg = Oniguruma::ORegexp.new( '\(\?#(\w+?)\)')
53
+ string = 'My favorite fruits are (?#fruit1), (?#fruit2), and (?#fruit3)'
54
+ assert_equal( "My favorite fruits are *, *, and *", reg.gsub( string, '*' ) )
55
+ fruits = { "fruit1" => "apples", "fruit2" => "bananas", "fruit3" => "grapes" }
56
+ assert_equal( "My favorite fruits are apples, bananas, and grapes", reg.gsub( string ) { |text, match| fruits[match[1]]} )
57
+ end
58
+
59
+ def test_eql
60
+ assert_equal( Oniguruma::ORegexp.new( 'expression'), Oniguruma::ORegexp.new( 'expression') )
61
+ assert_not_equal( Oniguruma::ORegexp.new( 'expression'), Oniguruma::ORegexp.new( 'expresssion') )
62
+ assert_not_equal( Oniguruma::ORegexp.new( 'expression', :encoding => Oniguruma::ENCODING_ASCII ), Oniguruma::ORegexp.new( 'expression', :encoding => Oniguruma::ENCODING_ISO_8859_1 ) )
63
+ assert_not_equal( Oniguruma::ORegexp.new( 'expression', :options => Oniguruma::OPTION_IGNORECASE ), Oniguruma::ORegexp.new( 'expression', :options => Oniguruma::OPTION_NONE ) )
64
+ end
65
+
66
+ def test_case_eql
67
+ a = "HELLO"
68
+ result = ""
69
+ case a
70
+ when Oniguruma::ORegexp.new('^[a-z]*$'); result = "Lower case\n"
71
+ when Oniguruma::ORegexp.new('^[A-Z]*$'); result = "Upper case\n"
72
+ else; result = "Mixed case\n"
73
+ end
74
+
75
+ assert_equal( "Upper case\n", result )
76
+ end
77
+
78
+ def test_operator_match
79
+ assert_equal( nil, Oniguruma::ORegexp.new( 'SIT' ) =~ "insensitive" )
80
+ assert_equal( 5, Oniguruma::ORegexp.new( 'SIT', :options => Oniguruma::OPTION_IGNORECASE ) =~ "insensitive" )
81
+ end
82
+
83
+ # def test_operator_match_2
84
+ # $_ = "input data"
85
+ # assert_equal( 7, ~Oniguruma::ORegexp.new( 'at' ) )
86
+ # end
87
+
88
+ def test_inspect
89
+ assert_equal( "/cat/im", Oniguruma::ORegexp.new( 'cat', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_MULTILINE ).inspect )
90
+ end
91
+
92
+ def test_to_s
93
+ assert_equal( "(?im-x)cat", Oniguruma::ORegexp.new( 'cat', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_MULTILINE ).to_s )
94
+ end
95
+
96
+ def test_kcode
97
+ reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
98
+ assert_equal( Oniguruma::ENCODING_ASCII, reg.kcode )
99
+ end
100
+
101
+ def test_options
102
+ assert_equal( 3, Oniguruma::ORegexp.new( 'abc', :options => Oniguruma::OPTION_IGNORECASE | Oniguruma::OPTION_EXTEND ).options )
103
+ end
104
+
105
+ def test_source
106
+ string = '(?<=\n)\\.*ocatarinetabelachitchix'
107
+ assert_equal( string, Oniguruma::ORegexp.new( string ).source )
108
+ end
109
+ end
110
+
111
+ class MatchDataTestCase < Test::Unit::TestCase
112
+ def setup
113
+ @reg = Oniguruma::ORegexp.new( '(.)(.)(\d+)(\d)' )
114
+ end
115
+
116
+ def test_square_brackets
117
+ matches = @reg.match( "THX1138." )
118
+ assert_equal( "HX1138", matches[0] )
119
+ assert_equal( ["H", "X"], matches[1, 2] )
120
+ assert_equal( ["H", "X", "113"], matches[1..3] )
121
+ assert_equal( ["X", "113"], matches[-3, 2] )
122
+ end
123
+
124
+ def test_begin
125
+ matches = @reg.match( "THX1138." )
126
+ assert_equal( 1, matches.begin(0) )
127
+ assert_equal( 2, matches.begin(2) )
128
+ end
129
+
130
+ def test_captures
131
+ matches = @reg.match( "THX1138." )
132
+ assert_equal( ["H", "X", "113", "8" ], matches.captures )
133
+ end
134
+
135
+ def test_end
136
+ matches = @reg.match( "THX1138." )
137
+ assert_equal( 7, matches.end(0) )
138
+ assert_equal( 3, matches.end(2) )
139
+ end
140
+
141
+ def test_size
142
+ matches = @reg.match( "THX1138." )
143
+ assert_equal( 5, matches.length )
144
+ assert_equal( 5, matches.size )
145
+ end
146
+
147
+ def test_offset
148
+ matches = @reg.match( "THX1138." )
149
+ assert_equal( [1, 7], matches.offset(0) )
150
+ assert_equal( [6, 7], matches.offset(4) )
151
+ end
152
+
153
+ def test_post_match
154
+ matches = @reg.match( "THX1138: The Movie" )
155
+ assert_equal( ": The Movie", matches.post_match )
156
+ end
157
+
158
+ def test_pre_match
159
+ matches = @reg.match( "THX1138." )
160
+ assert_equal( "T", matches.pre_match )
161
+ end
162
+
163
+ def test_select
164
+ matches = @reg.match( "THX1138: The Movie" )
165
+ assert_equal( ["HX1138", "113"], matches.select{ |v| v =~ /\d\d/} )
166
+ end
167
+
168
+ def test_string
169
+ matches = @reg.match( "THX1138." )
170
+ assert_equal( "THX1138.", matches.string )
171
+ assert( matches.string.frozen? )
172
+ end
173
+
174
+ def test_to_a
175
+ matches = @reg.match( "THX1138." )
176
+ assert_equal( ["HX1138", "H", "X", "113", "8" ], matches.to_a )
177
+ end
178
+
179
+ def test_to_s
180
+ matches = @reg.match( "THX1138." )
181
+ assert_equal( "HX1138", matches.to_s )
182
+ end
183
+
184
+ def test_values_at
185
+ matches = @reg.match( "THX1138: The Movie" )
186
+ assert_equal( ["HX1138", "X", "113"], matches.values_at( 0, 2, -2) )
187
+ end
188
+
189
+ def test_match_all
190
+ reg = Oniguruma::ORegexp.new( 'ca' )
191
+ matches = reg.match_all( 'ocatacachaca' )
192
+ assert_equal( 3, matches.size )
193
+ assert_equal( 7, matches.position(2) )
194
+ assert_equal( "ca", matches.string[matches.begin(1)...matches.end(1)])
195
+ end
196
+
197
+ def test_match_empty_string
198
+ reg = Oniguruma::ORegexp.new( '^\s*?(\n|\r)', :options => Oniguruma::OPTION_MULTILINE )
199
+ matches = reg.match( "\n\n\n\n\n" )
200
+ assert_not_nil( matches )
201
+ assert_equal( "\n\n\n\n", matches.post_match )
202
+ end
203
+
204
+ def test_group_by_name
205
+ reg = Oniguruma::ORegexp.new( '(?<begin>\()(?<body>.*)(?<end>\))', :options => Oniguruma::OPTION_MULTILINE )
206
+ matches = reg.match( "blah (content) blah" )
207
+ assert_not_nil( matches )
208
+ assert_equal( '(', matches[:begin] )
209
+ assert_equal( 'content', matches[:body] )
210
+ assert_equal( ')', matches[:end] )
211
+ assert_equal( nil, matches[:inexistent])
212
+ end
213
+
214
+ end
metadata ADDED
@@ -0,0 +1,53 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: oniguruma
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.9.0
7
+ date: 2007-03-22 00:00:00 +01:00
8
+ summary: Bindings for the oniguruma regular expression library
9
+ require_paths:
10
+ - lib
11
+ - ext
12
+ email: dix_ans@yahoo.com
13
+ homepage: http://oniguruma.rubyforge.org
14
+ rubyforge_project: oniguruma
15
+ description: Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regular expression library (no need to recompile Ruby).
16
+ autorequire:
17
+ default_executable:
18
+ bindir: bin
19
+ has_rdoc: true
20
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
21
+ requirements:
22
+ - - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ platform: ruby
27
+ signing_key:
28
+ cert_chain:
29
+ post_install_message:
30
+ authors:
31
+ - Dizan Vasquez
32
+ files:
33
+ - History.txt
34
+ - Manifest.txt
35
+ - README.txt
36
+ - Rakefile
37
+ - lib/oniguruma.rb
38
+ - ext/oregexp.c
39
+ - test/test_oniguruma.rb
40
+ test_files:
41
+ - test/test_oniguruma.rb
42
+ rdoc_options: []
43
+
44
+ extra_rdoc_files: []
45
+
46
+ executables: []
47
+
48
+ extensions:
49
+ - ext/extconf.rb
50
+ requirements: []
51
+
52
+ dependencies: []
53
+