tre-ruby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "bundler", "~> 1.0.0"
10
+ gem "jeweler", "~> 1.5.2"
11
+ gem "rcov", ">= 0"
12
+ gem "rake-compiler", ">= 0.7.7"
13
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Junegunn Choi
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,159 @@
1
+ = tre-ruby
2
+
3
+ tre-ruby is a Ruby binding for TRE library which is "a lightweight, robust, and
4
+ efficient POSIX compliant regexp matching library with some exciting features
5
+ such as approximate (fuzzy) matching." Since Ruby has builtin regexp support,
6
+ this gem will only provide the interface for approximate matching, which is
7
+ missing in Ruby.
8
+
9
+ == Prerequisite
10
+
11
+ * TRE library (http://laurikari.net/tre/)
12
+ * Download http://laurikari.net/tre/download/
13
+
14
+ == Installation
15
+
16
+ gem install tre-ruby
17
+
18
+ == Setting up
19
+ TRE is an extension module for String. You can extend a String object with it, or include it into String class
20
+
21
+ require 'tre-ruby'
22
+
23
+ # A string object extended to have approximate matching features
24
+ "Clamshell".extend(TRE).ascan /shll/, TRE.fuzziness(2)
25
+
26
+ # Or you can patch the String class so that every String object can have the extended features.
27
+ # (if you think it's appropriate)
28
+ class String
29
+ include TRE
30
+ end
31
+
32
+ "Clamshell".ascan /shll/, TRE.fuzziness(2)
33
+
34
+ == Approximate matching
35
+ TRE module provides the following instance methods.
36
+
37
+ === Methods
38
+
39
+ TRE#axxx methods behave similar to their String#xxx counterparts,
40
+ except that you cannot make use of virtual global variables ($&, $`, $', ...)
41
+
42
+ * aindex
43
+ * Returns the Range (n...m) of the first match
44
+ * afind
45
+ * Returns the first matching substring
46
+ * ascan
47
+ * Approximate scan. Similar to String#scan.
48
+ * Just like String#scan, TRE#ascan returns Array of Strings or Array of Array of Strings when the given Regexp pattern contains captures.
49
+ * ascan_r
50
+ * Same as ascan, but Range instead of String
51
+ * asub
52
+ * Substitute the first match.
53
+ * agsub
54
+ * Substitute every match.
55
+
56
+ === TRE::AParams
57
+
58
+ Every `a-method' of TRE takes a TRE::AParams object as its last parameter.
59
+ TRE::AParams controls the approximate matching.
60
+
61
+ params = TRE::AParams.new
62
+ params.max_err = 3
63
+
64
+ str.extend(TRE).ascan /abracadabra/, params
65
+
66
+ There is a shortcut class method TRE.fuzziness(n) which is good enough for most cases. It returns a frozen TRE::AParams object with max_err of n.
67
+
68
+ str.extend(TRE).ascan /abracadabra/, TRE.fuzziness(3)
69
+
70
+ == Examples
71
+
72
+ === TRE#aindex, TRE#afind
73
+ You can locate the pattern (String or Regexp) in the string with aindex and afind.
74
+ When the pattern is not found, nil is returned.
75
+
76
+ str = <<-EOF
77
+ She sells sea shells by the sea shore.
78
+ The shells she sells are surely seashells.
79
+ So if she sells shells on the seashore,
80
+ I'm sure she sells seashore shells.
81
+ EOF
82
+
83
+ # Returns the first matching range
84
+ # - TRE.fuzziness(n) returns frozen TRE::AParams object with max_err of n
85
+ str.aindex 'shll', 0, TRE.fuzziness(1)
86
+ # (4...8)
87
+
88
+ # Returns the first matching substring
89
+ str.afind 'shll', 0, TRE.fuzziness(1)
90
+ # "sell"
91
+
92
+ # afind from offset 10
93
+ str.afind 'shll', 10, TRE.fuzziness(1)
94
+ # "shell"
95
+
96
+ # Same for Regexp patterns
97
+ str.aindex /s[hx]ll/, 0, TRE.fuzziness(1)
98
+ # (4...8)
99
+
100
+ === TRE#ascan
101
+ When the pattern is not found, an empty Array is returned.
102
+
103
+ # Scan
104
+ str.ascan /SSELL/i, TRE.fuzziness(2)
105
+ # [" sell", "shell", "shell", " sell", "shell", " sell", "shell", " sell"]
106
+
107
+ str.ascan /(SS)(E)LL/i, TRE.fuzziness(2)
108
+ # [[" sell", " s", "e"], ["shell", "sh", "e"], ["shell", "sh", "e"],
109
+ # [" sell", " s", "e"], ["shell", "sh", "e"], [" sell", " s", "e"],
110
+ # ["shell", "sh", "e"], [" sell", " s", "e"]]
111
+
112
+ # Scan with block
113
+ str.ascan /SSELL/i, TRE.fuzziness(2) do | match_string |
114
+ puts match_string
115
+ end
116
+
117
+ str.ascan /(SS)(E)LL/i, TRE.fuzziness(2) do | first, second |
118
+ puts "#{first} => #{second}"
119
+ end
120
+
121
+ === TRE#asub, TRE#agsub
122
+
123
+ Substitutions.
124
+
125
+ str.asub 'shll', '______', TRE.fuzziness(2)
126
+
127
+ # Blocks are not supported but you can use back references
128
+ str.asub /(SS)(E)LL/i, "___(\\2 / \\1)__", TRE.fuzziness(2)
129
+ str.agsub /(SS)(E)LL/i, "___(\\2 / \\1)__", TRE.fuzziness(2)
130
+
131
+ === Fine-grained control of approximate matching parameters
132
+
133
+ aparams = TRE::AParams.new { |ap|
134
+ ap.cost_ins = 1
135
+ ap.cost_del = 1
136
+ ap.cost_subst = 2
137
+
138
+ ap.max_ins = 10
139
+ ap.max_del = 20
140
+ ap.max_subst = 15
141
+ ap.max_err = 30
142
+ ap.max_cost = 50
143
+ }
144
+ str.ascan(/sea shells/, aparams)
145
+
146
+ == Contributing to tre-ruby
147
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
148
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
149
+ * Fork the project
150
+ * Start a feature/bugfix branch
151
+ * Commit and push until you are happy with your contribution
152
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
153
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
154
+
155
+ == Copyright
156
+
157
+ Copyright (c) 2011 Junegunn Choi. See LICENSE.txt for
158
+ further details.
159
+
@@ -0,0 +1,66 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ jeweler_tasks = Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "tre-ruby"
16
+ gem.homepage = "http://github.com/junegunn/tre-ruby"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{Approximate regular expression matching using TRE}
19
+ gem.description = %Q{Ruby binding for TRE library. Provides interface for approximate regular expression matching.}
20
+ gem.email = "junegunn.c@gmail.com"
21
+ gem.authors = ["Junegunn Choi"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
26
+
27
+ # For extensions
28
+ # http://karottenreibe.github.com/2009/10/25/jeweler-interlude/
29
+ gem.extensions = FileList['ext/**/extconf.rb']
30
+ gem.files.include 'ext/**/*.c'
31
+ end
32
+ Jeweler::RubygemsDotOrgTasks.new
33
+
34
+ # For rake-compiler
35
+ # http://karottenreibe.github.com/2009/10/25/jeweler-interlude/
36
+ require 'rake/extensiontask'
37
+ #jeweler_tasks.gemspec.version = jeweler_tasks.jeweler.version
38
+ Rake::ExtensionTask.new('tre', jeweler_tasks.gemspec) do |ext|
39
+ ext.lib_dir = 'lib/tre-ruby'
40
+ end
41
+
42
+ require 'rake/testtask'
43
+ Rake::TestTask.new(:test) do |test|
44
+ test.libs << 'lib' << 'test'
45
+ test.pattern = 'test/**/test_*.rb'
46
+ test.verbose = true
47
+ end
48
+
49
+ require 'rcov/rcovtask'
50
+ Rcov::RcovTask.new do |test|
51
+ test.libs << 'test'
52
+ test.pattern = 'test/**/test_*.rb'
53
+ test.verbose = true
54
+ end
55
+
56
+ task :default => :test
57
+
58
+ require 'rake/rdoctask'
59
+ Rake::RDocTask.new do |rdoc|
60
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
61
+
62
+ rdoc.rdoc_dir = 'rdoc'
63
+ rdoc.title = "tre-ruby #{version}"
64
+ rdoc.rdoc_files.include('README*')
65
+ rdoc.rdoc_files.include('lib/**/*.rb')
66
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,39 @@
1
+ require 'mkmf'
2
+
3
+ def x d
4
+ abort "** #{d} is missing."
5
+ end
6
+
7
+ def xtre
8
+ abort <<-MISSING
9
+ **
10
+ **
11
+ ** TRE library is missing !!
12
+ ** *************************
13
+ **
14
+ ** You can download the source or the binaries from http://laurikari.net/tre/
15
+ **
16
+ ** = e.g. To install TRE from the source code
17
+ **
18
+ ** tar -xvjf tre-0.8.0.tar.bz2
19
+ ** cd tre-0.8.0
20
+ ** ./configure
21
+ ** make
22
+ ** sudo make install
23
+ **
24
+ **
25
+ MISSING
26
+ end
27
+
28
+ # TRE
29
+ xtre unless have_library('tre')
30
+ xtre unless have_header('tre/tre.h')
31
+
32
+ # Multi-byte support
33
+ # TRE_WCHAR
34
+ %w[TRE_MULTIBYTE TRE_APPROX].each do | macro |
35
+ x "Macro #{macro}" unless have_macro macro, 'tre/tre.h'
36
+ end
37
+
38
+ create_makefile('tre-ruby/tre')
39
+
@@ -0,0 +1,194 @@
1
+ #include "ruby.h"
2
+ #include "tre/tre.h"
3
+
4
+ VALUE mTRE;
5
+
6
+ #define _TRE_RUBY_APARAM_OVERRIDE(P) \
7
+ VALUE P = rb_funcall(params, rb_intern(#P), 0); \
8
+ if (P != Qnil) aparams->P = NUM2INT(P)
9
+ static void
10
+ tre_build_aparams(regaparams_t* aparams, VALUE params) {
11
+ // Sets to default
12
+ tre_regaparams_default(aparams);
13
+
14
+ // Then override
15
+ _TRE_RUBY_APARAM_OVERRIDE(cost_ins);
16
+ _TRE_RUBY_APARAM_OVERRIDE(cost_del);
17
+ _TRE_RUBY_APARAM_OVERRIDE(cost_subst);
18
+ _TRE_RUBY_APARAM_OVERRIDE(max_cost);
19
+ _TRE_RUBY_APARAM_OVERRIDE(max_ins);
20
+ _TRE_RUBY_APARAM_OVERRIDE(max_del);
21
+ _TRE_RUBY_APARAM_OVERRIDE(max_subst);
22
+ _TRE_RUBY_APARAM_OVERRIDE(max_err);
23
+ }
24
+ #undef _TRE_RUBY_APARAM_OVERRIDE
25
+
26
+ static void
27
+ tre_compile_regex(regex_t* preg, VALUE pattern, VALUE ignore_case, VALUE multi_line) {
28
+ Check_Type(pattern, T_STRING);
29
+
30
+ int cflags = REG_EXTENDED;
31
+ if (ignore_case == Qtrue) cflags = cflags | REG_ICASE;
32
+ if (multi_line == Qfalse) cflags = cflags | REG_NEWLINE;
33
+
34
+ reg_errcode_t result = tre_regncomp(preg, StringValuePtr(pattern), RSTRING_LEN(pattern), cflags);
35
+
36
+ if (result == REG_OK) return;
37
+
38
+ switch (result) {
39
+ case REG_NOMATCH:
40
+ rb_raise(rb_eRuntimeError, "No match.");
41
+ break;
42
+ case REG_BADPAT:
43
+ rb_raise(rb_eRuntimeError, "Invalid regexp.");
44
+ break;
45
+ case REG_ECOLLATE:
46
+ rb_raise(rb_eRuntimeError, "Unknown collating element.");
47
+ break;
48
+ case REG_ECTYPE:
49
+ rb_raise(rb_eRuntimeError, "Unknown character class name.");
50
+ break;
51
+ case REG_EESCAPE:
52
+ rb_raise(rb_eRuntimeError, "Trailing backslash.");
53
+ break;
54
+ case REG_ESUBREG:
55
+ rb_raise(rb_eRuntimeError, "Invalid back reference.");
56
+ break;
57
+ case REG_EBRACK:
58
+ rb_raise(rb_eRuntimeError, "\"[]\" imbalance");
59
+ break;
60
+ case REG_EPAREN:
61
+ rb_raise(rb_eRuntimeError, "\"\\(\\)\" or \"()\" imbalance");
62
+ break;
63
+ case REG_EBRACE:
64
+ rb_raise(rb_eRuntimeError, "\"\\{\\}\" or \"{}\" imbalance");
65
+ break;
66
+ case REG_BADBR:
67
+ rb_raise(rb_eRuntimeError, "Invalid content of {}");
68
+ break;
69
+ case REG_ERANGE:
70
+ rb_raise(rb_eRuntimeError, "Invalid use of range operator");
71
+ break;
72
+ case REG_ESPACE:
73
+ rb_raise(rb_eRuntimeError, "Out of memory.");
74
+ break;
75
+ case REG_BADRPT:
76
+ rb_raise(rb_eRuntimeError, "Invalid use of repetition operators.");
77
+ break;
78
+ default:
79
+ rb_raise(rb_eRuntimeError, "Unknown Error");
80
+ break;
81
+ }
82
+ }
83
+
84
+ static VALUE
85
+ tre_traverse(VALUE pattern, VALUE string, long char_offset, VALUE params,
86
+ VALUE ignore_case, VALUE multi_line, int num_captures, VALUE repeat) {
87
+
88
+ // Compile once
89
+ regex_t preg;
90
+ tre_compile_regex(&preg, pattern, ignore_case, multi_line);
91
+
92
+ // Build regaparams
93
+ regaparams_t aparams;
94
+ tre_build_aparams(&aparams, params);
95
+
96
+ // Match data
97
+ regamatch_t match;
98
+ regmatch_t pmatch[num_captures + 1];
99
+ // memset(&match, 0, sizeof(match));
100
+ match.nmatch = num_captures + 1;
101
+ match.pmatch = pmatch;
102
+
103
+ // Scan
104
+ VALUE arr = rb_ary_new();
105
+ long char_offset_acc = char_offset;
106
+ // rb_global_variable(&arr);
107
+
108
+ while (1) {
109
+ // Get substring to start with
110
+ long len = RSTRING_LEN(string) - char_offset;
111
+ if (char_offset >= len) break;
112
+ string = rb_str_substr(string, char_offset, len);
113
+
114
+ int result = tre_reganexec(&preg, StringValuePtr(string), len, &match, aparams, 0);
115
+
116
+ if (result == REG_NOMATCH) break;
117
+
118
+ // Fill in array with ranges
119
+ VALUE subarr;
120
+ if (match.nmatch == 1)
121
+ subarr = arr; // Fake
122
+ else {
123
+ subarr = rb_ary_new();
124
+ // rb_global_variable(&subarr);
125
+ }
126
+
127
+ unsigned int i;
128
+ for (i = 0; i < match.nmatch; ++i)
129
+ if (match.pmatch[i].rm_so == -1)
130
+ rb_ary_push(subarr, Qnil);
131
+ else {
132
+ VALUE range = rb_range_new(
133
+ LONG2NUM( char_offset_acc + rb_str_sublen(string, match.pmatch[i].rm_so) ),
134
+ LONG2NUM( char_offset_acc + rb_str_sublen(string, match.pmatch[i].rm_eo) ),
135
+ 1);
136
+ // rb_global_variable(&range);
137
+
138
+ rb_ary_push(subarr, range);
139
+ }
140
+ if (match.nmatch > 1) rb_ary_push(arr, subarr);
141
+
142
+ // Stop or proceed
143
+ if (repeat == Qfalse)
144
+ break;
145
+ else {
146
+ char_offset = rb_str_sublen(string, match.pmatch[0].rm_eo);
147
+ if (char_offset == 0) char_offset = 1; // Weird case
148
+ char_offset_acc += char_offset;
149
+ }
150
+ }
151
+
152
+ // Free once
153
+ tre_regfree(&preg);
154
+
155
+ return arr;
156
+ }
157
+
158
+ static VALUE
159
+ tre_aindex(int argc, VALUE *argv, VALUE self) {
160
+ VALUE pattern, string, char_offset, params, ignore_case, multi_line;
161
+ rb_scan_args(argc, argv, "60", &pattern, &string, &char_offset, &params, &ignore_case, &multi_line);
162
+
163
+ Check_Type(string, T_STRING);
164
+
165
+ VALUE rarray = tre_traverse(pattern, string, NUM2LONG(char_offset), params,
166
+ ignore_case, multi_line, 0, Qfalse);
167
+
168
+ if (RARRAY_LEN(rarray) == 0)
169
+ return Qnil;
170
+ else
171
+ return rb_ary_entry(rarray, 0);
172
+ }
173
+
174
+ static VALUE
175
+ tre_ascan(int argc, VALUE *argv, VALUE self) {
176
+ VALUE pattern, string, char_offset, params, ignore_case, multi_line, num_captures, repeat;
177
+ rb_scan_args(argc, argv, "80", &pattern, &string, &char_offset, &params,
178
+ &ignore_case, &multi_line, &num_captures, &repeat);
179
+
180
+ Check_Type(string, T_STRING);
181
+
182
+ VALUE rarray = tre_traverse(pattern, string, NUM2LONG(char_offset), params,
183
+ ignore_case, multi_line, NUM2INT(num_captures), repeat);
184
+
185
+ return rarray;
186
+ }
187
+
188
+ void
189
+ Init_tre() {
190
+ mTRE = rb_define_module("TRE");
191
+ rb_define_private_method(mTRE, "__aindex", tre_aindex, -1);
192
+ rb_define_private_method(mTRE, "__ascan", tre_ascan, -1);
193
+ }
194
+
@@ -0,0 +1,227 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'tre-ruby/tre'
4
+
5
+ module TRE
6
+ # Returns a TRE::AParams object with the given fuzziness (max_err)
7
+ # @param [Fixnum] max_err
8
+ # @return [TRE::AParams]
9
+ def TRE.fuzziness max_err
10
+ @@fuzzies ||= {}
11
+ return @@fuzzies[max_err] if @@fuzzies.has_key? max_err
12
+
13
+ param = TRE::AParams.new
14
+ param.max_err = max_err
15
+ param.freeze
16
+ @@fuzzies[max_err] = param
17
+ end
18
+
19
+ # Locates the pattern in the string and returns the Range object for the first match
20
+ # @param [String/Regexp] pattern
21
+ # @param [Fixnum] offset
22
+ # @param [TRE::AParams] params
23
+ # @return [Range]
24
+ def aindex pattern, offset = 0, params = TRE.fuzziness(0)
25
+ raise ArgumentError.new("Invalid parameter") unless params.is_a? TRE::AParams
26
+ raise ArgumentError.new("Invalid offset parameter") unless offset.is_a? Fixnum
27
+
28
+ input = parse_pattern pattern
29
+ __aindex(input[:source], self, offset, params,
30
+ input[:ignore_case], input[:multi_line])
31
+ end
32
+
33
+ # Locates the pattern in the string and returns the first matching substring.
34
+ # @param [String/Regexp] pattern
35
+ # @param [Fixnum] offset
36
+ # @param [TRE::AParams] params
37
+ # @return [String]
38
+ def afind pattern, offset = 0, params = TRE.fuzziness(0)
39
+ range = aindex pattern, offset, params
40
+
41
+ range && self[range]
42
+ end
43
+
44
+ # Scans for the pattern in the String and returns Array of matching substrings
45
+ # or Array of Array of Strings when the given pattern contains Regexp captures.
46
+ # @param [String/Regexp] pattern
47
+ # @param [TRE::AParams] params
48
+ # @return [Array]
49
+ def ascan pattern, params = TRE.fuzziness(0), &block
50
+ result = ascan_r(pattern, params).map { |e|
51
+ case e
52
+ when Array
53
+ e.map { |ee| self[ee] }.take_while { |ee| ee }
54
+ when Range
55
+ self[e]
56
+ else
57
+ raise RuntimeError.new
58
+ end
59
+ }
60
+ return result unless block_given?
61
+ yield_scan_result result, &block
62
+ end
63
+
64
+ # Same as TRE#ascan, but returns Array of Range objects instead of String objects.
65
+ # @param [String/Regexp] pattern
66
+ # @param [TRE::AParams] params
67
+ # @return [Array]
68
+ def ascan_r pattern, params = TRE.fuzziness(0), &block
69
+ ascan_r_impl pattern, params, true, &block
70
+ end
71
+
72
+ # Returns a copy of the String with the first match substituted
73
+ # @param [String/Regexp] pattern
74
+ # @param [String] replacement
75
+ # @param [TRE::AParams] params
76
+ # @return [String]
77
+ def asub pattern, replacement, params = TRE.fuzziness(0), &block
78
+ asub_impl pattern, replacement, params, false, &block
79
+ end
80
+
81
+ # Substitutes the first match
82
+ # @param [String/Regexp] pattern
83
+ # @param [String] replacement
84
+ # @param [TRE::AParams] params
85
+ # @return [String]
86
+ def asub! pattern, replacement, params = TRE.fuzziness(0), &block
87
+ self.replace asub(pattern, replacement, params, &block)
88
+ end
89
+
90
+ # Returns a copy of the String with every match substituted
91
+ # @param [String/Regexp] pattern
92
+ # @param [String] replacement
93
+ # @param [TRE::AParams] params
94
+ # @return [String]
95
+ def agsub pattern, replacement, params = TRE.fuzziness(0), &block
96
+ asub_impl pattern, replacement, params, true, &block
97
+ end
98
+
99
+ # Substitutes every match
100
+ # @param [String/Regexp] pattern
101
+ # @param [String] replacement
102
+ # @param [TRE::AParams] params
103
+ # @return [String]
104
+ def agsub! pattern, replacement, params = TRE.fuzziness(0), &block
105
+ self.replace agsub(pattern, replacement, params, &block)
106
+ end
107
+
108
+ # Parameters for approximate matching.
109
+ class AParams
110
+ # Default cost of an inserted character.
111
+ attr_accessor :cost_ins
112
+ # Default cost of a deleted character.
113
+ attr_accessor :cost_del
114
+ # Default cost of a substituted character.
115
+ attr_accessor :cost_subst
116
+
117
+ # Maximum allowed cost of a match.
118
+ attr_accessor :max_cost
119
+ # Maximum allowed number of inserts.
120
+ attr_accessor :max_ins
121
+ # Maximum allowed number of deletes.
122
+ attr_accessor :max_del
123
+ # Maximum allowed number of substitutes.
124
+ attr_accessor :max_subst
125
+ # Maximum allowed number of errors total.
126
+ attr_accessor :max_err
127
+
128
+ # Creates a AParams object with default values
129
+ def initialize
130
+ self.cost_ins = 1
131
+ self.cost_del = 1
132
+ self.cost_subst = 1
133
+ self.max_cost = nil
134
+ self.max_ins = nil
135
+ self.max_del = nil
136
+ self.max_del = nil
137
+ self.max_subst = nil
138
+ self.max_err = 0
139
+
140
+ yield self if block_given?
141
+ end
142
+ end
143
+
144
+ private
145
+ # TODO
146
+ def amatch pattern, offset = 0, params = TRE.fuzziness(0)
147
+ raise NotImplementedError
148
+
149
+ raise ArgumentError.new("Invalid parameter") unless params.is_a? TRE::AParams
150
+ raise ArgumentError.new("Invalid offset parameter") unless offset.is_a? Fixnum
151
+ end
152
+ def parse_pattern pattern
153
+ ret = {}
154
+ case pattern
155
+ when Regexp
156
+ ret[:source] = pattern.source
157
+
158
+ opts = pattern.options
159
+ ret[:multi_line] = (opts & Regexp::MULTILINE) > 0
160
+ opts &= ~Regexp::MULTILINE
161
+ ret[:ignore_case] = (opts & Regexp::IGNORECASE) > 0
162
+ opts &= ~Regexp::IGNORECASE
163
+ opts &= ~Regexp::FIXEDENCODING # FIXME
164
+ raise ArgumentError.new("Unsupported Regexp flag provided") if opts > 0
165
+
166
+ # Pessimistic estimation of the number of captures
167
+ ret[:num_captures] = ret[:source].each_char.count { |c| c == '(' }
168
+ when String
169
+ ret[:source] = Regexp.escape pattern
170
+ ret[:num_captures] = 0
171
+ end
172
+
173
+ ret
174
+ end
175
+
176
+ def ascan_r_impl pattern, params, repeat, &block
177
+ raise ArgumentError.new("Invalid parameter") unless params.is_a? TRE::AParams
178
+
179
+ input = parse_pattern pattern
180
+ result = __ascan(input[:source], self, 0, params,
181
+ input[:ignore_case], input[:multi_line], input[:num_captures], repeat)
182
+
183
+ return result unless block_given?
184
+ yield_scan_result result, &block
185
+ end
186
+
187
+ def asub_impl pattern, replacement, params, repeat, &block
188
+ raise NotImplementedError.new if block_given?
189
+ raise NotImplementedError.new unless replacement.is_a? String
190
+
191
+ ret = self.dup
192
+
193
+ ascan_r_impl(pattern, params, repeat, &block).each do | ranges |
194
+ # Captures
195
+ if ranges.is_a? Array
196
+ repl = replacement.dup
197
+ ranges[1..-1].each_with_index do | range, idx |
198
+ repl.gsub!("\\#{idx + 1}", self[range])
199
+ end
200
+ ret[ranges[0]] = repl
201
+ else
202
+ ret[ranges] = replacement
203
+ end
204
+ end
205
+
206
+ ret
207
+ end
208
+
209
+ def yield_scan_result result, &block
210
+ return self if result.empty?
211
+
212
+ # With captures
213
+ if result.first.is_a?(Array)
214
+ # arity == 1
215
+ if block.arity == 1
216
+ result.each { |r| yield r[1..-1] }
217
+ else
218
+ result.each { |r| yield *r[1..-1] }
219
+ end
220
+ # Without captures
221
+ else
222
+ result.each { |r| yield r }
223
+ end
224
+ self
225
+ end
226
+ end
227
+
@@ -0,0 +1,17 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ require 'tre-ruby'
15
+
16
+ class Test::Unit::TestCase
17
+ end
@@ -0,0 +1,210 @@
1
+ # encoding: UTF-8
2
+
3
+ $LOAD_PATH.unshift File.dirname(__FILE__) # FIXME
4
+ require 'helper'
5
+
6
+ class TestTRE < Test::Unit::TestCase
7
+ class TREString < String
8
+ include TRE
9
+ end
10
+
11
+ def test_extend
12
+ assert_equal 2...4, "aaba".extend(TRE).aindex('ba')
13
+ assert_equal [2...4], "aaba".extend(TRE).ascan_r(/ba/)
14
+ assert_equal ["ba"], "aaba".extend(TRE).ascan(/ba/)
15
+ end
16
+
17
+ def test_include
18
+ # String including TRE module would affect the subsequent tests
19
+ # String.send :include, TRE
20
+ # So, we use TREString here.
21
+
22
+ str = TREString.new "aaba"
23
+ assert_equal 2...4, str.aindex('ba')
24
+ assert_equal [2...4], str.ascan_r(/ba/)
25
+ assert_equal ["ba"], str.ascan(/ba/)
26
+ end
27
+
28
+ def test_privacy
29
+ str = TREString.new "aaba"
30
+ assert_raise(NoMethodError) { str.__ascan 1,1,1,1,1,1,1 }
31
+ assert_raise(NoMethodError) { str.__aindex 1,1,1,1,1,1 }
32
+ end
33
+
34
+ def test_aparams
35
+ aparams = TRE::AParams.new
36
+ assert_equal 0, aparams.max_err
37
+ aparams.max_err = 5
38
+ assert_equal 5, aparams.max_err
39
+ end
40
+
41
+ def test_fuzziness
42
+ fuzz1 = TRE.fuzziness 1
43
+ assert_equal TRE::AParams, fuzz1.class
44
+ assert_equal 1, fuzz1.max_err
45
+ assert fuzz1.frozen?
46
+
47
+ fuzz2 = TRE.fuzziness 2
48
+ assert_equal TRE::AParams, fuzz2.class
49
+ assert_equal 2, fuzz2.max_err
50
+ assert fuzz2.frozen?
51
+
52
+ # Same frozen object for same fuzziness
53
+ assert_equal fuzz1.object_id, TRE.fuzziness(1).object_id
54
+ assert_equal fuzz2.object_id, TRE.fuzziness(2).object_id
55
+ assert_not_equal fuzz1.object_id, fuzz2.object_id
56
+ end
57
+
58
+ def test_aindex_string
59
+ params = TRE::AParams.new
60
+
61
+ # String patterns
62
+ params.max_err = 1
63
+ assert_equal 'would', TWISTER[ TWISTER.aindex('tould', 0, params) ]
64
+ assert_equal nil, TWISTER.aindex('toult', 0, params)
65
+
66
+ params.max_err = 2
67
+ assert_equal 'would', TWISTER[ TWISTER.aindex('toult', 0, params) ]
68
+ assert_equal 'could', TWISTER[ TWISTER.aindex('toult', 400, params) ]
69
+ end
70
+
71
+ def test_aindex_regex
72
+ params = TRE.fuzziness 1
73
+
74
+ # Regex pattenrs
75
+ assert_equal 'would', TWISTER[ TWISTER.aindex /TOULD/i, 0, params ]
76
+ assert_equal nil, TWISTER.aindex(/toult/i, 0, params)
77
+
78
+ # Frozen: cannot modify
79
+ assert_raise(RuntimeError) { params.max_err = 2 }
80
+
81
+ # Warm AParams
82
+ params = TRE::AParams.new
83
+ params.max_err = 2
84
+
85
+ assert_equal nil, TWISTER.aindex(/TOULT/, 0, params)
86
+ assert_equal 'would', TWISTER[ TWISTER.aindex(/TOULT/i, 0, params) ]
87
+ assert_equal 'could', TWISTER[ TWISTER.aindex(/TOULT/i, 400, params) ]
88
+ assert_equal 'could', TWISTER[ TWISTER.aindex(/TOULT/i, 400, params) ]
89
+ assert_equal 'could', TWISTER[ TWISTER.aindex(/T((O)U(L))T/i, 400, params) ]
90
+
91
+ # afind shortcut
92
+ assert_equal 'would', TWISTER.afind(/TOULT/i, 0, params)
93
+ assert_equal 'could', TWISTER.afind(/TOULT/i, 400, params)
94
+ assert_equal 'could', TWISTER.afind(/TOULT/i, 400, params)
95
+ assert_equal 'could', TWISTER.afind(/T((O)U(L))T/i, 400, params)
96
+ end
97
+
98
+ def test_regex_flags
99
+ # Test for case-insensitivity
100
+ str = TREString.new "A\nB"
101
+ assert_equal nil, str.aindex(/b/)
102
+ assert_equal (2...3), str.aindex(/b/i)
103
+
104
+ # Test for multiline
105
+ assert_equal (0...1), str.aindex(/.*/)
106
+ assert_equal (0...3), str.aindex(/.*/m)
107
+
108
+ # Test for multiline and case-insensitivity
109
+ assert_equal (0...1), str.aindex(/a.*?b?/i)
110
+ assert_equal (0...3), str.aindex(/a.*?b?/im)
111
+
112
+ # Test for unsupported x flag
113
+ assert_raise(ArgumentError) { TWISTER.aindex(/a/x) }
114
+ end
115
+
116
+ def test_ascan_r
117
+ result = TWISTER.ascan_r(/peck/, TRE.fuzziness(2))
118
+ assert_equal Array, result.class
119
+ assert_equal Range, result.first.class
120
+
121
+ result = TWISTER.ascan_r(/p(e)ck/, TRE.fuzziness(2))
122
+ assert_equal Array, result.class
123
+ assert_equal Array, result.first.class
124
+ assert_equal Range, result.first.first.class
125
+ assert_equal Range, result.first.last.class
126
+ end
127
+
128
+ def test_ascan
129
+ # Without blocks
130
+ assert_equal 4, TWISTER.ascan(/peck/, TRE.fuzziness(0)).length
131
+ assert_equal 6, TWISTER.ascan(/peck/, TRE.fuzziness(1)).length
132
+ assert_equal 15, TWISTER.ascan(/peck/, TRE.fuzziness(2)).length
133
+ assert_equal 15, TWISTER.ascan(/(p(e)c)k/, TRE.fuzziness(2)).length
134
+
135
+ # Block given
136
+ TWISTER.ascan(/peck/, TRE.fuzziness(2)) do | a |
137
+ assert a.is_a?(String)
138
+ end
139
+ TWISTER.ascan(/((p)e)ck/, TRE.fuzziness(2)) do | a |
140
+ assert a.is_a?(Array)
141
+ assert_equal 2, a.length
142
+ end
143
+ TWISTER.ascan(/((p)e)ck/, TRE.fuzziness(2)) do | a, b |
144
+ assert a.is_a?(String)
145
+ assert b.is_a?(String)
146
+ end
147
+ end
148
+
149
+ def test_asub
150
+ assert_equal 1, TWISTER.asub(/(pe)(ck)/, '\2\2\1\1', TRE.fuzziness(3)).scan('ckckpepe').length
151
+ end
152
+
153
+ def test_asub!
154
+ copy = TWISTER.dup
155
+ rep = TWISTER.asub(/(pe)(ck)/, '\2\2\1\1', TRE.fuzziness(3))
156
+ copy. asub!(/(pe)(ck)/, '\2\2\1\1', TRE.fuzziness(3))
157
+
158
+ assert_equal copy, rep
159
+ assert_not_equal copy, TWISTER
160
+ end
161
+
162
+ def test_agsub
163
+ assert_equal 15, TWISTER.ascan(/(pe)(ck)/, TRE.fuzziness(2)).length
164
+ assert_equal 4, TWISTER.ascan(/(pe)(ck)/, TRE.fuzziness(2)).select { |m| m[0] == 'peck' }.length
165
+ assert_equal 4, TWISTER.agsub(/(pe)(ck)/, '\2\2\1\1', TRE.fuzziness(2)).scan('ckckpepe').length
166
+
167
+ # TODO: More rigorous tests
168
+ end
169
+
170
+ def test_agsub!
171
+ copy = TWISTER.dup
172
+ rep = TWISTER.agsub(/(pe)(ck)/, '\2\2\1\1', TRE.fuzziness(3))
173
+ copy. agsub!(/(pe)(ck)/, '\2\2\1\1', TRE.fuzziness(3))
174
+
175
+ assert_equal copy, rep
176
+ assert_not_equal copy, TWISTER
177
+ end
178
+
179
+ TWISTER = TREString.new <<-EOF
180
+ She sells sea shells by the sea shore.
181
+ The shells she sells are surely seashells.
182
+ So if she sells shells on the seashore,
183
+ I'm sure she sells seashore shells.
184
+
185
+ Peter Piper picked a peck of pickled peppers.
186
+ Did Peter Piper pick a peck of pickled peppers?
187
+ If Peter Piper picked a peck of pickled peppers,
188
+ where's the peck of pickled peppers Peter Piper picked?
189
+
190
+ How much wood would a woodchuck chuck
191
+ if a woodchuck could chuck wood?
192
+ He would chuck, he would, as much as he could,
193
+ and chuck as much wood as a woodchuck would
194
+ if a woodchuck could chuck wood.
195
+
196
+ Betty Botter had some butter,
197
+ "But," she said, "this butter's bitter.
198
+ If I bake this bitter butter,
199
+ it would make my batter bitter.
200
+ But a bit of better butter--
201
+ that would make my batter better."
202
+
203
+ So she bought a bit of butter,
204
+ better than her bitter butter,
205
+ and she baked it in her batter,
206
+ and the batter was not bitter.
207
+ So 'twas better Betty Botter
208
+ bought a bit of better butter.
209
+ EOF
210
+ end
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tre-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Junegunn Choi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-04-12 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: &2153038680 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.0.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *2153038680
25
+ - !ruby/object:Gem::Dependency
26
+ name: jeweler
27
+ requirement: &2153037520 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 1.5.2
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2153037520
36
+ - !ruby/object:Gem::Dependency
37
+ name: rcov
38
+ requirement: &2153036080 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2153036080
47
+ - !ruby/object:Gem::Dependency
48
+ name: rake-compiler
49
+ requirement: &2153024580 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.7.7
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2153024580
58
+ description: Ruby binding for TRE library. Provides interface for approximate regular
59
+ expression matching.
60
+ email: junegunn.c@gmail.com
61
+ executables: []
62
+ extensions:
63
+ - ext/tre/extconf.rb
64
+ extra_rdoc_files:
65
+ - LICENSE.txt
66
+ - README.rdoc
67
+ files:
68
+ - .document
69
+ - Gemfile
70
+ - LICENSE.txt
71
+ - README.rdoc
72
+ - Rakefile
73
+ - VERSION
74
+ - ext/tre/extconf.rb
75
+ - ext/tre/tre.c
76
+ - lib/tre-ruby.rb
77
+ - test/helper.rb
78
+ - test/test_tre-ruby.rb
79
+ homepage: http://github.com/junegunn/tre-ruby
80
+ licenses:
81
+ - MIT
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ segments:
93
+ - 0
94
+ hash: -2920886793274010137
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ requirements: []
102
+ rubyforge_project:
103
+ rubygems_version: 1.7.2
104
+ signing_key:
105
+ specification_version: 3
106
+ summary: Approximate regular expression matching using TRE
107
+ test_files:
108
+ - test/helper.rb
109
+ - test/test_tre-ruby.rb