fuzzy_match 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/README.rdoc +94 -0
- data/Rakefile +21 -0
- data/THANKS-WILLIAM-JAMES.rb +37 -0
- data/benchmark/before-with-free.txt +283 -0
- data/benchmark/before-without-last-result.txt +257 -0
- data/benchmark/before.txt +304 -0
- data/benchmark/memory.rb +54 -0
- data/examples/bts_aircraft/5-2-A.htm +10305 -0
- data/examples/bts_aircraft/5-2-B.htm +9576 -0
- data/examples/bts_aircraft/5-2-D.htm +7094 -0
- data/examples/bts_aircraft/5-2-E.htm +2349 -0
- data/examples/bts_aircraft/5-2-G.htm +2922 -0
- data/examples/bts_aircraft/blockings.csv +1 -0
- data/examples/bts_aircraft/identities.csv +1 -0
- data/examples/bts_aircraft/negatives.csv +1 -0
- data/examples/bts_aircraft/number_260.csv +334 -0
- data/examples/bts_aircraft/positives.csv +1 -0
- data/examples/bts_aircraft/test_bts_aircraft.rb +118 -0
- data/examples/bts_aircraft/tighteners.csv +1 -0
- data/examples/first_name_matching.rb +15 -0
- data/examples/icao-bts.xls +0 -0
- data/fuzzy_match.gemspec +32 -0
- data/lib/fuzzy_match/blocking.rb +36 -0
- data/lib/fuzzy_match/cached_result.rb +74 -0
- data/lib/fuzzy_match/identity.rb +23 -0
- data/lib/fuzzy_match/result.rb +17 -0
- data/lib/fuzzy_match/score.rb +125 -0
- data/lib/fuzzy_match/similarity.rb +53 -0
- data/lib/fuzzy_match/stop_word.rb +19 -0
- data/lib/fuzzy_match/tightener.rb +28 -0
- data/lib/fuzzy_match/version.rb +3 -0
- data/lib/fuzzy_match/wrapper.rb +67 -0
- data/lib/fuzzy_match.rb +252 -0
- data/test/helper.rb +12 -0
- data/test/test_blocking.rb +23 -0
- data/test/test_cache.rb +130 -0
- data/test/test_fuzzy_match.rb +190 -0
- data/test/test_fuzzy_match_convoluted.rb.disabled +268 -0
- data/test/test_identity.rb +33 -0
- data/test/test_tightening.rb +10 -0
- metadata +197 -0
data/.document
ADDED
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2011 Brighter Planet, Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
= fuzzy_match
|
2
|
+
|
3
|
+
Find a needle in a haystack based on string similarity (using the Pair Distance algorithm and Levenshtein distance) and regular expressions.
|
4
|
+
|
5
|
+
Replaces [loose_tight_dictionary](https://github.com/seamusabshere/loose_tight_dictionary) because that was a confusing name.
|
6
|
+
|
7
|
+
== Quickstart
|
8
|
+
|
9
|
+
>> require 'fuzzy_match'
|
10
|
+
=> true
|
11
|
+
>> FuzzyMatch.new(%w{seamus andy ben}).find('Shamus')
|
12
|
+
=> "seamus"
|
13
|
+
|
14
|
+
== String similarity matching
|
15
|
+
|
16
|
+
Uses {Dice's Coefficient}[http://en.wikipedia.org/wiki/Dice's_coefficient] algorithm (aka Pair Distance).
|
17
|
+
|
18
|
+
If that judges two strings to be be equally similar to a third string, then Levenshtein distance is used. For example, pair distance considers "RATZ" and "CATZ" to be equally similar to "RITZ" so we invoke Levenshtein.
|
19
|
+
|
20
|
+
>> require 'amatch'
|
21
|
+
=> true
|
22
|
+
>> 'RITZ'.pair_distance_similar 'RATZ'
|
23
|
+
=> 0.3333333333333333
|
24
|
+
>> 'RITZ'.pair_distance_similar 'CATZ' # <-- pair distance can't tell the difference, so we fall back to levenshtein...
|
25
|
+
=> 0.3333333333333333
|
26
|
+
>> 'RITZ'.levenshtein_similar 'RATZ'
|
27
|
+
=> 0.75
|
28
|
+
>> 'RITZ'.levenshtein_similar 'CATZ' # <-- which properly shows that RATZ should win
|
29
|
+
=> 0.5
|
30
|
+
|
31
|
+
== Production use
|
32
|
+
|
33
|
+
Over 2 years in {Brighter Planet's environmental impact API}[http://impact.brighterplanet.com] and {reference data service}[http://data.brighterplanet.com].
|
34
|
+
|
35
|
+
== Haystacks and how to read them
|
36
|
+
|
37
|
+
The (admittedly imperfect) metaphor is "look for a needle in a haystack"
|
38
|
+
|
39
|
+
* needle - the search term
|
40
|
+
* haystack - the records you are searching (<b>your result will be an object from here</b>)
|
41
|
+
|
42
|
+
So, what if your needle is a string like <tt>youruguay</tt> and your haystack is full of <tt>Country</tt> objects like <tt><Country name:"Uruguay"></tt>?
|
43
|
+
|
44
|
+
>> FuzzyMatch.new(countries, :read => :name).find('youruguay')
|
45
|
+
=> <Country name:"Uruguay">
|
46
|
+
|
47
|
+
== Regular expressions
|
48
|
+
|
49
|
+
You can improve the default matchings with regular expressions.
|
50
|
+
|
51
|
+
* Emphasize important words using <b>blockings</b> and <b>tighteners</b>
|
52
|
+
* Filter out stop words with <b>tighteners</b>
|
53
|
+
* Prevent impossible matches with <b>blockings</b> and <b>identities</b>
|
54
|
+
* Ignore words with <b>stop words</b>
|
55
|
+
|
56
|
+
=== Blockings
|
57
|
+
|
58
|
+
Setting a blocking of <tt>/Airbus/</tt> ensures that strings containing "Airbus" will only be scored against to other strings containing "Airbus". A better blocking in this case would probably be <tt>/airbus/i</tt>.
|
59
|
+
|
60
|
+
=== Tighteners
|
61
|
+
|
62
|
+
Adding a tightener like <tt>/(boeing).*(7\d\d)/i</tt> will cause "BOEING COMPANY 747" and "boeing747" to be scored as if they were "BOEING 747" and "boeing 747", respectively. See also "Case sensitivity" below.
|
63
|
+
|
64
|
+
=== Identities
|
65
|
+
|
66
|
+
Adding an identity like <tt>/(F)\-?(\d50)/</tt> ensures that "Ford F-150" and "Ford F-250" never match.
|
67
|
+
|
68
|
+
=== Stop words
|
69
|
+
|
70
|
+
Adding a stop word like <tt>THE</tt> ensures that it is not taken into account when comparing "THE CAT", "THE DAT", and "THE CATT"
|
71
|
+
|
72
|
+
== Case sensitivity
|
73
|
+
|
74
|
+
Scoring is case-insensitive. Everything is downcased before scoring. This is a change from previous versions. Your regexps may still be case-sensitive, though.
|
75
|
+
|
76
|
+
== Examples
|
77
|
+
|
78
|
+
Check out the tests.
|
79
|
+
|
80
|
+
== Speed (and who to thank for the algorithms)
|
81
|
+
|
82
|
+
If you add the amatch[http://flori.github.com/amatch/] gem to your Gemfile, it will use that, which is much faster (but {segfaults have been seen in the wild}[https://github.com/flori/amatch/issues/3]). Thanks {Flori}[https://github.com/flori]!
|
83
|
+
|
84
|
+
Otherwise, pure ruby versions of the string similarity algorithms derived from the {answer to a StackOverflow question}[http://stackoverflow.com/questions/653157/a-better-similarity-ranking-algorithm-for-variable-length-strings] and {the text gem}[https://github.com/threedaymonk/text/blob/master/lib/text/levenshtein.rb] are used. Thanks {marzagao}[http://stackoverflow.com/users/10997/marzagao] and {threedaymonk}[https://github.com/threedaymonk]!
|
85
|
+
|
86
|
+
== Authors
|
87
|
+
|
88
|
+
* Seamus Abshere <seamus@abshere.net>
|
89
|
+
* Ian Hough <ijhough@gmail.com>
|
90
|
+
* Andy Rossmeissl <andy@rossmeissl.net>
|
91
|
+
|
92
|
+
== Copyright
|
93
|
+
|
94
|
+
Copyright 2011 Brighter Planet, Inc.
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rake/testtask'
|
5
|
+
Rake::TestTask.new(:test) do |test|
|
6
|
+
test.libs << 'lib' << 'test'
|
7
|
+
test.pattern = 'test/**/test_*.rb'
|
8
|
+
test.verbose = true
|
9
|
+
end
|
10
|
+
|
11
|
+
task :default => :test
|
12
|
+
|
13
|
+
require 'rake/rdoctask'
|
14
|
+
Rake::RDocTask.new do |rdoc|
|
15
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
16
|
+
|
17
|
+
rdoc.rdoc_dir = 'rdoc'
|
18
|
+
rdoc.title = "fuzzy_match #{version}"
|
19
|
+
rdoc.rdoc_files.include('README*')
|
20
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
21
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Thanks William James!
|
4
|
+
# http://www.ruby-forum.com/topic/95519#200484
|
5
|
+
def cart_prod(*args)
|
6
|
+
args.inject([[]]){|old,lst|
|
7
|
+
new = []
|
8
|
+
lst.each{|e| new += old.map{|c| c.dup << e }}
|
9
|
+
new
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'benchmark'
|
14
|
+
|
15
|
+
a = [1,2,3]
|
16
|
+
b = [4,5]
|
17
|
+
Benchmark.bmbm do |x|
|
18
|
+
x.report("native") do
|
19
|
+
500_000.times { a.product(b) }
|
20
|
+
end
|
21
|
+
x.report("william-james") do |x|
|
22
|
+
500_000.times { cart_prod(a, b) }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# results:
|
27
|
+
# $ ruby foo.rb
|
28
|
+
# Rehearsal -------------------------------------------------
|
29
|
+
# native 0.720000 0.000000 0.720000 ( 0.729319)
|
30
|
+
# william-james 3.620000 0.010000 3.630000 ( 3.629198)
|
31
|
+
# ---------------------------------------- total: 4.350000sec
|
32
|
+
#
|
33
|
+
# user system total real
|
34
|
+
# native 0.710000 0.000000 0.710000 ( 0.708620)
|
35
|
+
# william-james 3.800000 0.000000 3.800000 ( 3.792538)
|
36
|
+
|
37
|
+
# thanks for all the fish!
|
@@ -0,0 +1,283 @@
|
|
1
|
+
1962 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/activesupport-3.0.5/lib/active_support/core_ext/object/blank.rb:68:String
|
2
|
+
1957 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/fastercsv-1.5.4/lib/faster_csv.rb:1632:String
|
3
|
+
342 ./benchmark/../lib/fuzzy_match/wrapper.rb:29:String
|
4
|
+
326 ./benchmark/../lib/fuzzy_match/wrapper.rb:29:Array
|
5
|
+
325 benchmark/memory.rb:21:String
|
6
|
+
325 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table/hasher.rb:20:String
|
7
|
+
325 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table/format/delimited.rb:22:ActiveSupport::OrderedHash
|
8
|
+
325 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table.rb:65:String
|
9
|
+
325 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/activesupport-3.0.5/lib/active_support/ordered_hash.rb:39:Array
|
10
|
+
325 ./benchmark/../lib/fuzzy_match/wrapper.rb:25:FuzzyMatch::Similarity
|
11
|
+
325 ./benchmark/../lib/fuzzy_match/similarity.rb:57:Array
|
12
|
+
325 ./benchmark/../lib/fuzzy_match/similarity.rb:25:FuzzyMatch::Score
|
13
|
+
325 ./benchmark/../lib/fuzzy_match/score.rb:13:Float
|
14
|
+
325 ./benchmark/../lib/fuzzy_match.rb:35:FuzzyMatch::Wrapper
|
15
|
+
320 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table/format/delimited.rb:28:String
|
16
|
+
303 ./benchmark/../lib/fuzzy_match/similarity.rb:21:Float
|
17
|
+
201 ./benchmark/../lib/fuzzy_match/tightener.rb:20:String
|
18
|
+
184 ./benchmark/../lib/fuzzy_match/tightener.rb:14:String
|
19
|
+
140 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch.bundle:0:__node__
|
20
|
+
41 ./benchmark/../lib/fuzzy_match/similarity.rb:49:__node__
|
21
|
+
31 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:27:Regexp
|
22
|
+
28 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:19:__node__
|
23
|
+
22 ./benchmark/../lib/fuzzy_match/similarity.rb:57:__node__
|
24
|
+
22 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:20:__node__
|
25
|
+
21 ./benchmark/../lib/fuzzy_match.rb:199:FuzzyMatch::Blocking
|
26
|
+
17 ./benchmark/../lib/fuzzy_match/similarity.rb:21:__node__
|
27
|
+
16 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch.bundle:0:Class
|
28
|
+
14 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:4:__node__
|
29
|
+
14 ./benchmark/../lib/fuzzy_match/similarity.rb:37:__node__
|
30
|
+
13 ./benchmark/../lib/fuzzy_match/wrapper.rb:15:__node__
|
31
|
+
13 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:27:__node__
|
32
|
+
12 ./benchmark/../lib/fuzzy_match/wrapper.rb:29:__node__
|
33
|
+
12 ./benchmark/../lib/fuzzy_match/wrapper.rb:19:__node__
|
34
|
+
11 ./benchmark/../lib/fuzzy_match/identity.rb:18:__node__
|
35
|
+
11 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:26:__node__
|
36
|
+
11 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:25:__node__
|
37
|
+
11 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:24:__node__
|
38
|
+
10 ./benchmark/../lib/fuzzy_match/similarity.rb:55:__node__
|
39
|
+
10 ./benchmark/../lib/fuzzy_match/similarity.rb:39:__node__
|
40
|
+
10 ./benchmark/../lib/fuzzy_match/similarity.rb:25:__node__
|
41
|
+
10 ./benchmark/../lib/fuzzy_match.rb:193:FuzzyMatch::Identity
|
42
|
+
9 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch.bundle:0:String
|
43
|
+
9 ./benchmark/../lib/fuzzy_match/wrapper.rb:10:__node__
|
44
|
+
9 ./benchmark/../lib/fuzzy_match/similarity.rb:49:String
|
45
|
+
9 ./benchmark/../lib/fuzzy_match/similarity.rb:42:__node__
|
46
|
+
9 ./benchmark/../lib/fuzzy_match/similarity.rb:41:__node__
|
47
|
+
8 ./benchmark/../lib/fuzzy_match/wrapper.rb:31:__node__
|
48
|
+
8 ./benchmark/../lib/fuzzy_match/tightener.rb:27:__node__
|
49
|
+
8 ./benchmark/../lib/fuzzy_match/tightener.rb:14:__node__
|
50
|
+
8 ./benchmark/../lib/fuzzy_match/similarity.rb:38:__node__
|
51
|
+
8 ./benchmark/../lib/fuzzy_match/score.rb:13:__node__
|
52
|
+
8 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:23:__node__
|
53
|
+
8 ./benchmark/../lib/fuzzy_match/blocking.rb:24:__node__
|
54
|
+
7 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:7:__node__
|
55
|
+
7 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:6:__node__
|
56
|
+
7 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:5:__node__
|
57
|
+
7 ./benchmark/../lib/fuzzy_match/wrapper.rb:9:__node__
|
58
|
+
7 ./benchmark/../lib/fuzzy_match/wrapper.rb:25:__node__
|
59
|
+
7 ./benchmark/../lib/fuzzy_match/similarity.rb:45:__node__
|
60
|
+
7 ./benchmark/../lib/fuzzy_match/score.rb:17:__node__
|
61
|
+
7 ./benchmark/../lib/fuzzy_match/identity.rb:19:__node__
|
62
|
+
7 ./benchmark/../lib/fuzzy_match/blocking.rb:27:__node__
|
63
|
+
7 ./benchmark/../lib/fuzzy_match.rb:209:String
|
64
|
+
6 ./benchmark/../lib/fuzzy_match/wrapper.rb:8:__node__
|
65
|
+
6 ./benchmark/../lib/fuzzy_match/similarity.rb:44:__node__
|
66
|
+
6 ./benchmark/../lib/fuzzy_match/similarity.rb:15:__node__
|
67
|
+
6 ./benchmark/../lib/fuzzy_match/similarity.rb:13:__node__
|
68
|
+
6 ./benchmark/../lib/fuzzy_match/score.rb:25:__node__
|
69
|
+
6 ./benchmark/../lib/fuzzy_match/score.rb:21:__node__
|
70
|
+
6 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:10:__node__
|
71
|
+
6 ./benchmark/../lib/fuzzy_match/blocking.rb:22:__node__
|
72
|
+
5 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/fastercsv-1.5.4/lib/faster_csv.rb:1640:String
|
73
|
+
5 ./benchmark/../lib/fuzzy_match/wrapper.rb:34:__node__
|
74
|
+
5 ./benchmark/../lib/fuzzy_match/tightener.rb:9:__node__
|
75
|
+
5 ./benchmark/../lib/fuzzy_match/tightener.rb:19:__node__
|
76
|
+
5 ./benchmark/../lib/fuzzy_match/similarity.rb:8:__node__
|
77
|
+
5 ./benchmark/../lib/fuzzy_match/similarity.rb:33:__node__
|
78
|
+
5 ./benchmark/../lib/fuzzy_match/similarity.rb:29:__node__
|
79
|
+
5 ./benchmark/../lib/fuzzy_match/similarity.rb:12:__node__
|
80
|
+
5 ./benchmark/../lib/fuzzy_match/score.rb:9:__node__
|
81
|
+
5 ./benchmark/../lib/fuzzy_match/result.rb:16:__node__
|
82
|
+
5 ./benchmark/../lib/fuzzy_match/identity.rb:10:__node__
|
83
|
+
5 ./benchmark/../lib/fuzzy_match/blocking.rb:26:__node__
|
84
|
+
5 ./benchmark/../lib/fuzzy_match/blocking.rb:25:__node__
|
85
|
+
5 ./benchmark/../lib/fuzzy_match/blocking.rb:15:__node__
|
86
|
+
4 ./benchmark/../lib/fuzzy_match/wrapper.rb:33:__node__
|
87
|
+
4 ./benchmark/../lib/fuzzy_match/wrapper.rb:30:__node__
|
88
|
+
4 ./benchmark/../lib/fuzzy_match/tightener.rb:20:__node__
|
89
|
+
4 ./benchmark/../lib/fuzzy_match/similarity.rb:59:__node__
|
90
|
+
4 ./benchmark/../lib/fuzzy_match/similarity.rb:54:__node__
|
91
|
+
4 ./benchmark/../lib/fuzzy_match/score.rb:5:__node__
|
92
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:9:__node__
|
93
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:8:__node__
|
94
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:7:__node__
|
95
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:6:__node__
|
96
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:5:__node__
|
97
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:4:__node__
|
98
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:3:__node__
|
99
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:13:__node__
|
100
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:12:__node__
|
101
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:11:__node__
|
102
|
+
4 ./benchmark/../lib/fuzzy_match/result.rb:10:__node__
|
103
|
+
4 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:8:__node__
|
104
|
+
4 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:22:__node__
|
105
|
+
4 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:21:__node__
|
106
|
+
4 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:20:String
|
107
|
+
4 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:11:__node__
|
108
|
+
3 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:8:__node__
|
109
|
+
3 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:3:__node__
|
110
|
+
3 ./benchmark/../lib/fuzzy_match/wrapper.rb:28:__node__
|
111
|
+
3 ./benchmark/../lib/fuzzy_match/wrapper.rb:24:__node__
|
112
|
+
3 ./benchmark/../lib/fuzzy_match/wrapper.rb:18:__node__
|
113
|
+
3 ./benchmark/../lib/fuzzy_match/wrapper.rb:15:String
|
114
|
+
3 ./benchmark/../lib/fuzzy_match/wrapper.rb:14:__node__
|
115
|
+
3 ./benchmark/../lib/fuzzy_match/tightener.rb:8:__node__
|
116
|
+
3 ./benchmark/../lib/fuzzy_match/tightener.rb:26:__node__
|
117
|
+
3 ./benchmark/../lib/fuzzy_match/tightener.rb:18:__node__
|
118
|
+
3 ./benchmark/../lib/fuzzy_match/tightener.rb:13:__node__
|
119
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:7:__node__
|
120
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:6:__node__
|
121
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:58:__node__
|
122
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:56:__node__
|
123
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:48:__node__
|
124
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:36:__node__
|
125
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:32:__node__
|
126
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:28:__node__
|
127
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:24:__node__
|
128
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:20:__node__
|
129
|
+
3 ./benchmark/../lib/fuzzy_match/similarity.rb:11:__node__
|
130
|
+
3 ./benchmark/../lib/fuzzy_match/score.rb:8:__node__
|
131
|
+
3 ./benchmark/../lib/fuzzy_match/score.rb:7:__node__
|
132
|
+
3 ./benchmark/../lib/fuzzy_match/score.rb:24:__node__
|
133
|
+
3 ./benchmark/../lib/fuzzy_match/score.rb:20:__node__
|
134
|
+
3 ./benchmark/../lib/fuzzy_match/score.rb:16:__node__
|
135
|
+
3 ./benchmark/../lib/fuzzy_match/score.rb:12:__node__
|
136
|
+
3 ./benchmark/../lib/fuzzy_match/result.rb:21:__node__
|
137
|
+
3 ./benchmark/../lib/fuzzy_match/result.rb:19:__node__
|
138
|
+
3 ./benchmark/../lib/fuzzy_match/result.rb:15:__node__
|
139
|
+
3 ./benchmark/../lib/fuzzy_match/identity.rb:9:__node__
|
140
|
+
3 ./benchmark/../lib/fuzzy_match/identity.rb:17:__node__
|
141
|
+
3 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:3:__node__
|
142
|
+
3 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:18:__node__
|
143
|
+
3 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:16:String
|
144
|
+
3 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:15:String
|
145
|
+
3 ./benchmark/../lib/fuzzy_match/blocking.rb:33:__node__
|
146
|
+
3 ./benchmark/../lib/fuzzy_match/blocking.rb:14:__node__
|
147
|
+
3 ./benchmark/../lib/fuzzy_match.rb:77:Array
|
148
|
+
2 /Users/seamus/.rvm/rubies/ruby-1.8.7-p334/lib/ruby/1.8/uri/common.rb:387:String
|
149
|
+
2 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:3:String
|
150
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:6:__node__
|
151
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:5:__node__
|
152
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:4:__node__
|
153
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:3:Class
|
154
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:35:__node__
|
155
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:32:__node__
|
156
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:26:__node__
|
157
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:22:__node__
|
158
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:20:__node__
|
159
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:16:__node__
|
160
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:12:__node__
|
161
|
+
2 ./benchmark/../lib/fuzzy_match/wrapper.rb:11:__node__
|
162
|
+
2 ./benchmark/../lib/fuzzy_match/tightener.rb:6:__node__
|
163
|
+
2 ./benchmark/../lib/fuzzy_match/tightener.rb:3:Class
|
164
|
+
2 ./benchmark/../lib/fuzzy_match/tightener.rb:28:__node__
|
165
|
+
2 ./benchmark/../lib/fuzzy_match/tightener.rb:27:String
|
166
|
+
2 ./benchmark/../lib/fuzzy_match/tightener.rb:24:__node__
|
167
|
+
2 ./benchmark/../lib/fuzzy_match/tightener.rb:23:__node__
|
168
|
+
2 ./benchmark/../lib/fuzzy_match/tightener.rb:22:__node__
|
169
|
+
2 ./benchmark/../lib/fuzzy_match/tightener.rb:15:__node__
|
170
|
+
2 ./benchmark/../lib/fuzzy_match/tightener.rb:10:__node__
|
171
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:60:__node__
|
172
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:50:__node__
|
173
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:4:__node__
|
174
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:46:__node__
|
175
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:3:__node__
|
176
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:34:__node__
|
177
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:30:__node__
|
178
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:2:Class
|
179
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:26:__node__
|
180
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:22:__node__
|
181
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:17:__node__
|
182
|
+
2 ./benchmark/../lib/fuzzy_match/similarity.rb:16:__node__
|
183
|
+
2 ./benchmark/../lib/fuzzy_match/score.rb:4:Class
|
184
|
+
2 ./benchmark/../lib/fuzzy_match/score.rb:26:__node__
|
185
|
+
2 ./benchmark/../lib/fuzzy_match/score.rb:22:__node__
|
186
|
+
2 ./benchmark/../lib/fuzzy_match/score.rb:18:__node__
|
187
|
+
2 ./benchmark/../lib/fuzzy_match/score.rb:17:String
|
188
|
+
2 ./benchmark/../lib/fuzzy_match/score.rb:14:__node__
|
189
|
+
2 ./benchmark/../lib/fuzzy_match/score.rb:13:String
|
190
|
+
2 ./benchmark/../lib/fuzzy_match/result.rb:2:Class
|
191
|
+
2 ./benchmark/../lib/fuzzy_match/result.rb:17:__node__
|
192
|
+
2 ./benchmark/../lib/fuzzy_match/identity.rb:7:__node__
|
193
|
+
2 ./benchmark/../lib/fuzzy_match/identity.rb:4:Class
|
194
|
+
2 ./benchmark/../lib/fuzzy_match/identity.rb:23:__node__
|
195
|
+
2 ./benchmark/../lib/fuzzy_match/identity.rb:22:__node__
|
196
|
+
2 ./benchmark/../lib/fuzzy_match/identity.rb:21:__node__
|
197
|
+
2 ./benchmark/../lib/fuzzy_match/identity.rb:11:__node__
|
198
|
+
2 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:7:__node__
|
199
|
+
2 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:6:__node__
|
200
|
+
2 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:5:__node__
|
201
|
+
2 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:23:String
|
202
|
+
2 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:12:__node__
|
203
|
+
2 ./benchmark/../lib/fuzzy_match/blocking.rb:9:Class
|
204
|
+
2 ./benchmark/../lib/fuzzy_match/blocking.rb:34:__node__
|
205
|
+
2 ./benchmark/../lib/fuzzy_match/blocking.rb:32:__node__
|
206
|
+
2 ./benchmark/../lib/fuzzy_match/blocking.rb:30:__node__
|
207
|
+
2 ./benchmark/../lib/fuzzy_match/blocking.rb:29:__node__
|
208
|
+
2 ./benchmark/../lib/fuzzy_match/blocking.rb:23:__node__
|
209
|
+
2 ./benchmark/../lib/fuzzy_match/blocking.rb:16:__node__
|
210
|
+
2 ./benchmark/../lib/fuzzy_match/blocking.rb:12:__node__
|
211
|
+
2 ./benchmark/../lib/fuzzy_match.rb:86:Array
|
212
|
+
1 benchmark/memory.rb:50:String
|
213
|
+
1 benchmark/memory.rb:49:FuzzyMatch
|
214
|
+
1 /Users/seamus/.rvm/rubies/ruby-1.8.7-p334/lib/ruby/1.8/uri/common.rb:492:URI::Generic
|
215
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table/executor.rb:19:Process::Status
|
216
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table/executor.rb:10:Bignum
|
217
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table.rb:63:Array
|
218
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table.rb:121:RemoteTable::Transformer
|
219
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table.rb:116:RemoteTable::Format::Delimited
|
220
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table.rb:111:RemoteTable::Properties
|
221
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/remote_table-1.1.6/lib/remote_table.rb:106:RemoteTable::LocalFile
|
222
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:4:Regexp
|
223
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:4:Array
|
224
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:1:__node__
|
225
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:1:String
|
226
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/amatch-0.2.5/lib/amatch/version.rb:1:Module
|
227
|
+
1 /Users/seamus/.rvm/gems/ruby-1.8.7-p334/gems/activesupport-3.0.5/lib/active_support/core_ext/hash/keys.rb:18:Hash
|
228
|
+
1 ./benchmark/../lib/fuzzy_match/wrapper.rb:3:__node__
|
229
|
+
1 ./benchmark/../lib/fuzzy_match/wrapper.rb:3:String
|
230
|
+
1 ./benchmark/../lib/fuzzy_match/wrapper.rb:25:String
|
231
|
+
1 ./benchmark/../lib/fuzzy_match/wrapper.rb:1:__node__
|
232
|
+
1 ./benchmark/../lib/fuzzy_match/wrapper.rb:10:String
|
233
|
+
1 ./benchmark/../lib/fuzzy_match/tightener.rb:4:String
|
234
|
+
1 ./benchmark/../lib/fuzzy_match/tightener.rb:4:FuzzyMatch::ExtractRegexp
|
235
|
+
1 ./benchmark/../lib/fuzzy_match/tightener.rb:3:__node__
|
236
|
+
1 ./benchmark/../lib/fuzzy_match/tightener.rb:3:String
|
237
|
+
1 ./benchmark/../lib/fuzzy_match/tightener.rb:1:__node__
|
238
|
+
1 ./benchmark/../lib/fuzzy_match/similarity.rb:9:__node__
|
239
|
+
1 ./benchmark/../lib/fuzzy_match/similarity.rb:2:__node__
|
240
|
+
1 ./benchmark/../lib/fuzzy_match/similarity.rb:2:String
|
241
|
+
1 ./benchmark/../lib/fuzzy_match/similarity.rb:25:String
|
242
|
+
1 ./benchmark/../lib/fuzzy_match/similarity.rb:1:__node__
|
243
|
+
1 ./benchmark/../lib/fuzzy_match/score.rb:4:__node__
|
244
|
+
1 ./benchmark/../lib/fuzzy_match/score.rb:4:String
|
245
|
+
1 ./benchmark/../lib/fuzzy_match/score.rb:3:__node__
|
246
|
+
1 ./benchmark/../lib/fuzzy_match/score.rb:1:String
|
247
|
+
1 ./benchmark/../lib/fuzzy_match/score.rb:13:Array
|
248
|
+
1 ./benchmark/../lib/fuzzy_match/score.rb:10:__node__
|
249
|
+
1 ./benchmark/../lib/fuzzy_match/result.rb:2:__node__
|
250
|
+
1 ./benchmark/../lib/fuzzy_match/result.rb:2:String
|
251
|
+
1 ./benchmark/../lib/fuzzy_match/result.rb:1:__node__
|
252
|
+
1 ./benchmark/../lib/fuzzy_match/identity.rb:5:FuzzyMatch::ExtractRegexp
|
253
|
+
1 ./benchmark/../lib/fuzzy_match/identity.rb:4:__node__
|
254
|
+
1 ./benchmark/../lib/fuzzy_match/identity.rb:4:String
|
255
|
+
1 ./benchmark/../lib/fuzzy_match/identity.rb:1:__node__
|
256
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:4:__node__
|
257
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:2:__node__
|
258
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:2:String
|
259
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:2:Module
|
260
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:28:__node__
|
261
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:26:String
|
262
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:25:String
|
263
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:24:String
|
264
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:23:Regexp
|
265
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:1:__node__
|
266
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:17:Hash
|
267
|
+
1 ./benchmark/../lib/fuzzy_match/extract_regexp.rb:10:String
|
268
|
+
1 ./benchmark/../lib/fuzzy_match/blocking.rb:9:__node__
|
269
|
+
1 ./benchmark/../lib/fuzzy_match/blocking.rb:9:String
|
270
|
+
1 ./benchmark/../lib/fuzzy_match/blocking.rb:1:__node__
|
271
|
+
1 ./benchmark/../lib/fuzzy_match/blocking.rb:10:FuzzyMatch::ExtractRegexp
|
272
|
+
1 ./benchmark/../lib/fuzzy_match.rb:62:FuzzyMatch::Wrapper
|
273
|
+
1 ./benchmark/../lib/fuzzy_match.rb:39:String
|
274
|
+
1 ./benchmark/../lib/fuzzy_match.rb:39:FuzzyMatch::Result
|
275
|
+
1 ./benchmark/../lib/fuzzy_match.rb:35:String
|
276
|
+
1 ./benchmark/../lib/fuzzy_match.rb:209:Array
|
277
|
+
1 ./benchmark/../lib/fuzzy_match.rb:199:String
|
278
|
+
1 ./benchmark/../lib/fuzzy_match.rb:198:Array
|
279
|
+
1 ./benchmark/../lib/fuzzy_match.rb:193:String
|
280
|
+
1 ./benchmark/../lib/fuzzy_match.rb:192:Array
|
281
|
+
1 ./benchmark/../lib/fuzzy_match.rb:187:String
|
282
|
+
1 ./benchmark/../lib/fuzzy_match.rb:186:Array
|
283
|
+
1 ./benchmark/../lib/fuzzy_match.rb:101:Array
|