fuzzy-matcher 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,119 @@
1
+ require 'date'
2
+
3
+ class FuzzyDate
4
+
5
+ FORMAT_2_REGEX = {
6
+ # 03/18/2014 or 3/18/2014
7
+ "%m/%d/%Y" => /(0?[1-9]|1[012])[-\/.~X](0?[1-9]|[12][0-9]|3[01])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/,
8
+
9
+ # 18/03/2014
10
+ "%d/%m/%Y" => /(0[1-9]|[12][0-9]|3[01])[-\/.~X](0[1-9]|1[012])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/,
11
+
12
+ # 2014-04-14
13
+ "%Y-%m-%d" => /(20[0-9][0-9])[-.~](0[1-9]|1[012])[-.~](0[1-9]|[12][0-9]|3[01])/,
14
+
15
+ # 10APR2014 or 4APR2014
16
+ "%d%b%Y" => /(0?[1-9]|[12][0-9]|3[01])(JAN|FEB|MAR|APR|ApR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(19|20[0-9][0-9])/,
17
+
18
+ # April 7, 2014
19
+ "%B %d, %Y" => /(January|February|March|April|May|June|July|August|September|October|November|December) *(0?[1-9]|[12][0-9]|3[01])[,.]? *(19|20[0-9][0-9])/
20
+ }
21
+
22
+ attr_accessor :max_fuzz
23
+
24
+ def initialize(max_fuzz=2)
25
+ @fsub = FuzzySub.new FuzzySub::CHAR_2_NUM_SUB
26
+ @max_fuzz = max_fuzz
27
+ @scanners = []
28
+ FORMAT_2_REGEX.each do |key, value|
29
+ register key, value
30
+ end
31
+ end
32
+
33
+ def register(format, regex)
34
+ @scanners << FuzzyDateScanner.new(format, regex)
35
+ end
36
+
37
+ # allow fuzziness of 2 by default
38
+ def fscan(string, fuzziness=2)
39
+ @scanners.map do |fdscan|
40
+ matches = fdscan.fscan!(string, fuzziness)
41
+ [matches, fdscan.format] if !matches.empty?
42
+ end.compact
43
+ end
44
+
45
+ def validaterize(m, format)
46
+ str = m[0]
47
+
48
+ case format
49
+ when "%Y-%m-%d"
50
+ date = @fsub.fsub!(m[3])
51
+ mont = @fsub.fsub!(m[2])
52
+ year = @fsub.fsub!(m[1])
53
+ str = "#{year}-#{mont}-#{date}"
54
+ when "%m/%d/%Y"
55
+ date = @fsub.fsub!(m[2])
56
+ mont = @fsub.fsub!(m[1])
57
+ year = @fsub.fsub!(m[3])
58
+ format = "%m/%d/%y" if year.length < 4
59
+ str = "#{mont}/#{date}/#{year}"
60
+ when "%d/%m/%Y"
61
+ date = @fsub.fsub!(m[1])
62
+ mont = @fsub.fsub!(m[2])
63
+ year = @fsub.fsub!(m[3])
64
+ format = "%d/%m/%y" if year.length < 4
65
+ str = "#{date}/#{mont}/#{year}"
66
+ when "%d%b%Y"
67
+ date = @fsub.fsub!(m[1])
68
+ mont = m[2].upcase
69
+ year = @fsub.fsub!(m[3])
70
+ format = "%d%b%y" if year.length < 4
71
+ str = "#{date}#{mont}#{year}"
72
+ end
73
+
74
+ [str.strip, format]
75
+ end
76
+
77
+ def matches_to_dates(matches)
78
+ dates = []
79
+ matches.each do |m|
80
+ # p "#{self.class.to_s} match: #{m[0]} with format #{m[1]}"
81
+ strings = m[0]
82
+ format = m[1]
83
+
84
+ strings.each do |str|
85
+ k = validaterize str, format
86
+ # p "#{k[0]}, #{k[1]}"
87
+ begin
88
+ date = Date.strptime(k[0], k[1])
89
+ dates << date
90
+ rescue ArgumentError
91
+ # p "String #{k[0]} is not valide date for date format #{k[1]}"
92
+ end
93
+ end
94
+ end
95
+ dates
96
+ end
97
+
98
+ # iteratively find the dates, try fuzziness 1 and then 2
99
+ def to_date(string)
100
+ dates = []
101
+ fuzz = 1
102
+ while fuzz <= @max_fuzz do
103
+ matches = fscan string, fuzz
104
+ dates = matches_to_dates matches
105
+ break if !dates.empty?
106
+ fuzz = fuzz + 1
107
+ end
108
+ dates
109
+ end
110
+
111
+ end
112
+
113
+ module FuzzyDateString
114
+
115
+ def to_date(max_fuzz=2)
116
+ FuzzyDate.new(max_fuzz).to_date self
117
+ end
118
+
119
+ end
@@ -0,0 +1,27 @@
1
+ # {
2
+ # # 03/18/2014 or 3/18/2014
3
+ # "%m/%d/%Y" => /([0 ][1-9]|1[012])[-\/.~X](0[1-9]|[12][0-9]|3[01])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/,
4
+ #
5
+ # # 18/03/2014
6
+ # "%d/%m/%Y" => /(0[1-9]|[12][0-9]|3[01])[-\/.~X](0[1-9]|1[012])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/,
7
+ #
8
+ # # 2014-04-14
9
+ # "%Y-%m-%d" => /(20[0-9][0-9])[-.~](0[1-9]|1[012])[-.~](0[1-9]|[12][0-9]|3[01])/,
10
+ #
11
+ # # 10APR2014 or 4APR2014
12
+ # "%d%b%Y" => /([0 ][1-9]|[12][0-9]|3[01])(JAN|FEB|MAR|APR|ApR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(19|20[0-9][0-9])/,
13
+ #
14
+ # # April 7, 2014
15
+ # "%B %d, %Y" => /(January|February|March|April|May|June|July|August|September|October|November|December) *(0?[1-9]|[12][0-9]|3[01])[,.]? *(19|20[0-9][0-9])/
16
+ # }
17
+
18
+ class FuzzyDateScanner < FuzzyScanner
19
+
20
+ attr_accessor :format
21
+
22
+ def initialize(format, regex)
23
+ @format = format
24
+ @regex = regex
25
+ end
26
+
27
+ end
@@ -0,0 +1,83 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'tre-ruby'
4
+
5
+ class Regexp
6
+ def +(r)
7
+ Regexp.new(source + r.source)
8
+ end
9
+ end
10
+
11
+ class FuzzyPrice
12
+
13
+ FUZZY_PRICE_REGEX = /\$?([1-9]*[0-9])[._-—]([0-9][0-9])/
14
+ TOTAL_TEXT_REGEX = /(Total|TOTAL|Total Applied) +/
15
+
16
+ attr_accessor :max_fuzz, :price_regex
17
+
18
+ def initialize(max_fuzz=2)
19
+ @max_fuzz = max_fuzz
20
+ @fsub = FuzzySub.new FuzzySub::CHAR_2_NUM_SUB
21
+ end
22
+
23
+ def price_regex
24
+ @price_regex ||= FUZZY_PRICE_REGEX
25
+ end
26
+
27
+ def find_price(file, text_regex, fuzzy_thresh=2)
28
+ prices = []
29
+ regex = text_regex + FUZZY_PRICE_REGEX
30
+
31
+ words = file.split("\n")
32
+ words.each do |word|
33
+ # p "check word => #{word}"
34
+
35
+ matches = word.extend(TRE).ascan regex, TRE.fuzziness(fuzzy_thresh)
36
+ matches.each do |match|
37
+
38
+ # match the price regex separately again to improve accuracy
39
+ match = (match[0].extend(TRE).ascan price_regex, TRE.fuzziness(fuzzy_thresh))[0]
40
+ next if !match
41
+
42
+ a = match[1]
43
+ b = match[2]
44
+ # p match
45
+
46
+ a = @fsub.fsub!(a)
47
+ b = @fsub.fsub!(b)
48
+
49
+ next if a.empty? || b.empty?
50
+
51
+ w = "#{a}.#{b}"
52
+
53
+ # remove $
54
+ w.gsub!("$", "")
55
+
56
+ # p "---> #{w}"
57
+ prices << w.to_f
58
+ end
59
+
60
+ end
61
+ prices
62
+ end
63
+
64
+ def to_price(string, regex)
65
+ prices = []
66
+ fuzz = 1
67
+ while fuzz <= @max_fuzz do
68
+ prices = find_price string, regex, fuzz
69
+ break if !prices.empty?
70
+ fuzz = fuzz + 1
71
+ end
72
+ prices
73
+ end
74
+
75
+ end
76
+
77
+ module FuzzyPriceString
78
+
79
+ def to_price(regex, max_fuzz=2)
80
+ FuzzyPrice.new(max_fuzz).to_price self, regex
81
+ end
82
+
83
+ end
@@ -0,0 +1,14 @@
1
+ require 'tre-ruby'
2
+
3
+ class FuzzyScanner
4
+
5
+ attr_accessor :regex
6
+
7
+ # allow fuzziness of 2 by default
8
+ def fscan!(str, fuzziness=2)
9
+ str.gsub!(/\n/, " ")
10
+ words = str.extend(TRE).ascan regex, TRE.fuzziness(fuzziness)
11
+ words.uniq
12
+ end
13
+
14
+ end
@@ -0,0 +1,33 @@
1
+ class FuzzySub
2
+
3
+ CHAR_2_NUM_SUB = {
4
+ "A" => "4",
5
+ "OoD" => "0",
6
+ "liI," => "1",
7
+ "q" => "4" #could be 9
8
+ }
9
+
10
+ attr_accessor :sub_hash
11
+
12
+ def initialize(sub_hash)
13
+ @sub_hash = sub_hash
14
+ end
15
+
16
+ def fsub!(word)
17
+ sub_hash.each do |k, v|
18
+ word.gsub!(/[#{k}]/, v)
19
+ end
20
+ word
21
+ end
22
+
23
+ end
24
+
25
+ module FuzzySubString
26
+
27
+ def fsub!(sub_hash)
28
+ fs = FuzzySub.new sub_hash
29
+ fs.fsub! self
30
+ end
31
+
32
+ end
33
+
@@ -0,0 +1,37 @@
1
+ class FuzzyWords
2
+
3
+ attr_accessor :max_fuzz
4
+
5
+ def initialize(max_fuzz=4)
6
+ @max_fuzz = max_fuzz
7
+ end
8
+
9
+ def find_words(file, words)
10
+ words.map do |w|
11
+ matches = find_word file, w
12
+ { word: w, matches: matches } if !matches.empty?
13
+ end.compact
14
+ end
15
+
16
+ def find_word(text, word)
17
+ matches = []
18
+ fuzz = 1
19
+ while fuzz <= @max_fuzz do
20
+ matches = text.extend(TRE).ascan word, TRE.fuzziness(fuzz)
21
+ break if !matches.empty?
22
+ fuzz = fuzz + 1
23
+ end
24
+ matches
25
+ end
26
+
27
+ end
28
+
29
+ module FuzzyWordsString
30
+
31
+ def fuzzy_match_words(words, max_fuzz=4)
32
+ FuzzyWords.new(max_fuzz).find_words(self, words)
33
+ end
34
+
35
+ end
36
+
37
+
@@ -0,0 +1,5 @@
1
+ module FuzzyMatcher
2
+
3
+ VERSION = '0.0.1'
4
+
5
+ end
@@ -0,0 +1,6 @@
1
+ require 'fuzzy_matcher/fuzzy_sub'
2
+ require "fuzzy_matcher/fuzzy_scanner"
3
+ require "fuzzy_matcher/fuzzy_date_scanner"
4
+ require "fuzzy_matcher/fuzzy_date"
5
+ require "fuzzy_matcher/fuzzy_price"
6
+ require "fuzzy_matcher/fuzzy_words"
@@ -0,0 +1,99 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzyDateScanner do
4
+
5
+ before do
6
+ # 03/18/2014 or 3/18/2014
7
+ @fs1 = FuzzyDateScanner.new "%m/%d/%Y", /([0 ][1-9]|1[012])[-\/.~X](0[1-9]|[12][0-9]|3[01])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/
8
+
9
+ # 18/03/2014
10
+ @fs2 = FuzzyDateScanner.new "%d/%m/%Y", /(0[1-9]|[12][0-9]|3[01])[-\/.~X](0[1-9]|1[012])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/
11
+ end
12
+
13
+ describe "Fuzzily find matching date!" do
14
+
15
+ describe "finds the matches" do
16
+
17
+ describe "m/d/Y" do
18
+ it "finds perfectly matched text for %m/%d/%y with fuzziness 0" do
19
+ matches = @fs1.fscan!("03/18/14", 0)
20
+ matches.length.must_equal 1
21
+ matches[0][0].must_equal "03/18/14"
22
+ end
23
+
24
+ it "finds perfectly matched text for %m/%d/%Y with fuzziness 0" do
25
+ matches = @fs1.fscan!("03/18/2014", 0)
26
+ matches.length.must_equal 1
27
+ matches[0][0].must_equal "03/18/2014"
28
+ end
29
+
30
+ it "finds 1-edit matched text with fuzziness 1" do
31
+ matches = @fs1.fscan!("03/18/201A", 1)
32
+ matches.length.must_equal 1
33
+ matches[0][0].must_equal "03/18/201A"
34
+ end
35
+ end
36
+
37
+ describe "d/m/Y" do
38
+ it "finds perfectly matched text for %d/%m/%y with fuzziness 0" do
39
+ matches = @fs2.fscan!("23/08/14", 0)
40
+ matches.length.must_equal 1
41
+ matches[0][0].must_equal "23/08/14"
42
+ end
43
+
44
+ it "finds perfectly matched text for %d/%m/%Y with fuzziness 0" do
45
+ matches = @fs2.fscan!("13/08/2014", 0)
46
+ matches.length.must_equal 1
47
+ matches[0][0].must_equal "13/08/2014"
48
+ end
49
+
50
+ it "finds 1-edit matched text with fuzziness 1" do
51
+ matches = @fs2.fscan!("18/01/201A", 1)
52
+ matches.length.must_equal 1
53
+ matches[0][0].must_equal "18/01/201A"
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ describe "cannot find the matches" do
60
+
61
+ describe "m/d/Y" do
62
+ it "cannot find 1-subbed text with fuzziness 0 (by default one sub costs 1)" do
63
+ matches = @fs1.fscan!("18/18/14", 0)
64
+ matches.must_be_empty
65
+ end
66
+
67
+ it "cannot find 2-subbed text with fuzziness 1 (by default one sub costs 1)" do
68
+ matches = @fs1.fscan!("18/18/I4", 1)
69
+ matches.must_be_empty
70
+ end
71
+
72
+ it "cannot find 2-subbed text with fuzziness 1 (by default one sub costs 1)" do
73
+ matches = @fs1.fscan!(" 3/I6/2DI4", 1)
74
+ matches.must_be_empty
75
+ end
76
+ end
77
+
78
+ describe "d/m/Y" do
79
+ it "cannot find 1-subbed text with fuzziness 0 (by default one sub costs 1)" do
80
+ matches = @fs2.fscan!("08/18/14", 0)
81
+ matches.must_be_empty
82
+ end
83
+
84
+ it "cannot find 2-subbed text with fuzziness 1 (by default one sub costs 1)" do
85
+ matches = @fs2.fscan!("08/18/I4", 1)
86
+ matches.must_be_empty
87
+ end
88
+
89
+ it "cannot find 2-subbed text with fuzziness 1 (by default one sub costs 1)" do
90
+ matches = @fs2.fscan!("13/I6/20I4", 1)
91
+ matches.must_be_empty
92
+ end
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+
99
+ end
@@ -0,0 +1,77 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzyDate do
4
+
5
+ before do
6
+ @sample1 = "Re9 Trans Dafe/TIme CashIer\n003 7269 2014-O4-1q l8:21 O1972808\n610984528 BCCESSORIES $20,00 $12 DO\nTrans DIscounf $8.00"
7
+ @sample2 = " 4/17/2014 1:23 PM }"
8
+ @sample3 = "4ApR2O,4"
9
+ @fd = FuzzyDate.new 1
10
+ end
11
+
12
+
13
+ describe "it scans the dates from text" do
14
+
15
+ it "gets dates matches for %Y-%m-%d with default fuzziness 1" do
16
+ matches = @fd.fscan @sample1
17
+ matches.length.must_equal 2
18
+ matches[0][1].must_equal "%Y-%m-%d"
19
+ matches[1][0][0][0].must_equal "21 O19" #unfortunately scanned a wrong date becoz fuzzy is 2
20
+ end
21
+
22
+ it "gets dates matches for %d%b%Y with default fuzziness 1" do
23
+ matches = @fd.fscan @sample3
24
+ matches.length.must_equal 1
25
+ matches[0][1].must_equal "%d%b%Y"
26
+ matches[0][0][0][0].must_equal "4ApR2O,4" #unfortunately scanned a wrong date becoz fuzzy is 2
27
+ end
28
+
29
+ it "gets dates matches for %m/%d/%Y with fuzziness 1" do
30
+ matches = @fd.fscan @sample2, 1
31
+ matches.length.must_equal 1
32
+ matches[0][1].must_equal "%m/%d/%Y"
33
+ end
34
+
35
+ end
36
+
37
+ describe "it returns date" do
38
+
39
+ it "scans and find the date with max fuzziness 1" do
40
+ @fd.max_fuzz = 3
41
+ dates = @fd.to_date @sample3
42
+ dates.length.must_equal 1
43
+ dates[0].to_s.must_equal "2014-04-04"
44
+ end
45
+
46
+ it "scans and find the date with max fuzziness 2" do
47
+ dates = @fd.to_date @sample2
48
+ dates.length.must_equal 1
49
+ dates[0].to_s.must_equal "2014-04-17"
50
+ end
51
+
52
+ end
53
+
54
+ describe "it scans but cannot find date with fuzziness 1" do
55
+
56
+ it "scans and returns the date with max fuzziness 1" do
57
+ @fd.max_fuzz = 1
58
+ dates = @fd.to_date @sample1
59
+ dates.length.must_equal 0
60
+ end
61
+
62
+ it "scans and returns the date with max fuzziness 1" do
63
+ @fd.max_fuzz = 2
64
+ dates = @fd.to_date @sample1
65
+ dates.length.must_equal 1
66
+ dates[0].to_s.must_equal "2014-04-14"
67
+ end
68
+
69
+ it "string can extend this module scans and returns the date with max fuzziness 1" do
70
+ dates = @sample1.extend(FuzzyDateString).to_date 2
71
+ dates.length.must_equal 1
72
+ end
73
+
74
+ end
75
+
76
+
77
+ end
@@ -0,0 +1,62 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'minitest_helper'
4
+
5
+ describe FuzzyPrice do
6
+
7
+ before do
8
+ @fp = FuzzyPrice.new 1
9
+ @sample0 = "Total 71.75"
10
+ @sample1 = ": % ` ’ ' Total . k` 27.32“"
11
+ @sample2 = "TofaI $26.18"
12
+ @sample3 = "T0faI 55.IA"
13
+ @sample4 = "T0faI 55.15"
14
+ @sample5 = "Total 35—O3"
15
+ @sample6 = "TOTAL $10.13"
16
+ end
17
+
18
+ describe "Finds price total" do
19
+
20
+ it "finds price total with fuzziness 1" do
21
+ price = @fp.to_price @sample0, FuzzyPrice::TOTAL_TEXT_REGEX
22
+ price.length.must_equal 1
23
+ price[0].must_equal 71.75
24
+ end
25
+
26
+ it "finds price total with fuzziness 2" do
27
+ @fp.max_fuzz = 2
28
+ price = @fp.to_price @sample2, FuzzyPrice::TOTAL_TEXT_REGEX
29
+ price.length.must_equal 1
30
+ price[0].must_equal 26.18
31
+
32
+ price = @fp.to_price @sample5, FuzzyPrice::TOTAL_TEXT_REGEX
33
+ price.length.must_equal 1
34
+ price[0].must_equal 35.03
35
+ end
36
+
37
+ it "finds price total with fuzz 3" do
38
+ @fp.max_fuzz = 3
39
+ price = @fp.to_price @sample4, FuzzyPrice::TOTAL_TEXT_REGEX
40
+ price.length.must_equal 1
41
+ price[0].must_equal 55.15
42
+ end
43
+
44
+ end
45
+
46
+ describe "Cannot find price total" do
47
+
48
+ it "cannot find price total with fuzziness 1" do
49
+ @fp.max_fuzz = 1
50
+ price = @fp.to_price @sample3, FuzzyPrice::TOTAL_TEXT_REGEX
51
+ price.length.must_equal 0
52
+ end
53
+
54
+ it "cannot find price total with fuzz 2, text too messy" do
55
+ price = @fp.to_price @sample1, FuzzyPrice::TOTAL_TEXT_REGEX
56
+ price.length.must_equal 0
57
+ end
58
+
59
+ end
60
+
61
+
62
+ end
@@ -0,0 +1,44 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzyScanner do
4
+
5
+ before do
6
+ @fs = FuzzyScanner.new
7
+ @fs.regex = /([0 ][1-9]|1[012])[-\/.~X](0[1-9]|[12][0-9]|3[01])[-\/.~X]([0-9][0-9]$)/
8
+ end
9
+
10
+ describe "Fuzzily find matching text!" do
11
+
12
+ describe "finds the matches" do
13
+
14
+ it "finds perfectly matched text with fuzziness 0" do
15
+ matches = @fs.fscan!("03/18/14", 0)
16
+ matches.length.must_equal 1
17
+ matches[0][0].must_equal "03/18/14"
18
+ end
19
+
20
+ it "finds 1-edit matched text with fuzziness 1" do
21
+ matches = @fs.fscan!("03/I8/14", 1)
22
+ matches.length.must_equal 1
23
+ matches[0][0].must_equal "03/I8/14"
24
+ end
25
+
26
+ end
27
+
28
+ describe "cannot find the matches" do
29
+
30
+ it "cannot find 2-edit text with fuzziness 1" do
31
+ matches = @fs.fscan!("03/18/IA", 1)
32
+ matches.must_be_empty
33
+ end
34
+
35
+ it "cannot find matches with regex not matching" do
36
+ matches = @fs.fscan!("18/18/14", 0)
37
+ matches.must_be_empty
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -0,0 +1,25 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzySub do
4
+
5
+ before do
6
+ @fs = FuzzySub.new({ "A" => "4" })
7
+ end
8
+
9
+ describe "Find text to sub!" do
10
+
11
+ it "must only sub the should subbed words" do
12
+ @fs.fsub!("O3/18/1A").must_equal "O3/18/14"
13
+ end
14
+
15
+ end
16
+
17
+ describe "String extends fuzzy sub" do
18
+
19
+ it "String extends fuzzy sub should be able to do fuzzy sub!" do
20
+ "O3/18/1A".extend(FuzzySubString).fsub!({ "A" => "4" }).must_equal "O3/18/14"
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,47 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzyWords do
4
+
5
+ before do
6
+ @fw = FuzzyWords.new
7
+ @words = ["JALAPENO CHILE", "CHILE D/ARBOL", "GRAPEFRUIT LARGE", "HASS SML AVOCADO", "MICHL KORS MS",
8
+ "LAUREN PETITE", "BROWN CHICKEN", "ASN/TAS DRIED NOODLE MED", "KOBE VENOMENON YOT"]
9
+
10
+ @test1 = " JALAPENO CHILE\n2.6l lb @ $O.69/lb $l.8O F\nCHILE D/ARBOL \nl.O8 lb @ $3,49/lb $3.77 F "
11
+ @test2 = " \n2.6l lb @ $O.69/lb $l.8O F\nCHIL \nl.O8 lb @ $3,49/lb $3.77 F "
12
+ @test3 = File.read File.expand_path("../../fixtures/sample1.txt", __FILE__)
13
+ @unreadable_test4 = File.read File.expand_path("../../fixtures/sample2.txt", __FILE__)
14
+ end
15
+
16
+ describe "Find text matches words" do
17
+
18
+ it "finds the similar words" do
19
+ words = @fw.find_words(@test1, @words)
20
+ words.length.must_equal 2
21
+ end
22
+
23
+ it "finds the similar words" do
24
+ words = @fw.find_words(@test3, ["BUTTERFLY PRINT TOP:MULTI"])
25
+ words.length.must_equal 1
26
+ words[0][:word].must_equal "BUTTERFLY PRINT TOP:MULTI"
27
+ words[0][:matches].must_equal ["BUTTERFLY PRINT TOP:MULTI"]
28
+ end
29
+
30
+ end
31
+
32
+ describe "Cannot find test matched words" do
33
+
34
+ it "does not find the similar words" do
35
+ words = @fw.find_words(@test2, @words)
36
+ words.length.must_equal 0
37
+ end
38
+
39
+ it "does not find any words with max_fuzz 7" do
40
+ @fw.max_fuzz = 7
41
+ words = @fw.find_words(@unreadable_test4, @words)
42
+ words.length.must_equal 0
43
+ end
44
+
45
+ end
46
+
47
+ end
@@ -0,0 +1,4 @@
1
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require "minitest/autorun"
4
+ require "fuzzy_matcher"
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy-matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Qi He
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-04-18 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ! ' Fuzzy matcher looks for fuzzy matches such as words, digits, etc.
15
+ in a string of text using regex or string.
16
+
17
+ '
18
+ email: qihe229@gmail.com
19
+ executables: []
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - lib/fuzzy_matcher/fuzzy_date.rb
24
+ - lib/fuzzy_matcher/fuzzy_date_scanner.rb
25
+ - lib/fuzzy_matcher/fuzzy_price.rb
26
+ - lib/fuzzy_matcher/fuzzy_scanner.rb
27
+ - lib/fuzzy_matcher/fuzzy_sub.rb
28
+ - lib/fuzzy_matcher/fuzzy_words.rb
29
+ - lib/fuzzy_matcher/version.rb
30
+ - lib/fuzzy_matcher.rb
31
+ - spec/fuzzy_matcher/fuzzy_date_scanner_spec.rb
32
+ - spec/fuzzy_matcher/fuzzy_date_spec.rb
33
+ - spec/fuzzy_matcher/fuzzy_price_spec.rb
34
+ - spec/fuzzy_matcher/fuzzy_scanner_spec.rb
35
+ - spec/fuzzy_matcher/fuzzy_sub_spec.rb
36
+ - spec/fuzzy_matcher/fuzzy_words_spec.rb
37
+ - spec/minitest_helper.rb
38
+ homepage: http://github.com/he9qi/fuzzy_matcher
39
+ licenses:
40
+ - MIT
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.23
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: Fuzzy matches words, digits, etc. in a string of text.
63
+ test_files:
64
+ - spec/fuzzy_matcher/fuzzy_date_scanner_spec.rb
65
+ - spec/fuzzy_matcher/fuzzy_date_spec.rb
66
+ - spec/fuzzy_matcher/fuzzy_price_spec.rb
67
+ - spec/fuzzy_matcher/fuzzy_scanner_spec.rb
68
+ - spec/fuzzy_matcher/fuzzy_sub_spec.rb
69
+ - spec/fuzzy_matcher/fuzzy_words_spec.rb
70
+ - spec/minitest_helper.rb