fuzzy-matcher 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,119 @@
1
+ require 'date'
2
+
3
+ class FuzzyDate
4
+
5
+ FORMAT_2_REGEX = {
6
+ # 03/18/2014 or 3/18/2014
7
+ "%m/%d/%Y" => /(0?[1-9]|1[012])[-\/.~X](0?[1-9]|[12][0-9]|3[01])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/,
8
+
9
+ # 18/03/2014
10
+ "%d/%m/%Y" => /(0[1-9]|[12][0-9]|3[01])[-\/.~X](0[1-9]|1[012])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/,
11
+
12
+ # 2014-04-14
13
+ "%Y-%m-%d" => /(20[0-9][0-9])[-.~](0[1-9]|1[012])[-.~](0[1-9]|[12][0-9]|3[01])/,
14
+
15
+ # 10APR2014 or 4APR2014
16
+ "%d%b%Y" => /(0?[1-9]|[12][0-9]|3[01])(JAN|FEB|MAR|APR|ApR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(19|20[0-9][0-9])/,
17
+
18
+ # April 7, 2014
19
+ "%B %d, %Y" => /(January|February|March|April|May|June|July|August|September|October|November|December) *(0?[1-9]|[12][0-9]|3[01])[,.]? *(19|20[0-9][0-9])/
20
+ }
21
+
22
+ attr_accessor :max_fuzz
23
+
24
+ def initialize(max_fuzz=2)
25
+ @fsub = FuzzySub.new FuzzySub::CHAR_2_NUM_SUB
26
+ @max_fuzz = max_fuzz
27
+ @scanners = []
28
+ FORMAT_2_REGEX.each do |key, value|
29
+ register key, value
30
+ end
31
+ end
32
+
33
+ def register(format, regex)
34
+ @scanners << FuzzyDateScanner.new(format, regex)
35
+ end
36
+
37
+ # allow fuzziness of 2 by default
38
+ def fscan(string, fuzziness=2)
39
+ @scanners.map do |fdscan|
40
+ matches = fdscan.fscan!(string, fuzziness)
41
+ [matches, fdscan.format] if !matches.empty?
42
+ end.compact
43
+ end
44
+
45
+ def validaterize(m, format)
46
+ str = m[0]
47
+
48
+ case format
49
+ when "%Y-%m-%d"
50
+ date = @fsub.fsub!(m[3])
51
+ mont = @fsub.fsub!(m[2])
52
+ year = @fsub.fsub!(m[1])
53
+ str = "#{year}-#{mont}-#{date}"
54
+ when "%m/%d/%Y"
55
+ date = @fsub.fsub!(m[2])
56
+ mont = @fsub.fsub!(m[1])
57
+ year = @fsub.fsub!(m[3])
58
+ format = "%m/%d/%y" if year.length < 4
59
+ str = "#{mont}/#{date}/#{year}"
60
+ when "%d/%m/%Y"
61
+ date = @fsub.fsub!(m[1])
62
+ mont = @fsub.fsub!(m[2])
63
+ year = @fsub.fsub!(m[3])
64
+ format = "%d/%m/%y" if year.length < 4
65
+ str = "#{date}/#{mont}/#{year}"
66
+ when "%d%b%Y"
67
+ date = @fsub.fsub!(m[1])
68
+ mont = m[2].upcase
69
+ year = @fsub.fsub!(m[3])
70
+ format = "%d%b%y" if year.length < 4
71
+ str = "#{date}#{mont}#{year}"
72
+ end
73
+
74
+ [str.strip, format]
75
+ end
76
+
77
+ def matches_to_dates(matches)
78
+ dates = []
79
+ matches.each do |m|
80
+ # p "#{self.class.to_s} match: #{m[0]} with format #{m[1]}"
81
+ strings = m[0]
82
+ format = m[1]
83
+
84
+ strings.each do |str|
85
+ k = validaterize str, format
86
+ # p "#{k[0]}, #{k[1]}"
87
+ begin
88
+ date = Date.strptime(k[0], k[1])
89
+ dates << date
90
+ rescue ArgumentError
91
+ # p "String #{k[0]} is not valide date for date format #{k[1]}"
92
+ end
93
+ end
94
+ end
95
+ dates
96
+ end
97
+
98
+ # iteratively find the dates, try fuzziness 1 and then 2
99
+ def to_date(string)
100
+ dates = []
101
+ fuzz = 1
102
+ while fuzz <= @max_fuzz do
103
+ matches = fscan string, fuzz
104
+ dates = matches_to_dates matches
105
+ break if !dates.empty?
106
+ fuzz = fuzz + 1
107
+ end
108
+ dates
109
+ end
110
+
111
+ end
112
+
113
+ module FuzzyDateString
114
+
115
+ def to_date(max_fuzz=2)
116
+ FuzzyDate.new(max_fuzz).to_date self
117
+ end
118
+
119
+ end
@@ -0,0 +1,27 @@
1
+ # {
2
+ # # 03/18/2014 or 3/18/2014
3
+ # "%m/%d/%Y" => /([0 ][1-9]|1[012])[-\/.~X](0[1-9]|[12][0-9]|3[01])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/,
4
+ #
5
+ # # 18/03/2014
6
+ # "%d/%m/%Y" => /(0[1-9]|[12][0-9]|3[01])[-\/.~X](0[1-9]|1[012])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/,
7
+ #
8
+ # # 2014-04-14
9
+ # "%Y-%m-%d" => /(20[0-9][0-9])[-.~](0[1-9]|1[012])[-.~](0[1-9]|[12][0-9]|3[01])/,
10
+ #
11
+ # # 10APR2014 or 4APR2014
12
+ # "%d%b%Y" => /([0 ][1-9]|[12][0-9]|3[01])(JAN|FEB|MAR|APR|ApR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(19|20[0-9][0-9])/,
13
+ #
14
+ # # April 7, 2014
15
+ # "%B %d, %Y" => /(January|February|March|April|May|June|July|August|September|October|November|December) *(0?[1-9]|[12][0-9]|3[01])[,.]? *(19|20[0-9][0-9])/
16
+ # }
17
+
18
+ class FuzzyDateScanner < FuzzyScanner
19
+
20
+ attr_accessor :format
21
+
22
+ def initialize(format, regex)
23
+ @format = format
24
+ @regex = regex
25
+ end
26
+
27
+ end
@@ -0,0 +1,83 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'tre-ruby'
4
+
5
+ class Regexp
6
+ def +(r)
7
+ Regexp.new(source + r.source)
8
+ end
9
+ end
10
+
11
+ class FuzzyPrice
12
+
13
+ FUZZY_PRICE_REGEX = /\$?([1-9]*[0-9])[._-—]([0-9][0-9])/
14
+ TOTAL_TEXT_REGEX = /(Total|TOTAL|Total Applied) +/
15
+
16
+ attr_accessor :max_fuzz, :price_regex
17
+
18
+ def initialize(max_fuzz=2)
19
+ @max_fuzz = max_fuzz
20
+ @fsub = FuzzySub.new FuzzySub::CHAR_2_NUM_SUB
21
+ end
22
+
23
+ def price_regex
24
+ @price_regex ||= FUZZY_PRICE_REGEX
25
+ end
26
+
27
+ def find_price(file, text_regex, fuzzy_thresh=2)
28
+ prices = []
29
+ regex = text_regex + FUZZY_PRICE_REGEX
30
+
31
+ words = file.split("\n")
32
+ words.each do |word|
33
+ # p "check word => #{word}"
34
+
35
+ matches = word.extend(TRE).ascan regex, TRE.fuzziness(fuzzy_thresh)
36
+ matches.each do |match|
37
+
38
+ # match the price regex separately again to improve accuracy
39
+ match = (match[0].extend(TRE).ascan price_regex, TRE.fuzziness(fuzzy_thresh))[0]
40
+ next if !match
41
+
42
+ a = match[1]
43
+ b = match[2]
44
+ # p match
45
+
46
+ a = @fsub.fsub!(a)
47
+ b = @fsub.fsub!(b)
48
+
49
+ next if a.empty? || b.empty?
50
+
51
+ w = "#{a}.#{b}"
52
+
53
+ # remove $
54
+ w.gsub!("$", "")
55
+
56
+ # p "---> #{w}"
57
+ prices << w.to_f
58
+ end
59
+
60
+ end
61
+ prices
62
+ end
63
+
64
+ def to_price(string, regex)
65
+ prices = []
66
+ fuzz = 1
67
+ while fuzz <= @max_fuzz do
68
+ prices = find_price string, regex, fuzz
69
+ break if !prices.empty?
70
+ fuzz = fuzz + 1
71
+ end
72
+ prices
73
+ end
74
+
75
+ end
76
+
77
+ module FuzzyPriceString
78
+
79
+ def to_price(regex, max_fuzz=2)
80
+ FuzzyPrice.new(max_fuzz).to_price self, regex
81
+ end
82
+
83
+ end
@@ -0,0 +1,14 @@
1
+ require 'tre-ruby'
2
+
3
+ class FuzzyScanner
4
+
5
+ attr_accessor :regex
6
+
7
+ # allow fuzziness of 2 by default
8
+ def fscan!(str, fuzziness=2)
9
+ str.gsub!(/\n/, " ")
10
+ words = str.extend(TRE).ascan regex, TRE.fuzziness(fuzziness)
11
+ words.uniq
12
+ end
13
+
14
+ end
@@ -0,0 +1,33 @@
1
+ class FuzzySub
2
+
3
+ CHAR_2_NUM_SUB = {
4
+ "A" => "4",
5
+ "OoD" => "0",
6
+ "liI," => "1",
7
+ "q" => "4" #could be 9
8
+ }
9
+
10
+ attr_accessor :sub_hash
11
+
12
+ def initialize(sub_hash)
13
+ @sub_hash = sub_hash
14
+ end
15
+
16
+ def fsub!(word)
17
+ sub_hash.each do |k, v|
18
+ word.gsub!(/[#{k}]/, v)
19
+ end
20
+ word
21
+ end
22
+
23
+ end
24
+
25
+ module FuzzySubString
26
+
27
+ def fsub!(sub_hash)
28
+ fs = FuzzySub.new sub_hash
29
+ fs.fsub! self
30
+ end
31
+
32
+ end
33
+
@@ -0,0 +1,37 @@
1
+ class FuzzyWords
2
+
3
+ attr_accessor :max_fuzz
4
+
5
+ def initialize(max_fuzz=4)
6
+ @max_fuzz = max_fuzz
7
+ end
8
+
9
+ def find_words(file, words)
10
+ words.map do |w|
11
+ matches = find_word file, w
12
+ { word: w, matches: matches } if !matches.empty?
13
+ end.compact
14
+ end
15
+
16
+ def find_word(text, word)
17
+ matches = []
18
+ fuzz = 1
19
+ while fuzz <= @max_fuzz do
20
+ matches = text.extend(TRE).ascan word, TRE.fuzziness(fuzz)
21
+ break if !matches.empty?
22
+ fuzz = fuzz + 1
23
+ end
24
+ matches
25
+ end
26
+
27
+ end
28
+
29
+ module FuzzyWordsString
30
+
31
+ def fuzzy_match_words(words, max_fuzz=4)
32
+ FuzzyWords.new(max_fuzz).find_words(self, words)
33
+ end
34
+
35
+ end
36
+
37
+
@@ -0,0 +1,5 @@
1
+ module FuzzyMatcher
2
+
3
+ VERSION = '0.0.1'
4
+
5
+ end
@@ -0,0 +1,6 @@
1
+ require 'fuzzy_matcher/fuzzy_sub'
2
+ require "fuzzy_matcher/fuzzy_scanner"
3
+ require "fuzzy_matcher/fuzzy_date_scanner"
4
+ require "fuzzy_matcher/fuzzy_date"
5
+ require "fuzzy_matcher/fuzzy_price"
6
+ require "fuzzy_matcher/fuzzy_words"
@@ -0,0 +1,99 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzyDateScanner do
4
+
5
+ before do
6
+ # 03/18/2014 or 3/18/2014
7
+ @fs1 = FuzzyDateScanner.new "%m/%d/%Y", /([0 ][1-9]|1[012])[-\/.~X](0[1-9]|[12][0-9]|3[01])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/
8
+
9
+ # 18/03/2014
10
+ @fs2 = FuzzyDateScanner.new "%d/%m/%Y", /(0[1-9]|[12][0-9]|3[01])[-\/.~X](0[1-9]|1[012])[-\/.~X](20[0-9][0-9]|[0-9][0-9]$)/
11
+ end
12
+
13
+ describe "Fuzzily find matching date!" do
14
+
15
+ describe "finds the matches" do
16
+
17
+ describe "m/d/Y" do
18
+ it "finds perfectly matched text for %m/%d/%y with fuzziness 0" do
19
+ matches = @fs1.fscan!("03/18/14", 0)
20
+ matches.length.must_equal 1
21
+ matches[0][0].must_equal "03/18/14"
22
+ end
23
+
24
+ it "finds perfectly matched text for %m/%d/%Y with fuzziness 0" do
25
+ matches = @fs1.fscan!("03/18/2014", 0)
26
+ matches.length.must_equal 1
27
+ matches[0][0].must_equal "03/18/2014"
28
+ end
29
+
30
+ it "finds 1-edit matched text with fuzziness 1" do
31
+ matches = @fs1.fscan!("03/18/201A", 1)
32
+ matches.length.must_equal 1
33
+ matches[0][0].must_equal "03/18/201A"
34
+ end
35
+ end
36
+
37
+ describe "d/m/Y" do
38
+ it "finds perfectly matched text for %d/%m/%y with fuzziness 0" do
39
+ matches = @fs2.fscan!("23/08/14", 0)
40
+ matches.length.must_equal 1
41
+ matches[0][0].must_equal "23/08/14"
42
+ end
43
+
44
+ it "finds perfectly matched text for %d/%m/%Y with fuzziness 0" do
45
+ matches = @fs2.fscan!("13/08/2014", 0)
46
+ matches.length.must_equal 1
47
+ matches[0][0].must_equal "13/08/2014"
48
+ end
49
+
50
+ it "finds 1-edit matched text with fuzziness 1" do
51
+ matches = @fs2.fscan!("18/01/201A", 1)
52
+ matches.length.must_equal 1
53
+ matches[0][0].must_equal "18/01/201A"
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ describe "cannot find the matches" do
60
+
61
+ describe "m/d/Y" do
62
+ it "cannot find 1-subbed text with fuzziness 0 (by default one sub costs 1)" do
63
+ matches = @fs1.fscan!("18/18/14", 0)
64
+ matches.must_be_empty
65
+ end
66
+
67
+ it "cannot find 2-subbed text with fuzziness 1 (by default one sub costs 1)" do
68
+ matches = @fs1.fscan!("18/18/I4", 1)
69
+ matches.must_be_empty
70
+ end
71
+
72
+ it "cannot find 2-subbed text with fuzziness 1 (by default one sub costs 1)" do
73
+ matches = @fs1.fscan!(" 3/I6/2DI4", 1)
74
+ matches.must_be_empty
75
+ end
76
+ end
77
+
78
+ describe "d/m/Y" do
79
+ it "cannot find 1-subbed text with fuzziness 0 (by default one sub costs 1)" do
80
+ matches = @fs2.fscan!("08/18/14", 0)
81
+ matches.must_be_empty
82
+ end
83
+
84
+ it "cannot find 2-subbed text with fuzziness 1 (by default one sub costs 1)" do
85
+ matches = @fs2.fscan!("08/18/I4", 1)
86
+ matches.must_be_empty
87
+ end
88
+
89
+ it "cannot find 2-subbed text with fuzziness 1 (by default one sub costs 1)" do
90
+ matches = @fs2.fscan!("13/I6/20I4", 1)
91
+ matches.must_be_empty
92
+ end
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+
99
+ end
@@ -0,0 +1,77 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzyDate do
4
+
5
+ before do
6
+ @sample1 = "Re9 Trans Dafe/TIme CashIer\n003 7269 2014-O4-1q l8:21 O1972808\n610984528 BCCESSORIES $20,00 $12 DO\nTrans DIscounf $8.00"
7
+ @sample2 = " 4/17/2014 1:23 PM }"
8
+ @sample3 = "4ApR2O,4"
9
+ @fd = FuzzyDate.new 1
10
+ end
11
+
12
+
13
+ describe "it scans the dates from text" do
14
+
15
+ it "gets dates matches for %Y-%m-%d with default fuzziness 1" do
16
+ matches = @fd.fscan @sample1
17
+ matches.length.must_equal 2
18
+ matches[0][1].must_equal "%Y-%m-%d"
19
+ matches[1][0][0][0].must_equal "21 O19" #unfortunately scanned a wrong date becoz fuzzy is 2
20
+ end
21
+
22
+ it "gets dates matches for %d%b%Y with default fuzziness 1" do
23
+ matches = @fd.fscan @sample3
24
+ matches.length.must_equal 1
25
+ matches[0][1].must_equal "%d%b%Y"
26
+ matches[0][0][0][0].must_equal "4ApR2O,4" #unfortunately scanned a wrong date becoz fuzzy is 2
27
+ end
28
+
29
+ it "gets dates matches for %m/%d/%Y with fuzziness 1" do
30
+ matches = @fd.fscan @sample2, 1
31
+ matches.length.must_equal 1
32
+ matches[0][1].must_equal "%m/%d/%Y"
33
+ end
34
+
35
+ end
36
+
37
+ describe "it returns date" do
38
+
39
+ it "scans and find the date with max fuzziness 1" do
40
+ @fd.max_fuzz = 3
41
+ dates = @fd.to_date @sample3
42
+ dates.length.must_equal 1
43
+ dates[0].to_s.must_equal "2014-04-04"
44
+ end
45
+
46
+ it "scans and find the date with max fuzziness 2" do
47
+ dates = @fd.to_date @sample2
48
+ dates.length.must_equal 1
49
+ dates[0].to_s.must_equal "2014-04-17"
50
+ end
51
+
52
+ end
53
+
54
+ describe "it scans but cannot find date with fuzziness 1" do
55
+
56
+ it "scans and returns the date with max fuzziness 1" do
57
+ @fd.max_fuzz = 1
58
+ dates = @fd.to_date @sample1
59
+ dates.length.must_equal 0
60
+ end
61
+
62
+ it "scans and returns the date with max fuzziness 1" do
63
+ @fd.max_fuzz = 2
64
+ dates = @fd.to_date @sample1
65
+ dates.length.must_equal 1
66
+ dates[0].to_s.must_equal "2014-04-14"
67
+ end
68
+
69
+ it "string can extend this module scans and returns the date with max fuzziness 1" do
70
+ dates = @sample1.extend(FuzzyDateString).to_date 2
71
+ dates.length.must_equal 1
72
+ end
73
+
74
+ end
75
+
76
+
77
+ end
@@ -0,0 +1,62 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'minitest_helper'
4
+
5
+ describe FuzzyPrice do
6
+
7
+ before do
8
+ @fp = FuzzyPrice.new 1
9
+ @sample0 = "Total 71.75"
10
+ @sample1 = ": % ` ’ ' Total . k` 27.32“"
11
+ @sample2 = "TofaI $26.18"
12
+ @sample3 = "T0faI 55.IA"
13
+ @sample4 = "T0faI 55.15"
14
+ @sample5 = "Total 35—O3"
15
+ @sample6 = "TOTAL $10.13"
16
+ end
17
+
18
+ describe "Finds price total" do
19
+
20
+ it "finds price total with fuzziness 1" do
21
+ price = @fp.to_price @sample0, FuzzyPrice::TOTAL_TEXT_REGEX
22
+ price.length.must_equal 1
23
+ price[0].must_equal 71.75
24
+ end
25
+
26
+ it "finds price total with fuzziness 2" do
27
+ @fp.max_fuzz = 2
28
+ price = @fp.to_price @sample2, FuzzyPrice::TOTAL_TEXT_REGEX
29
+ price.length.must_equal 1
30
+ price[0].must_equal 26.18
31
+
32
+ price = @fp.to_price @sample5, FuzzyPrice::TOTAL_TEXT_REGEX
33
+ price.length.must_equal 1
34
+ price[0].must_equal 35.03
35
+ end
36
+
37
+ it "finds price total with fuzz 3" do
38
+ @fp.max_fuzz = 3
39
+ price = @fp.to_price @sample4, FuzzyPrice::TOTAL_TEXT_REGEX
40
+ price.length.must_equal 1
41
+ price[0].must_equal 55.15
42
+ end
43
+
44
+ end
45
+
46
+ describe "Cannot find price total" do
47
+
48
+ it "cannot find price total with fuzziness 1" do
49
+ @fp.max_fuzz = 1
50
+ price = @fp.to_price @sample3, FuzzyPrice::TOTAL_TEXT_REGEX
51
+ price.length.must_equal 0
52
+ end
53
+
54
+ it "cannot find price total with fuzz 2, text too messy" do
55
+ price = @fp.to_price @sample1, FuzzyPrice::TOTAL_TEXT_REGEX
56
+ price.length.must_equal 0
57
+ end
58
+
59
+ end
60
+
61
+
62
+ end
@@ -0,0 +1,44 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzyScanner do
4
+
5
+ before do
6
+ @fs = FuzzyScanner.new
7
+ @fs.regex = /([0 ][1-9]|1[012])[-\/.~X](0[1-9]|[12][0-9]|3[01])[-\/.~X]([0-9][0-9]$)/
8
+ end
9
+
10
+ describe "Fuzzily find matching text!" do
11
+
12
+ describe "finds the matches" do
13
+
14
+ it "finds perfectly matched text with fuzziness 0" do
15
+ matches = @fs.fscan!("03/18/14", 0)
16
+ matches.length.must_equal 1
17
+ matches[0][0].must_equal "03/18/14"
18
+ end
19
+
20
+ it "finds 1-edit matched text with fuzziness 1" do
21
+ matches = @fs.fscan!("03/I8/14", 1)
22
+ matches.length.must_equal 1
23
+ matches[0][0].must_equal "03/I8/14"
24
+ end
25
+
26
+ end
27
+
28
+ describe "cannot find the matches" do
29
+
30
+ it "cannot find 2-edit text with fuzziness 1" do
31
+ matches = @fs.fscan!("03/18/IA", 1)
32
+ matches.must_be_empty
33
+ end
34
+
35
+ it "cannot find matches with regex not matching" do
36
+ matches = @fs.fscan!("18/18/14", 0)
37
+ matches.must_be_empty
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -0,0 +1,25 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzySub do
4
+
5
+ before do
6
+ @fs = FuzzySub.new({ "A" => "4" })
7
+ end
8
+
9
+ describe "Find text to sub!" do
10
+
11
+ it "must only sub the should subbed words" do
12
+ @fs.fsub!("O3/18/1A").must_equal "O3/18/14"
13
+ end
14
+
15
+ end
16
+
17
+ describe "String extends fuzzy sub" do
18
+
19
+ it "String extends fuzzy sub should be able to do fuzzy sub!" do
20
+ "O3/18/1A".extend(FuzzySubString).fsub!({ "A" => "4" }).must_equal "O3/18/14"
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,47 @@
1
+ require 'minitest_helper'
2
+
3
+ describe FuzzyWords do
4
+
5
+ before do
6
+ @fw = FuzzyWords.new
7
+ @words = ["JALAPENO CHILE", "CHILE D/ARBOL", "GRAPEFRUIT LARGE", "HASS SML AVOCADO", "MICHL KORS MS",
8
+ "LAUREN PETITE", "BROWN CHICKEN", "ASN/TAS DRIED NOODLE MED", "KOBE VENOMENON YOT"]
9
+
10
+ @test1 = " JALAPENO CHILE\n2.6l lb @ $O.69/lb $l.8O F\nCHILE D/ARBOL \nl.O8 lb @ $3,49/lb $3.77 F "
11
+ @test2 = " \n2.6l lb @ $O.69/lb $l.8O F\nCHIL \nl.O8 lb @ $3,49/lb $3.77 F "
12
+ @test3 = File.read File.expand_path("../../fixtures/sample1.txt", __FILE__)
13
+ @unreadable_test4 = File.read File.expand_path("../../fixtures/sample2.txt", __FILE__)
14
+ end
15
+
16
+ describe "Find text matches words" do
17
+
18
+ it "finds the similar words" do
19
+ words = @fw.find_words(@test1, @words)
20
+ words.length.must_equal 2
21
+ end
22
+
23
+ it "finds the similar words" do
24
+ words = @fw.find_words(@test3, ["BUTTERFLY PRINT TOP:MULTI"])
25
+ words.length.must_equal 1
26
+ words[0][:word].must_equal "BUTTERFLY PRINT TOP:MULTI"
27
+ words[0][:matches].must_equal ["BUTTERFLY PRINT TOP:MULTI"]
28
+ end
29
+
30
+ end
31
+
32
+ describe "Cannot find test matched words" do
33
+
34
+ it "does not find the similar words" do
35
+ words = @fw.find_words(@test2, @words)
36
+ words.length.must_equal 0
37
+ end
38
+
39
+ it "does not find any words with max_fuzz 7" do
40
+ @fw.max_fuzz = 7
41
+ words = @fw.find_words(@unreadable_test4, @words)
42
+ words.length.must_equal 0
43
+ end
44
+
45
+ end
46
+
47
+ end
@@ -0,0 +1,4 @@
1
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require "minitest/autorun"
4
+ require "fuzzy_matcher"
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy-matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Qi He
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-04-18 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ! ' Fuzzy matcher looks for fuzzy matches such as words, digits, etc.
15
+ in a string of text using regex or string.
16
+
17
+ '
18
+ email: qihe229@gmail.com
19
+ executables: []
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - lib/fuzzy_matcher/fuzzy_date.rb
24
+ - lib/fuzzy_matcher/fuzzy_date_scanner.rb
25
+ - lib/fuzzy_matcher/fuzzy_price.rb
26
+ - lib/fuzzy_matcher/fuzzy_scanner.rb
27
+ - lib/fuzzy_matcher/fuzzy_sub.rb
28
+ - lib/fuzzy_matcher/fuzzy_words.rb
29
+ - lib/fuzzy_matcher/version.rb
30
+ - lib/fuzzy_matcher.rb
31
+ - spec/fuzzy_matcher/fuzzy_date_scanner_spec.rb
32
+ - spec/fuzzy_matcher/fuzzy_date_spec.rb
33
+ - spec/fuzzy_matcher/fuzzy_price_spec.rb
34
+ - spec/fuzzy_matcher/fuzzy_scanner_spec.rb
35
+ - spec/fuzzy_matcher/fuzzy_sub_spec.rb
36
+ - spec/fuzzy_matcher/fuzzy_words_spec.rb
37
+ - spec/minitest_helper.rb
38
+ homepage: http://github.com/he9qi/fuzzy_matcher
39
+ licenses:
40
+ - MIT
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.23
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: Fuzzy matches words, digits, etc. in a string of text.
63
+ test_files:
64
+ - spec/fuzzy_matcher/fuzzy_date_scanner_spec.rb
65
+ - spec/fuzzy_matcher/fuzzy_date_spec.rb
66
+ - spec/fuzzy_matcher/fuzzy_price_spec.rb
67
+ - spec/fuzzy_matcher/fuzzy_scanner_spec.rb
68
+ - spec/fuzzy_matcher/fuzzy_sub_spec.rb
69
+ - spec/fuzzy_matcher/fuzzy_words_spec.rb
70
+ - spec/minitest_helper.rb