fuzzy_set 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b8ca0bdbe55c19972e9df5c589d606a81b94270d
4
+ data.tar.gz: da1885c2d00aa8ba8043f72294cbad17e90bc8ec
5
+ SHA512:
6
+ metadata.gz: b0b987aa8cd7d0143424fe6d6fcff33f90dccf7a1c7fb60d5bb23c699fb1df229119f0e8de97405bb682cd0ecbc631beed51a19800460e67c0e8f4385f690ff0
7
+ data.tar.gz: 18a257eb888a1feffbfecb9065116b19add560c5354a598f75605f1660a132d861abe86fd69f6afc641b2341647cbb0e64a4128fb70de3bf6f65d28b11df6556
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /spec/examples.txt
10
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.0.0-p647
4
+ - 2.1.7
5
+ - 2.2.3
6
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fuzzy_set.gemspec
4
+ gemspec
5
+
6
+ group :test do
7
+ gem 'codeclimate-test-reporter'
8
+ gem 'guard'
9
+ gem 'guard-rspec'
10
+ gem 'growl'
11
+ end
data/Guardfile ADDED
@@ -0,0 +1,14 @@
1
+ guard :rspec, cmd: "bundle exec rspec" do
2
+ require "guard/rspec/dsl"
3
+ dsl = Guard::RSpec::Dsl.new(self)
4
+
5
+ # RSpec files
6
+ rspec = dsl.rspec
7
+ watch(rspec.spec_helper) { rspec.spec_dir }
8
+ watch(rspec.spec_support) { rspec.spec_dir }
9
+ watch(rspec.spec_files)
10
+
11
+ # Ruby files
12
+ ruby = dsl.ruby
13
+ dsl.watch_spec_files_for(ruby.lib_files)
14
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Manuel Hutter
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # FuzzySet
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/fuzzy_set`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'fuzzy_set'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install fuzzy_set
22
+
23
+ ## Usage
24
+
25
+ ```ruby
26
+ require 'fuzzy_set'
27
+
28
+ states = open('states.txt').read.split(/\n/)
29
+ fs = FuzzySet.new(*states)
30
+
31
+ fs.exact_match('michigan!') # => "Michigan"
32
+ fs.exact_match('mischigen') # => nil
33
+
34
+ fs.get('mischigen')
35
+ # => ["Michigan", "Wisconsin", "Mississippi", "Minnesota", "Missouri"]
36
+ ```
37
+
38
+ ## Development
39
+
40
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
41
+
42
+ To install this gem onto your local machine, run `bundle exec rake install`.
43
+
44
+ ## Contributing
45
+
46
+ Bug reports and pull requests are welcome on GitHub at https://github.com/mhutter/fuzzy_set.
47
+
48
+
49
+ ## License
50
+
51
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rspec/core/rake_task'
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'fuzzy_set'
5
+
6
+ require 'pry'
7
+ Pry.start
data/bin/setup ADDED
@@ -0,0 +1,5 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
data/fuzzy_set.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fuzzy_set/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'fuzzy_set'
8
+ spec.version = FuzzySet::VERSION
9
+ spec.authors = ['Manuel Hutter']
10
+ spec.email = ['manuel@hutter.io']
11
+
12
+ spec.summary = %q{FuzzySet allows you to fuzzy-search Strings!}
13
+ spec.description = %q{FuzzySet allows you to fuzzy-search Strings!}
14
+ spec.homepage = 'https://github.com/mhutter/fuzzy_set'
15
+ spec.license = 'MIT'
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = 'exe'
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ['lib']
21
+
22
+ spec.add_runtime_dependency 'string-similarity', '~> 1.0'
23
+
24
+ spec.add_development_dependency 'bundler', '~> 1.10'
25
+ spec.add_development_dependency 'rake', '~> 10.0'
26
+ spec.add_development_dependency 'rspec'
27
+ spec.add_development_dependency 'pry'
28
+ spec.add_development_dependency 'simplecov'
29
+ end
@@ -0,0 +1,14 @@
1
+ class String
2
+ # break apart the string into strings of length `n`
3
+ #
4
+ # @example
5
+ # 'foobar'.ngram(3)
6
+ # # => ["-fo", "foo", "oob", "oba", "bar", "ar-"]
7
+ def ngram(n)
8
+ fail ArgumentError, "n must be > 1, is #{n}" if n < 2
9
+ str = "-#{self}-"
10
+ (str.length - n + 1).times.map do |i|
11
+ str.slice(i, n)
12
+ end
13
+ end
14
+ end
data/lib/fuzzy_set.rb ADDED
@@ -0,0 +1,106 @@
1
+ require 'string/similarity'
2
+
3
+ require 'fuzzy_set/version'
4
+ require 'core_ext/string'
5
+
6
+ # FuzzySet implements a fuzzy-searchable set of strings.
7
+ #
8
+ # As a set, it cannot contain duplicate elements.
9
+ class FuzzySet
10
+ NGRAM_SIZE = 3
11
+
12
+ def initialize(*items)
13
+ @items = []
14
+ @denormalize = {}
15
+ @index = {}
16
+
17
+ add(*items)
18
+ end
19
+
20
+ # Normalizes +query+, and looks up an entry by its normalized value.
21
+ #
22
+ # @param query [String] search query
23
+ # @return [String] matched (denormalized) value or `nil`
24
+ def exact_match(query)
25
+ @denormalize[normalize(query)]
26
+ end
27
+
28
+ # Add one or more +items+ to the set.
29
+ #
30
+ # Each item will be converted into a string and indexed upon adding.
31
+ #
32
+ # @param items [#to_s] item(s) to add
33
+ # @return [FuzzySet] +self+
34
+ def add(*items)
35
+ items.each do |item|
36
+ item = item.to_s
37
+ return self if @items.include?(item)
38
+
39
+ id = _add(item)
40
+ calculate_grams_for(normalize(item), id)
41
+ end
42
+ self
43
+ end
44
+
45
+ # @see #add
46
+ def <<(item)
47
+ add(item)
48
+ end
49
+
50
+ # Fuzzy-find a string based on +query+
51
+ #
52
+ # 1. normalize +query+
53
+ # 2. check for an exact match and return, if present
54
+ # 3. find matches based on Ngrams
55
+ # 4. sort matches by their cosine similarity to +query+
56
+ def get(query)
57
+ query = normalize(query)
58
+
59
+ # check for exact match
60
+ return [@denormalize[query]] if @denormalize[query]
61
+
62
+ match_ids = query.ngram(NGRAM_SIZE).map { |ng| @index[ng] }
63
+ match_ids = match_ids.flatten.compact.uniq
64
+ matches = match_ids.map { |id| @items[id] }
65
+
66
+ # sort matches by their cosine distance to query
67
+ matches.sort_by { |match| 1.0 - String::Similarity.cosine(query, match) }
68
+ end
69
+
70
+ # @return [Boolean] +true+ if the given +item+ is present in the set.
71
+ def include?(item)
72
+ @items.include?(item)
73
+ end
74
+
75
+ # @return [Fixnum] Number of elements in the set.
76
+ def length
77
+ @items.length
78
+ end
79
+ alias_method :size, :length
80
+
81
+ # @return [Boolean] +true+, if there are no items yet.
82
+ def empty?
83
+ @items.empty?
84
+ end
85
+
86
+ private
87
+
88
+ # Normalize a string by removing all non-word characters
89
+ # except spaces and then converting it to lowercase.
90
+ def normalize(str)
91
+ str.gsub(/[^\w ]/, '').downcase
92
+ end
93
+
94
+ def _add(item)
95
+ @items.push(item)
96
+ normalized = normalize(item)
97
+ @denormalize[normalized] = item
98
+ @items.index(item)
99
+ end
100
+
101
+ def calculate_grams_for(string, id)
102
+ string.ngram(NGRAM_SIZE).each do |gram|
103
+ @index[gram] = (@index[gram] || []).push(id)
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,3 @@
1
+ class FuzzySet
2
+ VERSION = '1.0.0'
3
+ end
metadata ADDED
@@ -0,0 +1,143 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy_set
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Hutter
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-09-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: string-similarity
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.10'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.10'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: FuzzySet allows you to fuzzy-search Strings!
98
+ email:
99
+ - manuel@hutter.io
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".rspec"
106
+ - ".travis.yml"
107
+ - Gemfile
108
+ - Guardfile
109
+ - LICENSE.txt
110
+ - README.md
111
+ - Rakefile
112
+ - bin/console
113
+ - bin/setup
114
+ - fuzzy_set.gemspec
115
+ - lib/core_ext/string.rb
116
+ - lib/fuzzy_set.rb
117
+ - lib/fuzzy_set/version.rb
118
+ homepage: https://github.com/mhutter/fuzzy_set
119
+ licenses:
120
+ - MIT
121
+ metadata: {}
122
+ post_install_message:
123
+ rdoc_options: []
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ required_rubygems_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ requirements: []
137
+ rubyforge_project:
138
+ rubygems_version: 2.4.5.1
139
+ signing_key:
140
+ specification_version: 4
141
+ summary: FuzzySet allows you to fuzzy-search Strings!
142
+ test_files: []
143
+ has_rdoc: