fuzzy-prompt 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +8 -0
- data/fuzzy.gemspec +26 -0
- data/lib/fuzzy.rb +63 -0
- data/lib/fuzzy/version.rb +3 -0
- data/test/fuzzy_test.rb +59 -0
- metadata +124 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2126de9c18c2f17e0b24606812e0e2e91414aae7
|
4
|
+
data.tar.gz: 13c301285bd18cc56fd40719b29ada64ff523890
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7f46d6b58fce91375a5ae82e4ac9db89c5282d9b47ed5fb2c4987d2e44c21b5a099de570899e476dc451b0e967008a2a7044ae332bf74f86f5ca19fc5d27b9a8
|
7
|
+
data.tar.gz: 4e5fc6ec854fd3363c4016c7558896777616ed5a5c956405d6b8ebbd1f1baf0535ad62ea7fac2f76ea19548477bf033872d100c42a1e0ecbf386c87086ba7aa4
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Sudhir Jonathan
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Fuzzy
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'fuzzy'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install fuzzy
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/fuzzy.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'fuzzy/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "fuzzy-prompt"
|
8
|
+
spec.version = Fuzzy::VERSION
|
9
|
+
spec.authors = ["Sudhir Jonathan"]
|
10
|
+
spec.email = ["sudhir.j@gmail.com"]
|
11
|
+
spec.description = %q{Tokenizes, fuzzes and scores strings - good for autocomplete}
|
12
|
+
spec.summary = %q{Fuzzy tokenizer and ranker}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "i18n"
|
22
|
+
spec.add_dependency "active_support"
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
spec.add_development_dependency "minitest"
|
26
|
+
end
|
data/lib/fuzzy.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require "fuzzy/version"
|
2
|
+
|
3
|
+
module Fuzzy
|
4
|
+
class Scorer
|
5
|
+
Token = Struct.new(:token, :weight)
|
6
|
+
|
7
|
+
TermSet = Struct.new(:weight, :terms) do
|
8
|
+
def cleaned_terms
|
9
|
+
terms.flat_map{|t| t.parameterize.split('-')}.to_set
|
10
|
+
end
|
11
|
+
|
12
|
+
def token_weight total_weight
|
13
|
+
weight.fdiv(total_weight * terms.size)
|
14
|
+
end
|
15
|
+
|
16
|
+
def tokens total_weight
|
17
|
+
cleaned_terms.map do |term|
|
18
|
+
Token.new term, token_weight(total_weight)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize corpus
|
24
|
+
corpus = (corpus || []).reject{|c| c[:weight].blank? or c[:terms].blank?}
|
25
|
+
@corpus = corpus.map{|c| TermSet.new(c[:weight], c[:terms].reject{|t| t.blank?})}
|
26
|
+
@total_weight = @corpus.sum {|c| c.weight}
|
27
|
+
@weighted_tokens = @corpus.flat_map{ |c| c.tokens @total_weight }
|
28
|
+
end
|
29
|
+
|
30
|
+
def rank query
|
31
|
+
scores = @weighted_tokens.map do |wt|
|
32
|
+
length_score = wt.token.starts_with?(query) ? query.length.fdiv(wt.token.length) : 0
|
33
|
+
length_score * wt.weight
|
34
|
+
end
|
35
|
+
score_count = scores.count{|s| s > 0}
|
36
|
+
return 0 unless score_count > 0
|
37
|
+
scores.sum / score_count
|
38
|
+
end
|
39
|
+
|
40
|
+
def tokenize
|
41
|
+
@corpus.flat_map{|c| c.cleaned_terms.to_a}.flat_map do |str|
|
42
|
+
(1..str.length).map { |len| str.slice(0, len) }
|
43
|
+
end.to_set
|
44
|
+
end
|
45
|
+
|
46
|
+
def tokens
|
47
|
+
tokenize.map{|t| Token.new(t, rank(t))}
|
48
|
+
end
|
49
|
+
|
50
|
+
def normalized_tokens
|
51
|
+
basic_tokens = tokens
|
52
|
+
max = basic_tokens.max_by(&:weight).weight
|
53
|
+
min = basic_tokens.min_by(&:weight).weight
|
54
|
+
# Calculate m and c values for the linear transform y=mx+c
|
55
|
+
# m = (y' - y)/(x' - x)
|
56
|
+
m = (1 - 0).fdiv(max - min)
|
57
|
+
# Substituting the max values in, we get 1 = m(max) + c
|
58
|
+
c = 1 - (m * max)
|
59
|
+
basic_tokens.map{|t| Token.new(t.token, (t.weight*m + c))}
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
data/test/fuzzy_test.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'minitest'
|
2
|
+
require 'minitest/spec'
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'active_support/all'
|
5
|
+
require 'fuzzy'
|
6
|
+
|
7
|
+
class TestFuzzy < Minitest::Test
|
8
|
+
def test_simple_whole_word_scoring
|
9
|
+
sc = Fuzzy::Scorer.new([
|
10
|
+
{weight: 4, terms: ['a', 'b']},
|
11
|
+
{weight: 2, terms: ['c', 'd']}
|
12
|
+
])
|
13
|
+
assert_in_delta 2.fdiv(6), sc.rank('a'), 0.001
|
14
|
+
assert_in_delta 1.fdiv(6), sc.rank('c'), 0.001
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_partial_word_scoring
|
18
|
+
sc = Fuzzy::Scorer.new([
|
19
|
+
{weight: 4, terms: ['ab', 'efwx', nil]},
|
20
|
+
{weight: 2, terms: ['eflm', 'gh', '']},
|
21
|
+
{weight: 2, terms: []},
|
22
|
+
{terms: []},
|
23
|
+
{weight: 23},
|
24
|
+
{}
|
25
|
+
])
|
26
|
+
assert_in_delta 1.fdiv(6), sc.rank('a'), 0.001
|
27
|
+
assert_in_delta 2.fdiv(6), sc.rank('ab'), 0.001
|
28
|
+
assert_in_delta 1.fdiv(12), sc.rank('g'), 0.001
|
29
|
+
assert_in_delta [1.fdiv(6), 1.fdiv(12)].sum.fdiv(2), sc.rank('ef'), 0.001
|
30
|
+
|
31
|
+
assert sc.rank('a') > sc.rank('b')
|
32
|
+
assert sc.rank('ab') > sc.rank('ef')
|
33
|
+
assert_equal sc.rank('ab'), sc.rank('efwx')
|
34
|
+
assert_equal sc.rank('eflm'), sc.rank('gh')
|
35
|
+
assert sc.rank('ef') > sc.rank('g')
|
36
|
+
assert sc.rank('efwx') > sc.rank('gh')
|
37
|
+
assert sc.rank('gh') > sc.rank('e')
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_tokenization
|
41
|
+
sc = Fuzzy::Scorer.new([
|
42
|
+
{weight: 4, terms: ['ab', 'cd']},
|
43
|
+
{weight: 2, terms: ['ef', 'gh']}
|
44
|
+
])
|
45
|
+
assert_equal sc.tokenize, ['a', 'ab', 'c', 'cd', 'e', 'ef', 'g', 'gh'].to_set
|
46
|
+
assert_equal sc.tokens, sc.tokenize.map{|t| Fuzzy::Scorer::Token.new(t, sc.rank(t))}
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_normalization
|
50
|
+
sc = Fuzzy::Scorer.new([
|
51
|
+
{weight: 4, terms: ['ab', 'cd']},
|
52
|
+
{weight: 2, terms: ['ef', 'gh']}
|
53
|
+
])
|
54
|
+
assert_in_delta sc.normalized_tokens.max_by(&:weight).weight, 1, 0.1
|
55
|
+
assert_in_delta sc.normalized_tokens.min_by(&:weight).weight, 0, 0.1
|
56
|
+
assert_equal sc.tokens.map(&:token).to_set, sc.normalized_tokens.map(&:token).to_set
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
metadata
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fuzzy-prompt
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sudhir Jonathan
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-27 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: i18n
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: active_support
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: minitest
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Tokenizes, fuzzes and scores strings - good for autocomplete
|
84
|
+
email:
|
85
|
+
- sudhir.j@gmail.com
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- .gitignore
|
91
|
+
- Gemfile
|
92
|
+
- LICENSE.txt
|
93
|
+
- README.md
|
94
|
+
- Rakefile
|
95
|
+
- fuzzy.gemspec
|
96
|
+
- lib/fuzzy.rb
|
97
|
+
- lib/fuzzy/version.rb
|
98
|
+
- test/fuzzy_test.rb
|
99
|
+
homepage: ''
|
100
|
+
licenses:
|
101
|
+
- MIT
|
102
|
+
metadata: {}
|
103
|
+
post_install_message:
|
104
|
+
rdoc_options: []
|
105
|
+
require_paths:
|
106
|
+
- lib
|
107
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - '>='
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
requirements: []
|
118
|
+
rubyforge_project:
|
119
|
+
rubygems_version: 2.1.9
|
120
|
+
signing_key:
|
121
|
+
specification_version: 4
|
122
|
+
summary: Fuzzy tokenizer and ranker
|
123
|
+
test_files:
|
124
|
+
- test/fuzzy_test.rb
|