fuzzy-prompt 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2126de9c18c2f17e0b24606812e0e2e91414aae7
4
+ data.tar.gz: 13c301285bd18cc56fd40719b29ada64ff523890
5
+ SHA512:
6
+ metadata.gz: 7f46d6b58fce91375a5ae82e4ac9db89c5282d9b47ed5fb2c4987d2e44c21b5a099de570899e476dc451b0e967008a2a7044ae332bf74f86f5ca19fc5d27b9a8
7
+ data.tar.gz: 4e5fc6ec854fd3363c4016c7558896777616ed5a5c956405d6b8ebbd1f1baf0535ad62ea7fac2f76ea19548477bf033872d100c42a1e0ecbf386c87086ba7aa4
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fuzzy.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Sudhir Jonathan
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Fuzzy
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'fuzzy'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install fuzzy
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_test.rb"
8
+ end
data/fuzzy.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fuzzy/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fuzzy-prompt"
8
+ spec.version = Fuzzy::VERSION
9
+ spec.authors = ["Sudhir Jonathan"]
10
+ spec.email = ["sudhir.j@gmail.com"]
11
+ spec.description = %q{Tokenizes, fuzzes and scores strings - good for autocomplete}
12
+ spec.summary = %q{Fuzzy tokenizer and ranker}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "i18n"
22
+ spec.add_dependency "active_support"
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "minitest"
26
+ end
data/lib/fuzzy.rb ADDED
@@ -0,0 +1,63 @@
1
+ require "fuzzy/version"
2
+
3
+ module Fuzzy
4
+ class Scorer
5
+ Token = Struct.new(:token, :weight)
6
+
7
+ TermSet = Struct.new(:weight, :terms) do
8
+ def cleaned_terms
9
+ terms.flat_map{|t| t.parameterize.split('-')}.to_set
10
+ end
11
+
12
+ def token_weight total_weight
13
+ weight.fdiv(total_weight * terms.size)
14
+ end
15
+
16
+ def tokens total_weight
17
+ cleaned_terms.map do |term|
18
+ Token.new term, token_weight(total_weight)
19
+ end
20
+ end
21
+ end
22
+
23
+ def initialize corpus
24
+ corpus = (corpus || []).reject{|c| c[:weight].blank? or c[:terms].blank?}
25
+ @corpus = corpus.map{|c| TermSet.new(c[:weight], c[:terms].reject{|t| t.blank?})}
26
+ @total_weight = @corpus.sum {|c| c.weight}
27
+ @weighted_tokens = @corpus.flat_map{ |c| c.tokens @total_weight }
28
+ end
29
+
30
+ def rank query
31
+ scores = @weighted_tokens.map do |wt|
32
+ length_score = wt.token.starts_with?(query) ? query.length.fdiv(wt.token.length) : 0
33
+ length_score * wt.weight
34
+ end
35
+ score_count = scores.count{|s| s > 0}
36
+ return 0 unless score_count > 0
37
+ scores.sum / score_count
38
+ end
39
+
40
+ def tokenize
41
+ @corpus.flat_map{|c| c.cleaned_terms.to_a}.flat_map do |str|
42
+ (1..str.length).map { |len| str.slice(0, len) }
43
+ end.to_set
44
+ end
45
+
46
+ def tokens
47
+ tokenize.map{|t| Token.new(t, rank(t))}
48
+ end
49
+
50
+ def normalized_tokens
51
+ basic_tokens = tokens
52
+ max = basic_tokens.max_by(&:weight).weight
53
+ min = basic_tokens.min_by(&:weight).weight
54
+ # Calculate m and c values for the linear transform y=mx+c
55
+ # m = (y' - y)/(x' - x)
56
+ m = (1 - 0).fdiv(max - min)
57
+ # Substituting the max values in, we get 1 = m(max) + c
58
+ c = 1 - (m * max)
59
+ basic_tokens.map{|t| Token.new(t.token, (t.weight*m + c))}
60
+ end
61
+
62
+ end
63
+ end
@@ -0,0 +1,3 @@
1
+ module Fuzzy
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,59 @@
1
+ require 'minitest'
2
+ require 'minitest/spec'
3
+ require 'minitest/autorun'
4
+ require 'active_support/all'
5
+ require 'fuzzy'
6
+
7
+ class TestFuzzy < Minitest::Test
8
+ def test_simple_whole_word_scoring
9
+ sc = Fuzzy::Scorer.new([
10
+ {weight: 4, terms: ['a', 'b']},
11
+ {weight: 2, terms: ['c', 'd']}
12
+ ])
13
+ assert_in_delta 2.fdiv(6), sc.rank('a'), 0.001
14
+ assert_in_delta 1.fdiv(6), sc.rank('c'), 0.001
15
+ end
16
+
17
+ def test_partial_word_scoring
18
+ sc = Fuzzy::Scorer.new([
19
+ {weight: 4, terms: ['ab', 'efwx', nil]},
20
+ {weight: 2, terms: ['eflm', 'gh', '']},
21
+ {weight: 2, terms: []},
22
+ {terms: []},
23
+ {weight: 23},
24
+ {}
25
+ ])
26
+ assert_in_delta 1.fdiv(6), sc.rank('a'), 0.001
27
+ assert_in_delta 2.fdiv(6), sc.rank('ab'), 0.001
28
+ assert_in_delta 1.fdiv(12), sc.rank('g'), 0.001
29
+ assert_in_delta [1.fdiv(6), 1.fdiv(12)].sum.fdiv(2), sc.rank('ef'), 0.001
30
+
31
+ assert sc.rank('a') > sc.rank('b')
32
+ assert sc.rank('ab') > sc.rank('ef')
33
+ assert_equal sc.rank('ab'), sc.rank('efwx')
34
+ assert_equal sc.rank('eflm'), sc.rank('gh')
35
+ assert sc.rank('ef') > sc.rank('g')
36
+ assert sc.rank('efwx') > sc.rank('gh')
37
+ assert sc.rank('gh') > sc.rank('e')
38
+ end
39
+
40
+ def test_tokenization
41
+ sc = Fuzzy::Scorer.new([
42
+ {weight: 4, terms: ['ab', 'cd']},
43
+ {weight: 2, terms: ['ef', 'gh']}
44
+ ])
45
+ assert_equal sc.tokenize, ['a', 'ab', 'c', 'cd', 'e', 'ef', 'g', 'gh'].to_set
46
+ assert_equal sc.tokens, sc.tokenize.map{|t| Fuzzy::Scorer::Token.new(t, sc.rank(t))}
47
+ end
48
+
49
+ def test_normalization
50
+ sc = Fuzzy::Scorer.new([
51
+ {weight: 4, terms: ['ab', 'cd']},
52
+ {weight: 2, terms: ['ef', 'gh']}
53
+ ])
54
+ assert_in_delta sc.normalized_tokens.max_by(&:weight).weight, 1, 0.1
55
+ assert_in_delta sc.normalized_tokens.min_by(&:weight).weight, 0, 0.1
56
+ assert_equal sc.tokens.map(&:token).to_set, sc.normalized_tokens.map(&:token).to_set
57
+ end
58
+
59
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy-prompt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Sudhir Jonathan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-10-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: i18n
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: active_support
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Tokenizes, fuzzes and scores strings - good for autocomplete
84
+ email:
85
+ - sudhir.j@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - .gitignore
91
+ - Gemfile
92
+ - LICENSE.txt
93
+ - README.md
94
+ - Rakefile
95
+ - fuzzy.gemspec
96
+ - lib/fuzzy.rb
97
+ - lib/fuzzy/version.rb
98
+ - test/fuzzy_test.rb
99
+ homepage: ''
100
+ licenses:
101
+ - MIT
102
+ metadata: {}
103
+ post_install_message:
104
+ rdoc_options: []
105
+ require_paths:
106
+ - lib
107
+ required_ruby_version: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - '>='
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ requirements: []
118
+ rubyforge_project:
119
+ rubygems_version: 2.1.9
120
+ signing_key:
121
+ specification_version: 4
122
+ summary: Fuzzy tokenizer and ranker
123
+ test_files:
124
+ - test/fuzzy_test.rb