fuzzy-prompt 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2126de9c18c2f17e0b24606812e0e2e91414aae7
4
+ data.tar.gz: 13c301285bd18cc56fd40719b29ada64ff523890
5
+ SHA512:
6
+ metadata.gz: 7f46d6b58fce91375a5ae82e4ac9db89c5282d9b47ed5fb2c4987d2e44c21b5a099de570899e476dc451b0e967008a2a7044ae332bf74f86f5ca19fc5d27b9a8
7
+ data.tar.gz: 4e5fc6ec854fd3363c4016c7558896777616ed5a5c956405d6b8ebbd1f1baf0535ad62ea7fac2f76ea19548477bf033872d100c42a1e0ecbf386c87086ba7aa4
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fuzzy.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Sudhir Jonathan
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Fuzzy
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'fuzzy'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install fuzzy
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_test.rb"
8
+ end
data/fuzzy.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fuzzy/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fuzzy-prompt"
8
+ spec.version = Fuzzy::VERSION
9
+ spec.authors = ["Sudhir Jonathan"]
10
+ spec.email = ["sudhir.j@gmail.com"]
11
+ spec.description = %q{Tokenizes, fuzzes and scores strings - good for autocomplete}
12
+ spec.summary = %q{Fuzzy tokenizer and ranker}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "i18n"
22
+ spec.add_dependency "active_support"
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "minitest"
26
+ end
data/lib/fuzzy.rb ADDED
@@ -0,0 +1,63 @@
1
+ require "fuzzy/version"
2
+
3
+ module Fuzzy
4
+ class Scorer
5
+ Token = Struct.new(:token, :weight)
6
+
7
+ TermSet = Struct.new(:weight, :terms) do
8
+ def cleaned_terms
9
+ terms.flat_map{|t| t.parameterize.split('-')}.to_set
10
+ end
11
+
12
+ def token_weight total_weight
13
+ weight.fdiv(total_weight * terms.size)
14
+ end
15
+
16
+ def tokens total_weight
17
+ cleaned_terms.map do |term|
18
+ Token.new term, token_weight(total_weight)
19
+ end
20
+ end
21
+ end
22
+
23
+ def initialize corpus
24
+ corpus = (corpus || []).reject{|c| c[:weight].blank? or c[:terms].blank?}
25
+ @corpus = corpus.map{|c| TermSet.new(c[:weight], c[:terms].reject{|t| t.blank?})}
26
+ @total_weight = @corpus.sum {|c| c.weight}
27
+ @weighted_tokens = @corpus.flat_map{ |c| c.tokens @total_weight }
28
+ end
29
+
30
+ def rank query
31
+ scores = @weighted_tokens.map do |wt|
32
+ length_score = wt.token.starts_with?(query) ? query.length.fdiv(wt.token.length) : 0
33
+ length_score * wt.weight
34
+ end
35
+ score_count = scores.count{|s| s > 0}
36
+ return 0 unless score_count > 0
37
+ scores.sum / score_count
38
+ end
39
+
40
+ def tokenize
41
+ @corpus.flat_map{|c| c.cleaned_terms.to_a}.flat_map do |str|
42
+ (1..str.length).map { |len| str.slice(0, len) }
43
+ end.to_set
44
+ end
45
+
46
+ def tokens
47
+ tokenize.map{|t| Token.new(t, rank(t))}
48
+ end
49
+
50
+ def normalized_tokens
51
+ basic_tokens = tokens
52
+ max = basic_tokens.max_by(&:weight).weight
53
+ min = basic_tokens.min_by(&:weight).weight
54
+ # Calculate m and c values for the linear transform y=mx+c
55
+ # m = (y' - y)/(x' - x)
56
+ m = (1 - 0).fdiv(max - min)
57
+ # Substituting the max values in, we get 1 = m(max) + c
58
+ c = 1 - (m * max)
59
+ basic_tokens.map{|t| Token.new(t.token, (t.weight*m + c))}
60
+ end
61
+
62
+ end
63
+ end
@@ -0,0 +1,3 @@
1
+ module Fuzzy
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,59 @@
1
+ require 'minitest'
2
+ require 'minitest/spec'
3
+ require 'minitest/autorun'
4
+ require 'active_support/all'
5
+ require 'fuzzy'
6
+
7
+ class TestFuzzy < Minitest::Test
8
+ def test_simple_whole_word_scoring
9
+ sc = Fuzzy::Scorer.new([
10
+ {weight: 4, terms: ['a', 'b']},
11
+ {weight: 2, terms: ['c', 'd']}
12
+ ])
13
+ assert_in_delta 2.fdiv(6), sc.rank('a'), 0.001
14
+ assert_in_delta 1.fdiv(6), sc.rank('c'), 0.001
15
+ end
16
+
17
+ def test_partial_word_scoring
18
+ sc = Fuzzy::Scorer.new([
19
+ {weight: 4, terms: ['ab', 'efwx', nil]},
20
+ {weight: 2, terms: ['eflm', 'gh', '']},
21
+ {weight: 2, terms: []},
22
+ {terms: []},
23
+ {weight: 23},
24
+ {}
25
+ ])
26
+ assert_in_delta 1.fdiv(6), sc.rank('a'), 0.001
27
+ assert_in_delta 2.fdiv(6), sc.rank('ab'), 0.001
28
+ assert_in_delta 1.fdiv(12), sc.rank('g'), 0.001
29
+ assert_in_delta [1.fdiv(6), 1.fdiv(12)].sum.fdiv(2), sc.rank('ef'), 0.001
30
+
31
+ assert sc.rank('a') > sc.rank('b')
32
+ assert sc.rank('ab') > sc.rank('ef')
33
+ assert_equal sc.rank('ab'), sc.rank('efwx')
34
+ assert_equal sc.rank('eflm'), sc.rank('gh')
35
+ assert sc.rank('ef') > sc.rank('g')
36
+ assert sc.rank('efwx') > sc.rank('gh')
37
+ assert sc.rank('gh') > sc.rank('e')
38
+ end
39
+
40
+ def test_tokenization
41
+ sc = Fuzzy::Scorer.new([
42
+ {weight: 4, terms: ['ab', 'cd']},
43
+ {weight: 2, terms: ['ef', 'gh']}
44
+ ])
45
+ assert_equal sc.tokenize, ['a', 'ab', 'c', 'cd', 'e', 'ef', 'g', 'gh'].to_set
46
+ assert_equal sc.tokens, sc.tokenize.map{|t| Fuzzy::Scorer::Token.new(t, sc.rank(t))}
47
+ end
48
+
49
+ def test_normalization
50
+ sc = Fuzzy::Scorer.new([
51
+ {weight: 4, terms: ['ab', 'cd']},
52
+ {weight: 2, terms: ['ef', 'gh']}
53
+ ])
54
+ assert_in_delta sc.normalized_tokens.max_by(&:weight).weight, 1, 0.1
55
+ assert_in_delta sc.normalized_tokens.min_by(&:weight).weight, 0, 0.1
56
+ assert_equal sc.tokens.map(&:token).to_set, sc.normalized_tokens.map(&:token).to_set
57
+ end
58
+
59
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy-prompt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Sudhir Jonathan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-10-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: i18n
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: active_support
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: minitest
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Tokenizes, fuzzes and scores strings - good for autocomplete
84
+ email:
85
+ - sudhir.j@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - .gitignore
91
+ - Gemfile
92
+ - LICENSE.txt
93
+ - README.md
94
+ - Rakefile
95
+ - fuzzy.gemspec
96
+ - lib/fuzzy.rb
97
+ - lib/fuzzy/version.rb
98
+ - test/fuzzy_test.rb
99
+ homepage: ''
100
+ licenses:
101
+ - MIT
102
+ metadata: {}
103
+ post_install_message:
104
+ rdoc_options: []
105
+ require_paths:
106
+ - lib
107
+ required_ruby_version: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - '>='
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ requirements: []
118
+ rubyforge_project:
119
+ rubygems_version: 2.1.9
120
+ signing_key:
121
+ specification_version: 4
122
+ summary: Fuzzy tokenizer and ranker
123
+ test_files:
124
+ - test/fuzzy_test.rb