fuzzzy 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,128 @@
1
+ require 'csv'
2
+ require 'benchmark'
3
+ require 'ruby-prof'
4
+
5
+ module FuzzzyBenchmark
6
+ extend Fuzzzy::Index
7
+
8
+ module_function
9
+ def benchmark meth, contexts, index_cntx={}, times=100, &block
10
+ @search_method = meth
11
+ @times = times
12
+ prepare_indexes(default_context.merge(index_cntx))
13
+
14
+ Benchmark.bm do |benchmark|
15
+ contexts.each do |context|
16
+ report(benchmark, context.merge(index_cntx))
17
+ end
18
+
19
+ yield(benchmark, self) if block_given?
20
+ end
21
+
22
+ ensure
23
+ Fuzzzy.redis.flushdb
24
+ @search_method = nil
25
+ end
26
+
27
+ def profile meth, context, index_cntx={}, times=100, &block
28
+ @search_method = meth
29
+ @times = times
30
+ prepare_indexes(default_context.merge(index_cntx))
31
+
32
+ RubyProf.start
33
+
34
+ @times.times do
35
+ searcher.search(context)
36
+ end
37
+
38
+ result = RubyProf.stop
39
+
40
+ flat_printer = RubyProf::FlatPrinter.new(result)
41
+ callstack_printer = RubyProf::CallStackPrinter.new(result)
42
+ graph_printer = RubyProf::GraphHtmlPrinter.new(result)
43
+
44
+ File.open(Fuzzzy.root.join('benchmark', 'reports', "#{search_method}_graph.html"), 'w') do |file|
45
+ graph_printer.print(file)
46
+ end
47
+ File.open(Fuzzzy.root.join('benchmark', 'reports', "#{search_method}_callstack.html"), 'w') do |file|
48
+ callstack_printer.print(file)
49
+ end
50
+ File.open(Fuzzzy.root.join('benchmark', 'reports', "#{search_method}_flat.txt"), 'w') do |file|
51
+ flat_printer.print(file)
52
+ end
53
+ ensure
54
+ Fuzzzy.redis.flushdb
55
+ @search_method = nil
56
+ end
57
+
58
+ def report bench, context
59
+ context = default_context.merge(context)
60
+ result, strings = get_result(context)
61
+
62
+ puts "Execute #{@times} times"
63
+ puts "query: '#{context[:query]}', result: '#{result}' => #{strings}"
64
+ bench.report(context[:title] || '') do
65
+ @times.times do
66
+ searcher.search(context)
67
+ end
68
+ end
69
+ puts ''
70
+ end
71
+
72
+ def get_result cntx
73
+ result = searcher.search(cntx)
74
+ strings = result.map{|id|Fuzzzy.redis.get('fuzzzy:city:name:dictionary:' + id)}
75
+ [result, strings]
76
+ end
77
+
78
+ def prepare_indexes cntx
79
+ puts "Create index for #{search_method}:"
80
+ puts "#{fixtures.size} names"
81
+
82
+ start = Time.now
83
+ fixtures.each do |source|
84
+ indexer.create_index(cntx.merge(
85
+ :dictionary_string => source[:name].downcase,
86
+ :id => source[:id]
87
+ ))
88
+ end
89
+
90
+ puts "#{Time.now - start} sec."
91
+ puts "size - #{Fuzzzy.redis.info['used_memory_human']}"
92
+ end
93
+
94
+ def default_context
95
+ {
96
+ :index_name => 'city:name',
97
+ :method => search_method
98
+ }
99
+ end
100
+
101
+ def indexer
102
+ _indexer(search_method)
103
+ end
104
+
105
+ def searcher
106
+ _searcher(search_method)
107
+ end
108
+
109
+ def fixtures
110
+ @fixtures ||= begin
111
+ result = []
112
+ CSV.foreach(Fuzzzy.root.join('benchmark', 'data', 'cities.csv').to_s,
113
+ :headers => true, :encoding => 'utf-8'
114
+ ) do |row|
115
+ result << row.to_hash.symbolize_keys
116
+ end
117
+ result
118
+ end
119
+ end
120
+
121
+ def search_method
122
+ @search_method
123
+ end
124
+
125
+ def search_method= meth
126
+ @search_method = meth
127
+ end
128
+ end
@@ -0,0 +1,36 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../benchmark", __FILE__)
2
+ require File.expand_path('../../lib/fuzzzy', __FILE__)
3
+ require 'fuzzzy_benchmark'
4
+
5
+ puts 'Without stripping stopwords'
6
+ FuzzzyBenchmark.benchmark(:ngram, [{
7
+ :query => 'eastleigh naersouthempton',
8
+ :distance => 4
9
+ }, {
10
+ :query => 'alixandropolis',
11
+ :distance => 2
12
+ }, {
13
+ :query => 'jenan',
14
+ :distance => 1
15
+ }])
16
+
17
+ puts 'With stripping stopwords'
18
+ FuzzzyBenchmark.benchmark(:ngram, [{
19
+ :query => 'eastleigh naersouthempton',
20
+ :distance => 4,
21
+ :strip_stopwords => true
22
+ }, {
23
+ :query => 'alixandropolis',
24
+ :distance => 2,
25
+ :strip_stopwords => true
26
+ }, {
27
+ :query => 'jenan',
28
+ :distance => 1,
29
+ :strip_stopwords => true
30
+ }], {:strip_stopwords => true})
31
+
32
+ FuzzzyBenchmark.profile(:ngram, {
33
+ :query => 'eastleigh naersouthempton',
34
+ :distance => 4,
35
+ :strip_stopwords => true
36
+ }, {:strip_stopwords => true}, 1000)
@@ -0,0 +1,36 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../benchmark", __FILE__)
2
+ require File.expand_path('../../lib/fuzzzy', __FILE__)
3
+ require 'fuzzzy_benchmark'
4
+
5
+ puts 'Without stripping stopwords'
6
+ FuzzzyBenchmark.benchmark(:soundex, [{
7
+ :query => 'eastleigh naersouthempton',
8
+ :distance => 4
9
+ }, {
10
+ :query => 'alixandropolis',
11
+ :distance => 2
12
+ }, {
13
+ :query => 'jenan',
14
+ :distance => 1
15
+ }])
16
+
17
+ puts 'With stripping stopwords'
18
+ FuzzzyBenchmark.benchmark(:soundex, [{
19
+ :query => 'eastleigh naersouthempton',
20
+ :distance => 4,
21
+ :strip_stopwords => true
22
+ }, {
23
+ :query => 'alixandropolis',
24
+ :distance => 2,
25
+ :strip_stopwords => true
26
+ }, {
27
+ :query => 'jenan',
28
+ :distance => 1,
29
+ :strip_stopwords => true
30
+ }], {:strip_stopwords => true})
31
+
32
+ FuzzzyBenchmark.profile(:soundex, {
33
+ :query => 'eastleigh naersouthempton',
34
+ :distance => 4,
35
+ :strip_stopwords => true
36
+ }, {:strip_stopwords => true}, 1000)
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH.unshift File.expand_path("../..", __FILE__)
2
+ require './lib/fuzzzy'
3
+ require 'grape'
4
+
5
+ Fuzzzy.redis = Redis.new(
6
+ :host => (ENV['REDIS_HOST'] || 'localhost'),
7
+ :port => (ENV['REDIS_PORT'] || 6379),
8
+ :database => (ENV['REDIS_DB'] || 0)
9
+ )
10
+
11
+ run Fuzzzy::Server::HTTP
@@ -0,0 +1,175 @@
1
+ ---
2
+ - a
3
+ - about
4
+ - above
5
+ - after
6
+ - again
7
+ - against
8
+ - all
9
+ - am
10
+ - an
11
+ - and
12
+ - any
13
+ - are
14
+ - aren't
15
+ - as
16
+ - at
17
+ - be
18
+ - because
19
+ - been
20
+ - before
21
+ - being
22
+ - below
23
+ - between
24
+ - both
25
+ - but
26
+ - by
27
+ - can't
28
+ - cannot
29
+ - could
30
+ - couldn't
31
+ - did
32
+ - didn't
33
+ - do
34
+ - does
35
+ - doesn't
36
+ - doing
37
+ - don't
38
+ - down
39
+ - during
40
+ - each
41
+ - few
42
+ - for
43
+ - from
44
+ - further
45
+ - had
46
+ - hadn't
47
+ - has
48
+ - hasn't
49
+ - have
50
+ - haven't
51
+ - having
52
+ - he
53
+ - he'd
54
+ - he'll
55
+ - he's
56
+ - her
57
+ - here
58
+ - here's
59
+ - hers
60
+ - herself
61
+ - him
62
+ - himself
63
+ - his
64
+ - how
65
+ - how's
66
+ - i
67
+ - i'd
68
+ - i'll
69
+ - i'm
70
+ - i've
71
+ - if
72
+ - in
73
+ - into
74
+ - is
75
+ - isn't
76
+ - it
77
+ - it's
78
+ - its
79
+ - itself
80
+ - let's
81
+ - me
82
+ - more
83
+ - most
84
+ - mustn't
85
+ - my
86
+ - myself
87
+ - "no"
88
+ - nor
89
+ - not
90
+ - of
91
+ - "off"
92
+ - "on"
93
+ - once
94
+ - only
95
+ - or
96
+ - other
97
+ - ought
98
+ - our
99
+ - ours
100
+ - ourselves
101
+ - out
102
+ - over
103
+ - own
104
+ - same
105
+ - shan't
106
+ - she
107
+ - she'd
108
+ - she'll
109
+ - she's
110
+ - should
111
+ - shouldn't
112
+ - so
113
+ - some
114
+ - such
115
+ - than
116
+ - that
117
+ - that's
118
+ - the
119
+ - their
120
+ - theirs
121
+ - them
122
+ - themselves
123
+ - then
124
+ - there
125
+ - there's
126
+ - these
127
+ - they
128
+ - they'd
129
+ - they'll
130
+ - they're
131
+ - they've
132
+ - this
133
+ - those
134
+ - through
135
+ - to
136
+ - too
137
+ - under
138
+ - until
139
+ - up
140
+ - very
141
+ - was
142
+ - wasn't
143
+ - we
144
+ - we'd
145
+ - we'll
146
+ - we're
147
+ - we've
148
+ - were
149
+ - weren't
150
+ - what
151
+ - what's
152
+ - when
153
+ - when's
154
+ - where
155
+ - where's
156
+ - which
157
+ - while
158
+ - who
159
+ - who's
160
+ - whom
161
+ - why
162
+ - why's
163
+ - with
164
+ - won't
165
+ - would
166
+ - wouldn't
167
+ - you
168
+ - you'd
169
+ - you'll
170
+ - you're
171
+ - you've
172
+ - your
173
+ - yours
174
+ - yourself
175
+ - yourselves
@@ -0,0 +1,44 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "fuzzzy/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "fuzzzy"
7
+ s.version = Fuzzzy::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Undr"]
10
+ s.email = ["lilipoper@gmail.com"]
11
+ s.homepage = "http://github.com/undr/fuzzzy"
12
+ s.summary = %q{Fuzzy Search client and server}
13
+ s.description = %q{Fuzzy Search client and server}
14
+
15
+ s.rubyforge_project = "fuzzzy"
16
+
17
+ s.add_development_dependency "rspec", ">= 2"
18
+ s.add_development_dependency "yard", "~> 0.6.0"
19
+ s.add_development_dependency "ruby-debug19"
20
+ s.add_development_dependency "bson_ext"
21
+ s.add_development_dependency "mongoid"
22
+ s.add_development_dependency "pry"
23
+ s.add_development_dependency 'ruby-prof'
24
+
25
+ s.add_dependency "bundler"
26
+ s.add_dependency "rake"
27
+ s.add_dependency "activesupport"
28
+ s.add_dependency "eventmachine"
29
+ s.add_dependency "yajl-ruby"
30
+ s.add_dependency "levenshtein-ffi"
31
+ s.add_dependency "text"
32
+ s.add_dependency "grape"
33
+ s.add_dependency "hiredis"
34
+ s.add_dependency "redis"
35
+ s.add_dependency "daemons"
36
+ s.add_dependency "ZenTest", "4.5.0"
37
+ s.add_dependency 'RubyInline'
38
+
39
+
40
+ s.files = `git ls-files`.split("\n")
41
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
42
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
43
+ s.require_paths = ["lib"]
44
+ end
@@ -0,0 +1,129 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ Bundler.require :default, (ENV['RACK_ENV'] || 'development')
4
+ require 'yaml'
5
+ require 'logger'
6
+ require 'active_support'
7
+ require 'yajl'
8
+ require 'levenshtein-ffi'
9
+ require 'text'
10
+ require 'redis/connection/hiredis'
11
+ require 'redis'
12
+
13
+ module Fuzzzy
14
+ extend self
15
+ extend ActiveSupport::Autoload
16
+
17
+ autoload :Redis
18
+ autoload :Index
19
+
20
+ autoload :Indexer, 'fuzzzy/methods/indexer'
21
+ autoload :MethodBase, 'fuzzzy/methods/method_base'
22
+
23
+ module Soundex
24
+ extend ActiveSupport::Autoload
25
+
26
+ autoload :Base, 'fuzzzy/methods/soundex/base'
27
+ autoload :Indexer, 'fuzzzy/methods/soundex/indexer'
28
+ autoload :Searcher, 'fuzzzy/methods/soundex/searcher'
29
+ end
30
+
31
+ module Ngram
32
+ extend ActiveSupport::Autoload
33
+
34
+ autoload :Base, 'fuzzzy/methods/ngram/base'
35
+ autoload :Indexer, 'fuzzzy/methods/ngram/indexer'
36
+ autoload :Searcher, 'fuzzzy/methods/ngram/searcher'
37
+ end
38
+
39
+ module Server
40
+ extend ActiveSupport::Autoload
41
+
42
+ autoload :HTTP, 'fuzzzy/server/http'
43
+ end
44
+
45
+ if defined?(Mongoid)
46
+ module Mongoid
47
+ extend ActiveSupport::Autoload
48
+
49
+ autoload :Index, 'fuzzzy/orm/mongoid/index'
50
+ end
51
+ end
52
+
53
+ # Fuzzzy.configure do |config|
54
+ # config.logger = Logger.new($stdout)
55
+ # config.redis = ::Redis.new(
56
+ # :host => 'localhost',
57
+ # :port => 6379,
58
+ # :database => 0
59
+ # )
60
+ # config.stopwords = %w{the stopwords list}
61
+ # end
62
+ def configure
63
+ yield self
64
+ end
65
+
66
+ def logger
67
+ @logger = default_logger unless defined?(@logger)
68
+ @logger
69
+ end
70
+
71
+ def logger=(logger)
72
+ case logger
73
+ when Logger then @logger = logger
74
+ when false, nil then @logger = nil
75
+ end
76
+ end
77
+
78
+ def redis
79
+ @redis ||= ::Redis.new(
80
+ :host => 'localhost',
81
+ :port => 6379,
82
+ :database => 0
83
+ )
84
+ end
85
+
86
+ def redis= connection
87
+ @redis = connection
88
+ end
89
+
90
+ def stopwords
91
+ @stopwords ||= default_stopwords
92
+ end
93
+
94
+ def stopwords= value
95
+ @stopwords = load_stopwords(value).uniq
96
+ end
97
+
98
+ def load_stopwords options
99
+ if options.is_a?(Hash)
100
+ stops = load_stopwords(options[:stopwords])
101
+ options[:default] ? (stops + default_stopwords) : stops
102
+ elsif options.is_a?(Array)
103
+ options
104
+ elsif options.is_a?(String) || options.is_a?(Pathname)
105
+ YAML.load_file(options)
106
+ else
107
+ []
108
+ end
109
+ end
110
+
111
+ def default_stopwords
112
+ @default_stopwords ||= load_stopwords(Fuzzzy.root.join('dictionary', 'en_stopwords.yml').to_s)
113
+ end
114
+
115
+ def env
116
+ return Rails.env if defined?(Rails)
117
+ return Sinatra::Base.environment.to_s if defined?(Sinatra)
118
+ ENV["RACK_ENV"] || 'development'
119
+ end
120
+
121
+ def root
122
+ @root ||= Pathname.new(File.expand_path('.'))
123
+ end
124
+
125
+ protected
126
+ def default_logger
127
+ defined?(Rails) && Rails.respond_to?(:logger) ? Rails.logger : ::Logger.new($stdout)
128
+ end
129
+ end