fuzzzy 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,128 @@
1
+ require 'csv'
2
+ require 'benchmark'
3
+ require 'ruby-prof'
4
+
5
+ module FuzzzyBenchmark
6
+ extend Fuzzzy::Index
7
+
8
+ module_function
9
+ def benchmark meth, contexts, index_cntx={}, times=100, &block
10
+ @search_method = meth
11
+ @times = times
12
+ prepare_indexes(default_context.merge(index_cntx))
13
+
14
+ Benchmark.bm do |benchmark|
15
+ contexts.each do |context|
16
+ report(benchmark, context.merge(index_cntx))
17
+ end
18
+
19
+ yield(benchmark, self) if block_given?
20
+ end
21
+
22
+ ensure
23
+ Fuzzzy.redis.flushdb
24
+ @search_method = nil
25
+ end
26
+
27
+ def profile meth, context, index_cntx={}, times=100, &block
28
+ @search_method = meth
29
+ @times = times
30
+ prepare_indexes(default_context.merge(index_cntx))
31
+
32
+ RubyProf.start
33
+
34
+ @times.times do
35
+ searcher.search(context)
36
+ end
37
+
38
+ result = RubyProf.stop
39
+
40
+ flat_printer = RubyProf::FlatPrinter.new(result)
41
+ callstack_printer = RubyProf::CallStackPrinter.new(result)
42
+ graph_printer = RubyProf::GraphHtmlPrinter.new(result)
43
+
44
+ File.open(Fuzzzy.root.join('benchmark', 'reports', "#{search_method}_graph.html"), 'w') do |file|
45
+ graph_printer.print(file)
46
+ end
47
+ File.open(Fuzzzy.root.join('benchmark', 'reports', "#{search_method}_callstack.html"), 'w') do |file|
48
+ callstack_printer.print(file)
49
+ end
50
+ File.open(Fuzzzy.root.join('benchmark', 'reports', "#{search_method}_flat.txt"), 'w') do |file|
51
+ flat_printer.print(file)
52
+ end
53
+ ensure
54
+ Fuzzzy.redis.flushdb
55
+ @search_method = nil
56
+ end
57
+
58
+ def report bench, context
59
+ context = default_context.merge(context)
60
+ result, strings = get_result(context)
61
+
62
+ puts "Execute #{@times} times"
63
+ puts "query: '#{context[:query]}', result: '#{result}' => #{strings}"
64
+ bench.report(context[:title] || '') do
65
+ @times.times do
66
+ searcher.search(context)
67
+ end
68
+ end
69
+ puts ''
70
+ end
71
+
72
+ def get_result cntx
73
+ result = searcher.search(cntx)
74
+ strings = result.map{|id|Fuzzzy.redis.get('fuzzzy:city:name:dictionary:' + id)}
75
+ [result, strings]
76
+ end
77
+
78
+ def prepare_indexes cntx
79
+ puts "Create index for #{search_method}:"
80
+ puts "#{fixtures.size} names"
81
+
82
+ start = Time.now
83
+ fixtures.each do |source|
84
+ indexer.create_index(cntx.merge(
85
+ :dictionary_string => source[:name].downcase,
86
+ :id => source[:id]
87
+ ))
88
+ end
89
+
90
+ puts "#{Time.now - start} sec."
91
+ puts "size - #{Fuzzzy.redis.info['used_memory_human']}"
92
+ end
93
+
94
+ def default_context
95
+ {
96
+ :index_name => 'city:name',
97
+ :method => search_method
98
+ }
99
+ end
100
+
101
+ def indexer
102
+ _indexer(search_method)
103
+ end
104
+
105
+ def searcher
106
+ _searcher(search_method)
107
+ end
108
+
109
+ def fixtures
110
+ @fixtures ||= begin
111
+ result = []
112
+ CSV.foreach(Fuzzzy.root.join('benchmark', 'data', 'cities.csv').to_s,
113
+ :headers => true, :encoding => 'utf-8'
114
+ ) do |row|
115
+ result << row.to_hash.symbolize_keys
116
+ end
117
+ result
118
+ end
119
+ end
120
+
121
+ def search_method
122
+ @search_method
123
+ end
124
+
125
+ def search_method= meth
126
+ @search_method = meth
127
+ end
128
+ end
@@ -0,0 +1,36 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../benchmark", __FILE__)
2
+ require File.expand_path('../../lib/fuzzzy', __FILE__)
3
+ require 'fuzzzy_benchmark'
4
+
5
+ puts 'Without stripping stopwords'
6
+ FuzzzyBenchmark.benchmark(:ngram, [{
7
+ :query => 'eastleigh naersouthempton',
8
+ :distance => 4
9
+ }, {
10
+ :query => 'alixandropolis',
11
+ :distance => 2
12
+ }, {
13
+ :query => 'jenan',
14
+ :distance => 1
15
+ }])
16
+
17
+ puts 'With stripping stopwords'
18
+ FuzzzyBenchmark.benchmark(:ngram, [{
19
+ :query => 'eastleigh naersouthempton',
20
+ :distance => 4,
21
+ :strip_stopwords => true
22
+ }, {
23
+ :query => 'alixandropolis',
24
+ :distance => 2,
25
+ :strip_stopwords => true
26
+ }, {
27
+ :query => 'jenan',
28
+ :distance => 1,
29
+ :strip_stopwords => true
30
+ }], {:strip_stopwords => true})
31
+
32
+ FuzzzyBenchmark.profile(:ngram, {
33
+ :query => 'eastleigh naersouthempton',
34
+ :distance => 4,
35
+ :strip_stopwords => true
36
+ }, {:strip_stopwords => true}, 1000)
@@ -0,0 +1,36 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../benchmark", __FILE__)
2
+ require File.expand_path('../../lib/fuzzzy', __FILE__)
3
+ require 'fuzzzy_benchmark'
4
+
5
+ puts 'Without stripping stopwords'
6
+ FuzzzyBenchmark.benchmark(:soundex, [{
7
+ :query => 'eastleigh naersouthempton',
8
+ :distance => 4
9
+ }, {
10
+ :query => 'alixandropolis',
11
+ :distance => 2
12
+ }, {
13
+ :query => 'jenan',
14
+ :distance => 1
15
+ }])
16
+
17
+ puts 'With stripping stopwords'
18
+ FuzzzyBenchmark.benchmark(:soundex, [{
19
+ :query => 'eastleigh naersouthempton',
20
+ :distance => 4,
21
+ :strip_stopwords => true
22
+ }, {
23
+ :query => 'alixandropolis',
24
+ :distance => 2,
25
+ :strip_stopwords => true
26
+ }, {
27
+ :query => 'jenan',
28
+ :distance => 1,
29
+ :strip_stopwords => true
30
+ }], {:strip_stopwords => true})
31
+
32
+ FuzzzyBenchmark.profile(:soundex, {
33
+ :query => 'eastleigh naersouthempton',
34
+ :distance => 4,
35
+ :strip_stopwords => true
36
+ }, {:strip_stopwords => true}, 1000)
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH.unshift File.expand_path("../..", __FILE__)
2
+ require './lib/fuzzzy'
3
+ require 'grape'
4
+
5
+ Fuzzzy.redis = Redis.new(
6
+ :host => (ENV['REDIS_HOST'] || 'localhost'),
7
+ :port => (ENV['REDIS_PORT'] || 6379),
8
+ :database => (ENV['REDIS_DB'] || 0)
9
+ )
10
+
11
+ run Fuzzzy::Server::HTTP
@@ -0,0 +1,175 @@
1
+ ---
2
+ - a
3
+ - about
4
+ - above
5
+ - after
6
+ - again
7
+ - against
8
+ - all
9
+ - am
10
+ - an
11
+ - and
12
+ - any
13
+ - are
14
+ - aren't
15
+ - as
16
+ - at
17
+ - be
18
+ - because
19
+ - been
20
+ - before
21
+ - being
22
+ - below
23
+ - between
24
+ - both
25
+ - but
26
+ - by
27
+ - can't
28
+ - cannot
29
+ - could
30
+ - couldn't
31
+ - did
32
+ - didn't
33
+ - do
34
+ - does
35
+ - doesn't
36
+ - doing
37
+ - don't
38
+ - down
39
+ - during
40
+ - each
41
+ - few
42
+ - for
43
+ - from
44
+ - further
45
+ - had
46
+ - hadn't
47
+ - has
48
+ - hasn't
49
+ - have
50
+ - haven't
51
+ - having
52
+ - he
53
+ - he'd
54
+ - he'll
55
+ - he's
56
+ - her
57
+ - here
58
+ - here's
59
+ - hers
60
+ - herself
61
+ - him
62
+ - himself
63
+ - his
64
+ - how
65
+ - how's
66
+ - i
67
+ - i'd
68
+ - i'll
69
+ - i'm
70
+ - i've
71
+ - if
72
+ - in
73
+ - into
74
+ - is
75
+ - isn't
76
+ - it
77
+ - it's
78
+ - its
79
+ - itself
80
+ - let's
81
+ - me
82
+ - more
83
+ - most
84
+ - mustn't
85
+ - my
86
+ - myself
87
+ - "no"
88
+ - nor
89
+ - not
90
+ - of
91
+ - "off"
92
+ - "on"
93
+ - once
94
+ - only
95
+ - or
96
+ - other
97
+ - ought
98
+ - our
99
+ - ours
100
+ - ourselves
101
+ - out
102
+ - over
103
+ - own
104
+ - same
105
+ - shan't
106
+ - she
107
+ - she'd
108
+ - she'll
109
+ - she's
110
+ - should
111
+ - shouldn't
112
+ - so
113
+ - some
114
+ - such
115
+ - than
116
+ - that
117
+ - that's
118
+ - the
119
+ - their
120
+ - theirs
121
+ - them
122
+ - themselves
123
+ - then
124
+ - there
125
+ - there's
126
+ - these
127
+ - they
128
+ - they'd
129
+ - they'll
130
+ - they're
131
+ - they've
132
+ - this
133
+ - those
134
+ - through
135
+ - to
136
+ - too
137
+ - under
138
+ - until
139
+ - up
140
+ - very
141
+ - was
142
+ - wasn't
143
+ - we
144
+ - we'd
145
+ - we'll
146
+ - we're
147
+ - we've
148
+ - were
149
+ - weren't
150
+ - what
151
+ - what's
152
+ - when
153
+ - when's
154
+ - where
155
+ - where's
156
+ - which
157
+ - while
158
+ - who
159
+ - who's
160
+ - whom
161
+ - why
162
+ - why's
163
+ - with
164
+ - won't
165
+ - would
166
+ - wouldn't
167
+ - you
168
+ - you'd
169
+ - you'll
170
+ - you're
171
+ - you've
172
+ - your
173
+ - yours
174
+ - yourself
175
+ - yourselves
@@ -0,0 +1,44 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "fuzzzy/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "fuzzzy"
7
+ s.version = Fuzzzy::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Undr"]
10
+ s.email = ["lilipoper@gmail.com"]
11
+ s.homepage = "http://github.com/undr/fuzzzy"
12
+ s.summary = %q{Fuzzy Search client and server}
13
+ s.description = %q{Fuzzy Search client and server}
14
+
15
+ s.rubyforge_project = "fuzzzy"
16
+
17
+ s.add_development_dependency "rspec", ">= 2"
18
+ s.add_development_dependency "yard", "~> 0.6.0"
19
+ s.add_development_dependency "ruby-debug19"
20
+ s.add_development_dependency "bson_ext"
21
+ s.add_development_dependency "mongoid"
22
+ s.add_development_dependency "pry"
23
+ s.add_development_dependency 'ruby-prof'
24
+
25
+ s.add_dependency "bundler"
26
+ s.add_dependency "rake"
27
+ s.add_dependency "activesupport"
28
+ s.add_dependency "eventmachine"
29
+ s.add_dependency "yajl-ruby"
30
+ s.add_dependency "levenshtein-ffi"
31
+ s.add_dependency "text"
32
+ s.add_dependency "grape"
33
+ s.add_dependency "hiredis"
34
+ s.add_dependency "redis"
35
+ s.add_dependency "daemons"
36
+ s.add_dependency "ZenTest", "4.5.0"
37
+ s.add_dependency 'RubyInline'
38
+
39
+
40
+ s.files = `git ls-files`.split("\n")
41
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
42
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
43
+ s.require_paths = ["lib"]
44
+ end
@@ -0,0 +1,129 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ Bundler.require :default, (ENV['RACK_ENV'] || 'development')
4
+ require 'yaml'
5
+ require 'logger'
6
+ require 'active_support'
7
+ require 'yajl'
8
+ require 'levenshtein-ffi'
9
+ require 'text'
10
+ require 'redis/connection/hiredis'
11
+ require 'redis'
12
+
13
+ module Fuzzzy
14
+ extend self
15
+ extend ActiveSupport::Autoload
16
+
17
+ autoload :Redis
18
+ autoload :Index
19
+
20
+ autoload :Indexer, 'fuzzzy/methods/indexer'
21
+ autoload :MethodBase, 'fuzzzy/methods/method_base'
22
+
23
+ module Soundex
24
+ extend ActiveSupport::Autoload
25
+
26
+ autoload :Base, 'fuzzzy/methods/soundex/base'
27
+ autoload :Indexer, 'fuzzzy/methods/soundex/indexer'
28
+ autoload :Searcher, 'fuzzzy/methods/soundex/searcher'
29
+ end
30
+
31
+ module Ngram
32
+ extend ActiveSupport::Autoload
33
+
34
+ autoload :Base, 'fuzzzy/methods/ngram/base'
35
+ autoload :Indexer, 'fuzzzy/methods/ngram/indexer'
36
+ autoload :Searcher, 'fuzzzy/methods/ngram/searcher'
37
+ end
38
+
39
+ module Server
40
+ extend ActiveSupport::Autoload
41
+
42
+ autoload :HTTP, 'fuzzzy/server/http'
43
+ end
44
+
45
+ if defined?(Mongoid)
46
+ module Mongoid
47
+ extend ActiveSupport::Autoload
48
+
49
+ autoload :Index, 'fuzzzy/orm/mongoid/index'
50
+ end
51
+ end
52
+
53
+ # Fuzzzy.configure do |config|
54
+ # config.logger = Logger.new($stdout)
55
+ # config.redis = ::Redis.new(
56
+ # :host => 'localhost',
57
+ # :port => 6379,
58
+ # :database => 0
59
+ # )
60
+ # config.stopwords = %w{the stopwords list}
61
+ # end
62
+ def configure
63
+ yield self
64
+ end
65
+
66
+ def logger
67
+ @logger = default_logger unless defined?(@logger)
68
+ @logger
69
+ end
70
+
71
+ def logger=(logger)
72
+ case logger
73
+ when Logger then @logger = logger
74
+ when false, nil then @logger = nil
75
+ end
76
+ end
77
+
78
+ def redis
79
+ @redis ||= ::Redis.new(
80
+ :host => 'localhost',
81
+ :port => 6379,
82
+ :database => 0
83
+ )
84
+ end
85
+
86
+ def redis= connection
87
+ @redis = connection
88
+ end
89
+
90
+ def stopwords
91
+ @stopwords ||= default_stopwords
92
+ end
93
+
94
+ def stopwords= value
95
+ @stopwords = load_stopwords(value).uniq
96
+ end
97
+
98
+ def load_stopwords options
99
+ if options.is_a?(Hash)
100
+ stops = load_stopwords(options[:stopwords])
101
+ options[:default] ? (stops + default_stopwords) : stops
102
+ elsif options.is_a?(Array)
103
+ options
104
+ elsif options.is_a?(String) || options.is_a?(Pathname)
105
+ YAML.load_file(options)
106
+ else
107
+ []
108
+ end
109
+ end
110
+
111
+ def default_stopwords
112
+ @default_stopwords ||= load_stopwords(Fuzzzy.root.join('dictionary', 'en_stopwords.yml').to_s)
113
+ end
114
+
115
+ def env
116
+ return Rails.env if defined?(Rails)
117
+ return Sinatra::Base.environment.to_s if defined?(Sinatra)
118
+ ENV["RACK_ENV"] || 'development'
119
+ end
120
+
121
+ def root
122
+ @root ||= Pathname.new(File.expand_path('.'))
123
+ end
124
+
125
+ protected
126
+ def default_logger
127
+ defined?(Rails) && Rails.respond_to?(:logger) ? Rails.logger : ::Logger.new($stdout)
128
+ end
129
+ end