picky 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/picky +14 -0
- data/lib/bundling.rb +10 -0
- data/lib/constants.rb +9 -0
- data/lib/deployment.rb +212 -0
- data/lib/picky/application.rb +40 -0
- data/lib/picky/cacher/convenience.rb +3 -0
- data/lib/picky/cacher/generator.rb +17 -0
- data/lib/picky/cacher/partial/default.rb +7 -0
- data/lib/picky/cacher/partial/none.rb +19 -0
- data/lib/picky/cacher/partial/strategy.rb +7 -0
- data/lib/picky/cacher/partial/subtoken.rb +91 -0
- data/lib/picky/cacher/partial_generator.rb +15 -0
- data/lib/picky/cacher/similarity/default.rb +7 -0
- data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
- data/lib/picky/cacher/similarity/none.rb +25 -0
- data/lib/picky/cacher/similarity/strategy.rb +7 -0
- data/lib/picky/cacher/similarity_generator.rb +15 -0
- data/lib/picky/cacher/weights/default.rb +7 -0
- data/lib/picky/cacher/weights/logarithmic.rb +39 -0
- data/lib/picky/cacher/weights/strategy.rb +7 -0
- data/lib/picky/cacher/weights_generator.rb +15 -0
- data/lib/picky/configuration/configuration.rb +13 -0
- data/lib/picky/configuration/field.rb +68 -0
- data/lib/picky/configuration/indexes.rb +60 -0
- data/lib/picky/configuration/queries.rb +32 -0
- data/lib/picky/configuration/type.rb +52 -0
- data/lib/picky/cores.rb +101 -0
- data/lib/picky/db/configuration.rb +23 -0
- data/lib/picky/ext/ruby19/extconf.rb +7 -0
- data/lib/picky/ext/ruby19/performant.c +339 -0
- data/lib/picky/extensions/array.rb +45 -0
- data/lib/picky/extensions/hash.rb +11 -0
- data/lib/picky/extensions/module.rb +15 -0
- data/lib/picky/extensions/symbol.rb +18 -0
- data/lib/picky/generator.rb +156 -0
- data/lib/picky/helpers/cache.rb +23 -0
- data/lib/picky/helpers/gc.rb +11 -0
- data/lib/picky/helpers/measuring.rb +45 -0
- data/lib/picky/helpers/search.rb +27 -0
- data/lib/picky/index/bundle.rb +328 -0
- data/lib/picky/index/category.rb +109 -0
- data/lib/picky/index/combined.rb +38 -0
- data/lib/picky/index/type.rb +30 -0
- data/lib/picky/indexers/base.rb +77 -0
- data/lib/picky/indexers/default.rb +3 -0
- data/lib/picky/indexers/field.rb +13 -0
- data/lib/picky/indexers/no_source_specified_error.rb +5 -0
- data/lib/picky/indexers/solr.rb +60 -0
- data/lib/picky/indexes.rb +180 -0
- data/lib/picky/initializers/ext.rb +6 -0
- data/lib/picky/initializers/mysql.rb +22 -0
- data/lib/picky/loader.rb +287 -0
- data/lib/picky/loggers/search.rb +19 -0
- data/lib/picky/performant/array.rb +23 -0
- data/lib/picky/query/allocation.rb +82 -0
- data/lib/picky/query/allocations.rb +131 -0
- data/lib/picky/query/base.rb +124 -0
- data/lib/picky/query/combination.rb +69 -0
- data/lib/picky/query/combinations.rb +106 -0
- data/lib/picky/query/combinator.rb +92 -0
- data/lib/picky/query/full.rb +15 -0
- data/lib/picky/query/live.rb +22 -0
- data/lib/picky/query/qualifiers.rb +73 -0
- data/lib/picky/query/solr.rb +77 -0
- data/lib/picky/query/token.rb +215 -0
- data/lib/picky/query/tokens.rb +102 -0
- data/lib/picky/query/weigher.rb +159 -0
- data/lib/picky/query/weights.rb +55 -0
- data/lib/picky/rack/harakiri.rb +37 -0
- data/lib/picky/results/base.rb +103 -0
- data/lib/picky/results/full.rb +19 -0
- data/lib/picky/results/live.rb +19 -0
- data/lib/picky/routing.rb +165 -0
- data/lib/picky/signals.rb +11 -0
- data/lib/picky/solr/schema_generator.rb +73 -0
- data/lib/picky/sources/base.rb +19 -0
- data/lib/picky/sources/csv.rb +30 -0
- data/lib/picky/sources/db.rb +77 -0
- data/lib/picky/tokenizers/base.rb +130 -0
- data/lib/picky/tokenizers/default.rb +3 -0
- data/lib/picky/tokenizers/index.rb +73 -0
- data/lib/picky/tokenizers/query.rb +70 -0
- data/lib/picky/umlaut_substituter.rb +21 -0
- data/lib/picky-tasks.rb +6 -0
- data/lib/picky.rb +18 -0
- data/lib/tasks/application.rake +5 -0
- data/lib/tasks/cache.rake +53 -0
- data/lib/tasks/framework.rake +4 -0
- data/lib/tasks/index.rake +29 -0
- data/lib/tasks/server.rake +48 -0
- data/lib/tasks/shortcuts.rake +13 -0
- data/lib/tasks/solr.rake +36 -0
- data/lib/tasks/spec.rake +11 -0
- data/lib/tasks/statistics.rake +13 -0
- data/lib/tasks/try.rake +29 -0
- data/prototype_project/Gemfile +23 -0
- data/prototype_project/Rakefile +1 -0
- data/prototype_project/app/README +6 -0
- data/prototype_project/app/application.rb +50 -0
- data/prototype_project/app/application.ru +29 -0
- data/prototype_project/app/db.yml +10 -0
- data/prototype_project/app/logging.rb +20 -0
- data/prototype_project/app/unicorn.ru +10 -0
- data/prototype_project/log/README +1 -0
- data/prototype_project/script/console +34 -0
- data/prototype_project/tmp/README +0 -0
- data/prototype_project/tmp/pids/README +0 -0
- data/spec/ext/performant_spec.rb +64 -0
- data/spec/lib/application_spec.rb +61 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
- data/spec/lib/cacher/partial_generator_spec.rb +35 -0
- data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
- data/spec/lib/cacher/similarity/none_spec.rb +23 -0
- data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
- data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
- data/spec/lib/cacher/weights_generator_spec.rb +21 -0
- data/spec/lib/configuration/configuration_spec.rb +38 -0
- data/spec/lib/configuration/type_spec.rb +49 -0
- data/spec/lib/configuration_spec.rb +8 -0
- data/spec/lib/cores_spec.rb +65 -0
- data/spec/lib/extensions/array_spec.rb +37 -0
- data/spec/lib/extensions/hash_spec.rb +11 -0
- data/spec/lib/extensions/module_spec.rb +27 -0
- data/spec/lib/extensions/symbol_spec.rb +85 -0
- data/spec/lib/generator_spec.rb +135 -0
- data/spec/lib/helpers/cache_spec.rb +35 -0
- data/spec/lib/helpers/gc_spec.rb +71 -0
- data/spec/lib/helpers/measuring_spec.rb +18 -0
- data/spec/lib/helpers/search_spec.rb +50 -0
- data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
- data/spec/lib/index/bundle_spec.rb +260 -0
- data/spec/lib/index/category_spec.rb +203 -0
- data/spec/lib/indexers/base_spec.rb +73 -0
- data/spec/lib/indexers/field_spec.rb +20 -0
- data/spec/lib/loader_spec.rb +48 -0
- data/spec/lib/loggers/search_spec.rb +19 -0
- data/spec/lib/performant/array_spec.rb +13 -0
- data/spec/lib/query/allocation_spec.rb +194 -0
- data/spec/lib/query/allocations_spec.rb +336 -0
- data/spec/lib/query/base_spec.rb +104 -0
- data/spec/lib/query/combination_spec.rb +90 -0
- data/spec/lib/query/combinations_spec.rb +83 -0
- data/spec/lib/query/combinator_spec.rb +112 -0
- data/spec/lib/query/full_spec.rb +22 -0
- data/spec/lib/query/live_spec.rb +61 -0
- data/spec/lib/query/qualifiers_spec.rb +31 -0
- data/spec/lib/query/solr_spec.rb +51 -0
- data/spec/lib/query/token_spec.rb +297 -0
- data/spec/lib/query/tokens_spec.rb +189 -0
- data/spec/lib/query/weights_spec.rb +47 -0
- data/spec/lib/results/base_spec.rb +233 -0
- data/spec/lib/routing_spec.rb +318 -0
- data/spec/lib/solr/schema_generator_spec.rb +42 -0
- data/spec/lib/sources/db_spec.rb +91 -0
- data/spec/lib/tokenizers/base_spec.rb +61 -0
- data/spec/lib/tokenizers/index_spec.rb +51 -0
- data/spec/lib/tokenizers/query_spec.rb +105 -0
- data/spec/lib/umlaut_substituter_spec.rb +84 -0
- data/spec/specific/speed_spec.rb +55 -0
- metadata +371 -15
- data/README.textile +0 -9
data/lib/tasks/solr.rake
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# sunspot-solr start --solr-home=solr --data-directory=index/development/solr --pid-dir=solr/pids --log-file=log/solr.log
|
|
2
|
+
|
|
3
|
+
namespace :solr do
|
|
4
|
+
|
|
5
|
+
namespace :schema do
|
|
6
|
+
task :generate => :application do
|
|
7
|
+
generator = Solr::SchemaGenerator.new Indexes.configuration
|
|
8
|
+
generator.generate
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
task :index => :application do
|
|
14
|
+
Rake::Task[:"solr:start"].invoke
|
|
15
|
+
sleep 3
|
|
16
|
+
Indexes.index_solr
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def action name
|
|
21
|
+
`sunspot-solr #{name} --solr-home=solr --data-directory=index/#{SEARCH_ENVIRONMENT}/solr --pid-dir=solr/pids --log-file=log/solr.log`
|
|
22
|
+
end
|
|
23
|
+
task :start => :application do
|
|
24
|
+
Rake::Task['solr:schema:generate'].invoke
|
|
25
|
+
action :start
|
|
26
|
+
end
|
|
27
|
+
task :stop => :application do
|
|
28
|
+
action :stop
|
|
29
|
+
end
|
|
30
|
+
task :restart => :application do
|
|
31
|
+
action :stop
|
|
32
|
+
sleep 2
|
|
33
|
+
action :start
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
data/lib/tasks/spec.rake
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
require 'spec'
|
|
2
|
+
require 'spec/rake/spectask'
|
|
3
|
+
|
|
4
|
+
task :default => :spec
|
|
5
|
+
|
|
6
|
+
desc "Run all specs in spec directory (excluding plugin specs)"
|
|
7
|
+
Spec::Rake::SpecTask.new(:spec) do |t|
|
|
8
|
+
spec_root = File.join(File.dirname(__FILE__), '..', '..', 'spec')
|
|
9
|
+
t.spec_opts = ['--options', "\"#{File.join(spec_root, 'spec.opts')}\""]
|
|
10
|
+
t.spec_files = FileList[File.join(spec_root, '**', '*_spec.rb')]
|
|
11
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
namespace :statistics do
|
|
2
|
+
|
|
3
|
+
desc "start the server"
|
|
4
|
+
task :start => :application do
|
|
5
|
+
Statistics.start unless SEARCH_ENVIRONMENT == 'test'
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
desc "stop the server"
|
|
9
|
+
task :stop => :application do
|
|
10
|
+
Statistics.stop unless SEARCH_ENVIRONMENT == 'test'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
end
|
data/lib/tasks/try.rake
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Tasks for manually testing your engine configuration.
|
|
2
|
+
#
|
|
3
|
+
namespace :try do
|
|
4
|
+
|
|
5
|
+
desc "Try how a given word would be tokenized when indexing (type:field optional)."
|
|
6
|
+
task :index, [:text, :type_and_field] => :application do |_, options|
|
|
7
|
+
text, type_and_field = options.text, options.type_and_field
|
|
8
|
+
|
|
9
|
+
tokenizer = type_and_field ? Indexes.find(*type_and_field.split(':')).tokenizer : Tokenizers::Index.new
|
|
10
|
+
|
|
11
|
+
puts "\"#{text}\" is index tokenized as #{tokenizer.tokenize(text).to_a}"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
desc "Try how a given word would be tokenized when querying."
|
|
15
|
+
task :query, [:text] => :application do |_, options|
|
|
16
|
+
text = options.text
|
|
17
|
+
|
|
18
|
+
puts "\"#{text}\" is query tokenized as #{Tokenizers::Query.new.tokenize(text).to_a}"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
desc "Try the given text with both the index and the query (type:field optional)."
|
|
22
|
+
task :both, [:text, :type_and_field] => :application do |_, options|
|
|
23
|
+
text, type_and_field = options.text, options.type_and_field
|
|
24
|
+
|
|
25
|
+
Rake::Task[:"try:index"].invoke text, type_and_field
|
|
26
|
+
Rake::Task[:"try:query"].invoke text
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
source :gemcutter
|
|
2
|
+
|
|
3
|
+
# Gems required by Picky.
|
|
4
|
+
#
|
|
5
|
+
gem 'picky'
|
|
6
|
+
gem 'bundler', '>=0.9.26'
|
|
7
|
+
gem 'rack', '1.2.1'
|
|
8
|
+
gem 'rack-mount', '0.6.9'
|
|
9
|
+
gem 'text', '0.2.0'
|
|
10
|
+
|
|
11
|
+
# Should be optional.
|
|
12
|
+
#
|
|
13
|
+
gem 'activesupport', '2.3.8', :require => 'active_support'
|
|
14
|
+
gem 'activerecord', '2.3.8', :require => 'active_record'
|
|
15
|
+
gem 'rsolr', '0.12.1'
|
|
16
|
+
gem 'sunspot', '1.1.0'
|
|
17
|
+
gem 'rack_fast_escape', '2009.06.24'
|
|
18
|
+
gem 'rspec'
|
|
19
|
+
|
|
20
|
+
# Required by your project.
|
|
21
|
+
#
|
|
22
|
+
gem 'unicorn'
|
|
23
|
+
gem 'mysql'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require 'picky-tasks'
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
# This is your application.
|
|
4
|
+
#
|
|
5
|
+
# Have fun with Picky!
|
|
6
|
+
#
|
|
7
|
+
class PickySearch < Application # The App Constant needs to be identical in application.ru.
|
|
8
|
+
|
|
9
|
+
# This is an example with books that you can adapt.
|
|
10
|
+
#
|
|
11
|
+
# Note: Much more is possible, but let's start out easy.
|
|
12
|
+
#
|
|
13
|
+
# Ask me if you have questions!
|
|
14
|
+
#
|
|
15
|
+
|
|
16
|
+
indexes do
|
|
17
|
+
illegal_characters(/[^äöüa-zA-Z0-9\s\/\-\"\&\.]/)
|
|
18
|
+
stopwords(/\b(und|der|die|das|mit|im|ein|des|dem|the|of)\b/)
|
|
19
|
+
split_text_on(/[\s\/\-\"\&\.]/)
|
|
20
|
+
|
|
21
|
+
type :books,
|
|
22
|
+
Sources::DB.new(
|
|
23
|
+
'SELECT id, title, author, isbn13 as isbn FROM books',
|
|
24
|
+
DB.configure(:file => 'app/db.yml')
|
|
25
|
+
),
|
|
26
|
+
field(:title, :qualifiers => [:t, :title, :titre], :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
|
|
27
|
+
field(:author, :qualifiers => [:s, :author, :auteur]),
|
|
28
|
+
field(:isbn, :qualifiers => [:i, :isbn])
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
queries do
|
|
32
|
+
maximum_tokens 5
|
|
33
|
+
# Note that Picky needs the following characters to
|
|
34
|
+
# pass through, as they are control characters: *"~:
|
|
35
|
+
#
|
|
36
|
+
illegal_characters(/[^a-zA-Z0-9\s\/\-\,\&äöü\"\~\*\:]/)
|
|
37
|
+
stopwords(/\b(und|der|die|das|mit|ein|des|dem|the|of)\b/)
|
|
38
|
+
split_text_on(/[\s\/\-\,\&]+/)
|
|
39
|
+
|
|
40
|
+
# Set some weights according to the position. Note that the order is important.
|
|
41
|
+
#
|
|
42
|
+
options = { :weights => Query::Weights.new([:title] => 6, [:author, :title] => 3) }
|
|
43
|
+
|
|
44
|
+
route %r{^/books/full}, Query::Full.new(Indexes[:books], options)
|
|
45
|
+
route %r{^/books/live}, Query::Live.new(Indexes[:books], options)
|
|
46
|
+
|
|
47
|
+
root 200
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# This file has been generated by Picky.
|
|
2
|
+
#
|
|
3
|
+
|
|
4
|
+
# Require the gem. This loads the search framework.
|
|
5
|
+
#
|
|
6
|
+
require 'picky'
|
|
7
|
+
|
|
8
|
+
# Load your application. This requires the files in lib TODO
|
|
9
|
+
#
|
|
10
|
+
Loader.load_application
|
|
11
|
+
|
|
12
|
+
# Load the data. This loads data from cache files e.g. "some_index/*_index.dump" into constant Indexes::SomeIndex.
|
|
13
|
+
#
|
|
14
|
+
Indexes.load_from_cache
|
|
15
|
+
|
|
16
|
+
# Use Harakiri middleware to kill unicorn child after X requests.
|
|
17
|
+
#
|
|
18
|
+
# See http://vimeo.com/12614970 for more info.
|
|
19
|
+
#
|
|
20
|
+
# Note: Comment this.
|
|
21
|
+
#
|
|
22
|
+
Rack::Harakiri.after = 50
|
|
23
|
+
use Rack::Harakiri
|
|
24
|
+
|
|
25
|
+
# Start the application and start accepting requests.
|
|
26
|
+
#
|
|
27
|
+
# Note: Needs to be the same name as in application.rb.
|
|
28
|
+
#
|
|
29
|
+
run PickySearch
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Standard logging.
|
|
2
|
+
#
|
|
3
|
+
require 'logger'
|
|
4
|
+
PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(SEARCH_ROOT, 'log/search.log')))
|
|
5
|
+
|
|
6
|
+
# Example with using the syslog logger.
|
|
7
|
+
# Falling back to the standard log if it isn't available.
|
|
8
|
+
# (For example, because it is used locally and syslog is
|
|
9
|
+
# only available on the servers)
|
|
10
|
+
#
|
|
11
|
+
# begin
|
|
12
|
+
# log_program_name = 'search/query'
|
|
13
|
+
# logger = SyslogLogger.new log_program_name
|
|
14
|
+
# PickyLog = Loggers::Search.new logger
|
|
15
|
+
# puts "Logging on syslog #{log_program_name}."
|
|
16
|
+
# rescue StandardError
|
|
17
|
+
# puts "Could not connect to the syslog, using the normal log."
|
|
18
|
+
# require 'logger'
|
|
19
|
+
# PickyLog = Loggers::Search.new ::Logger.new(File.join(SEARCH_ROOT, 'log/search.log'))
|
|
20
|
+
# end
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Logs go here by default.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: utf-8
|
|
3
|
+
#
|
|
4
|
+
irb = 'irb'
|
|
5
|
+
|
|
6
|
+
require 'optparse'
|
|
7
|
+
options = { :sandbox => false, :irb => irb }
|
|
8
|
+
OptionParser.new do |opt|
|
|
9
|
+
opt.banner = "Usage: console [environment] [options]"
|
|
10
|
+
opt.on("--irb=[#{irb}]", 'Invoke a different irb.') { |v| options[:irb] = v }
|
|
11
|
+
opt.parse!(ARGV)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
libs = " -r irb/completion"
|
|
15
|
+
libs << %( -r "picky" )
|
|
16
|
+
|
|
17
|
+
ENV['SEARCH_ENV'] = case ARGV.first
|
|
18
|
+
when "p"; "production"
|
|
19
|
+
when "d"; "development"
|
|
20
|
+
when "t"; "test"
|
|
21
|
+
else
|
|
22
|
+
ARGV.first || ENV['SEARCH_ENV'] || 'development'
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
puts "Use \x1b[1;30mLoader.load_application\x1b[m to load app."
|
|
26
|
+
puts "Use \x1b[1;30mIndexes.load_from_cache\x1b[m after that to load indexes."
|
|
27
|
+
puts "Copy the following line to do just that:"
|
|
28
|
+
puts "\x1b[1;30mLoader.load_application; Indexes.load_from_cache; nil\x1b[m"
|
|
29
|
+
puts ""
|
|
30
|
+
puts "Now you can for example create a query instance."
|
|
31
|
+
puts "\x1b[1;30mfull_books = Query::Full.new(Indexes[:books]); nil\x1b[m"
|
|
32
|
+
puts "and search on it"
|
|
33
|
+
puts "\x1b[1;30mfull_books.search_with_text 'bla'\x1b[m"
|
|
34
|
+
exec "#{options[:irb]} #{libs} --simple-prompt"
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Performant::Array do
|
|
4
|
+
|
|
5
|
+
describe "memory_efficient_intersect" do
|
|
6
|
+
before(:each) do
|
|
7
|
+
GC.disable
|
|
8
|
+
end
|
|
9
|
+
after(:each) do
|
|
10
|
+
GC.enable
|
|
11
|
+
GC.start
|
|
12
|
+
end
|
|
13
|
+
it "should intersect empty arrays correctly" do
|
|
14
|
+
arys = [[3,4], [1,2,3], []]
|
|
15
|
+
|
|
16
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == []
|
|
17
|
+
end
|
|
18
|
+
it "should handle intermediate empty results correctly" do
|
|
19
|
+
arys = [[5,4], [1,2,3], [3,4,5,8,9]]
|
|
20
|
+
|
|
21
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == []
|
|
22
|
+
end
|
|
23
|
+
it "should intersect correctly" do
|
|
24
|
+
arys = [[3,4], [1,2,3], [3,4,5,8,9]]
|
|
25
|
+
|
|
26
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == [3]
|
|
27
|
+
end
|
|
28
|
+
it "should intersect correctly again" do
|
|
29
|
+
arys = [[3,4,5,6,7], [1,2,3,5,6,7], [3,4,5,6,7,8,9]]
|
|
30
|
+
|
|
31
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == [3,5,6,7]
|
|
32
|
+
end
|
|
33
|
+
it "should intersect many arrays" do
|
|
34
|
+
arys = [[3,4,5,6,7], [1,2,3,5,6,7], [3,4,5,6,7,8,9], [1,2,3,4,5,6,7,8,9,10], [2,3,5,6,7,19], [1,2,3,4,5,6,7,8,9,10], [2,3,5,6,7,19]]
|
|
35
|
+
|
|
36
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == [3,5,6,7]
|
|
37
|
+
end
|
|
38
|
+
it "should handle random arrays" do
|
|
39
|
+
proto = Array.new(100, 3_500_000)
|
|
40
|
+
arys = [proto.map { |e| rand e }, proto.map { |e| rand e }, proto.map { |e| rand e }]
|
|
41
|
+
|
|
42
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == arys.inject(arys.shift.dup) { |total, ary| total & arys }
|
|
43
|
+
end
|
|
44
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
|
45
|
+
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
|
46
|
+
|
|
47
|
+
# brute force
|
|
48
|
+
Benchmark.realtime do
|
|
49
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size))
|
|
50
|
+
end.should <= 0.001
|
|
51
|
+
end
|
|
52
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
|
53
|
+
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
|
54
|
+
|
|
55
|
+
# &
|
|
56
|
+
Benchmark.realtime do
|
|
57
|
+
arys.inject(arys.shift.dup) do |total, ary|
|
|
58
|
+
total & arys
|
|
59
|
+
end
|
|
60
|
+
end.should <= 0.0015
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
describe Application do
|
|
6
|
+
|
|
7
|
+
describe 'routing' do
|
|
8
|
+
it 'should be there' do
|
|
9
|
+
lambda { Application.routing }.should_not raise_error
|
|
10
|
+
end
|
|
11
|
+
it "should return a new Routing instance" do
|
|
12
|
+
Application.routing.should be_kind_of(Routing)
|
|
13
|
+
end
|
|
14
|
+
it "should cache the instance" do
|
|
15
|
+
Application.routing.should == Application.routing
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
describe 'call' do
|
|
20
|
+
before(:each) do
|
|
21
|
+
@routes = stub :routes
|
|
22
|
+
Application.stub! :routing => @routes
|
|
23
|
+
end
|
|
24
|
+
it 'should delegate' do
|
|
25
|
+
@routes.should_receive(:call).once.with :env
|
|
26
|
+
|
|
27
|
+
Application.call :env
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
describe "indexes" do
|
|
32
|
+
|
|
33
|
+
end
|
|
34
|
+
describe "indexes_configuration" do
|
|
35
|
+
it 'should be there' do
|
|
36
|
+
lambda { Application.indexes_configuration }.should_not raise_error
|
|
37
|
+
end
|
|
38
|
+
it "should return a new Routing instance" do
|
|
39
|
+
Application.indexes_configuration.should be_kind_of(Configuration::Indexes)
|
|
40
|
+
end
|
|
41
|
+
it "should cache the instance" do
|
|
42
|
+
Application.indexes_configuration.should == Application.indexes_configuration
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
describe "queries" do
|
|
47
|
+
|
|
48
|
+
end
|
|
49
|
+
describe "queries_configuration" do
|
|
50
|
+
it 'should be there' do
|
|
51
|
+
lambda { Application.queries_configuration }.should_not raise_error
|
|
52
|
+
end
|
|
53
|
+
it "should return a new Routing instance" do
|
|
54
|
+
Application.queries_configuration.should be_kind_of(Configuration::Queries)
|
|
55
|
+
end
|
|
56
|
+
it "should cache the instance" do
|
|
57
|
+
Application.queries_configuration.should == Application.queries_configuration
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Cacher::Partial::Subtoken do
|
|
4
|
+
|
|
5
|
+
context 'default down_to' do
|
|
6
|
+
before(:each) do
|
|
7
|
+
@cacher = Cacher::Partial::Subtoken.new
|
|
8
|
+
end
|
|
9
|
+
describe 'down_to' do
|
|
10
|
+
it 'should return the right value' do
|
|
11
|
+
@cacher.down_to.should == 1
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
describe 'generate_from' do
|
|
15
|
+
it 'should generate the right index' do
|
|
16
|
+
@cacher.generate_from( :florian => [1], :flavia => [2] ).should == {
|
|
17
|
+
:florian => [1],
|
|
18
|
+
:floria => [1],
|
|
19
|
+
:flori => [1],
|
|
20
|
+
:flor => [1],
|
|
21
|
+
:flo => [1],
|
|
22
|
+
:fl => [1, 2],
|
|
23
|
+
:f => [1, 2],
|
|
24
|
+
:flavia => [2],
|
|
25
|
+
:flavi => [2],
|
|
26
|
+
:flav => [2],
|
|
27
|
+
:fla => [2]
|
|
28
|
+
}
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
context 'down_to set' do
|
|
33
|
+
context 'default starting_at' do
|
|
34
|
+
before(:each) do
|
|
35
|
+
@cacher = Cacher::Partial::Subtoken.new :down_to => 4
|
|
36
|
+
end
|
|
37
|
+
describe 'starting_at' do
|
|
38
|
+
it 'should return the right value' do
|
|
39
|
+
@cacher.starting_at.should == 0
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
describe 'down_to' do
|
|
43
|
+
it 'should return the right value' do
|
|
44
|
+
@cacher.down_to.should == 4
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
describe 'generate_from' do
|
|
48
|
+
it 'should generate the right index' do
|
|
49
|
+
@cacher.generate_from( :florian => [1], :flavia => [2] ).should == {
|
|
50
|
+
:florian => [1],
|
|
51
|
+
:floria => [1],
|
|
52
|
+
:flori => [1],
|
|
53
|
+
:flor => [1],
|
|
54
|
+
:flavia => [2],
|
|
55
|
+
:flavi => [2],
|
|
56
|
+
:flav => [2]
|
|
57
|
+
}
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
context 'starting_at -1' do
|
|
62
|
+
before(:each) do
|
|
63
|
+
@cacher = Cacher::Partial::Subtoken.new :down_to => 4, :starting_at => -1
|
|
64
|
+
end
|
|
65
|
+
describe 'starting_at' do
|
|
66
|
+
it 'should return the right value' do
|
|
67
|
+
@cacher.starting_at.should == -2
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
describe 'down_to' do
|
|
71
|
+
it 'should return the right value' do
|
|
72
|
+
@cacher.down_to.should == 4
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
describe 'generate_from' do
|
|
76
|
+
it 'should generate the right index' do
|
|
77
|
+
@cacher.generate_from( :florian => [1], :flavia => [2] ).should == {
|
|
78
|
+
:floria => [1],
|
|
79
|
+
:flori => [1],
|
|
80
|
+
:flor => [1],
|
|
81
|
+
:flavi => [2],
|
|
82
|
+
:flav => [2]
|
|
83
|
+
}
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Cacher::PartialGenerator do
|
|
4
|
+
|
|
5
|
+
context 'integration' do
|
|
6
|
+
it 'should generate the correct values with a given strategy' do
|
|
7
|
+
generator = Cacher::PartialGenerator.new :meier => [1], :maier => [2]
|
|
8
|
+
|
|
9
|
+
generator.generate(Cacher::Partial::Subtoken.new).should == {
|
|
10
|
+
:meier => [1],
|
|
11
|
+
:meie => [1],
|
|
12
|
+
:mei => [1],
|
|
13
|
+
:me => [1],
|
|
14
|
+
:m => [1, 2],
|
|
15
|
+
:maier => [2],
|
|
16
|
+
:maie => [2],
|
|
17
|
+
:mai => [2],
|
|
18
|
+
:ma => [2]
|
|
19
|
+
}
|
|
20
|
+
end
|
|
21
|
+
it 'should generate the correct values with a given specific strategy' do
|
|
22
|
+
generator = Cacher::PartialGenerator.new :meier => [1], :maier => [2]
|
|
23
|
+
|
|
24
|
+
generator.generate(Cacher::Partial::Subtoken.new(:down_to => 3)).should == {
|
|
25
|
+
:meier => [1],
|
|
26
|
+
:meie => [1],
|
|
27
|
+
:mei => [1],
|
|
28
|
+
:maier => [2],
|
|
29
|
+
:maie => [2],
|
|
30
|
+
:mai => [2]
|
|
31
|
+
}
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
describe Cacher::Similarity::DoubleLevenshtone do
|
|
6
|
+
|
|
7
|
+
before(:each) do
|
|
8
|
+
@similarity = Cacher::Similarity::DoubleLevenshtone.new
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def self.it_should_encode text, expected
|
|
12
|
+
it "should encode #{text.inspect} correctly" do
|
|
13
|
+
@similarity.encoded(text).should == expected
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
def self.it_should_generate_from index, expected
|
|
17
|
+
it "should generate #{expected.inspect} correctly from #{index.inspect}" do
|
|
18
|
+
@similarity.generate_from(index).should == expected
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it_should_encode :meier, :MR
|
|
23
|
+
it_should_encode :grossberger, :KRSP
|
|
24
|
+
it_should_encode :hadelbla, :HTLP
|
|
25
|
+
|
|
26
|
+
it_should_generate_from({}, {})
|
|
27
|
+
it_should_generate_from({ :maier => nil, :meier => nil }, :MR => [:maier, :meier]) # should be correctly ordered
|
|
28
|
+
it_should_generate_from({ :maier => nil, :meier => nil, :hallaballa => nil }, :MR => [:maier, :meier], :HLPL => [:hallaballa])
|
|
29
|
+
it_should_generate_from({ :susan => nil, :susanne => nil, :bruderer => nil }, :SSN => [:susan, :susanne], :PRTR => [:bruderer])
|
|
30
|
+
|
|
31
|
+
describe 'with reduced amount' do
|
|
32
|
+
before(:each) do
|
|
33
|
+
@similarity = Cacher::Similarity::DoubleLevenshtone.new(1)
|
|
34
|
+
end
|
|
35
|
+
it_should_generate_from({ :maier => nil, :meier => nil }, :MR => [:maier])
|
|
36
|
+
it_should_generate_from({ :susan => nil, :susanne => nil, :bruderer => nil }, :SSN => [:susan], :PRTR => [:bruderer])
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
describe 'hashify' do
|
|
40
|
+
it 'should turn an empty list into an empty hash' do
|
|
41
|
+
@similarity.send(:hashify, []).should == {}
|
|
42
|
+
end
|
|
43
|
+
it 'should turn the list into an unordered similarity' do
|
|
44
|
+
@similarity.send(:hashify, [:meier, :maier]).should == { :MR => [:meier, :maier] }
|
|
45
|
+
end
|
|
46
|
+
it 'should turn the list into a encoded hash' do
|
|
47
|
+
@similarity.send(:hashify, [:meier, :maier, :peter]).should == { :MR => [:meier, :maier], :PTR => [:peter] }
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
context 'integration' do
|
|
52
|
+
it 'should return the right ordered array' do
|
|
53
|
+
index = @similarity.generate_from :meier => nil, :maier => nil, :mairai => nil, :mair => nil, :meira => nil
|
|
54
|
+
code = @similarity.encoded :maier
|
|
55
|
+
|
|
56
|
+
index[code].should == [:mair, :maier, :meier, :meira, :mairai]
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
describe Cacher::Similarity::None do
|
|
6
|
+
|
|
7
|
+
before(:each) do
|
|
8
|
+
@similarity = Cacher::Similarity::None.new
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
describe 'encode' do
|
|
12
|
+
it 'should always return nil' do
|
|
13
|
+
@similarity.encoded(:whatever).should == nil
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
describe 'generate_from' do
|
|
18
|
+
it 'should return an empty hash, always' do
|
|
19
|
+
@similarity.generate_from(:anything).should == {}
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Cacher::SimilarityGenerator do
|
|
4
|
+
|
|
5
|
+
context 'integration' do
|
|
6
|
+
it 'should generate the correct values' do
|
|
7
|
+
generator = Cacher::SimilarityGenerator.new :anything_really
|
|
8
|
+
|
|
9
|
+
generator.generate.should == {}
|
|
10
|
+
end
|
|
11
|
+
it 'should generate the correct values with a given strategy' do
|
|
12
|
+
generator = Cacher::SimilarityGenerator.new :meier => nil,
|
|
13
|
+
:maier => nil,
|
|
14
|
+
:mayer => nil,
|
|
15
|
+
:meyer => nil,
|
|
16
|
+
:peter => nil
|
|
17
|
+
|
|
18
|
+
generator.generate(Cacher::Similarity::DoubleLevenshtone.new).should == { :MR => [:meier, :maier, :mayer, :meyer], :PTR => [:peter] }
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end
|