picky 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/picky +14 -0
- data/lib/bundling.rb +10 -0
- data/lib/constants.rb +9 -0
- data/lib/deployment.rb +212 -0
- data/lib/picky/application.rb +40 -0
- data/lib/picky/cacher/convenience.rb +3 -0
- data/lib/picky/cacher/generator.rb +17 -0
- data/lib/picky/cacher/partial/default.rb +7 -0
- data/lib/picky/cacher/partial/none.rb +19 -0
- data/lib/picky/cacher/partial/strategy.rb +7 -0
- data/lib/picky/cacher/partial/subtoken.rb +91 -0
- data/lib/picky/cacher/partial_generator.rb +15 -0
- data/lib/picky/cacher/similarity/default.rb +7 -0
- data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
- data/lib/picky/cacher/similarity/none.rb +25 -0
- data/lib/picky/cacher/similarity/strategy.rb +7 -0
- data/lib/picky/cacher/similarity_generator.rb +15 -0
- data/lib/picky/cacher/weights/default.rb +7 -0
- data/lib/picky/cacher/weights/logarithmic.rb +39 -0
- data/lib/picky/cacher/weights/strategy.rb +7 -0
- data/lib/picky/cacher/weights_generator.rb +15 -0
- data/lib/picky/configuration/configuration.rb +13 -0
- data/lib/picky/configuration/field.rb +68 -0
- data/lib/picky/configuration/indexes.rb +60 -0
- data/lib/picky/configuration/queries.rb +32 -0
- data/lib/picky/configuration/type.rb +52 -0
- data/lib/picky/cores.rb +101 -0
- data/lib/picky/db/configuration.rb +23 -0
- data/lib/picky/ext/ruby19/extconf.rb +7 -0
- data/lib/picky/ext/ruby19/performant.c +339 -0
- data/lib/picky/extensions/array.rb +45 -0
- data/lib/picky/extensions/hash.rb +11 -0
- data/lib/picky/extensions/module.rb +15 -0
- data/lib/picky/extensions/symbol.rb +18 -0
- data/lib/picky/generator.rb +156 -0
- data/lib/picky/helpers/cache.rb +23 -0
- data/lib/picky/helpers/gc.rb +11 -0
- data/lib/picky/helpers/measuring.rb +45 -0
- data/lib/picky/helpers/search.rb +27 -0
- data/lib/picky/index/bundle.rb +328 -0
- data/lib/picky/index/category.rb +109 -0
- data/lib/picky/index/combined.rb +38 -0
- data/lib/picky/index/type.rb +30 -0
- data/lib/picky/indexers/base.rb +77 -0
- data/lib/picky/indexers/default.rb +3 -0
- data/lib/picky/indexers/field.rb +13 -0
- data/lib/picky/indexers/no_source_specified_error.rb +5 -0
- data/lib/picky/indexers/solr.rb +60 -0
- data/lib/picky/indexes.rb +180 -0
- data/lib/picky/initializers/ext.rb +6 -0
- data/lib/picky/initializers/mysql.rb +22 -0
- data/lib/picky/loader.rb +287 -0
- data/lib/picky/loggers/search.rb +19 -0
- data/lib/picky/performant/array.rb +23 -0
- data/lib/picky/query/allocation.rb +82 -0
- data/lib/picky/query/allocations.rb +131 -0
- data/lib/picky/query/base.rb +124 -0
- data/lib/picky/query/combination.rb +69 -0
- data/lib/picky/query/combinations.rb +106 -0
- data/lib/picky/query/combinator.rb +92 -0
- data/lib/picky/query/full.rb +15 -0
- data/lib/picky/query/live.rb +22 -0
- data/lib/picky/query/qualifiers.rb +73 -0
- data/lib/picky/query/solr.rb +77 -0
- data/lib/picky/query/token.rb +215 -0
- data/lib/picky/query/tokens.rb +102 -0
- data/lib/picky/query/weigher.rb +159 -0
- data/lib/picky/query/weights.rb +55 -0
- data/lib/picky/rack/harakiri.rb +37 -0
- data/lib/picky/results/base.rb +103 -0
- data/lib/picky/results/full.rb +19 -0
- data/lib/picky/results/live.rb +19 -0
- data/lib/picky/routing.rb +165 -0
- data/lib/picky/signals.rb +11 -0
- data/lib/picky/solr/schema_generator.rb +73 -0
- data/lib/picky/sources/base.rb +19 -0
- data/lib/picky/sources/csv.rb +30 -0
- data/lib/picky/sources/db.rb +77 -0
- data/lib/picky/tokenizers/base.rb +130 -0
- data/lib/picky/tokenizers/default.rb +3 -0
- data/lib/picky/tokenizers/index.rb +73 -0
- data/lib/picky/tokenizers/query.rb +70 -0
- data/lib/picky/umlaut_substituter.rb +21 -0
- data/lib/picky-tasks.rb +6 -0
- data/lib/picky.rb +18 -0
- data/lib/tasks/application.rake +5 -0
- data/lib/tasks/cache.rake +53 -0
- data/lib/tasks/framework.rake +4 -0
- data/lib/tasks/index.rake +29 -0
- data/lib/tasks/server.rake +48 -0
- data/lib/tasks/shortcuts.rake +13 -0
- data/lib/tasks/solr.rake +36 -0
- data/lib/tasks/spec.rake +11 -0
- data/lib/tasks/statistics.rake +13 -0
- data/lib/tasks/try.rake +29 -0
- data/prototype_project/Gemfile +23 -0
- data/prototype_project/Rakefile +1 -0
- data/prototype_project/app/README +6 -0
- data/prototype_project/app/application.rb +50 -0
- data/prototype_project/app/application.ru +29 -0
- data/prototype_project/app/db.yml +10 -0
- data/prototype_project/app/logging.rb +20 -0
- data/prototype_project/app/unicorn.ru +10 -0
- data/prototype_project/log/README +1 -0
- data/prototype_project/script/console +34 -0
- data/prototype_project/tmp/README +0 -0
- data/prototype_project/tmp/pids/README +0 -0
- data/spec/ext/performant_spec.rb +64 -0
- data/spec/lib/application_spec.rb +61 -0
- data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
- data/spec/lib/cacher/partial_generator_spec.rb +35 -0
- data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
- data/spec/lib/cacher/similarity/none_spec.rb +23 -0
- data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
- data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
- data/spec/lib/cacher/weights_generator_spec.rb +21 -0
- data/spec/lib/configuration/configuration_spec.rb +38 -0
- data/spec/lib/configuration/type_spec.rb +49 -0
- data/spec/lib/configuration_spec.rb +8 -0
- data/spec/lib/cores_spec.rb +65 -0
- data/spec/lib/extensions/array_spec.rb +37 -0
- data/spec/lib/extensions/hash_spec.rb +11 -0
- data/spec/lib/extensions/module_spec.rb +27 -0
- data/spec/lib/extensions/symbol_spec.rb +85 -0
- data/spec/lib/generator_spec.rb +135 -0
- data/spec/lib/helpers/cache_spec.rb +35 -0
- data/spec/lib/helpers/gc_spec.rb +71 -0
- data/spec/lib/helpers/measuring_spec.rb +18 -0
- data/spec/lib/helpers/search_spec.rb +50 -0
- data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
- data/spec/lib/index/bundle_spec.rb +260 -0
- data/spec/lib/index/category_spec.rb +203 -0
- data/spec/lib/indexers/base_spec.rb +73 -0
- data/spec/lib/indexers/field_spec.rb +20 -0
- data/spec/lib/loader_spec.rb +48 -0
- data/spec/lib/loggers/search_spec.rb +19 -0
- data/spec/lib/performant/array_spec.rb +13 -0
- data/spec/lib/query/allocation_spec.rb +194 -0
- data/spec/lib/query/allocations_spec.rb +336 -0
- data/spec/lib/query/base_spec.rb +104 -0
- data/spec/lib/query/combination_spec.rb +90 -0
- data/spec/lib/query/combinations_spec.rb +83 -0
- data/spec/lib/query/combinator_spec.rb +112 -0
- data/spec/lib/query/full_spec.rb +22 -0
- data/spec/lib/query/live_spec.rb +61 -0
- data/spec/lib/query/qualifiers_spec.rb +31 -0
- data/spec/lib/query/solr_spec.rb +51 -0
- data/spec/lib/query/token_spec.rb +297 -0
- data/spec/lib/query/tokens_spec.rb +189 -0
- data/spec/lib/query/weights_spec.rb +47 -0
- data/spec/lib/results/base_spec.rb +233 -0
- data/spec/lib/routing_spec.rb +318 -0
- data/spec/lib/solr/schema_generator_spec.rb +42 -0
- data/spec/lib/sources/db_spec.rb +91 -0
- data/spec/lib/tokenizers/base_spec.rb +61 -0
- data/spec/lib/tokenizers/index_spec.rb +51 -0
- data/spec/lib/tokenizers/query_spec.rb +105 -0
- data/spec/lib/umlaut_substituter_spec.rb +84 -0
- data/spec/specific/speed_spec.rb +55 -0
- metadata +371 -15
- data/README.textile +0 -9
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# The Array class we all know and love.
|
|
2
|
+
#
|
|
3
|
+
class Array
|
|
4
|
+
|
|
5
|
+
# Cluster-uniqs equal neighborly elements.
|
|
6
|
+
#
|
|
7
|
+
# Returns a copy.
|
|
8
|
+
#
|
|
9
|
+
def clustered_uniq
|
|
10
|
+
self.inject([]) do |result, element|
|
|
11
|
+
result << element if element != result.last
|
|
12
|
+
result
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
#
|
|
17
|
+
#
|
|
18
|
+
def map_with_index!
|
|
19
|
+
each_with_index do |element, index| self[index] = yield(element, index); end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
#
|
|
23
|
+
#
|
|
24
|
+
def map_with_index &block
|
|
25
|
+
dup.map_with_index! &block
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Accesses a random element of this array.
|
|
29
|
+
#
|
|
30
|
+
def random
|
|
31
|
+
self[Kernel.rand(self.length)]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Sort the array using distance from levenshtein.
|
|
35
|
+
#
|
|
36
|
+
# Will raise if encounters not to_s-able element.
|
|
37
|
+
#
|
|
38
|
+
def sort_by_levenshtein! from
|
|
39
|
+
from = from.to_s
|
|
40
|
+
sort! do |this, that|
|
|
41
|
+
Text::Levenshtein.distance(this.to_s, from) <=> Text::Levenshtein.distance(that.to_s, from)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# The original Module class.
|
|
2
|
+
#
|
|
3
|
+
class Module
|
|
4
|
+
|
|
5
|
+
def each_delegate *methods
|
|
6
|
+
options = methods.pop
|
|
7
|
+
unless options.is_a?(Hash) && to = options[:to]
|
|
8
|
+
raise ArgumentError, "Multi delegation needs a target. Supply an options hash with a :to key as the last argument (e.g. delegate :something, :to => :an_array_reader)."
|
|
9
|
+
end
|
|
10
|
+
methods.each do |method|
|
|
11
|
+
module_eval("def #{method}(*args, &block)\n#{to}.each{ |t| t.__send__(#{method.inspect}, *args, &block) }\nend\n", "(__DELEGATION__)", 1)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Extending the Symbol class.
|
|
2
|
+
#
|
|
3
|
+
class Symbol
|
|
4
|
+
|
|
5
|
+
# :keys.subtokens # => [:key, :ke, :k]
|
|
6
|
+
# :keys.subtokens(2) # => [:key, :ke]
|
|
7
|
+
#
|
|
8
|
+
def subtokens down_to_length = 1
|
|
9
|
+
sub, result = self.to_s, [self]
|
|
10
|
+
|
|
11
|
+
size = sub.size
|
|
12
|
+
down_to_length = size if size < down_to_length
|
|
13
|
+
|
|
14
|
+
size.downto(down_to_length + 1) { result << sub.chop!.to_sym }
|
|
15
|
+
result
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
end
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
require 'fileutils'
|
|
2
|
+
|
|
3
|
+
module Picky
|
|
4
|
+
|
|
5
|
+
class NoGeneratorException < Exception; end
|
|
6
|
+
|
|
7
|
+
# This is a very simple project generator.
|
|
8
|
+
# Not at all like Padrino's or Rails'.
|
|
9
|
+
# (No diss, just by way of a faster explanation)
|
|
10
|
+
#
|
|
11
|
+
# Basically copies a prototype project into a newly generated directory.
|
|
12
|
+
#
|
|
13
|
+
class Generator
|
|
14
|
+
|
|
15
|
+
attr_reader :types
|
|
16
|
+
|
|
17
|
+
def initialize
|
|
18
|
+
@types = {
|
|
19
|
+
:project => Project
|
|
20
|
+
}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Run the generators with this command.
|
|
24
|
+
#
|
|
25
|
+
# This will "route" the commands to the right specific generator.
|
|
26
|
+
#
|
|
27
|
+
def generate args
|
|
28
|
+
generator = generator_for *args
|
|
29
|
+
generator.generate
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
#
|
|
33
|
+
#
|
|
34
|
+
def generator_for identifier, *args
|
|
35
|
+
generator_class = types[identifier.to_sym]
|
|
36
|
+
raise NoGeneratorException unless generator_class
|
|
37
|
+
generator_for_class generator_class, *args
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
#
|
|
41
|
+
#
|
|
42
|
+
def generator_for_class klass, *args
|
|
43
|
+
klass.new *args
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
class Project
|
|
47
|
+
|
|
48
|
+
attr_reader :name, :prototype_project_basedir
|
|
49
|
+
|
|
50
|
+
def initialize name, *args
|
|
51
|
+
@name = name
|
|
52
|
+
@prototype_project_basedir = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'prototype_project'))
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
#
|
|
56
|
+
#
|
|
57
|
+
def generate
|
|
58
|
+
exclaim "Setting up Picky project \"#{name}\"."
|
|
59
|
+
create_target_directory
|
|
60
|
+
copy_all_files
|
|
61
|
+
exclaim "\"#{name}\" is a great project name! Have fun :)"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
#
|
|
65
|
+
#
|
|
66
|
+
def create_target_directory
|
|
67
|
+
if File.exists?(target_directory)
|
|
68
|
+
exists target_directory
|
|
69
|
+
else
|
|
70
|
+
FileUtils.mkdir target_directory
|
|
71
|
+
created target_directory
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
#
|
|
76
|
+
#
|
|
77
|
+
def copy_all_files
|
|
78
|
+
all_prototype_files.each do |filename|
|
|
79
|
+
next if filename.match(/\.textile$/)
|
|
80
|
+
copy_single_file filename
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
#
|
|
85
|
+
#
|
|
86
|
+
def target_filename_for filename
|
|
87
|
+
filename.gsub(%r{#{prototype_project_basedir}}, target_directory)
|
|
88
|
+
end
|
|
89
|
+
#
|
|
90
|
+
#
|
|
91
|
+
def copy_single_file filename
|
|
92
|
+
target = target_filename_for filename
|
|
93
|
+
if File.exists? target
|
|
94
|
+
exists target
|
|
95
|
+
else
|
|
96
|
+
smart_copy filename, target
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Well, "smart" ;)
|
|
101
|
+
#
|
|
102
|
+
def smart_copy filename, target
|
|
103
|
+
# p "Trying to copy #{filename} -> #{target}"
|
|
104
|
+
FileUtils.copy_file filename, target
|
|
105
|
+
created target
|
|
106
|
+
rescue Errno::EISDIR
|
|
107
|
+
# p "EISDIR #{filename} -> #{target}"
|
|
108
|
+
FileUtils.rm target
|
|
109
|
+
FileUtils.mkdir_p target unless Dir.exists?(target)
|
|
110
|
+
created target
|
|
111
|
+
rescue Errno::EEXIST
|
|
112
|
+
# p "EEXIST #{filename} -> #{target}"
|
|
113
|
+
exists target
|
|
114
|
+
rescue Errno::ENOTDIR
|
|
115
|
+
# p "ENOTDIR #{filename} -> #{target}"
|
|
116
|
+
FileUtils.mkdir_p File.dirname(target) rescue nil
|
|
117
|
+
retry
|
|
118
|
+
rescue Errno::ENOENT => e
|
|
119
|
+
# p "ENOENT #{filename} -> #{target}"
|
|
120
|
+
if File.exists? filename
|
|
121
|
+
FileUtils.mkdir_p File.dirname(target)
|
|
122
|
+
retry
|
|
123
|
+
else
|
|
124
|
+
raise e
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
#
|
|
129
|
+
#
|
|
130
|
+
def all_prototype_files
|
|
131
|
+
Dir[File.join(prototype_project_basedir, '**', '*')]
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
#
|
|
135
|
+
#
|
|
136
|
+
def target_directory
|
|
137
|
+
File.expand_path File.join(Dir.pwd, name)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def created entry
|
|
141
|
+
exclaim "#{entry} \x1b[32mcreated\x1b[m."
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def exists entry
|
|
145
|
+
exclaim "#{entry} \x1b[31mexists\x1b[m, skipping."
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def exclaim something
|
|
149
|
+
puts something
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#
|
|
2
|
+
#
|
|
3
|
+
module Helpers
|
|
4
|
+
module Cache
|
|
5
|
+
# This is a simple cache.
|
|
6
|
+
# The store needs to be able to answer to [] and []=.
|
|
7
|
+
#
|
|
8
|
+
def cached store, key, &block
|
|
9
|
+
# Get cached result
|
|
10
|
+
#
|
|
11
|
+
results = store[key]
|
|
12
|
+
return results if results
|
|
13
|
+
|
|
14
|
+
results = lambda(&block).call
|
|
15
|
+
|
|
16
|
+
# Store results
|
|
17
|
+
#
|
|
18
|
+
store[key] = results
|
|
19
|
+
|
|
20
|
+
results
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Helper methods for measuring, benchmarking, logging.
|
|
2
|
+
#
|
|
3
|
+
module Helpers
|
|
4
|
+
module Measuring
|
|
5
|
+
|
|
6
|
+
def log_performance(name, performed_on = '', &block)
|
|
7
|
+
time_begin = Time.now.to_f
|
|
8
|
+
|
|
9
|
+
lambda(&block).call
|
|
10
|
+
|
|
11
|
+
duration = Time.now.to_f - time_begin
|
|
12
|
+
|
|
13
|
+
# PerformanceLog.info("#{'%30s' % name}: #{'%2.10f' % duration} #{performed_on}")
|
|
14
|
+
duration
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Returns a duration in seconds.
|
|
18
|
+
#
|
|
19
|
+
def timed(*args, &block)
|
|
20
|
+
block_to_be_measured = lambda(&block)
|
|
21
|
+
|
|
22
|
+
time_begin = Time.now.to_f
|
|
23
|
+
|
|
24
|
+
block_to_be_measured.call(*args)
|
|
25
|
+
|
|
26
|
+
Time.now.to_f - time_begin
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def profiled_html(mode = :cpu_time, &block)
|
|
30
|
+
require 'ruby-prof'
|
|
31
|
+
|
|
32
|
+
RubyProf.measure_mode = "RubyProf::#{mode.to_s.upcase}".constantize
|
|
33
|
+
|
|
34
|
+
result = RubyProf.profile &block
|
|
35
|
+
|
|
36
|
+
printer = RubyProf::GraphHtmlPrinter.new(result)
|
|
37
|
+
File.open('log/profiler.html', 'w') do |f|
|
|
38
|
+
printer.print(f)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
system 'open log/profiler.html'
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Helpers
|
|
2
|
+
module Search
|
|
3
|
+
|
|
4
|
+
def status_class_for(results_count)
|
|
5
|
+
case results_count
|
|
6
|
+
when (51..100)
|
|
7
|
+
:lots
|
|
8
|
+
when (26..50)
|
|
9
|
+
:many
|
|
10
|
+
when (16..25)
|
|
11
|
+
:several
|
|
12
|
+
when (8..15)
|
|
13
|
+
:some
|
|
14
|
+
when (2..7)
|
|
15
|
+
:few
|
|
16
|
+
when 1
|
|
17
|
+
:one
|
|
18
|
+
when 0
|
|
19
|
+
:none
|
|
20
|
+
else
|
|
21
|
+
:too_many
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
#
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
# This is the ACTUAL index.
|
|
6
|
+
#
|
|
7
|
+
# Handles full index, partial index, weights index, and similarity index.
|
|
8
|
+
#
|
|
9
|
+
class Bundle
|
|
10
|
+
|
|
11
|
+
attr_reader :name, :category, :type
|
|
12
|
+
attr_accessor :index, :weights, :similarity
|
|
13
|
+
attr_accessor :partial_strategy, :weights_strategy, :similarity_strategy
|
|
14
|
+
|
|
15
|
+
delegate :[], :[]=, :clear, :to => :index
|
|
16
|
+
|
|
17
|
+
# Path is in which directory the cache is located.
|
|
18
|
+
#
|
|
19
|
+
def initialize name, category, type, partial_strategy, weights_strategy, similarity_strategy
|
|
20
|
+
@index = {}
|
|
21
|
+
@weights = {}
|
|
22
|
+
@similarity = {}
|
|
23
|
+
|
|
24
|
+
@name = name
|
|
25
|
+
@category = category
|
|
26
|
+
@type = type
|
|
27
|
+
|
|
28
|
+
@partial_strategy = partial_strategy
|
|
29
|
+
@weights_strategy = weights_strategy
|
|
30
|
+
@similarity_strategy = similarity_strategy
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Get the ids for the text.
|
|
34
|
+
#
|
|
35
|
+
def ids text
|
|
36
|
+
@index[text] || []
|
|
37
|
+
end
|
|
38
|
+
# Get a weight for the text.
|
|
39
|
+
#
|
|
40
|
+
def weight text
|
|
41
|
+
@weights[text]
|
|
42
|
+
end
|
|
43
|
+
# Get a list of similar texts.
|
|
44
|
+
#
|
|
45
|
+
def similar text
|
|
46
|
+
code = similarity_strategy.encoded text
|
|
47
|
+
code && @similarity[code] || []
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Identifier for this bundle.
|
|
51
|
+
#
|
|
52
|
+
def identifier
|
|
53
|
+
"#{name}:#{type.name}:#{category.name}"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Point to category.
|
|
57
|
+
#
|
|
58
|
+
def search_index_root
|
|
59
|
+
File.join SEARCH_ROOT, 'index'
|
|
60
|
+
# category.search_index_root
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def size_of path
|
|
64
|
+
`ls -l #{path} | awk '{print $5}'`.to_i
|
|
65
|
+
end
|
|
66
|
+
# Check if the cache files are there and do not have size 0.
|
|
67
|
+
#
|
|
68
|
+
def caches_ok?
|
|
69
|
+
cache_ok?(index_cache_path) &&
|
|
70
|
+
cache_ok?(similarity_cache_path) &&
|
|
71
|
+
cache_ok?(weights_cache_path)
|
|
72
|
+
end
|
|
73
|
+
# Is the cache ok? I.e. larger than four in size.
|
|
74
|
+
#
|
|
75
|
+
def cache_ok? path
|
|
76
|
+
size_of(path) > 0
|
|
77
|
+
end
|
|
78
|
+
# Raises an appropriate error message.
|
|
79
|
+
#
|
|
80
|
+
def raise_cache_missing what
|
|
81
|
+
raise "#{what} cache for #{identifier} missing."
|
|
82
|
+
end
|
|
83
|
+
# Is the cache small?
|
|
84
|
+
#
|
|
85
|
+
def cache_small? path
|
|
86
|
+
size_of(path) < 16
|
|
87
|
+
end
|
|
88
|
+
def warn_cache_small what
|
|
89
|
+
puts "#{what} cache for #{identifier} smaller than 16 bytes."
|
|
90
|
+
end
|
|
91
|
+
# Check all index files and raise if necessary.
|
|
92
|
+
#
|
|
93
|
+
def raise_unless_cache_exists
|
|
94
|
+
warn_cache_small :index if cache_small?(index_cache_path)
|
|
95
|
+
# warn_cache_small :similarity if cache_small?(similarity_cache_path)
|
|
96
|
+
warn_cache_small :weights if cache_small?(weights_cache_path)
|
|
97
|
+
|
|
98
|
+
raise_cache_missing :index unless cache_ok?(index_cache_path)
|
|
99
|
+
raise_cache_missing :similarity unless cache_ok?(similarity_cache_path)
|
|
100
|
+
raise_cache_missing :weights unless cache_ok?(weights_cache_path)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Copies the indexes to the "backup" directory.
|
|
104
|
+
#
|
|
105
|
+
def backup
|
|
106
|
+
FileUtils.mkdir backup_path unless Dir.exists?(backup_path)
|
|
107
|
+
FileUtils.cp index_cache_path, backup_path, :verbose => true
|
|
108
|
+
FileUtils.cp similarity_cache_path, backup_path, :verbose => true
|
|
109
|
+
FileUtils.cp weights_cache_path, backup_path, :verbose => true
|
|
110
|
+
end
|
|
111
|
+
def backup_path
|
|
112
|
+
File.join File.dirname(index_cache_path), 'backup'
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Restores the indexes from the "backup" directory.
|
|
116
|
+
#
|
|
117
|
+
def restore
|
|
118
|
+
FileUtils.cp backup_file_path_of(index_cache_path), index_cache_path, :verbose => true
|
|
119
|
+
FileUtils.cp backup_file_path_of(similarity_cache_path), similarity_cache_path, :verbose => true
|
|
120
|
+
FileUtils.cp backup_file_path_of(weights_cache_path), weights_cache_path, :verbose => true
|
|
121
|
+
end
|
|
122
|
+
def backup_file_path_of path
|
|
123
|
+
dir, name = File.split path
|
|
124
|
+
File.join dir, 'backup', name
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Delete the file at path.
|
|
128
|
+
#
|
|
129
|
+
def delete path
|
|
130
|
+
`rm -Rf #{path}`
|
|
131
|
+
end
|
|
132
|
+
# Delete all index files.
|
|
133
|
+
#
|
|
134
|
+
def delete_all
|
|
135
|
+
delete index_cache_path
|
|
136
|
+
delete similarity_cache_path
|
|
137
|
+
delete weights_cache_path
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Create directory and parent directories.
|
|
141
|
+
#
|
|
142
|
+
def create_directory
|
|
143
|
+
FileUtils.mkdir_p cache_directory
|
|
144
|
+
end
|
|
145
|
+
# TODO Move to config. Duplicate Code in field.rb.
|
|
146
|
+
#
|
|
147
|
+
def cache_directory
|
|
148
|
+
File.join search_index_root, SEARCH_ENVIRONMENT, type.name.to_s
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Generates a cache path.
|
|
152
|
+
#
|
|
153
|
+
def cache_path text
|
|
154
|
+
File.join cache_directory, "#{name}_#{text}.dump"
|
|
155
|
+
end
|
|
156
|
+
def index_cache_path
|
|
157
|
+
cache_path "#{category.name}_index"
|
|
158
|
+
end
|
|
159
|
+
def similarity_cache_path
|
|
160
|
+
cache_path "#{category.name}_similarity"
|
|
161
|
+
end
|
|
162
|
+
def weights_cache_path
|
|
163
|
+
cache_path "#{category.name}_weights"
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Loads all indexes into this category.
|
|
167
|
+
#
|
|
168
|
+
def load
|
|
169
|
+
load_index
|
|
170
|
+
load_similarity
|
|
171
|
+
load_weights
|
|
172
|
+
end
|
|
173
|
+
def load_the index_method_name, path
|
|
174
|
+
self.send "#{index_method_name}=", Marshal.load(File.open(path, "r:binary")) if File.exists? path
|
|
175
|
+
end
|
|
176
|
+
def load_index
|
|
177
|
+
puts "#{Time.now}: Loading the index for #{identifier} from the cache."
|
|
178
|
+
load_the :index, index_cache_path
|
|
179
|
+
end
|
|
180
|
+
def load_similarity
|
|
181
|
+
puts "#{Time.now}: Loading the similarity for #{identifier} from the cache."
|
|
182
|
+
load_the :similarity, similarity_cache_path
|
|
183
|
+
end
|
|
184
|
+
def load_weights
|
|
185
|
+
puts "#{Time.now}: Loading the weights for #{identifier} from the cache."
|
|
186
|
+
load_the :weights, weights_cache_path
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# TODO Decide on the fate of this.
|
|
190
|
+
#
|
|
191
|
+
# # Generates similar index entries. If you search for bla, you will also find the blarf and vice versa.
|
|
192
|
+
# #
|
|
193
|
+
# # Examples:
|
|
194
|
+
# # title.generate_similar_from { :bla => :blarf }
|
|
195
|
+
# #
|
|
196
|
+
# # Note: Be careful with this, as it uses up a lot of memory.
|
|
197
|
+
# #
|
|
198
|
+
# def generate_similar_from mapping
|
|
199
|
+
# mapping.each_pair do |one, other|
|
|
200
|
+
# one_ids = self.index[one]
|
|
201
|
+
# other_ids = self.index[other]
|
|
202
|
+
#
|
|
203
|
+
# self.index[one] += other_ids || [] if one_ids
|
|
204
|
+
# self.index[other] += one_ids || [] if other_ids
|
|
205
|
+
# end
|
|
206
|
+
# end
|
|
207
|
+
|
|
208
|
+
# Generation
|
|
209
|
+
#
|
|
210
|
+
|
|
211
|
+
# This method
|
|
212
|
+
# * loads the base index from the db
|
|
213
|
+
# * generates derived indexes
|
|
214
|
+
# * dumps all the indexes into files
|
|
215
|
+
#
|
|
216
|
+
def generate_caches_from_db
|
|
217
|
+
cache_from_db_generation_message
|
|
218
|
+
load_from_index_file
|
|
219
|
+
generate_caches_from_memory
|
|
220
|
+
end
|
|
221
|
+
def cache_from_db_generation_message
|
|
222
|
+
puts "#{Time.now}: Generating caches from db for #{identifier}."
|
|
223
|
+
end
|
|
224
|
+
# Generates derived indexes from the index and dumps.
|
|
225
|
+
#
|
|
226
|
+
# Note: assumes that there is something in the index
|
|
227
|
+
#
|
|
228
|
+
def generate_caches_from_memory
|
|
229
|
+
cache_from_memory_generation_message
|
|
230
|
+
generate_derived
|
|
231
|
+
end
|
|
232
|
+
def cache_from_memory_generation_message
|
|
233
|
+
puts "#{Time.now}: Generating derived caches from memory for #{identifier}."
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Generates the weights and similarity from the main index.
|
|
237
|
+
#
|
|
238
|
+
def generate_derived
|
|
239
|
+
generate_weights
|
|
240
|
+
generate_similarity
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Load the data from the db.
|
|
244
|
+
#
|
|
245
|
+
def load_from_index_file # TODO Load from index_file.
|
|
246
|
+
clear
|
|
247
|
+
retrieve
|
|
248
|
+
end
|
|
249
|
+
# Retrieves the data into the index.
|
|
250
|
+
#
|
|
251
|
+
# TODO Beautify.
|
|
252
|
+
#
|
|
253
|
+
def retrieve
|
|
254
|
+
# TODO Make r:binary configurable!
|
|
255
|
+
#
|
|
256
|
+
File.open(search_index_file_name, 'r:binary') do |file|
|
|
257
|
+
file.each_line do |line|
|
|
258
|
+
indexed_id, token = line.split ?,,2
|
|
259
|
+
token.chomp!
|
|
260
|
+
token = token.to_sym
|
|
261
|
+
|
|
262
|
+
initialize_index_for token
|
|
263
|
+
index[token] << indexed_id.to_i
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
def initialize_index_for token
|
|
268
|
+
index[token] ||= []
|
|
269
|
+
end
|
|
270
|
+
# TODO Duplicate code!
|
|
271
|
+
#
|
|
272
|
+
# TODO Use config object?
|
|
273
|
+
#
|
|
274
|
+
def search_index_file_name
|
|
275
|
+
File.join cache_directory, "#{type.name}_#{category.name}_index.txt"
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Generators.
|
|
279
|
+
#
|
|
280
|
+
# TODO Move somewhere more fitting.
|
|
281
|
+
#
|
|
282
|
+
|
|
283
|
+
# Generates a new index (writes its index) using the
|
|
284
|
+
# given partial caching strategy.
|
|
285
|
+
#
|
|
286
|
+
def generate_partial
|
|
287
|
+
generator = Cacher::PartialGenerator.new self.index
|
|
288
|
+
self.index = generator.generate self.partial_strategy
|
|
289
|
+
end
|
|
290
|
+
def generate_partial_from full_index
|
|
291
|
+
self.index = full_index
|
|
292
|
+
self.generate_partial
|
|
293
|
+
self
|
|
294
|
+
end
|
|
295
|
+
# Generates a new similarity index (writes its index) using the
|
|
296
|
+
# given similarity caching strategy.
|
|
297
|
+
#
|
|
298
|
+
def generate_similarity
|
|
299
|
+
generator = Cacher::SimilarityGenerator.new self.index
|
|
300
|
+
self.similarity = generator.generate self.similarity_strategy
|
|
301
|
+
end
|
|
302
|
+
# Generates a new weights index (writes its index) using the
|
|
303
|
+
# given weight caching strategy.
|
|
304
|
+
#
|
|
305
|
+
def generate_weights
|
|
306
|
+
generator = Cacher::WeightsGenerator.new self.index
|
|
307
|
+
self.weights = generator.generate self.weights_strategy
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Saves the index in a dump file.
|
|
311
|
+
#
|
|
312
|
+
def dump
|
|
313
|
+
dump_index
|
|
314
|
+
dump_similarity
|
|
315
|
+
dump_weights
|
|
316
|
+
end
|
|
317
|
+
def dump_index
|
|
318
|
+
index.dump_to index_cache_path
|
|
319
|
+
end
|
|
320
|
+
def dump_similarity
|
|
321
|
+
similarity.dump_to similarity_cache_path
|
|
322
|
+
end
|
|
323
|
+
def dump_weights
|
|
324
|
+
weights.dump_to weights_cache_path
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
end
|
|
328
|
+
end
|