picky 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. data/bin/picky +14 -0
  2. data/lib/bundling.rb +10 -0
  3. data/lib/constants.rb +9 -0
  4. data/lib/deployment.rb +212 -0
  5. data/lib/picky/application.rb +40 -0
  6. data/lib/picky/cacher/convenience.rb +3 -0
  7. data/lib/picky/cacher/generator.rb +17 -0
  8. data/lib/picky/cacher/partial/default.rb +7 -0
  9. data/lib/picky/cacher/partial/none.rb +19 -0
  10. data/lib/picky/cacher/partial/strategy.rb +7 -0
  11. data/lib/picky/cacher/partial/subtoken.rb +91 -0
  12. data/lib/picky/cacher/partial_generator.rb +15 -0
  13. data/lib/picky/cacher/similarity/default.rb +7 -0
  14. data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
  15. data/lib/picky/cacher/similarity/none.rb +25 -0
  16. data/lib/picky/cacher/similarity/strategy.rb +7 -0
  17. data/lib/picky/cacher/similarity_generator.rb +15 -0
  18. data/lib/picky/cacher/weights/default.rb +7 -0
  19. data/lib/picky/cacher/weights/logarithmic.rb +39 -0
  20. data/lib/picky/cacher/weights/strategy.rb +7 -0
  21. data/lib/picky/cacher/weights_generator.rb +15 -0
  22. data/lib/picky/configuration/configuration.rb +13 -0
  23. data/lib/picky/configuration/field.rb +68 -0
  24. data/lib/picky/configuration/indexes.rb +60 -0
  25. data/lib/picky/configuration/queries.rb +32 -0
  26. data/lib/picky/configuration/type.rb +52 -0
  27. data/lib/picky/cores.rb +101 -0
  28. data/lib/picky/db/configuration.rb +23 -0
  29. data/lib/picky/ext/ruby19/extconf.rb +7 -0
  30. data/lib/picky/ext/ruby19/performant.c +339 -0
  31. data/lib/picky/extensions/array.rb +45 -0
  32. data/lib/picky/extensions/hash.rb +11 -0
  33. data/lib/picky/extensions/module.rb +15 -0
  34. data/lib/picky/extensions/symbol.rb +18 -0
  35. data/lib/picky/generator.rb +156 -0
  36. data/lib/picky/helpers/cache.rb +23 -0
  37. data/lib/picky/helpers/gc.rb +11 -0
  38. data/lib/picky/helpers/measuring.rb +45 -0
  39. data/lib/picky/helpers/search.rb +27 -0
  40. data/lib/picky/index/bundle.rb +328 -0
  41. data/lib/picky/index/category.rb +109 -0
  42. data/lib/picky/index/combined.rb +38 -0
  43. data/lib/picky/index/type.rb +30 -0
  44. data/lib/picky/indexers/base.rb +77 -0
  45. data/lib/picky/indexers/default.rb +3 -0
  46. data/lib/picky/indexers/field.rb +13 -0
  47. data/lib/picky/indexers/no_source_specified_error.rb +5 -0
  48. data/lib/picky/indexers/solr.rb +60 -0
  49. data/lib/picky/indexes.rb +180 -0
  50. data/lib/picky/initializers/ext.rb +6 -0
  51. data/lib/picky/initializers/mysql.rb +22 -0
  52. data/lib/picky/loader.rb +287 -0
  53. data/lib/picky/loggers/search.rb +19 -0
  54. data/lib/picky/performant/array.rb +23 -0
  55. data/lib/picky/query/allocation.rb +82 -0
  56. data/lib/picky/query/allocations.rb +131 -0
  57. data/lib/picky/query/base.rb +124 -0
  58. data/lib/picky/query/combination.rb +69 -0
  59. data/lib/picky/query/combinations.rb +106 -0
  60. data/lib/picky/query/combinator.rb +92 -0
  61. data/lib/picky/query/full.rb +15 -0
  62. data/lib/picky/query/live.rb +22 -0
  63. data/lib/picky/query/qualifiers.rb +73 -0
  64. data/lib/picky/query/solr.rb +77 -0
  65. data/lib/picky/query/token.rb +215 -0
  66. data/lib/picky/query/tokens.rb +102 -0
  67. data/lib/picky/query/weigher.rb +159 -0
  68. data/lib/picky/query/weights.rb +55 -0
  69. data/lib/picky/rack/harakiri.rb +37 -0
  70. data/lib/picky/results/base.rb +103 -0
  71. data/lib/picky/results/full.rb +19 -0
  72. data/lib/picky/results/live.rb +19 -0
  73. data/lib/picky/routing.rb +165 -0
  74. data/lib/picky/signals.rb +11 -0
  75. data/lib/picky/solr/schema_generator.rb +73 -0
  76. data/lib/picky/sources/base.rb +19 -0
  77. data/lib/picky/sources/csv.rb +30 -0
  78. data/lib/picky/sources/db.rb +77 -0
  79. data/lib/picky/tokenizers/base.rb +130 -0
  80. data/lib/picky/tokenizers/default.rb +3 -0
  81. data/lib/picky/tokenizers/index.rb +73 -0
  82. data/lib/picky/tokenizers/query.rb +70 -0
  83. data/lib/picky/umlaut_substituter.rb +21 -0
  84. data/lib/picky-tasks.rb +6 -0
  85. data/lib/picky.rb +18 -0
  86. data/lib/tasks/application.rake +5 -0
  87. data/lib/tasks/cache.rake +53 -0
  88. data/lib/tasks/framework.rake +4 -0
  89. data/lib/tasks/index.rake +29 -0
  90. data/lib/tasks/server.rake +48 -0
  91. data/lib/tasks/shortcuts.rake +13 -0
  92. data/lib/tasks/solr.rake +36 -0
  93. data/lib/tasks/spec.rake +11 -0
  94. data/lib/tasks/statistics.rake +13 -0
  95. data/lib/tasks/try.rake +29 -0
  96. data/prototype_project/Gemfile +23 -0
  97. data/prototype_project/Rakefile +1 -0
  98. data/prototype_project/app/README +6 -0
  99. data/prototype_project/app/application.rb +50 -0
  100. data/prototype_project/app/application.ru +29 -0
  101. data/prototype_project/app/db.yml +10 -0
  102. data/prototype_project/app/logging.rb +20 -0
  103. data/prototype_project/app/unicorn.ru +10 -0
  104. data/prototype_project/log/README +1 -0
  105. data/prototype_project/script/console +34 -0
  106. data/prototype_project/tmp/README +0 -0
  107. data/prototype_project/tmp/pids/README +0 -0
  108. data/spec/ext/performant_spec.rb +64 -0
  109. data/spec/lib/application_spec.rb +61 -0
  110. data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
  111. data/spec/lib/cacher/partial_generator_spec.rb +35 -0
  112. data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
  113. data/spec/lib/cacher/similarity/none_spec.rb +23 -0
  114. data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
  115. data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
  116. data/spec/lib/cacher/weights_generator_spec.rb +21 -0
  117. data/spec/lib/configuration/configuration_spec.rb +38 -0
  118. data/spec/lib/configuration/type_spec.rb +49 -0
  119. data/spec/lib/configuration_spec.rb +8 -0
  120. data/spec/lib/cores_spec.rb +65 -0
  121. data/spec/lib/extensions/array_spec.rb +37 -0
  122. data/spec/lib/extensions/hash_spec.rb +11 -0
  123. data/spec/lib/extensions/module_spec.rb +27 -0
  124. data/spec/lib/extensions/symbol_spec.rb +85 -0
  125. data/spec/lib/generator_spec.rb +135 -0
  126. data/spec/lib/helpers/cache_spec.rb +35 -0
  127. data/spec/lib/helpers/gc_spec.rb +71 -0
  128. data/spec/lib/helpers/measuring_spec.rb +18 -0
  129. data/spec/lib/helpers/search_spec.rb +50 -0
  130. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
  131. data/spec/lib/index/bundle_spec.rb +260 -0
  132. data/spec/lib/index/category_spec.rb +203 -0
  133. data/spec/lib/indexers/base_spec.rb +73 -0
  134. data/spec/lib/indexers/field_spec.rb +20 -0
  135. data/spec/lib/loader_spec.rb +48 -0
  136. data/spec/lib/loggers/search_spec.rb +19 -0
  137. data/spec/lib/performant/array_spec.rb +13 -0
  138. data/spec/lib/query/allocation_spec.rb +194 -0
  139. data/spec/lib/query/allocations_spec.rb +336 -0
  140. data/spec/lib/query/base_spec.rb +104 -0
  141. data/spec/lib/query/combination_spec.rb +90 -0
  142. data/spec/lib/query/combinations_spec.rb +83 -0
  143. data/spec/lib/query/combinator_spec.rb +112 -0
  144. data/spec/lib/query/full_spec.rb +22 -0
  145. data/spec/lib/query/live_spec.rb +61 -0
  146. data/spec/lib/query/qualifiers_spec.rb +31 -0
  147. data/spec/lib/query/solr_spec.rb +51 -0
  148. data/spec/lib/query/token_spec.rb +297 -0
  149. data/spec/lib/query/tokens_spec.rb +189 -0
  150. data/spec/lib/query/weights_spec.rb +47 -0
  151. data/spec/lib/results/base_spec.rb +233 -0
  152. data/spec/lib/routing_spec.rb +318 -0
  153. data/spec/lib/solr/schema_generator_spec.rb +42 -0
  154. data/spec/lib/sources/db_spec.rb +91 -0
  155. data/spec/lib/tokenizers/base_spec.rb +61 -0
  156. data/spec/lib/tokenizers/index_spec.rb +51 -0
  157. data/spec/lib/tokenizers/query_spec.rb +105 -0
  158. data/spec/lib/umlaut_substituter_spec.rb +84 -0
  159. data/spec/specific/speed_spec.rb +55 -0
  160. metadata +371 -15
  161. data/README.textile +0 -9
@@ -0,0 +1,45 @@
1
+ # The Array class we all know and love.
2
+ #
3
+ class Array
4
+
5
+ # Cluster-uniqs equal neighborly elements.
6
+ #
7
+ # Returns a copy.
8
+ #
9
+ def clustered_uniq
10
+ self.inject([]) do |result, element|
11
+ result << element if element != result.last
12
+ result
13
+ end
14
+ end
15
+
16
+ #
17
+ #
18
+ def map_with_index!
19
+ each_with_index do |element, index| self[index] = yield(element, index); end
20
+ end
21
+
22
+ #
23
+ #
24
+ def map_with_index &block
25
+ dup.map_with_index! &block
26
+ end
27
+
28
+ # Accesses a random element of this array.
29
+ #
30
+ def random
31
+ self[Kernel.rand(self.length)]
32
+ end
33
+
34
+ # Sort the array using distance from levenshtein.
35
+ #
36
+ # Will raise if encounters not to_s-able element.
37
+ #
38
+ def sort_by_levenshtein! from
39
+ from = from.to_s
40
+ sort! do |this, that|
41
+ Text::Levenshtein.distance(this.to_s, from) <=> Text::Levenshtein.distance(that.to_s, from)
42
+ end
43
+ end
44
+
45
+ end
@@ -0,0 +1,11 @@
1
+ # Extensions for the Hash.
2
+ #
3
+ class Hash
4
+
5
+ # Dumps binary self to the path given.
6
+ #
7
+ def dump_to path
8
+ File.open(path, 'w:binary') { |out_file| Marshal.dump self, out_file }
9
+ end
10
+
11
+ end
@@ -0,0 +1,15 @@
1
+ # The original Module class.
2
+ #
3
+ class Module
4
+
5
+ def each_delegate *methods
6
+ options = methods.pop
7
+ unless options.is_a?(Hash) && to = options[:to]
8
+ raise ArgumentError, "Multi delegation needs a target. Supply an options hash with a :to key as the last argument (e.g. delegate :something, :to => :an_array_reader)."
9
+ end
10
+ methods.each do |method|
11
+ module_eval("def #{method}(*args, &block)\n#{to}.each{ |t| t.__send__(#{method.inspect}, *args, &block) }\nend\n", "(__DELEGATION__)", 1)
12
+ end
13
+ end
14
+
15
+ end
@@ -0,0 +1,18 @@
1
+ # Extending the Symbol class.
2
+ #
3
+ class Symbol
4
+
5
+ # :keys.subtokens # => [:key, :ke, :k]
6
+ # :keys.subtokens(2) # => [:key, :ke]
7
+ #
8
+ def subtokens down_to_length = 1
9
+ sub, result = self.to_s, [self]
10
+
11
+ size = sub.size
12
+ down_to_length = size if size < down_to_length
13
+
14
+ size.downto(down_to_length + 1) { result << sub.chop!.to_sym }
15
+ result
16
+ end
17
+
18
+ end
@@ -0,0 +1,156 @@
1
+ require 'fileutils'
2
+
3
+ module Picky
4
+
5
+ class NoGeneratorException < Exception; end
6
+
7
+ # This is a very simple project generator.
8
+ # Not at all like Padrino's or Rails'.
9
+ # (No diss, just by way of a faster explanation)
10
+ #
11
+ # Basically copies a prototype project into a newly generated directory.
12
+ #
13
+ class Generator
14
+
15
+ attr_reader :types
16
+
17
+ def initialize
18
+ @types = {
19
+ :project => Project
20
+ }
21
+ end
22
+
23
+ # Run the generators with this command.
24
+ #
25
+ # This will "route" the commands to the right specific generator.
26
+ #
27
+ def generate args
28
+ generator = generator_for *args
29
+ generator.generate
30
+ end
31
+
32
+ #
33
+ #
34
+ def generator_for identifier, *args
35
+ generator_class = types[identifier.to_sym]
36
+ raise NoGeneratorException unless generator_class
37
+ generator_for_class generator_class, *args
38
+ end
39
+
40
+ #
41
+ #
42
+ def generator_for_class klass, *args
43
+ klass.new *args
44
+ end
45
+
46
+ class Project
47
+
48
+ attr_reader :name, :prototype_project_basedir
49
+
50
+ def initialize name, *args
51
+ @name = name
52
+ @prototype_project_basedir = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'prototype_project'))
53
+ end
54
+
55
+ #
56
+ #
57
+ def generate
58
+ exclaim "Setting up Picky project \"#{name}\"."
59
+ create_target_directory
60
+ copy_all_files
61
+ exclaim "\"#{name}\" is a great project name! Have fun :)"
62
+ end
63
+
64
+ #
65
+ #
66
+ def create_target_directory
67
+ if File.exists?(target_directory)
68
+ exists target_directory
69
+ else
70
+ FileUtils.mkdir target_directory
71
+ created target_directory
72
+ end
73
+ end
74
+
75
+ #
76
+ #
77
+ def copy_all_files
78
+ all_prototype_files.each do |filename|
79
+ next if filename.match(/\.textile$/)
80
+ copy_single_file filename
81
+ end
82
+ end
83
+
84
+ #
85
+ #
86
+ def target_filename_for filename
87
+ filename.gsub(%r{#{prototype_project_basedir}}, target_directory)
88
+ end
89
+ #
90
+ #
91
+ def copy_single_file filename
92
+ target = target_filename_for filename
93
+ if File.exists? target
94
+ exists target
95
+ else
96
+ smart_copy filename, target
97
+ end
98
+ end
99
+
100
+ # Well, "smart" ;)
101
+ #
102
+ def smart_copy filename, target
103
+ # p "Trying to copy #{filename} -> #{target}"
104
+ FileUtils.copy_file filename, target
105
+ created target
106
+ rescue Errno::EISDIR
107
+ # p "EISDIR #{filename} -> #{target}"
108
+ FileUtils.rm target
109
+ FileUtils.mkdir_p target unless Dir.exists?(target)
110
+ created target
111
+ rescue Errno::EEXIST
112
+ # p "EEXIST #{filename} -> #{target}"
113
+ exists target
114
+ rescue Errno::ENOTDIR
115
+ # p "ENOTDIR #{filename} -> #{target}"
116
+ FileUtils.mkdir_p File.dirname(target) rescue nil
117
+ retry
118
+ rescue Errno::ENOENT => e
119
+ # p "ENOENT #{filename} -> #{target}"
120
+ if File.exists? filename
121
+ FileUtils.mkdir_p File.dirname(target)
122
+ retry
123
+ else
124
+ raise e
125
+ end
126
+ end
127
+
128
+ #
129
+ #
130
+ def all_prototype_files
131
+ Dir[File.join(prototype_project_basedir, '**', '*')]
132
+ end
133
+
134
+ #
135
+ #
136
+ def target_directory
137
+ File.expand_path File.join(Dir.pwd, name)
138
+ end
139
+
140
+ def created entry
141
+ exclaim "#{entry} \x1b[32mcreated\x1b[m."
142
+ end
143
+
144
+ def exists entry
145
+ exclaim "#{entry} \x1b[31mexists\x1b[m, skipping."
146
+ end
147
+
148
+ def exclaim something
149
+ puts something
150
+ end
151
+
152
+ end
153
+
154
+ end
155
+
156
+ end
@@ -0,0 +1,23 @@
1
+ #
2
+ #
3
+ module Helpers
4
+ module Cache
5
+ # This is a simple cache.
6
+ # The store needs to be able to answer to [] and []=.
7
+ #
8
+ def cached store, key, &block
9
+ # Get cached result
10
+ #
11
+ results = store[key]
12
+ return results if results
13
+
14
+ results = lambda(&block).call
15
+
16
+ # Store results
17
+ #
18
+ store[key] = results
19
+
20
+ results
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,11 @@
1
+ module Helpers
2
+ module GC
3
+ def gc_disabled &block
4
+ ::GC.disable
5
+ block.call
6
+ ::GC.enable
7
+ ::GC.start
8
+ end
9
+ alias disabled gc_disabled
10
+ end
11
+ end
@@ -0,0 +1,45 @@
1
+ # Helper methods for measuring, benchmarking, logging.
2
+ #
3
+ module Helpers
4
+ module Measuring
5
+
6
+ def log_performance(name, performed_on = '', &block)
7
+ time_begin = Time.now.to_f
8
+
9
+ lambda(&block).call
10
+
11
+ duration = Time.now.to_f - time_begin
12
+
13
+ # PerformanceLog.info("#{'%30s' % name}: #{'%2.10f' % duration} #{performed_on}")
14
+ duration
15
+ end
16
+
17
+ # Returns a duration in seconds.
18
+ #
19
+ def timed(*args, &block)
20
+ block_to_be_measured = lambda(&block)
21
+
22
+ time_begin = Time.now.to_f
23
+
24
+ block_to_be_measured.call(*args)
25
+
26
+ Time.now.to_f - time_begin
27
+ end
28
+
29
+ def profiled_html(mode = :cpu_time, &block)
30
+ require 'ruby-prof'
31
+
32
+ RubyProf.measure_mode = "RubyProf::#{mode.to_s.upcase}".constantize
33
+
34
+ result = RubyProf.profile &block
35
+
36
+ printer = RubyProf::GraphHtmlPrinter.new(result)
37
+ File.open('log/profiler.html', 'w') do |f|
38
+ printer.print(f)
39
+ end
40
+
41
+ system 'open log/profiler.html'
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,27 @@
1
+ module Helpers
2
+ module Search
3
+
4
+ def status_class_for(results_count)
5
+ case results_count
6
+ when (51..100)
7
+ :lots
8
+ when (26..50)
9
+ :many
10
+ when (16..25)
11
+ :several
12
+ when (8..15)
13
+ :some
14
+ when (2..7)
15
+ :few
16
+ when 1
17
+ :one
18
+ when 0
19
+ :none
20
+ else
21
+ :too_many
22
+ end
23
+ end
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,328 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Index
4
+
5
+ # This is the ACTUAL index.
6
+ #
7
+ # Handles full index, partial index, weights index, and similarity index.
8
+ #
9
+ class Bundle
10
+
11
+ attr_reader :name, :category, :type
12
+ attr_accessor :index, :weights, :similarity
13
+ attr_accessor :partial_strategy, :weights_strategy, :similarity_strategy
14
+
15
+ delegate :[], :[]=, :clear, :to => :index
16
+
17
+ # Path is in which directory the cache is located.
18
+ #
19
+ def initialize name, category, type, partial_strategy, weights_strategy, similarity_strategy
20
+ @index = {}
21
+ @weights = {}
22
+ @similarity = {}
23
+
24
+ @name = name
25
+ @category = category
26
+ @type = type
27
+
28
+ @partial_strategy = partial_strategy
29
+ @weights_strategy = weights_strategy
30
+ @similarity_strategy = similarity_strategy
31
+ end
32
+
33
+ # Get the ids for the text.
34
+ #
35
+ def ids text
36
+ @index[text] || []
37
+ end
38
+ # Get a weight for the text.
39
+ #
40
+ def weight text
41
+ @weights[text]
42
+ end
43
+ # Get a list of similar texts.
44
+ #
45
+ def similar text
46
+ code = similarity_strategy.encoded text
47
+ code && @similarity[code] || []
48
+ end
49
+
50
+ # Identifier for this bundle.
51
+ #
52
+ def identifier
53
+ "#{name}:#{type.name}:#{category.name}"
54
+ end
55
+
56
+ # Point to category.
57
+ #
58
+ def search_index_root
59
+ File.join SEARCH_ROOT, 'index'
60
+ # category.search_index_root
61
+ end
62
+
63
+ def size_of path
64
+ `ls -l #{path} | awk '{print $5}'`.to_i
65
+ end
66
+ # Check if the cache files are there and do not have size 0.
67
+ #
68
+ def caches_ok?
69
+ cache_ok?(index_cache_path) &&
70
+ cache_ok?(similarity_cache_path) &&
71
+ cache_ok?(weights_cache_path)
72
+ end
73
+ # Is the cache ok? I.e. larger than four in size.
74
+ #
75
+ def cache_ok? path
76
+ size_of(path) > 0
77
+ end
78
+ # Raises an appropriate error message.
79
+ #
80
+ def raise_cache_missing what
81
+ raise "#{what} cache for #{identifier} missing."
82
+ end
83
+ # Is the cache small?
84
+ #
85
+ def cache_small? path
86
+ size_of(path) < 16
87
+ end
88
+ def warn_cache_small what
89
+ puts "#{what} cache for #{identifier} smaller than 16 bytes."
90
+ end
91
+ # Check all index files and raise if necessary.
92
+ #
93
+ def raise_unless_cache_exists
94
+ warn_cache_small :index if cache_small?(index_cache_path)
95
+ # warn_cache_small :similarity if cache_small?(similarity_cache_path)
96
+ warn_cache_small :weights if cache_small?(weights_cache_path)
97
+
98
+ raise_cache_missing :index unless cache_ok?(index_cache_path)
99
+ raise_cache_missing :similarity unless cache_ok?(similarity_cache_path)
100
+ raise_cache_missing :weights unless cache_ok?(weights_cache_path)
101
+ end
102
+
103
+ # Copies the indexes to the "backup" directory.
104
+ #
105
+ def backup
106
+ FileUtils.mkdir backup_path unless Dir.exists?(backup_path)
107
+ FileUtils.cp index_cache_path, backup_path, :verbose => true
108
+ FileUtils.cp similarity_cache_path, backup_path, :verbose => true
109
+ FileUtils.cp weights_cache_path, backup_path, :verbose => true
110
+ end
111
+ def backup_path
112
+ File.join File.dirname(index_cache_path), 'backup'
113
+ end
114
+
115
+ # Restores the indexes from the "backup" directory.
116
+ #
117
+ def restore
118
+ FileUtils.cp backup_file_path_of(index_cache_path), index_cache_path, :verbose => true
119
+ FileUtils.cp backup_file_path_of(similarity_cache_path), similarity_cache_path, :verbose => true
120
+ FileUtils.cp backup_file_path_of(weights_cache_path), weights_cache_path, :verbose => true
121
+ end
122
+ def backup_file_path_of path
123
+ dir, name = File.split path
124
+ File.join dir, 'backup', name
125
+ end
126
+
127
+ # Delete the file at path.
128
+ #
129
+ def delete path
130
+ `rm -Rf #{path}`
131
+ end
132
+ # Delete all index files.
133
+ #
134
+ def delete_all
135
+ delete index_cache_path
136
+ delete similarity_cache_path
137
+ delete weights_cache_path
138
+ end
139
+
140
+ # Create directory and parent directories.
141
+ #
142
+ def create_directory
143
+ FileUtils.mkdir_p cache_directory
144
+ end
145
+ # TODO Move to config. Duplicate Code in field.rb.
146
+ #
147
+ def cache_directory
148
+ File.join search_index_root, SEARCH_ENVIRONMENT, type.name.to_s
149
+ end
150
+
151
+ # Generates a cache path.
152
+ #
153
+ def cache_path text
154
+ File.join cache_directory, "#{name}_#{text}.dump"
155
+ end
156
+ def index_cache_path
157
+ cache_path "#{category.name}_index"
158
+ end
159
+ def similarity_cache_path
160
+ cache_path "#{category.name}_similarity"
161
+ end
162
+ def weights_cache_path
163
+ cache_path "#{category.name}_weights"
164
+ end
165
+
166
+ # Loads all indexes into this category.
167
+ #
168
+ def load
169
+ load_index
170
+ load_similarity
171
+ load_weights
172
+ end
173
+ def load_the index_method_name, path
174
+ self.send "#{index_method_name}=", Marshal.load(File.open(path, "r:binary")) if File.exists? path
175
+ end
176
+ def load_index
177
+ puts "#{Time.now}: Loading the index for #{identifier} from the cache."
178
+ load_the :index, index_cache_path
179
+ end
180
+ def load_similarity
181
+ puts "#{Time.now}: Loading the similarity for #{identifier} from the cache."
182
+ load_the :similarity, similarity_cache_path
183
+ end
184
+ def load_weights
185
+ puts "#{Time.now}: Loading the weights for #{identifier} from the cache."
186
+ load_the :weights, weights_cache_path
187
+ end
188
+
189
+ # TODO Decide on the fate of this.
190
+ #
191
+ # # Generates similar index entries. If you search for bla, you will also find the blarf and vice versa.
192
+ # #
193
+ # # Examples:
194
+ # # title.generate_similar_from { :bla => :blarf }
195
+ # #
196
+ # # Note: Be careful with this, as it uses up a lot of memory.
197
+ # #
198
+ # def generate_similar_from mapping
199
+ # mapping.each_pair do |one, other|
200
+ # one_ids = self.index[one]
201
+ # other_ids = self.index[other]
202
+ #
203
+ # self.index[one] += other_ids || [] if one_ids
204
+ # self.index[other] += one_ids || [] if other_ids
205
+ # end
206
+ # end
207
+
208
+ # Generation
209
+ #
210
+
211
+ # This method
212
+ # * loads the base index from the db
213
+ # * generates derived indexes
214
+ # * dumps all the indexes into files
215
+ #
216
+ def generate_caches_from_db
217
+ cache_from_db_generation_message
218
+ load_from_index_file
219
+ generate_caches_from_memory
220
+ end
221
+ def cache_from_db_generation_message
222
+ puts "#{Time.now}: Generating caches from db for #{identifier}."
223
+ end
224
+ # Generates derived indexes from the index and dumps.
225
+ #
226
+ # Note: assumes that there is something in the index
227
+ #
228
+ def generate_caches_from_memory
229
+ cache_from_memory_generation_message
230
+ generate_derived
231
+ end
232
+ def cache_from_memory_generation_message
233
+ puts "#{Time.now}: Generating derived caches from memory for #{identifier}."
234
+ end
235
+
236
+ # Generates the weights and similarity from the main index.
237
+ #
238
+ def generate_derived
239
+ generate_weights
240
+ generate_similarity
241
+ end
242
+
243
+ # Load the data from the db.
244
+ #
245
+ def load_from_index_file # TODO Load from index_file.
246
+ clear
247
+ retrieve
248
+ end
249
+ # Retrieves the data into the index.
250
+ #
251
+ # TODO Beautify.
252
+ #
253
+ def retrieve
254
+ # TODO Make r:binary configurable!
255
+ #
256
+ File.open(search_index_file_name, 'r:binary') do |file|
257
+ file.each_line do |line|
258
+ indexed_id, token = line.split ?,,2
259
+ token.chomp!
260
+ token = token.to_sym
261
+
262
+ initialize_index_for token
263
+ index[token] << indexed_id.to_i
264
+ end
265
+ end
266
+ end
267
+ def initialize_index_for token
268
+ index[token] ||= []
269
+ end
270
+ # TODO Duplicate code!
271
+ #
272
+ # TODO Use config object?
273
+ #
274
+ def search_index_file_name
275
+ File.join cache_directory, "#{type.name}_#{category.name}_index.txt"
276
+ end
277
+
278
+ # Generators.
279
+ #
280
+ # TODO Move somewhere more fitting.
281
+ #
282
+
283
+ # Generates a new index (writes its index) using the
284
+ # given partial caching strategy.
285
+ #
286
+ def generate_partial
287
+ generator = Cacher::PartialGenerator.new self.index
288
+ self.index = generator.generate self.partial_strategy
289
+ end
290
+ def generate_partial_from full_index
291
+ self.index = full_index
292
+ self.generate_partial
293
+ self
294
+ end
295
+ # Generates a new similarity index (writes its index) using the
296
+ # given similarity caching strategy.
297
+ #
298
+ def generate_similarity
299
+ generator = Cacher::SimilarityGenerator.new self.index
300
+ self.similarity = generator.generate self.similarity_strategy
301
+ end
302
+ # Generates a new weights index (writes its index) using the
303
+ # given weight caching strategy.
304
+ #
305
+ def generate_weights
306
+ generator = Cacher::WeightsGenerator.new self.index
307
+ self.weights = generator.generate self.weights_strategy
308
+ end
309
+
310
+ # Saves the index in a dump file.
311
+ #
312
+ def dump
313
+ dump_index
314
+ dump_similarity
315
+ dump_weights
316
+ end
317
+ def dump_index
318
+ index.dump_to index_cache_path
319
+ end
320
+ def dump_similarity
321
+ similarity.dump_to similarity_cache_path
322
+ end
323
+ def dump_weights
324
+ weights.dump_to weights_cache_path
325
+ end
326
+
327
+ end
328
+ end