picky 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,7 +4,7 @@ module Cacher
4
4
  #
5
5
  class PartialGenerator < Generator
6
6
 
7
- # Generate a similarity index based on the given index.
7
+ # Generate a partial index based on the given index.
8
8
  #
9
9
  def generate strategy = Partial::Subtoken.new(:down_to => 1)
10
10
  strategy.generate_from self.index
@@ -2,16 +2,22 @@
2
2
  #
3
3
  class Hash
4
4
 
5
- # Dumps binary self to the path given.
5
+ # Dumps jsonized self to the path given. Minus extension.
6
6
  #
7
- # TODO Still used? If yes, spec!
8
- #
9
- def dump_to path
10
- File.open(path, 'w:binary') do |out_file|
7
+ def dump_to_json path
8
+ File.open("#{path}.json", 'w') do |out_file|
11
9
  Yajl::Encoder.encode self, out_file
12
10
  end
13
11
  end
14
12
 
13
+ # Dumps binary self to the path given. Minus extension.
14
+ #
15
+ def dump_to_marshalled path
16
+ File.open("#{path}.dump", 'w:binary') do |out_file|
17
+ Marshal.dump self, out_file
18
+ end
19
+ end
20
+
15
21
  # Use yajl's encoding.
16
22
  #
17
23
  def to_json options = {}
@@ -40,7 +40,7 @@ module Picky
40
40
  def generator_for identifier, *args
41
41
  generator_class = types[identifier.to_sym]
42
42
  raise NoGeneratorException unless generator_class
43
- generator_for_class generator_class, *args
43
+ generator_for_class generator_class, identifier, *args
44
44
  end
45
45
 
46
46
  #
@@ -56,11 +56,11 @@ module Picky
56
56
  #
57
57
  class Project
58
58
 
59
- attr_reader :name, :prototype_project_basedir
59
+ attr_reader :name, :project_prototype_basedir
60
60
 
61
- def initialize name, *args
61
+ def initialize identifier, name, *args
62
62
  @name = name
63
- @prototype_project_basedir = File.expand_path '../../../prototype_project', __FILE__
63
+ @project_prototype_basedir = File.expand_path '../../../project_prototype', __FILE__
64
64
  end
65
65
 
66
66
  #
@@ -70,12 +70,14 @@ module Picky
70
70
  create_target_directory
71
71
  copy_all_files
72
72
  exclaim "\"#{name}\" is a great project name! Have fun :)\n"
73
- exclaim "Next steps:"
74
73
  exclaim ""
75
- exclaim "cd #{name}"
76
- exclaim "cat Gemfile # <- Do you need the mysql gem, for example?"
77
- exclaim "bundle install"
78
- exclaim "rake # <- shows you where Picky needs input from you."
74
+ exclaim "Next steps:"
75
+ exclaim "1. cd #{name}"
76
+ exclaim "2. bundle install"
77
+ exclaim "3. rake index"
78
+ exclaim "4. rake start"
79
+ exclaim "5. rake # (optional) shows you where Picky needs input from you"
80
+ exclaim " # if you want to define your own search."
79
81
  end
80
82
 
81
83
  #
@@ -101,7 +103,7 @@ module Picky
101
103
  #
102
104
  #
103
105
  def target_filename_for filename
104
- filename.gsub(%r{#{prototype_project_basedir}}, target_directory)
106
+ filename.gsub(%r{#{project_prototype_basedir}}, target_directory)
105
107
  end
106
108
  #
107
109
  #
@@ -145,7 +147,7 @@ module Picky
145
147
  #
146
148
  #
147
149
  def all_prototype_files
148
- Dir[File.join(prototype_project_basedir, '**', '*')]
150
+ Dir[File.join(project_prototype_basedir, '**', '*')]
149
151
  end
150
152
 
151
153
  #
@@ -152,7 +152,7 @@ module Index
152
152
  # Generates a cache path.
153
153
  #
154
154
  def cache_path text
155
- File.join cache_directory, "#{name}_#{text}.json"
155
+ File.join cache_directory, "#{name}_#{text}"
156
156
  end
157
157
  def index_cache_path
158
158
  cache_path "#{category.name}_index"
@@ -171,20 +171,23 @@ module Index
171
171
  load_similarity
172
172
  load_weights
173
173
  end
174
- def load_the index_method_name, path
175
- self.send "#{index_method_name}=", Yajl::Parser.parse(File.open(path, 'r'), :symbolize_keys => true)
174
+ def load_the_json path
175
+ Yajl::Parser.parse File.open("#{path}.json", 'r'), :symbolize_keys => true
176
+ end
177
+ def load_the_marshalled path
178
+ Marshal.load File.open("#{path}.dump", 'r:binary')
176
179
  end
177
180
  def load_index
178
181
  timed_exclaim "Loading the index for #{identifier} from the cache."
179
- load_the :index, index_cache_path
182
+ self.index = load_the_json index_cache_path
180
183
  end
181
184
  def load_similarity
182
185
  timed_exclaim "Loading the similarity for #{identifier} from the cache."
183
- load_the :similarity, similarity_cache_path
186
+ self.similarity = load_the_marshalled similarity_cache_path
184
187
  end
185
188
  def load_weights
186
189
  timed_exclaim "Loading the weights for #{identifier} from the cache."
187
- load_the :weights, weights_cache_path
190
+ self.weights = load_the_json weights_cache_path
188
191
  end
189
192
 
190
193
  # Generation
@@ -195,16 +198,10 @@ module Index
195
198
  # * generates derived indexes
196
199
  # * dumps all the indexes into files
197
200
  #
198
- # TODO Rename to Source!!!
199
- #
200
- def generate_caches_from_db
201
- cache_from_db_generation_message
201
+ def generate_caches_from_source
202
202
  load_from_index_file
203
203
  generate_caches_from_memory
204
204
  end
205
- def cache_from_db_generation_message
206
- timed_exclaim "CACHE FROM SOURCE #{identifier}."
207
- end
208
205
  # Generates derived indexes from the index and dumps.
209
206
  #
210
207
  # Note: assumes that there is something in the index
@@ -227,9 +224,13 @@ module Index
227
224
  # Load the data from the db.
228
225
  #
229
226
  def load_from_index_file
227
+ load_from_index_generation_message
230
228
  clear
231
229
  retrieve
232
230
  end
231
+ def load_from_index_generation_message
232
+ timed_exclaim "LOAD INDEX #{identifier}."
233
+ end
233
234
  # Retrieves the data into the index.
234
235
  #
235
236
  # TODO Beautify.
@@ -272,6 +273,7 @@ module Index
272
273
  self.index = generator.generate self.partial_strategy
273
274
  end
274
275
  def generate_partial_from exact_index
276
+ timed_exclaim "PARTIAL GENERATE #{identifier}."
275
277
  self.index = exact_index
276
278
  self.generate_partial
277
279
  self
@@ -299,13 +301,20 @@ module Index
299
301
  dump_weights
300
302
  end
301
303
  def dump_index
302
- index.dump_to index_cache_path
304
+ timed_exclaim "DUMP INDEX #{identifier}."
305
+ index.dump_to_json index_cache_path
303
306
  end
307
+ # Note: We marshal the similarity, as the
308
+ # Yajl json lib cannot load symbolized
309
+ # values, just keys.
310
+ #
304
311
  def dump_similarity
305
- similarity.dump_to similarity_cache_path
312
+ timed_exclaim "DUMP SIMILARITY #{identifier}."
313
+ similarity.dump_to_marshalled similarity_cache_path
306
314
  end
307
315
  def dump_weights
308
- weights.dump_to weights_cache_path
316
+ timed_exclaim "DUMP WEIGHTS #{identifier}."
317
+ weights.dump_to_json weights_cache_path
309
318
  end
310
319
 
311
320
  end
@@ -40,17 +40,14 @@ module Index
40
40
  # Generates all caches for this category.
41
41
  #
42
42
  def generate_caches
43
- timed_exclaim "LOAD #{identifier}."
44
- generate_caches_from_db
45
- timed_exclaim "PARTIAL #{identifier}."
43
+ generate_caches_from_source
46
44
  generate_partial
47
- timed_exclaim "CACHE #{identifier}."
48
45
  generate_caches_from_memory
49
- timed_exclaim "DUMP #{identifier}."
50
46
  dump_caches
47
+ timed_exclaim "CACHE FINISHED #{identifier}."
51
48
  end
52
- def generate_caches_from_db
53
- exact.generate_caches_from_db
49
+ def generate_caches_from_source
50
+ exact.generate_caches_from_source
54
51
  end
55
52
  def generate_partial
56
53
  partial.generate_partial_from exact.index
@@ -61,7 +61,7 @@ module Indexers
61
61
  end
62
62
 
63
63
  def indexing_message
64
- timed_exclaim "INDEX #{@type.name}, #{@field.name}" #:#{@field.indexed_name}." # TODO field.identifier
64
+ timed_exclaim "INDEX #{@type.name} #{@field.name}" #:#{@field.indexed_name}." # TODO field.identifier
65
65
  end
66
66
 
67
67
  end
data/lib/picky/indexes.rb CHANGED
@@ -16,7 +16,7 @@ module Indexes
16
16
 
17
17
  # Run in parallel.
18
18
  #
19
- timed_exclaim "Indexing using #{Cores.max_processors} processors."
19
+ timed_exclaim "INDEXING USING #{Cores.max_processors} PROCESSORS."
20
20
  Cores.forked self.fields, :randomly => true do |field|
21
21
  # Reestablish DB connection.
22
22
  #
@@ -24,6 +24,7 @@ module Indexes
24
24
  field.index
25
25
  field.cache
26
26
  end
27
+ timed_exclaim "INDEXING FINISHED."
27
28
  end
28
29
  def self.index_solr
29
30
  configuration.index_solr
data/lib/picky/signals.rb CHANGED
@@ -1,4 +1,4 @@
1
- # TODO Cleanup and move to prototype_project.
1
+ # TODO Cleanup and move to project_prototype.
2
2
  #
3
3
  # Signal.trap 'USR1' do
4
4
  # Indexes.reload
@@ -2,7 +2,7 @@ source :gemcutter
2
2
 
3
3
  # Gems required by Picky.
4
4
  #
5
- gem 'picky', '~> 0.2.0'
5
+ gem 'picky', '~> 0.3.0'
6
6
  gem 'bundler', '>= 0.9.26'
7
7
  gem 'rack', '~> 1.2.1'
8
8
  gem 'rack-mount', '~> 0.6.9'
@@ -26,4 +26,4 @@ gem 'unicorn'
26
26
 
27
27
  # Optional. Use your preferred database adapter.
28
28
  #
29
- gem 'mysql'
29
+ # gem 'mysql'
File without changes
File without changes
@@ -7,7 +7,7 @@ class PickySearch < Application
7
7
  #
8
8
  # Check the Wiki http://github.com/floere/picky/wiki for more options.
9
9
  #
10
- # Ask me if you have questions or specific requests.
10
+ # Ask me or the google group if you have questions or specific requests.
11
11
  #
12
12
 
13
13
  indexing.removes_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
@@ -15,7 +15,9 @@ class PickySearch < Application
15
15
  indexing.splits_text_on(/[\s\/\-\"\&\.]/)
16
16
 
17
17
  books_index = index :books,
18
- Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
18
+ Sources::CSV.new(:title, :author, :isbn, :year, :publisher, :subjects, :file => 'app/library.csv'),
19
+ # Use a database as source:
20
+ # Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
19
21
  field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
20
22
  field(:author),
21
23
  field(:isbn, :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
@@ -9,5 +9,5 @@ adapter: mysql
9
9
  host: localhost
10
10
  username: root
11
11
  password:
12
- database: books_database # Needs to contain the DB source in app/application.rb.
12
+ database: your_database # Load this configuration and use a DB source in app/application.rb.
13
13
  encoding: utf8