picky 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ module Cacher
4
4
  #
5
5
  class PartialGenerator < Generator
6
6
 
7
- # Generate a similarity index based on the given index.
7
+ # Generate a partial index based on the given index.
8
8
  #
9
9
  def generate strategy = Partial::Subtoken.new(:down_to => 1)
10
10
  strategy.generate_from self.index
@@ -2,16 +2,22 @@
2
2
  #
3
3
  class Hash
4
4
 
5
- # Dumps binary self to the path given.
5
+ # Dumps jsonized self to the path given. Minus extension.
6
6
  #
7
- # TODO Still used? If yes, spec!
8
- #
9
- def dump_to path
10
- File.open(path, 'w:binary') do |out_file|
7
+ def dump_to_json path
8
+ File.open("#{path}.json", 'w') do |out_file|
11
9
  Yajl::Encoder.encode self, out_file
12
10
  end
13
11
  end
14
12
 
13
+ # Dumps binary self to the path given. Minus extension.
14
+ #
15
+ def dump_to_marshalled path
16
+ File.open("#{path}.dump", 'w:binary') do |out_file|
17
+ Marshal.dump self, out_file
18
+ end
19
+ end
20
+
15
21
  # Use yajl's encoding.
16
22
  #
17
23
  def to_json options = {}
@@ -40,7 +40,7 @@ module Picky
40
40
  def generator_for identifier, *args
41
41
  generator_class = types[identifier.to_sym]
42
42
  raise NoGeneratorException unless generator_class
43
- generator_for_class generator_class, *args
43
+ generator_for_class generator_class, identifier, *args
44
44
  end
45
45
 
46
46
  #
@@ -56,11 +56,11 @@ module Picky
56
56
  #
57
57
  class Project
58
58
 
59
- attr_reader :name, :prototype_project_basedir
59
+ attr_reader :name, :project_prototype_basedir
60
60
 
61
- def initialize name, *args
61
+ def initialize identifier, name, *args
62
62
  @name = name
63
- @prototype_project_basedir = File.expand_path '../../../prototype_project', __FILE__
63
+ @project_prototype_basedir = File.expand_path '../../../project_prototype', __FILE__
64
64
  end
65
65
 
66
66
  #
@@ -70,12 +70,14 @@ module Picky
70
70
  create_target_directory
71
71
  copy_all_files
72
72
  exclaim "\"#{name}\" is a great project name! Have fun :)\n"
73
- exclaim "Next steps:"
74
73
  exclaim ""
75
- exclaim "cd #{name}"
76
- exclaim "cat Gemfile # <- Do you need the mysql gem, for example?"
77
- exclaim "bundle install"
78
- exclaim "rake # <- shows you where Picky needs input from you."
74
+ exclaim "Next steps:"
75
+ exclaim "1. cd #{name}"
76
+ exclaim "2. bundle install"
77
+ exclaim "3. rake index"
78
+ exclaim "4. rake start"
79
+ exclaim "5. rake # (optional) shows you where Picky needs input from you"
80
+ exclaim " # if you want to define your own search."
79
81
  end
80
82
 
81
83
  #
@@ -101,7 +103,7 @@ module Picky
101
103
  #
102
104
  #
103
105
  def target_filename_for filename
104
- filename.gsub(%r{#{prototype_project_basedir}}, target_directory)
106
+ filename.gsub(%r{#{project_prototype_basedir}}, target_directory)
105
107
  end
106
108
  #
107
109
  #
@@ -145,7 +147,7 @@ module Picky
145
147
  #
146
148
  #
147
149
  def all_prototype_files
148
- Dir[File.join(prototype_project_basedir, '**', '*')]
150
+ Dir[File.join(project_prototype_basedir, '**', '*')]
149
151
  end
150
152
 
151
153
  #
@@ -152,7 +152,7 @@ module Index
152
152
  # Generates a cache path.
153
153
  #
154
154
  def cache_path text
155
- File.join cache_directory, "#{name}_#{text}.json"
155
+ File.join cache_directory, "#{name}_#{text}"
156
156
  end
157
157
  def index_cache_path
158
158
  cache_path "#{category.name}_index"
@@ -171,20 +171,23 @@ module Index
171
171
  load_similarity
172
172
  load_weights
173
173
  end
174
- def load_the index_method_name, path
175
- self.send "#{index_method_name}=", Yajl::Parser.parse(File.open(path, 'r'), :symbolize_keys => true)
174
+ def load_the_json path
175
+ Yajl::Parser.parse File.open("#{path}.json", 'r'), :symbolize_keys => true
176
+ end
177
+ def load_the_marshalled path
178
+ Marshal.load File.open("#{path}.dump", 'r:binary')
176
179
  end
177
180
  def load_index
178
181
  timed_exclaim "Loading the index for #{identifier} from the cache."
179
- load_the :index, index_cache_path
182
+ self.index = load_the_json index_cache_path
180
183
  end
181
184
  def load_similarity
182
185
  timed_exclaim "Loading the similarity for #{identifier} from the cache."
183
- load_the :similarity, similarity_cache_path
186
+ self.similarity = load_the_marshalled similarity_cache_path
184
187
  end
185
188
  def load_weights
186
189
  timed_exclaim "Loading the weights for #{identifier} from the cache."
187
- load_the :weights, weights_cache_path
190
+ self.weights = load_the_json weights_cache_path
188
191
  end
189
192
 
190
193
  # Generation
@@ -195,16 +198,10 @@ module Index
195
198
  # * generates derived indexes
196
199
  # * dumps all the indexes into files
197
200
  #
198
- # TODO Rename to Source!!!
199
- #
200
- def generate_caches_from_db
201
- cache_from_db_generation_message
201
+ def generate_caches_from_source
202
202
  load_from_index_file
203
203
  generate_caches_from_memory
204
204
  end
205
- def cache_from_db_generation_message
206
- timed_exclaim "CACHE FROM SOURCE #{identifier}."
207
- end
208
205
  # Generates derived indexes from the index and dumps.
209
206
  #
210
207
  # Note: assumes that there is something in the index
@@ -227,9 +224,13 @@ module Index
227
224
  # Load the data from the db.
228
225
  #
229
226
  def load_from_index_file
227
+ load_from_index_generation_message
230
228
  clear
231
229
  retrieve
232
230
  end
231
+ def load_from_index_generation_message
232
+ timed_exclaim "LOAD INDEX #{identifier}."
233
+ end
233
234
  # Retrieves the data into the index.
234
235
  #
235
236
  # TODO Beautify.
@@ -272,6 +273,7 @@ module Index
272
273
  self.index = generator.generate self.partial_strategy
273
274
  end
274
275
  def generate_partial_from exact_index
276
+ timed_exclaim "PARTIAL GENERATE #{identifier}."
275
277
  self.index = exact_index
276
278
  self.generate_partial
277
279
  self
@@ -299,13 +301,20 @@ module Index
299
301
  dump_weights
300
302
  end
301
303
  def dump_index
302
- index.dump_to index_cache_path
304
+ timed_exclaim "DUMP INDEX #{identifier}."
305
+ index.dump_to_json index_cache_path
303
306
  end
307
+ # Note: We marshal the similarity, as the
308
+ # Yajl json lib cannot load symbolized
309
+ # values, just keys.
310
+ #
304
311
  def dump_similarity
305
- similarity.dump_to similarity_cache_path
312
+ timed_exclaim "DUMP SIMILARITY #{identifier}."
313
+ similarity.dump_to_marshalled similarity_cache_path
306
314
  end
307
315
  def dump_weights
308
- weights.dump_to weights_cache_path
316
+ timed_exclaim "DUMP WEIGHTS #{identifier}."
317
+ weights.dump_to_json weights_cache_path
309
318
  end
310
319
 
311
320
  end
@@ -40,17 +40,14 @@ module Index
40
40
  # Generates all caches for this category.
41
41
  #
42
42
  def generate_caches
43
- timed_exclaim "LOAD #{identifier}."
44
- generate_caches_from_db
45
- timed_exclaim "PARTIAL #{identifier}."
43
+ generate_caches_from_source
46
44
  generate_partial
47
- timed_exclaim "CACHE #{identifier}."
48
45
  generate_caches_from_memory
49
- timed_exclaim "DUMP #{identifier}."
50
46
  dump_caches
47
+ timed_exclaim "CACHE FINISHED #{identifier}."
51
48
  end
52
- def generate_caches_from_db
53
- exact.generate_caches_from_db
49
+ def generate_caches_from_source
50
+ exact.generate_caches_from_source
54
51
  end
55
52
  def generate_partial
56
53
  partial.generate_partial_from exact.index
@@ -61,7 +61,7 @@ module Indexers
61
61
  end
62
62
 
63
63
  def indexing_message
64
- timed_exclaim "INDEX #{@type.name}, #{@field.name}" #:#{@field.indexed_name}." # TODO field.identifier
64
+ timed_exclaim "INDEX #{@type.name} #{@field.name}" #:#{@field.indexed_name}." # TODO field.identifier
65
65
  end
66
66
 
67
67
  end
data/lib/picky/indexes.rb CHANGED
@@ -16,7 +16,7 @@ module Indexes
16
16
 
17
17
  # Run in parallel.
18
18
  #
19
- timed_exclaim "Indexing using #{Cores.max_processors} processors."
19
+ timed_exclaim "INDEXING USING #{Cores.max_processors} PROCESSORS."
20
20
  Cores.forked self.fields, :randomly => true do |field|
21
21
  # Reestablish DB connection.
22
22
  #
@@ -24,6 +24,7 @@ module Indexes
24
24
  field.index
25
25
  field.cache
26
26
  end
27
+ timed_exclaim "INDEXING FINISHED."
27
28
  end
28
29
  def self.index_solr
29
30
  configuration.index_solr
data/lib/picky/signals.rb CHANGED
@@ -1,4 +1,4 @@
1
- # TODO Cleanup and move to prototype_project.
1
+ # TODO Cleanup and move to project_prototype.
2
2
  #
3
3
  # Signal.trap 'USR1' do
4
4
  # Indexes.reload
@@ -2,7 +2,7 @@ source :gemcutter
2
2
 
3
3
  # Gems required by Picky.
4
4
  #
5
- gem 'picky', '~> 0.2.0'
5
+ gem 'picky', '~> 0.3.0'
6
6
  gem 'bundler', '>= 0.9.26'
7
7
  gem 'rack', '~> 1.2.1'
8
8
  gem 'rack-mount', '~> 0.6.9'
@@ -26,4 +26,4 @@ gem 'unicorn'
26
26
 
27
27
  # Optional. Use your preferred database adapter.
28
28
  #
29
- gem 'mysql'
29
+ # gem 'mysql'
File without changes
File without changes
@@ -7,7 +7,7 @@ class PickySearch < Application
7
7
  #
8
8
  # Check the Wiki http://github.com/floere/picky/wiki for more options.
9
9
  #
10
- # Ask me if you have questions or specific requests.
10
+ # Ask me or the google group if you have questions or specific requests.
11
11
  #
12
12
 
13
13
  indexing.removes_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
@@ -15,7 +15,9 @@ class PickySearch < Application
15
15
  indexing.splits_text_on(/[\s\/\-\"\&\.]/)
16
16
 
17
17
  books_index = index :books,
18
- Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
18
+ Sources::CSV.new(:title, :author, :isbn, :year, :publisher, :subjects, :file => 'app/library.csv'),
19
+ # Use a database as source:
20
+ # Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
19
21
  field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
20
22
  field(:author),
21
23
  field(:isbn, :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
@@ -9,5 +9,5 @@ adapter: mysql
9
9
  host: localhost
10
10
  username: root
11
11
  password:
12
- database: books_database # Needs to contain the DB source in app/application.rb.
12
+ database: your_database # Load this configuration and use a DB source in app/application.rb.
13
13
  encoding: utf8