picky 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bundling.rb +0 -2
- data/lib/deployment.rb +2 -3
- data/lib/picky/application.rb +2 -2
- data/lib/picky/cacher/generator.rb +6 -8
- data/lib/picky/cacher/partial_generator.rb +2 -2
- data/lib/picky/configuration/field.rb +1 -1
- data/lib/picky/configuration/indexes.rb +1 -1
- data/lib/picky/configuration/queries.rb +1 -1
- data/lib/picky/configuration/type.rb +2 -5
- data/lib/picky/index/bundle.rb +6 -25
- data/lib/picky/indexers/solr.rb +5 -5
- data/lib/picky/indexes.rb +4 -1
- data/lib/picky/initializers/ext.rb +0 -2
- data/lib/picky/loader.rb +17 -58
- data/lib/picky/query/base.rb +6 -9
- data/lib/picky/query/token.rb +3 -6
- data/lib/picky/query/tokens.rb +12 -24
- data/lib/picky/query/weights.rb +2 -3
- data/lib/picky/tokenizers/base.rb +14 -17
- data/lib/picky/tokenizers/index.rb +1 -1
- data/lib/picky/tokenizers/query.rb +1 -1
- data/lib/tasks/server.rake +1 -1
- data/prototype_project/app/application.rb +1 -1
- data/prototype_project/app/application.ru +11 -1
- data/spec/lib/index/bundle_spec.rb +1 -1
- data/spec/lib/results/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +4 -4
- metadata +3 -8
- data/lib/picky/configuration/configuration.rb +0 -13
- data/spec/lib/configuration/configuration_spec.rb +0 -38
- data/spec/lib/configuration_spec.rb +0 -8
data/lib/bundling.rb
CHANGED
data/lib/deployment.rb
CHANGED
@@ -124,10 +124,9 @@ module Picky
|
|
124
124
|
run "rm -rf #{current_path}/log; ln -sf #{shared_path}/log #{current_path}/log"
|
125
125
|
run "rm -rf #{current_path}/index; ln -sf #{shared_path}/index #{current_path}/index"
|
126
126
|
# link database-config files
|
127
|
-
run "ln -sf #{shared_path}/
|
128
|
-
run "ln -sf #{shared_path}/config/source.yml #{current_path}/config/db/source.yml"
|
127
|
+
run "ln -sf #{shared_path}/app/db.yml #{current_path}/app/db.yml"
|
129
128
|
# link unicorn.ru
|
130
|
-
run "ln -sf #{shared_path}/
|
129
|
+
run "ln -sf #{shared_path}/app/unicorn.ru #{current_path}/app/unicorn.ru"
|
131
130
|
end
|
132
131
|
|
133
132
|
namespace :rollback do
|
data/lib/picky/application.rb
CHANGED
@@ -9,7 +9,7 @@ class Application
|
|
9
9
|
routing.call env
|
10
10
|
end
|
11
11
|
|
12
|
-
#
|
12
|
+
#
|
13
13
|
#
|
14
14
|
def self.indexes &block
|
15
15
|
indexes_configuration.instance_eval &block
|
@@ -24,7 +24,7 @@ class Application
|
|
24
24
|
@indexes = Configuration::Indexes.new # Is instance a problem?
|
25
25
|
end
|
26
26
|
|
27
|
-
#
|
27
|
+
#
|
28
28
|
#
|
29
29
|
def self.queries &block
|
30
30
|
queries_configuration.instance_eval &block
|
@@ -1,17 +1,15 @@
|
|
1
1
|
module Cacher
|
2
|
-
|
3
|
-
# A cache generator holds an index
|
4
|
-
#
|
5
|
-
# TODO Rename to index_type.
|
2
|
+
|
3
|
+
# A cache generator holds an index.
|
6
4
|
#
|
7
5
|
class Generator
|
8
|
-
|
6
|
+
|
9
7
|
attr_reader :index
|
10
|
-
|
8
|
+
|
11
9
|
def initialize index
|
12
10
|
@index = index
|
13
11
|
end
|
14
|
-
|
12
|
+
|
15
13
|
end
|
16
|
-
|
14
|
+
|
17
15
|
end
|
@@ -3,7 +3,7 @@ module Cacher
|
|
3
3
|
# The partial generator uses a subtoken(downto:1) generator as default.
|
4
4
|
#
|
5
5
|
class PartialGenerator < Generator
|
6
|
-
|
6
|
+
|
7
7
|
# Generate a similarity index based on the given index.
|
8
8
|
#
|
9
9
|
def generate strategy = Partial::Subtoken.new(:down_to => 1)
|
@@ -11,5 +11,5 @@ module Cacher
|
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
end
|
@@ -58,7 +58,7 @@ module Configuration
|
|
58
58
|
@indexer || @indexer = @indexer_class.new(type, self)
|
59
59
|
end
|
60
60
|
def tokenizer
|
61
|
-
@tokenizer || @tokenizer = @tokenizer_class.new
|
61
|
+
@tokenizer || @tokenizer = @tokenizer_class.new
|
62
62
|
end
|
63
63
|
def virtual?
|
64
64
|
!!virtual
|
@@ -16,7 +16,7 @@ module Configuration
|
|
16
16
|
|
17
17
|
# Delegates
|
18
18
|
#
|
19
|
-
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :
|
19
|
+
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
|
20
20
|
|
21
21
|
#
|
22
22
|
#
|
@@ -25,7 +25,7 @@ module Configuration
|
|
25
25
|
def maximum_tokens amount
|
26
26
|
Query::Tokens.maximum = amount
|
27
27
|
end
|
28
|
-
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :
|
28
|
+
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
|
29
29
|
|
30
30
|
end
|
31
31
|
|
@@ -12,12 +12,12 @@ module Configuration
|
|
12
12
|
fields << options
|
13
13
|
options = {}
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
@name = name
|
17
17
|
@source = source
|
18
18
|
# dup, if field is reused. TODO Rewrite.
|
19
19
|
@fields = fields.map { |field| field = field.dup; field.type = self; field }
|
20
|
-
|
20
|
+
|
21
21
|
@after_indexing = options[:after_indexing]
|
22
22
|
@result_type = options[:result_type] || name
|
23
23
|
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query?
|
@@ -27,9 +27,6 @@ module Configuration
|
|
27
27
|
categories = fields.map { |field| field.generate }
|
28
28
|
Index::Type.new name, result_type, ignore_unassigned_tokens, *categories
|
29
29
|
end
|
30
|
-
def table_name
|
31
|
-
self # FIXME UGH, Remove anyway
|
32
|
-
end
|
33
30
|
def take_snapshot
|
34
31
|
source.take_snapshot self
|
35
32
|
end
|
data/lib/picky/index/bundle.rb
CHANGED
@@ -162,7 +162,7 @@ module Index
|
|
162
162
|
def weights_cache_path
|
163
163
|
cache_path "#{category.name}_weights"
|
164
164
|
end
|
165
|
-
|
165
|
+
|
166
166
|
# Loads all indexes into this category.
|
167
167
|
#
|
168
168
|
def load
|
@@ -185,29 +185,10 @@ module Index
|
|
185
185
|
puts "#{Time.now}: Loading the weights for #{identifier} from the cache."
|
186
186
|
load_the :weights, weights_cache_path
|
187
187
|
end
|
188
|
-
|
189
|
-
# TODO Decide on the fate of this.
|
190
|
-
#
|
191
|
-
# # Generates similar index entries. If you search for bla, you will also find the blarf and vice versa.
|
192
|
-
# #
|
193
|
-
# # Examples:
|
194
|
-
# # title.generate_similar_from { :bla => :blarf }
|
195
|
-
# #
|
196
|
-
# # Note: Be careful with this, as it uses up a lot of memory.
|
197
|
-
# #
|
198
|
-
# def generate_similar_from mapping
|
199
|
-
# mapping.each_pair do |one, other|
|
200
|
-
# one_ids = self.index[one]
|
201
|
-
# other_ids = self.index[other]
|
202
|
-
#
|
203
|
-
# self.index[one] += other_ids || [] if one_ids
|
204
|
-
# self.index[other] += one_ids || [] if other_ids
|
205
|
-
# end
|
206
|
-
# end
|
207
|
-
|
188
|
+
|
208
189
|
# Generation
|
209
190
|
#
|
210
|
-
|
191
|
+
|
211
192
|
# This method
|
212
193
|
# * loads the base index from the db
|
213
194
|
# * generates derived indexes
|
@@ -232,17 +213,17 @@ module Index
|
|
232
213
|
def cache_from_memory_generation_message
|
233
214
|
puts "#{Time.now}: Generating derived caches from memory for #{identifier}."
|
234
215
|
end
|
235
|
-
|
216
|
+
|
236
217
|
# Generates the weights and similarity from the main index.
|
237
218
|
#
|
238
219
|
def generate_derived
|
239
220
|
generate_weights
|
240
221
|
generate_similarity
|
241
222
|
end
|
242
|
-
|
223
|
+
|
243
224
|
# Load the data from the db.
|
244
225
|
#
|
245
|
-
def load_from_index_file
|
226
|
+
def load_from_index_file
|
246
227
|
clear
|
247
228
|
retrieve
|
248
229
|
end
|
data/lib/picky/indexers/solr.rb
CHANGED
@@ -28,16 +28,16 @@ module Indexers
|
|
28
28
|
#
|
29
29
|
DB.connect
|
30
30
|
results = DB.connection.execute statement
|
31
|
-
|
31
|
+
|
32
32
|
return unless results # TODO check
|
33
|
-
|
33
|
+
|
34
34
|
type_name = @type.name.to_s
|
35
|
-
|
35
|
+
|
36
36
|
solr.delete_by_query "type:#{type_name}"
|
37
37
|
solr.commit
|
38
|
-
|
38
|
+
|
39
39
|
documents = []
|
40
|
-
|
40
|
+
|
41
41
|
results.each do |indexed_id, *values|
|
42
42
|
values.each &:downcase!
|
43
43
|
documents << hashed(values).merge(:id => indexed_id, :type => type_name)
|
data/lib/picky/indexes.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# TODO I should really do a Types collector class which does all this!
|
2
|
+
#
|
1
3
|
module Indexes
|
2
4
|
|
3
5
|
mattr_accessor :configuration, :types, :type_mapping
|
@@ -17,6 +19,7 @@ module Indexes
|
|
17
19
|
puts "Indexing using #{Cores.max_processors} processors."
|
18
20
|
Cores.forked self.fields, :randomly => true do |field|
|
19
21
|
# Reestablish DB connection.
|
22
|
+
#
|
20
23
|
DB.connect # TODO Rewrite!
|
21
24
|
field.index
|
22
25
|
field.cache
|
@@ -142,7 +145,7 @@ module Indexes
|
|
142
145
|
#
|
143
146
|
#
|
144
147
|
def self.clear
|
145
|
-
self.types = []
|
148
|
+
self.types = [] # TODO self.types = Types.new
|
146
149
|
end
|
147
150
|
|
148
151
|
|
data/lib/picky/loader.rb
CHANGED
@@ -41,21 +41,13 @@ module Loader
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
-
# def self.add_lib_dir
|
45
|
-
# lib_dir = File.join(SEARCH_ROOT, 'lib')
|
46
|
-
# $:.unshift lib_dir unless $:.include?(lib_dir)
|
47
|
-
# end
|
48
|
-
|
49
44
|
# Load the user's application.
|
50
45
|
#
|
51
46
|
def self.load_application
|
52
|
-
# DB.connect # FIXME only needed when indexing.
|
53
47
|
# Load the user's application.
|
54
48
|
#
|
55
49
|
exclaim 'Loading Application.'
|
56
50
|
|
57
|
-
load_all_user_in 'app/initializers'
|
58
|
-
|
59
51
|
# Add lib dir to load path.
|
60
52
|
#
|
61
53
|
# add_lib_dir
|
@@ -63,6 +55,7 @@ module Loader
|
|
63
55
|
# Picky autoloading.
|
64
56
|
#
|
65
57
|
begin
|
58
|
+
load_all_user_in 'lib/initializers'
|
66
59
|
load_all_user_in 'lib/tokenizers'
|
67
60
|
load_all_user_in 'lib/indexers'
|
68
61
|
load_all_user_in 'lib/query'
|
@@ -72,21 +65,14 @@ module Loader
|
|
72
65
|
retry
|
73
66
|
end
|
74
67
|
|
75
|
-
#
|
68
|
+
# Load the user's config.
|
76
69
|
#
|
77
70
|
load_user 'app/logging'
|
78
|
-
# load_user 'app/config'
|
79
|
-
# Configuration.apply
|
80
|
-
|
81
|
-
# Require the user's application.
|
82
|
-
#
|
83
71
|
load_user 'app/application'
|
84
72
|
|
85
|
-
#
|
73
|
+
# TODO Rewrite
|
86
74
|
#
|
87
|
-
|
88
|
-
#
|
89
|
-
Query::Qualifiers.instance.prepare # TODO Rewrite
|
75
|
+
Query::Qualifiers.instance.prepare
|
90
76
|
|
91
77
|
exclaim "Application loaded."
|
92
78
|
end
|
@@ -94,19 +80,9 @@ module Loader
|
|
94
80
|
# Loads the framework.
|
95
81
|
#
|
96
82
|
def self.load_framework
|
97
|
-
#
|
98
|
-
|
99
|
-
# exclaim 'Compiling C code.'
|
100
|
-
require_relative 'initializers/ext'
|
101
|
-
|
102
|
-
require 'rack_fast_escape'
|
103
|
-
# exclaim 'Loaded rack_fast_escape.'
|
104
|
-
require 'text'
|
105
|
-
# exclaim 'Loaded text.'
|
106
|
-
|
107
|
-
# Extend path with lib
|
83
|
+
# Compile C-Code. TODO Remove as soon as stable. Remove also mentioned file.
|
108
84
|
#
|
109
|
-
|
85
|
+
require_relative 'initializers/ext'
|
110
86
|
|
111
87
|
# Load extensions.
|
112
88
|
#
|
@@ -114,7 +90,6 @@ module Loader
|
|
114
90
|
load_relative 'extensions/symbol'
|
115
91
|
load_relative 'extensions/module'
|
116
92
|
load_relative 'extensions/hash'
|
117
|
-
# exclaim "Loaded extensions."
|
118
93
|
|
119
94
|
# Load harakiri.
|
120
95
|
#
|
@@ -126,23 +101,12 @@ module Loader
|
|
126
101
|
load_relative 'helpers/cache'
|
127
102
|
load_relative 'helpers/measuring'
|
128
103
|
load_relative 'helpers/search'
|
129
|
-
# exclaim "Loaded helpers."
|
130
104
|
|
131
105
|
# Signal handling
|
132
106
|
#
|
133
107
|
load_relative 'signals'
|
134
|
-
# exclaim "Loaded signals handling."
|
135
108
|
|
136
|
-
#
|
137
|
-
#
|
138
|
-
Dir['plugins/*'].each do |directory|
|
139
|
-
extend_load_path directory
|
140
|
-
extend_load_path directory, 'lib'
|
141
|
-
load "#{directory.gsub!(/plugins\//, '')}.rb"
|
142
|
-
end
|
143
|
-
# exclaim "Loaded plugins."
|
144
|
-
|
145
|
-
# Require the necessary libs. Referenced modules first.
|
109
|
+
# Various.
|
146
110
|
#
|
147
111
|
load_relative 'loggers/search'
|
148
112
|
load_relative 'umlaut_substituter'
|
@@ -178,8 +142,6 @@ module Loader
|
|
178
142
|
|
179
143
|
# Convenience accessors for generators.
|
180
144
|
#
|
181
|
-
# TODO Just remove from under Cacher?
|
182
|
-
#
|
183
145
|
load_relative 'cacher/convenience'
|
184
146
|
|
185
147
|
# Index generators.
|
@@ -213,23 +175,24 @@ module Loader
|
|
213
175
|
#
|
214
176
|
load_relative 'query/combination'
|
215
177
|
load_relative 'query/combinations'
|
216
|
-
|
178
|
+
|
217
179
|
load_relative 'query/allocation'
|
218
180
|
load_relative 'query/allocations'
|
219
|
-
|
181
|
+
|
220
182
|
load_relative 'query/qualifiers'
|
221
183
|
load_relative 'query/weigher'
|
222
184
|
load_relative 'query/combinator'
|
223
|
-
|
185
|
+
|
224
186
|
load_relative 'query/weights'
|
225
|
-
|
187
|
+
|
226
188
|
# Query.
|
227
189
|
#
|
228
190
|
load_relative 'query/base'
|
229
191
|
load_relative 'query/live'
|
230
192
|
load_relative 'query/full'
|
231
|
-
|
232
|
-
|
193
|
+
#
|
194
|
+
load_relative 'query/solr' # TODO
|
195
|
+
|
233
196
|
# Results.
|
234
197
|
#
|
235
198
|
load_relative 'results/base'
|
@@ -254,7 +217,6 @@ module Loader
|
|
254
217
|
load_relative 'configuration/field'
|
255
218
|
load_relative 'configuration/type'
|
256
219
|
load_relative 'configuration/indexes'
|
257
|
-
load_relative 'configuration/configuration'
|
258
220
|
|
259
221
|
# ... in Application.
|
260
222
|
#
|
@@ -274,14 +236,11 @@ module Loader
|
|
274
236
|
#
|
275
237
|
load_relative 'generator'
|
276
238
|
end
|
277
|
-
|
239
|
+
|
240
|
+
# Silenceable puts.
|
241
|
+
#
|
278
242
|
def self.exclaim text
|
279
243
|
puts text
|
280
244
|
end
|
281
245
|
|
282
|
-
def self.extend_load_path *dirs
|
283
|
-
dir = File.join(SEARCH_ROOT, *dirs)
|
284
|
-
$:.unshift dir unless $:.include? dir
|
285
|
-
end
|
286
|
-
|
287
246
|
end
|
data/lib/picky/query/base.rb
CHANGED
@@ -67,28 +67,25 @@ module Query
|
|
67
67
|
# Get the allocations.
|
68
68
|
#
|
69
69
|
allocations = @weigher.allocations_for tokens
|
70
|
-
|
70
|
+
|
71
71
|
# Callbacks.
|
72
72
|
#
|
73
73
|
reduce allocations
|
74
74
|
remove_from allocations
|
75
|
-
|
76
|
-
# TODO allocations#calculate # or better name
|
77
|
-
#
|
78
|
-
|
75
|
+
|
79
76
|
# Remove double allocations.
|
80
77
|
#
|
81
78
|
allocations.uniq
|
82
|
-
|
83
|
-
# Score the allocations.
|
79
|
+
|
80
|
+
# Score the allocations using weights as bias.
|
84
81
|
#
|
85
82
|
allocations.calculate_score weights
|
86
|
-
|
83
|
+
|
87
84
|
# Sort the allocations.
|
88
85
|
# (allocations are sorted according to score, highest to lowest)
|
89
86
|
#
|
90
87
|
allocations.sort
|
91
|
-
|
88
|
+
|
92
89
|
# Return the allocations.
|
93
90
|
#
|
94
91
|
allocations
|
data/lib/picky/query/token.rb
CHANGED
@@ -69,7 +69,7 @@ module Query
|
|
69
69
|
# If the text ends with *, partialize it. If with ", don't.
|
70
70
|
#
|
71
71
|
@@no_partial = /\"$/
|
72
|
-
@@partial =
|
72
|
+
@@partial = /\*$/
|
73
73
|
def partialize
|
74
74
|
self.partial = false and return if @text =~ @@no_partial
|
75
75
|
self.partial = true if @text =~ @@partial
|
@@ -78,7 +78,7 @@ module Query
|
|
78
78
|
# If the text ends with ~ similarize it. If with ", don't.
|
79
79
|
#
|
80
80
|
@@no_similar = /\"$/
|
81
|
-
@@similar =
|
81
|
+
@@similar = /\~$/
|
82
82
|
def similarize
|
83
83
|
self.similar = false and return if @text =~ @@no_similar
|
84
84
|
self.similar = true if @text =~ @@similar
|
@@ -94,10 +94,7 @@ module Query
|
|
94
94
|
def remove_illegals
|
95
95
|
@text.gsub! @@illegals, '' unless @text.blank?
|
96
96
|
end
|
97
|
-
|
98
|
-
# TODO Think about these, remove illegals and normalize...
|
99
|
-
#
|
100
|
-
|
97
|
+
|
101
98
|
# Visitor for tokenizer.
|
102
99
|
#
|
103
100
|
# TODO Rewrite!!!
|
data/lib/picky/query/tokens.rb
CHANGED
@@ -1,32 +1,32 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
module Query
|
4
|
-
|
4
|
+
|
5
5
|
# This class primarily handles switching through similar token constellations.
|
6
6
|
#
|
7
7
|
class Tokens
|
8
|
-
|
8
|
+
|
9
9
|
#
|
10
10
|
#
|
11
11
|
cattr_accessor :maximum
|
12
12
|
self.maximum = 5
|
13
|
-
|
13
|
+
|
14
14
|
# Basically delegates to its internal tokens array.
|
15
15
|
#
|
16
16
|
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
17
|
-
|
17
|
+
|
18
18
|
#
|
19
19
|
#
|
20
20
|
def initialize tokens = []
|
21
21
|
@tokens = tokens
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
#
|
25
25
|
#
|
26
26
|
def tokenize_with tokenizer
|
27
27
|
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
# Generates an array in the form of
|
31
31
|
# [
|
32
32
|
# [combination], # of token 1
|
@@ -52,7 +52,7 @@ module Query
|
|
52
52
|
|
53
53
|
# Caps the tokens to the maximum.
|
54
54
|
#
|
55
|
-
#
|
55
|
+
# Note: We could parametrize this if necessary.
|
56
56
|
#
|
57
57
|
def cap
|
58
58
|
@tokens.slice!(@@maximum..-1) if cap?
|
@@ -60,43 +60,31 @@ module Query
|
|
60
60
|
def cap?
|
61
61
|
@tokens.size > @@maximum
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
# Rejects blank tokens.
|
65
65
|
#
|
66
66
|
def reject
|
67
67
|
@tokens.reject! &:blank?
|
68
68
|
end
|
69
|
-
|
70
|
-
# Switches the tokens
|
71
|
-
#
|
72
|
-
# TODO
|
73
|
-
#
|
74
|
-
def next_similar
|
75
|
-
@tokens.first.next_similar unless empty?
|
76
|
-
end
|
77
|
-
|
69
|
+
|
78
70
|
# Returns a solr query.
|
79
71
|
#
|
80
72
|
def to_solr_query
|
81
73
|
@tokens.map(&:to_solr).join ' '
|
82
74
|
end
|
83
|
-
|
75
|
+
|
84
76
|
#
|
85
77
|
#
|
86
78
|
def originals
|
87
79
|
@tokens.map(&:original)
|
88
80
|
end
|
89
|
-
|
81
|
+
|
90
82
|
# Just join the token original texts.
|
91
83
|
#
|
92
84
|
def to_s
|
93
85
|
originals.join ' '
|
94
86
|
end
|
95
87
|
|
96
|
-
# def to_a
|
97
|
-
# @tokens
|
98
|
-
# end
|
99
|
-
|
100
88
|
end
|
101
|
-
|
89
|
+
|
102
90
|
end
|
data/lib/picky/query/weights.rb
CHANGED
@@ -19,8 +19,6 @@ module Query
|
|
19
19
|
|
20
20
|
# Get the weight of an allocation.
|
21
21
|
#
|
22
|
-
# TODO Add a block to evaluate?
|
23
|
-
#
|
24
22
|
def weight_for clustered
|
25
23
|
@weights[clustered] || 0
|
26
24
|
end
|
@@ -43,7 +41,8 @@ module Query
|
|
43
41
|
#
|
44
42
|
categories = combinations.map { |combination| combination.bundle.category }.clustered_uniq
|
45
43
|
|
46
|
-
#
|
44
|
+
# Note: Caching will not be necessary anymore if the
|
45
|
+
# mapping is not necessary anymore.
|
47
46
|
#
|
48
47
|
cached @weights_cache, categories do
|
49
48
|
categories.map! &:name
|
@@ -1,8 +1,7 @@
|
|
1
1
|
module Tokenizers
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
|
3
|
+
class Base
|
4
|
+
|
6
5
|
# Stopwords.
|
7
6
|
#
|
8
7
|
def self.stopwords regexp
|
@@ -19,7 +18,7 @@ module Tokenizers
|
|
19
18
|
end
|
20
19
|
end
|
21
20
|
def remove_stopwords text; end
|
22
|
-
|
21
|
+
|
23
22
|
# Contraction.
|
24
23
|
#
|
25
24
|
def self.contract_expressions what, to_what
|
@@ -28,7 +27,7 @@ module Tokenizers
|
|
28
27
|
end
|
29
28
|
end
|
30
29
|
def contract text; end
|
31
|
-
|
30
|
+
|
32
31
|
# Illegals.
|
33
32
|
#
|
34
33
|
# TODO Should there be a legal?
|
@@ -39,7 +38,7 @@ module Tokenizers
|
|
39
38
|
end
|
40
39
|
end
|
41
40
|
def remove_illegals text; end
|
42
|
-
|
41
|
+
|
43
42
|
# Splitting.
|
44
43
|
#
|
45
44
|
def self.split_text_on regexp
|
@@ -48,7 +47,7 @@ module Tokenizers
|
|
48
47
|
end
|
49
48
|
end
|
50
49
|
def split text; end
|
51
|
-
|
50
|
+
|
52
51
|
# Normalizing.
|
53
52
|
#
|
54
53
|
def self.normalize_words regexp_replaces
|
@@ -63,18 +62,16 @@ module Tokenizers
|
|
63
62
|
end
|
64
63
|
end
|
65
64
|
def normalize_with_patterns text; end
|
66
|
-
|
65
|
+
|
67
66
|
# Illegal after normalizing.
|
68
67
|
#
|
69
|
-
|
70
|
-
#
|
71
|
-
def self.illegal_characters_after regexp
|
68
|
+
def self.illegal_characters_after_splitting regexp
|
72
69
|
define_method :remove_after_normalizing_illegals do |text|
|
73
70
|
text.gsub! regexp, ''
|
74
71
|
end
|
75
72
|
end
|
76
73
|
def remove_after_normalizing_illegals text; end
|
77
|
-
|
74
|
+
|
78
75
|
# Returns a number of tokens, generated from the given text.
|
79
76
|
#
|
80
77
|
# Note:
|
@@ -88,10 +85,10 @@ module Tokenizers
|
|
88
85
|
tokens = tokens_for words # creating tokens / strings
|
89
86
|
process tokens # processing tokens / strings
|
90
87
|
end
|
91
|
-
|
88
|
+
|
92
89
|
# Hooks.
|
93
90
|
#
|
94
|
-
|
91
|
+
|
95
92
|
# Preprocessing.
|
96
93
|
#
|
97
94
|
def preprocess text; end
|
@@ -104,7 +101,7 @@ module Tokenizers
|
|
104
101
|
reject tokens # Reject any tokens that don't meet criteria
|
105
102
|
tokens
|
106
103
|
end
|
107
|
-
|
104
|
+
|
108
105
|
# Rejects blank tokens.
|
109
106
|
#
|
110
107
|
def reject tokens
|
@@ -125,6 +122,6 @@ module Tokenizers
|
|
125
122
|
def empty_tokens
|
126
123
|
::Query::Tokens.new
|
127
124
|
end
|
128
|
-
|
125
|
+
|
129
126
|
end
|
130
127
|
end
|
data/lib/tasks/server.rake
CHANGED
@@ -21,7 +21,7 @@ namespace :server do
|
|
21
21
|
:port => 4000,
|
22
22
|
:daemonize => false
|
23
23
|
}
|
24
|
-
# TODO Move port!
|
24
|
+
# TODO Move port configuration!
|
25
25
|
port = SEARCH_ENVIRONMENT == 'production' ? 6000 : 4000
|
26
26
|
`export SEARCH_ENV=#{SEARCH_ENVIRONMENT}; unicorn -p #{config[SEARCH_ENVIRONMENT][:port]} -c #{File.join(SEARCH_ROOT, 'app/unicorn.ru')} #{config[SEARCH_ENVIRONMENT][:daemonize] ? '-D' : ''} #{File.join(SEARCH_ROOT, 'app/application.ru')}`
|
27
27
|
end
|
@@ -11,7 +11,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
|
|
11
11
|
# Note: Much more is possible, but let's start out easy.
|
12
12
|
#
|
13
13
|
# Ask me if you have questions!
|
14
|
-
#
|
14
|
+
#
|
15
15
|
|
16
16
|
indexes do
|
17
17
|
illegal_characters(/[^äöüa-zA-Z0-9\s\/\-\"\&\.]/)
|
@@ -5,7 +5,17 @@
|
|
5
5
|
#
|
6
6
|
require 'picky'
|
7
7
|
|
8
|
-
# Load your application. This requires the files in
|
8
|
+
# Load your application. This requires the following files in
|
9
|
+
#
|
10
|
+
# * /lib/initializers/*.rb
|
11
|
+
# * /lib/tokenizers/*.rb
|
12
|
+
# * /lib/indexers/*.rb
|
13
|
+
# * /lib/query/*.rb
|
14
|
+
#
|
15
|
+
# * /app/logging.rb
|
16
|
+
# * /app/application.rb
|
17
|
+
#
|
18
|
+
# to be required (in that order).
|
9
19
|
#
|
10
20
|
Loader.load_application
|
11
21
|
|
@@ -74,9 +74,9 @@ describe Results do
|
|
74
74
|
@allocations = stub :allocations
|
75
75
|
@results.stub! :allocations => @allocations
|
76
76
|
end
|
77
|
-
it 'should
|
77
|
+
it 'should process' do
|
78
78
|
@allocations.should_receive(:process!).once.with(20, 0).ordered
|
79
|
-
|
79
|
+
|
80
80
|
@results.prepare!
|
81
81
|
end
|
82
82
|
end
|
@@ -2,8 +2,6 @@
|
|
2
2
|
#
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
|
-
# TODO CLEAN UP.
|
6
|
-
#
|
7
5
|
describe Tokenizers::Index do
|
8
6
|
|
9
7
|
before(:each) do
|
@@ -21,11 +19,11 @@ describe Tokenizers::Index do
|
|
21
19
|
t1 = stub(:token, :to_s => '')
|
22
20
|
t2 = stub(:token, :to_s => 'not blank')
|
23
21
|
t3 = stub(:token, :to_s => '')
|
24
|
-
|
22
|
+
|
25
23
|
@tokenizer.reject([t1, t2, t3]).should == [t2]
|
26
24
|
end
|
27
25
|
end
|
28
|
-
|
26
|
+
|
29
27
|
describe "tokenize" do
|
30
28
|
describe "normalizing" do
|
31
29
|
def self.it_should_normalize_token(text, expected)
|
@@ -34,6 +32,7 @@ describe Tokenizers::Index do
|
|
34
32
|
end
|
35
33
|
end
|
36
34
|
# defaults
|
35
|
+
#
|
37
36
|
it_should_normalize_token 'it_should_not_normalize_by_default', :it_should_not_normalize_by_default
|
38
37
|
end
|
39
38
|
describe "tokenizing" do
|
@@ -43,6 +42,7 @@ describe Tokenizers::Index do
|
|
43
42
|
end
|
44
43
|
end
|
45
44
|
# defaults
|
45
|
+
#
|
46
46
|
it_should_tokenize_token "splitting on \\s", [:splitting, :on, :"\\s"]
|
47
47
|
it_should_tokenize_token 'und', [:und]
|
48
48
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-10-02 00:00:00 +02:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -194,7 +194,6 @@ files:
|
|
194
194
|
- lib/picky/cacher/weights/logarithmic.rb
|
195
195
|
- lib/picky/cacher/weights/strategy.rb
|
196
196
|
- lib/picky/cacher/weights_generator.rb
|
197
|
-
- lib/picky/configuration/configuration.rb
|
198
197
|
- lib/picky/configuration/field.rb
|
199
198
|
- lib/picky/configuration/indexes.rb
|
200
199
|
- lib/picky/configuration/queries.rb
|
@@ -289,9 +288,7 @@ files:
|
|
289
288
|
- spec/lib/cacher/similarity_generator_spec.rb
|
290
289
|
- spec/lib/cacher/weights/logarithmic_spec.rb
|
291
290
|
- spec/lib/cacher/weights_generator_spec.rb
|
292
|
-
- spec/lib/configuration/configuration_spec.rb
|
293
291
|
- spec/lib/configuration/type_spec.rb
|
294
|
-
- spec/lib/configuration_spec.rb
|
295
292
|
- spec/lib/cores_spec.rb
|
296
293
|
- spec/lib/extensions/array_spec.rb
|
297
294
|
- spec/lib/extensions/hash_spec.rb
|
@@ -375,9 +372,7 @@ test_files:
|
|
375
372
|
- spec/lib/cacher/similarity_generator_spec.rb
|
376
373
|
- spec/lib/cacher/weights/logarithmic_spec.rb
|
377
374
|
- spec/lib/cacher/weights_generator_spec.rb
|
378
|
-
- spec/lib/configuration/configuration_spec.rb
|
379
375
|
- spec/lib/configuration/type_spec.rb
|
380
|
-
- spec/lib/configuration_spec.rb
|
381
376
|
- spec/lib/cores_spec.rb
|
382
377
|
- spec/lib/extensions/array_spec.rb
|
383
378
|
- spec/lib/extensions/hash_spec.rb
|
@@ -1,38 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require 'spec_helper'
|
3
|
-
|
4
|
-
describe Configuration do
|
5
|
-
|
6
|
-
# describe 'field' do
|
7
|
-
# it 'should define a new type' do
|
8
|
-
# Configuration::Field.should_receive(:new).once.with :some_name, :some_options
|
9
|
-
#
|
10
|
-
# Configuration.field :some_name, :some_options
|
11
|
-
# end
|
12
|
-
# it 'should respect the default' do
|
13
|
-
# Configuration::Field.should_receive(:new).once.with :some_name, {}
|
14
|
-
#
|
15
|
-
# Configuration.field :some_name
|
16
|
-
# end
|
17
|
-
# end
|
18
|
-
#
|
19
|
-
# describe 'type' do
|
20
|
-
# it 'should define a new type' do
|
21
|
-
# Configuration::Type.should_receive(:new).once.with :some_name, :some_field, :some_other_field
|
22
|
-
#
|
23
|
-
# Configuration.type :some_name, :some_field, :some_other_field
|
24
|
-
# end
|
25
|
-
# end
|
26
|
-
#
|
27
|
-
# describe 'indexes' do
|
28
|
-
# it 'should define the indexes and save' do
|
29
|
-
# indexes = mock :indexes
|
30
|
-
#
|
31
|
-
# Configuration::Indexes.should_receive(:new).once.with(:some_types).and_return indexes
|
32
|
-
# indexes.should_receive(:save).once.with
|
33
|
-
#
|
34
|
-
# Configuration.indexes :some_types
|
35
|
-
# end
|
36
|
-
# end
|
37
|
-
|
38
|
-
end
|