picky 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bundling.rb +0 -2
- data/lib/deployment.rb +2 -3
- data/lib/picky/application.rb +2 -2
- data/lib/picky/cacher/generator.rb +6 -8
- data/lib/picky/cacher/partial_generator.rb +2 -2
- data/lib/picky/configuration/field.rb +1 -1
- data/lib/picky/configuration/indexes.rb +1 -1
- data/lib/picky/configuration/queries.rb +1 -1
- data/lib/picky/configuration/type.rb +2 -5
- data/lib/picky/index/bundle.rb +6 -25
- data/lib/picky/indexers/solr.rb +5 -5
- data/lib/picky/indexes.rb +4 -1
- data/lib/picky/initializers/ext.rb +0 -2
- data/lib/picky/loader.rb +17 -58
- data/lib/picky/query/base.rb +6 -9
- data/lib/picky/query/token.rb +3 -6
- data/lib/picky/query/tokens.rb +12 -24
- data/lib/picky/query/weights.rb +2 -3
- data/lib/picky/tokenizers/base.rb +14 -17
- data/lib/picky/tokenizers/index.rb +1 -1
- data/lib/picky/tokenizers/query.rb +1 -1
- data/lib/tasks/server.rake +1 -1
- data/prototype_project/app/application.rb +1 -1
- data/prototype_project/app/application.ru +11 -1
- data/spec/lib/index/bundle_spec.rb +1 -1
- data/spec/lib/results/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +4 -4
- metadata +3 -8
- data/lib/picky/configuration/configuration.rb +0 -13
- data/spec/lib/configuration/configuration_spec.rb +0 -38
- data/spec/lib/configuration_spec.rb +0 -8
data/lib/bundling.rb
CHANGED
data/lib/deployment.rb
CHANGED
@@ -124,10 +124,9 @@ module Picky
|
|
124
124
|
run "rm -rf #{current_path}/log; ln -sf #{shared_path}/log #{current_path}/log"
|
125
125
|
run "rm -rf #{current_path}/index; ln -sf #{shared_path}/index #{current_path}/index"
|
126
126
|
# link database-config files
|
127
|
-
run "ln -sf #{shared_path}/
|
128
|
-
run "ln -sf #{shared_path}/config/source.yml #{current_path}/config/db/source.yml"
|
127
|
+
run "ln -sf #{shared_path}/app/db.yml #{current_path}/app/db.yml"
|
129
128
|
# link unicorn.ru
|
130
|
-
run "ln -sf #{shared_path}/
|
129
|
+
run "ln -sf #{shared_path}/app/unicorn.ru #{current_path}/app/unicorn.ru"
|
131
130
|
end
|
132
131
|
|
133
132
|
namespace :rollback do
|
data/lib/picky/application.rb
CHANGED
@@ -9,7 +9,7 @@ class Application
|
|
9
9
|
routing.call env
|
10
10
|
end
|
11
11
|
|
12
|
-
#
|
12
|
+
#
|
13
13
|
#
|
14
14
|
def self.indexes &block
|
15
15
|
indexes_configuration.instance_eval &block
|
@@ -24,7 +24,7 @@ class Application
|
|
24
24
|
@indexes = Configuration::Indexes.new # Is instance a problem?
|
25
25
|
end
|
26
26
|
|
27
|
-
#
|
27
|
+
#
|
28
28
|
#
|
29
29
|
def self.queries &block
|
30
30
|
queries_configuration.instance_eval &block
|
@@ -1,17 +1,15 @@
|
|
1
1
|
module Cacher
|
2
|
-
|
3
|
-
# A cache generator holds an index
|
4
|
-
#
|
5
|
-
# TODO Rename to index_type.
|
2
|
+
|
3
|
+
# A cache generator holds an index.
|
6
4
|
#
|
7
5
|
class Generator
|
8
|
-
|
6
|
+
|
9
7
|
attr_reader :index
|
10
|
-
|
8
|
+
|
11
9
|
def initialize index
|
12
10
|
@index = index
|
13
11
|
end
|
14
|
-
|
12
|
+
|
15
13
|
end
|
16
|
-
|
14
|
+
|
17
15
|
end
|
@@ -3,7 +3,7 @@ module Cacher
|
|
3
3
|
# The partial generator uses a subtoken(downto:1) generator as default.
|
4
4
|
#
|
5
5
|
class PartialGenerator < Generator
|
6
|
-
|
6
|
+
|
7
7
|
# Generate a similarity index based on the given index.
|
8
8
|
#
|
9
9
|
def generate strategy = Partial::Subtoken.new(:down_to => 1)
|
@@ -11,5 +11,5 @@ module Cacher
|
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
end
|
@@ -58,7 +58,7 @@ module Configuration
|
|
58
58
|
@indexer || @indexer = @indexer_class.new(type, self)
|
59
59
|
end
|
60
60
|
def tokenizer
|
61
|
-
@tokenizer || @tokenizer = @tokenizer_class.new
|
61
|
+
@tokenizer || @tokenizer = @tokenizer_class.new
|
62
62
|
end
|
63
63
|
def virtual?
|
64
64
|
!!virtual
|
@@ -16,7 +16,7 @@ module Configuration
|
|
16
16
|
|
17
17
|
# Delegates
|
18
18
|
#
|
19
|
-
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :
|
19
|
+
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
|
20
20
|
|
21
21
|
#
|
22
22
|
#
|
@@ -25,7 +25,7 @@ module Configuration
|
|
25
25
|
def maximum_tokens amount
|
26
26
|
Query::Tokens.maximum = amount
|
27
27
|
end
|
28
|
-
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :
|
28
|
+
delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after_splitting, :to => :default_index
|
29
29
|
|
30
30
|
end
|
31
31
|
|
@@ -12,12 +12,12 @@ module Configuration
|
|
12
12
|
fields << options
|
13
13
|
options = {}
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
@name = name
|
17
17
|
@source = source
|
18
18
|
# dup, if field is reused. TODO Rewrite.
|
19
19
|
@fields = fields.map { |field| field = field.dup; field.type = self; field }
|
20
|
-
|
20
|
+
|
21
21
|
@after_indexing = options[:after_indexing]
|
22
22
|
@result_type = options[:result_type] || name
|
23
23
|
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query?
|
@@ -27,9 +27,6 @@ module Configuration
|
|
27
27
|
categories = fields.map { |field| field.generate }
|
28
28
|
Index::Type.new name, result_type, ignore_unassigned_tokens, *categories
|
29
29
|
end
|
30
|
-
def table_name
|
31
|
-
self # FIXME UGH, Remove anyway
|
32
|
-
end
|
33
30
|
def take_snapshot
|
34
31
|
source.take_snapshot self
|
35
32
|
end
|
data/lib/picky/index/bundle.rb
CHANGED
@@ -162,7 +162,7 @@ module Index
|
|
162
162
|
def weights_cache_path
|
163
163
|
cache_path "#{category.name}_weights"
|
164
164
|
end
|
165
|
-
|
165
|
+
|
166
166
|
# Loads all indexes into this category.
|
167
167
|
#
|
168
168
|
def load
|
@@ -185,29 +185,10 @@ module Index
|
|
185
185
|
puts "#{Time.now}: Loading the weights for #{identifier} from the cache."
|
186
186
|
load_the :weights, weights_cache_path
|
187
187
|
end
|
188
|
-
|
189
|
-
# TODO Decide on the fate of this.
|
190
|
-
#
|
191
|
-
# # Generates similar index entries. If you search for bla, you will also find the blarf and vice versa.
|
192
|
-
# #
|
193
|
-
# # Examples:
|
194
|
-
# # title.generate_similar_from { :bla => :blarf }
|
195
|
-
# #
|
196
|
-
# # Note: Be careful with this, as it uses up a lot of memory.
|
197
|
-
# #
|
198
|
-
# def generate_similar_from mapping
|
199
|
-
# mapping.each_pair do |one, other|
|
200
|
-
# one_ids = self.index[one]
|
201
|
-
# other_ids = self.index[other]
|
202
|
-
#
|
203
|
-
# self.index[one] += other_ids || [] if one_ids
|
204
|
-
# self.index[other] += one_ids || [] if other_ids
|
205
|
-
# end
|
206
|
-
# end
|
207
|
-
|
188
|
+
|
208
189
|
# Generation
|
209
190
|
#
|
210
|
-
|
191
|
+
|
211
192
|
# This method
|
212
193
|
# * loads the base index from the db
|
213
194
|
# * generates derived indexes
|
@@ -232,17 +213,17 @@ module Index
|
|
232
213
|
def cache_from_memory_generation_message
|
233
214
|
puts "#{Time.now}: Generating derived caches from memory for #{identifier}."
|
234
215
|
end
|
235
|
-
|
216
|
+
|
236
217
|
# Generates the weights and similarity from the main index.
|
237
218
|
#
|
238
219
|
def generate_derived
|
239
220
|
generate_weights
|
240
221
|
generate_similarity
|
241
222
|
end
|
242
|
-
|
223
|
+
|
243
224
|
# Load the data from the db.
|
244
225
|
#
|
245
|
-
def load_from_index_file
|
226
|
+
def load_from_index_file
|
246
227
|
clear
|
247
228
|
retrieve
|
248
229
|
end
|
data/lib/picky/indexers/solr.rb
CHANGED
@@ -28,16 +28,16 @@ module Indexers
|
|
28
28
|
#
|
29
29
|
DB.connect
|
30
30
|
results = DB.connection.execute statement
|
31
|
-
|
31
|
+
|
32
32
|
return unless results # TODO check
|
33
|
-
|
33
|
+
|
34
34
|
type_name = @type.name.to_s
|
35
|
-
|
35
|
+
|
36
36
|
solr.delete_by_query "type:#{type_name}"
|
37
37
|
solr.commit
|
38
|
-
|
38
|
+
|
39
39
|
documents = []
|
40
|
-
|
40
|
+
|
41
41
|
results.each do |indexed_id, *values|
|
42
42
|
values.each &:downcase!
|
43
43
|
documents << hashed(values).merge(:id => indexed_id, :type => type_name)
|
data/lib/picky/indexes.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# TODO I should really do a Types collector class which does all this!
|
2
|
+
#
|
1
3
|
module Indexes
|
2
4
|
|
3
5
|
mattr_accessor :configuration, :types, :type_mapping
|
@@ -17,6 +19,7 @@ module Indexes
|
|
17
19
|
puts "Indexing using #{Cores.max_processors} processors."
|
18
20
|
Cores.forked self.fields, :randomly => true do |field|
|
19
21
|
# Reestablish DB connection.
|
22
|
+
#
|
20
23
|
DB.connect # TODO Rewrite!
|
21
24
|
field.index
|
22
25
|
field.cache
|
@@ -142,7 +145,7 @@ module Indexes
|
|
142
145
|
#
|
143
146
|
#
|
144
147
|
def self.clear
|
145
|
-
self.types = []
|
148
|
+
self.types = [] # TODO self.types = Types.new
|
146
149
|
end
|
147
150
|
|
148
151
|
|
data/lib/picky/loader.rb
CHANGED
@@ -41,21 +41,13 @@ module Loader
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
-
# def self.add_lib_dir
|
45
|
-
# lib_dir = File.join(SEARCH_ROOT, 'lib')
|
46
|
-
# $:.unshift lib_dir unless $:.include?(lib_dir)
|
47
|
-
# end
|
48
|
-
|
49
44
|
# Load the user's application.
|
50
45
|
#
|
51
46
|
def self.load_application
|
52
|
-
# DB.connect # FIXME only needed when indexing.
|
53
47
|
# Load the user's application.
|
54
48
|
#
|
55
49
|
exclaim 'Loading Application.'
|
56
50
|
|
57
|
-
load_all_user_in 'app/initializers'
|
58
|
-
|
59
51
|
# Add lib dir to load path.
|
60
52
|
#
|
61
53
|
# add_lib_dir
|
@@ -63,6 +55,7 @@ module Loader
|
|
63
55
|
# Picky autoloading.
|
64
56
|
#
|
65
57
|
begin
|
58
|
+
load_all_user_in 'lib/initializers'
|
66
59
|
load_all_user_in 'lib/tokenizers'
|
67
60
|
load_all_user_in 'lib/indexers'
|
68
61
|
load_all_user_in 'lib/query'
|
@@ -72,21 +65,14 @@ module Loader
|
|
72
65
|
retry
|
73
66
|
end
|
74
67
|
|
75
|
-
#
|
68
|
+
# Load the user's config.
|
76
69
|
#
|
77
70
|
load_user 'app/logging'
|
78
|
-
# load_user 'app/config'
|
79
|
-
# Configuration.apply
|
80
|
-
|
81
|
-
# Require the user's application.
|
82
|
-
#
|
83
71
|
load_user 'app/application'
|
84
72
|
|
85
|
-
#
|
73
|
+
# TODO Rewrite
|
86
74
|
#
|
87
|
-
|
88
|
-
#
|
89
|
-
Query::Qualifiers.instance.prepare # TODO Rewrite
|
75
|
+
Query::Qualifiers.instance.prepare
|
90
76
|
|
91
77
|
exclaim "Application loaded."
|
92
78
|
end
|
@@ -94,19 +80,9 @@ module Loader
|
|
94
80
|
# Loads the framework.
|
95
81
|
#
|
96
82
|
def self.load_framework
|
97
|
-
#
|
98
|
-
|
99
|
-
# exclaim 'Compiling C code.'
|
100
|
-
require_relative 'initializers/ext'
|
101
|
-
|
102
|
-
require 'rack_fast_escape'
|
103
|
-
# exclaim 'Loaded rack_fast_escape.'
|
104
|
-
require 'text'
|
105
|
-
# exclaim 'Loaded text.'
|
106
|
-
|
107
|
-
# Extend path with lib
|
83
|
+
# Compile C-Code. TODO Remove as soon as stable. Remove also mentioned file.
|
108
84
|
#
|
109
|
-
|
85
|
+
require_relative 'initializers/ext'
|
110
86
|
|
111
87
|
# Load extensions.
|
112
88
|
#
|
@@ -114,7 +90,6 @@ module Loader
|
|
114
90
|
load_relative 'extensions/symbol'
|
115
91
|
load_relative 'extensions/module'
|
116
92
|
load_relative 'extensions/hash'
|
117
|
-
# exclaim "Loaded extensions."
|
118
93
|
|
119
94
|
# Load harakiri.
|
120
95
|
#
|
@@ -126,23 +101,12 @@ module Loader
|
|
126
101
|
load_relative 'helpers/cache'
|
127
102
|
load_relative 'helpers/measuring'
|
128
103
|
load_relative 'helpers/search'
|
129
|
-
# exclaim "Loaded helpers."
|
130
104
|
|
131
105
|
# Signal handling
|
132
106
|
#
|
133
107
|
load_relative 'signals'
|
134
|
-
# exclaim "Loaded signals handling."
|
135
108
|
|
136
|
-
#
|
137
|
-
#
|
138
|
-
Dir['plugins/*'].each do |directory|
|
139
|
-
extend_load_path directory
|
140
|
-
extend_load_path directory, 'lib'
|
141
|
-
load "#{directory.gsub!(/plugins\//, '')}.rb"
|
142
|
-
end
|
143
|
-
# exclaim "Loaded plugins."
|
144
|
-
|
145
|
-
# Require the necessary libs. Referenced modules first.
|
109
|
+
# Various.
|
146
110
|
#
|
147
111
|
load_relative 'loggers/search'
|
148
112
|
load_relative 'umlaut_substituter'
|
@@ -178,8 +142,6 @@ module Loader
|
|
178
142
|
|
179
143
|
# Convenience accessors for generators.
|
180
144
|
#
|
181
|
-
# TODO Just remove from under Cacher?
|
182
|
-
#
|
183
145
|
load_relative 'cacher/convenience'
|
184
146
|
|
185
147
|
# Index generators.
|
@@ -213,23 +175,24 @@ module Loader
|
|
213
175
|
#
|
214
176
|
load_relative 'query/combination'
|
215
177
|
load_relative 'query/combinations'
|
216
|
-
|
178
|
+
|
217
179
|
load_relative 'query/allocation'
|
218
180
|
load_relative 'query/allocations'
|
219
|
-
|
181
|
+
|
220
182
|
load_relative 'query/qualifiers'
|
221
183
|
load_relative 'query/weigher'
|
222
184
|
load_relative 'query/combinator'
|
223
|
-
|
185
|
+
|
224
186
|
load_relative 'query/weights'
|
225
|
-
|
187
|
+
|
226
188
|
# Query.
|
227
189
|
#
|
228
190
|
load_relative 'query/base'
|
229
191
|
load_relative 'query/live'
|
230
192
|
load_relative 'query/full'
|
231
|
-
|
232
|
-
|
193
|
+
#
|
194
|
+
load_relative 'query/solr' # TODO
|
195
|
+
|
233
196
|
# Results.
|
234
197
|
#
|
235
198
|
load_relative 'results/base'
|
@@ -254,7 +217,6 @@ module Loader
|
|
254
217
|
load_relative 'configuration/field'
|
255
218
|
load_relative 'configuration/type'
|
256
219
|
load_relative 'configuration/indexes'
|
257
|
-
load_relative 'configuration/configuration'
|
258
220
|
|
259
221
|
# ... in Application.
|
260
222
|
#
|
@@ -274,14 +236,11 @@ module Loader
|
|
274
236
|
#
|
275
237
|
load_relative 'generator'
|
276
238
|
end
|
277
|
-
|
239
|
+
|
240
|
+
# Silenceable puts.
|
241
|
+
#
|
278
242
|
def self.exclaim text
|
279
243
|
puts text
|
280
244
|
end
|
281
245
|
|
282
|
-
def self.extend_load_path *dirs
|
283
|
-
dir = File.join(SEARCH_ROOT, *dirs)
|
284
|
-
$:.unshift dir unless $:.include? dir
|
285
|
-
end
|
286
|
-
|
287
246
|
end
|
data/lib/picky/query/base.rb
CHANGED
@@ -67,28 +67,25 @@ module Query
|
|
67
67
|
# Get the allocations.
|
68
68
|
#
|
69
69
|
allocations = @weigher.allocations_for tokens
|
70
|
-
|
70
|
+
|
71
71
|
# Callbacks.
|
72
72
|
#
|
73
73
|
reduce allocations
|
74
74
|
remove_from allocations
|
75
|
-
|
76
|
-
# TODO allocations#calculate # or better name
|
77
|
-
#
|
78
|
-
|
75
|
+
|
79
76
|
# Remove double allocations.
|
80
77
|
#
|
81
78
|
allocations.uniq
|
82
|
-
|
83
|
-
# Score the allocations.
|
79
|
+
|
80
|
+
# Score the allocations using weights as bias.
|
84
81
|
#
|
85
82
|
allocations.calculate_score weights
|
86
|
-
|
83
|
+
|
87
84
|
# Sort the allocations.
|
88
85
|
# (allocations are sorted according to score, highest to lowest)
|
89
86
|
#
|
90
87
|
allocations.sort
|
91
|
-
|
88
|
+
|
92
89
|
# Return the allocations.
|
93
90
|
#
|
94
91
|
allocations
|
data/lib/picky/query/token.rb
CHANGED
@@ -69,7 +69,7 @@ module Query
|
|
69
69
|
# If the text ends with *, partialize it. If with ", don't.
|
70
70
|
#
|
71
71
|
@@no_partial = /\"$/
|
72
|
-
@@partial =
|
72
|
+
@@partial = /\*$/
|
73
73
|
def partialize
|
74
74
|
self.partial = false and return if @text =~ @@no_partial
|
75
75
|
self.partial = true if @text =~ @@partial
|
@@ -78,7 +78,7 @@ module Query
|
|
78
78
|
# If the text ends with ~ similarize it. If with ", don't.
|
79
79
|
#
|
80
80
|
@@no_similar = /\"$/
|
81
|
-
@@similar =
|
81
|
+
@@similar = /\~$/
|
82
82
|
def similarize
|
83
83
|
self.similar = false and return if @text =~ @@no_similar
|
84
84
|
self.similar = true if @text =~ @@similar
|
@@ -94,10 +94,7 @@ module Query
|
|
94
94
|
def remove_illegals
|
95
95
|
@text.gsub! @@illegals, '' unless @text.blank?
|
96
96
|
end
|
97
|
-
|
98
|
-
# TODO Think about these, remove illegals and normalize...
|
99
|
-
#
|
100
|
-
|
97
|
+
|
101
98
|
# Visitor for tokenizer.
|
102
99
|
#
|
103
100
|
# TODO Rewrite!!!
|
data/lib/picky/query/tokens.rb
CHANGED
@@ -1,32 +1,32 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
module Query
|
4
|
-
|
4
|
+
|
5
5
|
# This class primarily handles switching through similar token constellations.
|
6
6
|
#
|
7
7
|
class Tokens
|
8
|
-
|
8
|
+
|
9
9
|
#
|
10
10
|
#
|
11
11
|
cattr_accessor :maximum
|
12
12
|
self.maximum = 5
|
13
|
-
|
13
|
+
|
14
14
|
# Basically delegates to its internal tokens array.
|
15
15
|
#
|
16
16
|
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
17
|
-
|
17
|
+
|
18
18
|
#
|
19
19
|
#
|
20
20
|
def initialize tokens = []
|
21
21
|
@tokens = tokens
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
#
|
25
25
|
#
|
26
26
|
def tokenize_with tokenizer
|
27
27
|
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
# Generates an array in the form of
|
31
31
|
# [
|
32
32
|
# [combination], # of token 1
|
@@ -52,7 +52,7 @@ module Query
|
|
52
52
|
|
53
53
|
# Caps the tokens to the maximum.
|
54
54
|
#
|
55
|
-
#
|
55
|
+
# Note: We could parametrize this if necessary.
|
56
56
|
#
|
57
57
|
def cap
|
58
58
|
@tokens.slice!(@@maximum..-1) if cap?
|
@@ -60,43 +60,31 @@ module Query
|
|
60
60
|
def cap?
|
61
61
|
@tokens.size > @@maximum
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
# Rejects blank tokens.
|
65
65
|
#
|
66
66
|
def reject
|
67
67
|
@tokens.reject! &:blank?
|
68
68
|
end
|
69
|
-
|
70
|
-
# Switches the tokens
|
71
|
-
#
|
72
|
-
# TODO
|
73
|
-
#
|
74
|
-
def next_similar
|
75
|
-
@tokens.first.next_similar unless empty?
|
76
|
-
end
|
77
|
-
|
69
|
+
|
78
70
|
# Returns a solr query.
|
79
71
|
#
|
80
72
|
def to_solr_query
|
81
73
|
@tokens.map(&:to_solr).join ' '
|
82
74
|
end
|
83
|
-
|
75
|
+
|
84
76
|
#
|
85
77
|
#
|
86
78
|
def originals
|
87
79
|
@tokens.map(&:original)
|
88
80
|
end
|
89
|
-
|
81
|
+
|
90
82
|
# Just join the token original texts.
|
91
83
|
#
|
92
84
|
def to_s
|
93
85
|
originals.join ' '
|
94
86
|
end
|
95
87
|
|
96
|
-
# def to_a
|
97
|
-
# @tokens
|
98
|
-
# end
|
99
|
-
|
100
88
|
end
|
101
|
-
|
89
|
+
|
102
90
|
end
|
data/lib/picky/query/weights.rb
CHANGED
@@ -19,8 +19,6 @@ module Query
|
|
19
19
|
|
20
20
|
# Get the weight of an allocation.
|
21
21
|
#
|
22
|
-
# TODO Add a block to evaluate?
|
23
|
-
#
|
24
22
|
def weight_for clustered
|
25
23
|
@weights[clustered] || 0
|
26
24
|
end
|
@@ -43,7 +41,8 @@ module Query
|
|
43
41
|
#
|
44
42
|
categories = combinations.map { |combination| combination.bundle.category }.clustered_uniq
|
45
43
|
|
46
|
-
#
|
44
|
+
# Note: Caching will not be necessary anymore if the
|
45
|
+
# mapping is not necessary anymore.
|
47
46
|
#
|
48
47
|
cached @weights_cache, categories do
|
49
48
|
categories.map! &:name
|
@@ -1,8 +1,7 @@
|
|
1
1
|
module Tokenizers
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
|
3
|
+
class Base
|
4
|
+
|
6
5
|
# Stopwords.
|
7
6
|
#
|
8
7
|
def self.stopwords regexp
|
@@ -19,7 +18,7 @@ module Tokenizers
|
|
19
18
|
end
|
20
19
|
end
|
21
20
|
def remove_stopwords text; end
|
22
|
-
|
21
|
+
|
23
22
|
# Contraction.
|
24
23
|
#
|
25
24
|
def self.contract_expressions what, to_what
|
@@ -28,7 +27,7 @@ module Tokenizers
|
|
28
27
|
end
|
29
28
|
end
|
30
29
|
def contract text; end
|
31
|
-
|
30
|
+
|
32
31
|
# Illegals.
|
33
32
|
#
|
34
33
|
# TODO Should there be a legal?
|
@@ -39,7 +38,7 @@ module Tokenizers
|
|
39
38
|
end
|
40
39
|
end
|
41
40
|
def remove_illegals text; end
|
42
|
-
|
41
|
+
|
43
42
|
# Splitting.
|
44
43
|
#
|
45
44
|
def self.split_text_on regexp
|
@@ -48,7 +47,7 @@ module Tokenizers
|
|
48
47
|
end
|
49
48
|
end
|
50
49
|
def split text; end
|
51
|
-
|
50
|
+
|
52
51
|
# Normalizing.
|
53
52
|
#
|
54
53
|
def self.normalize_words regexp_replaces
|
@@ -63,18 +62,16 @@ module Tokenizers
|
|
63
62
|
end
|
64
63
|
end
|
65
64
|
def normalize_with_patterns text; end
|
66
|
-
|
65
|
+
|
67
66
|
# Illegal after normalizing.
|
68
67
|
#
|
69
|
-
|
70
|
-
#
|
71
|
-
def self.illegal_characters_after regexp
|
68
|
+
def self.illegal_characters_after_splitting regexp
|
72
69
|
define_method :remove_after_normalizing_illegals do |text|
|
73
70
|
text.gsub! regexp, ''
|
74
71
|
end
|
75
72
|
end
|
76
73
|
def remove_after_normalizing_illegals text; end
|
77
|
-
|
74
|
+
|
78
75
|
# Returns a number of tokens, generated from the given text.
|
79
76
|
#
|
80
77
|
# Note:
|
@@ -88,10 +85,10 @@ module Tokenizers
|
|
88
85
|
tokens = tokens_for words # creating tokens / strings
|
89
86
|
process tokens # processing tokens / strings
|
90
87
|
end
|
91
|
-
|
88
|
+
|
92
89
|
# Hooks.
|
93
90
|
#
|
94
|
-
|
91
|
+
|
95
92
|
# Preprocessing.
|
96
93
|
#
|
97
94
|
def preprocess text; end
|
@@ -104,7 +101,7 @@ module Tokenizers
|
|
104
101
|
reject tokens # Reject any tokens that don't meet criteria
|
105
102
|
tokens
|
106
103
|
end
|
107
|
-
|
104
|
+
|
108
105
|
# Rejects blank tokens.
|
109
106
|
#
|
110
107
|
def reject tokens
|
@@ -125,6 +122,6 @@ module Tokenizers
|
|
125
122
|
def empty_tokens
|
126
123
|
::Query::Tokens.new
|
127
124
|
end
|
128
|
-
|
125
|
+
|
129
126
|
end
|
130
127
|
end
|
data/lib/tasks/server.rake
CHANGED
@@ -21,7 +21,7 @@ namespace :server do
|
|
21
21
|
:port => 4000,
|
22
22
|
:daemonize => false
|
23
23
|
}
|
24
|
-
# TODO Move port!
|
24
|
+
# TODO Move port configuration!
|
25
25
|
port = SEARCH_ENVIRONMENT == 'production' ? 6000 : 4000
|
26
26
|
`export SEARCH_ENV=#{SEARCH_ENVIRONMENT}; unicorn -p #{config[SEARCH_ENVIRONMENT][:port]} -c #{File.join(SEARCH_ROOT, 'app/unicorn.ru')} #{config[SEARCH_ENVIRONMENT][:daemonize] ? '-D' : ''} #{File.join(SEARCH_ROOT, 'app/application.ru')}`
|
27
27
|
end
|
@@ -11,7 +11,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
|
|
11
11
|
# Note: Much more is possible, but let's start out easy.
|
12
12
|
#
|
13
13
|
# Ask me if you have questions!
|
14
|
-
#
|
14
|
+
#
|
15
15
|
|
16
16
|
indexes do
|
17
17
|
illegal_characters(/[^äöüa-zA-Z0-9\s\/\-\"\&\.]/)
|
@@ -5,7 +5,17 @@
|
|
5
5
|
#
|
6
6
|
require 'picky'
|
7
7
|
|
8
|
-
# Load your application. This requires the files in
|
8
|
+
# Load your application. This requires the following files in
|
9
|
+
#
|
10
|
+
# * /lib/initializers/*.rb
|
11
|
+
# * /lib/tokenizers/*.rb
|
12
|
+
# * /lib/indexers/*.rb
|
13
|
+
# * /lib/query/*.rb
|
14
|
+
#
|
15
|
+
# * /app/logging.rb
|
16
|
+
# * /app/application.rb
|
17
|
+
#
|
18
|
+
# to be required (in that order).
|
9
19
|
#
|
10
20
|
Loader.load_application
|
11
21
|
|
@@ -74,9 +74,9 @@ describe Results do
|
|
74
74
|
@allocations = stub :allocations
|
75
75
|
@results.stub! :allocations => @allocations
|
76
76
|
end
|
77
|
-
it 'should
|
77
|
+
it 'should process' do
|
78
78
|
@allocations.should_receive(:process!).once.with(20, 0).ordered
|
79
|
-
|
79
|
+
|
80
80
|
@results.prepare!
|
81
81
|
end
|
82
82
|
end
|
@@ -2,8 +2,6 @@
|
|
2
2
|
#
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
|
-
# TODO CLEAN UP.
|
6
|
-
#
|
7
5
|
describe Tokenizers::Index do
|
8
6
|
|
9
7
|
before(:each) do
|
@@ -21,11 +19,11 @@ describe Tokenizers::Index do
|
|
21
19
|
t1 = stub(:token, :to_s => '')
|
22
20
|
t2 = stub(:token, :to_s => 'not blank')
|
23
21
|
t3 = stub(:token, :to_s => '')
|
24
|
-
|
22
|
+
|
25
23
|
@tokenizer.reject([t1, t2, t3]).should == [t2]
|
26
24
|
end
|
27
25
|
end
|
28
|
-
|
26
|
+
|
29
27
|
describe "tokenize" do
|
30
28
|
describe "normalizing" do
|
31
29
|
def self.it_should_normalize_token(text, expected)
|
@@ -34,6 +32,7 @@ describe Tokenizers::Index do
|
|
34
32
|
end
|
35
33
|
end
|
36
34
|
# defaults
|
35
|
+
#
|
37
36
|
it_should_normalize_token 'it_should_not_normalize_by_default', :it_should_not_normalize_by_default
|
38
37
|
end
|
39
38
|
describe "tokenizing" do
|
@@ -43,6 +42,7 @@ describe Tokenizers::Index do
|
|
43
42
|
end
|
44
43
|
end
|
45
44
|
# defaults
|
45
|
+
#
|
46
46
|
it_should_tokenize_token "splitting on \\s", [:splitting, :on, :"\\s"]
|
47
47
|
it_should_tokenize_token 'und', [:und]
|
48
48
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-10-02 00:00:00 +02:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -194,7 +194,6 @@ files:
|
|
194
194
|
- lib/picky/cacher/weights/logarithmic.rb
|
195
195
|
- lib/picky/cacher/weights/strategy.rb
|
196
196
|
- lib/picky/cacher/weights_generator.rb
|
197
|
-
- lib/picky/configuration/configuration.rb
|
198
197
|
- lib/picky/configuration/field.rb
|
199
198
|
- lib/picky/configuration/indexes.rb
|
200
199
|
- lib/picky/configuration/queries.rb
|
@@ -289,9 +288,7 @@ files:
|
|
289
288
|
- spec/lib/cacher/similarity_generator_spec.rb
|
290
289
|
- spec/lib/cacher/weights/logarithmic_spec.rb
|
291
290
|
- spec/lib/cacher/weights_generator_spec.rb
|
292
|
-
- spec/lib/configuration/configuration_spec.rb
|
293
291
|
- spec/lib/configuration/type_spec.rb
|
294
|
-
- spec/lib/configuration_spec.rb
|
295
292
|
- spec/lib/cores_spec.rb
|
296
293
|
- spec/lib/extensions/array_spec.rb
|
297
294
|
- spec/lib/extensions/hash_spec.rb
|
@@ -375,9 +372,7 @@ test_files:
|
|
375
372
|
- spec/lib/cacher/similarity_generator_spec.rb
|
376
373
|
- spec/lib/cacher/weights/logarithmic_spec.rb
|
377
374
|
- spec/lib/cacher/weights_generator_spec.rb
|
378
|
-
- spec/lib/configuration/configuration_spec.rb
|
379
375
|
- spec/lib/configuration/type_spec.rb
|
380
|
-
- spec/lib/configuration_spec.rb
|
381
376
|
- spec/lib/cores_spec.rb
|
382
377
|
- spec/lib/extensions/array_spec.rb
|
383
378
|
- spec/lib/extensions/hash_spec.rb
|
@@ -1,38 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require 'spec_helper'
|
3
|
-
|
4
|
-
describe Configuration do
|
5
|
-
|
6
|
-
# describe 'field' do
|
7
|
-
# it 'should define a new type' do
|
8
|
-
# Configuration::Field.should_receive(:new).once.with :some_name, :some_options
|
9
|
-
#
|
10
|
-
# Configuration.field :some_name, :some_options
|
11
|
-
# end
|
12
|
-
# it 'should respect the default' do
|
13
|
-
# Configuration::Field.should_receive(:new).once.with :some_name, {}
|
14
|
-
#
|
15
|
-
# Configuration.field :some_name
|
16
|
-
# end
|
17
|
-
# end
|
18
|
-
#
|
19
|
-
# describe 'type' do
|
20
|
-
# it 'should define a new type' do
|
21
|
-
# Configuration::Type.should_receive(:new).once.with :some_name, :some_field, :some_other_field
|
22
|
-
#
|
23
|
-
# Configuration.type :some_name, :some_field, :some_other_field
|
24
|
-
# end
|
25
|
-
# end
|
26
|
-
#
|
27
|
-
# describe 'indexes' do
|
28
|
-
# it 'should define the indexes and save' do
|
29
|
-
# indexes = mock :indexes
|
30
|
-
#
|
31
|
-
# Configuration::Indexes.should_receive(:new).once.with(:some_types).and_return indexes
|
32
|
-
# indexes.should_receive(:save).once.with
|
33
|
-
#
|
34
|
-
# Configuration.indexes :some_types
|
35
|
-
# end
|
36
|
-
# end
|
37
|
-
|
38
|
-
end
|