picky 1.5.2 → 1.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/analyzer.rb +154 -0
- data/lib/picky/application.rb +53 -33
- data/lib/picky/character_substituters/west_european.rb +10 -6
- data/lib/picky/cli.rb +18 -18
- data/lib/picky/index/base.rb +44 -13
- data/lib/picky/index_bundle.rb +13 -4
- data/lib/picky/indexed/indexes.rb +26 -10
- data/lib/picky/indexing/indexes.rb +26 -24
- data/lib/picky/interfaces/live_parameters.rb +23 -16
- data/lib/picky/internals/extensions/object.rb +13 -6
- data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
- data/lib/picky/internals/index/backend.rb +1 -2
- data/lib/picky/internals/index/file/basic.rb +18 -14
- data/lib/picky/internals/index/files.rb +16 -6
- data/lib/picky/internals/index/redis/basic.rb +12 -5
- data/lib/picky/internals/index/redis.rb +2 -2
- data/lib/picky/internals/indexed/bundle/base.rb +58 -14
- data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
- data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
- data/lib/picky/internals/indexed/categories.rb +19 -14
- data/lib/picky/internals/indexed/category.rb +44 -20
- data/lib/picky/internals/indexed/index.rb +23 -13
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
- data/lib/picky/internals/indexers/serial.rb +1 -1
- data/lib/picky/internals/indexing/bundle/base.rb +28 -28
- data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
- data/lib/picky/internals/indexing/categories.rb +15 -11
- data/lib/picky/internals/indexing/category.rb +30 -20
- data/lib/picky/internals/indexing/index.rb +22 -14
- data/lib/picky/internals/query/allocations.rb +0 -15
- data/lib/picky/internals/query/combinations/base.rb +0 -4
- data/lib/picky/internals/query/combinations/redis.rb +19 -8
- data/lib/picky/internals/query/indexes.rb +3 -6
- data/lib/picky/internals/query/token.rb +0 -4
- data/lib/picky/internals/query/weights.rb +2 -11
- data/lib/picky/internals/results/base.rb +3 -10
- data/lib/picky/internals/tokenizers/base.rb +64 -28
- data/lib/picky/internals/tokenizers/index.rb +8 -8
- data/lib/picky/loader.rb +59 -53
- data/lib/picky/query/base.rb +23 -29
- data/lib/picky/sources/base.rb +10 -10
- data/lib/picky/sources/couch.rb +14 -10
- data/lib/picky/sources/csv.rb +21 -14
- data/lib/picky/sources/db.rb +37 -31
- data/lib/picky/sources/delicious.rb +11 -8
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/statistics.rb +66 -0
- data/lib/tasks/application.rake +3 -0
- data/lib/tasks/checks.rake +11 -0
- data/lib/tasks/framework.rake +3 -0
- data/lib/tasks/index.rake +9 -11
- data/lib/tasks/routes.rake +3 -2
- data/lib/tasks/shortcuts.rake +17 -5
- data/lib/tasks/statistics.rake +20 -12
- data/lib/tasks/try.rake +14 -14
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/index/base_spec.rb +25 -3
- data/spec/lib/internals/extensions/object_spec.rb +46 -20
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
- data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
- data/spec/lib/internals/indexers/serial_spec.rb +1 -1
- data/spec/lib/internals/results/base_spec.rb +0 -12
- data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
- data/spec/lib/query/allocations_spec.rb +0 -56
- data/spec/lib/query/base_spec.rb +25 -21
- data/spec/lib/query/combinations/redis_spec.rb +6 -1
- data/spec/lib/sources/delicious_spec.rb +2 -2
- data/spec/lib/statistics_spec.rb +31 -0
- metadata +9 -2
@@ -3,35 +3,35 @@
|
|
3
3
|
module Internals
|
4
4
|
|
5
5
|
module Indexing # :nodoc:all
|
6
|
-
|
6
|
+
|
7
7
|
module Bundle
|
8
|
-
|
8
|
+
|
9
9
|
# This is the indexing bundle.
|
10
10
|
# It does all menial tasks that have nothing to do
|
11
11
|
# with the actual index running etc.
|
12
12
|
#
|
13
13
|
class Base < SuperBase
|
14
|
-
|
14
|
+
|
15
15
|
attr_accessor :partial_strategy, :weights_strategy
|
16
|
-
|
16
|
+
|
17
17
|
# Path is in which directory the cache is located.
|
18
18
|
#
|
19
19
|
def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
|
20
20
|
super name, configuration, similarity_strategy
|
21
|
-
|
21
|
+
|
22
22
|
@partial_strategy = partial_strategy
|
23
23
|
@weights_strategy = weights_strategy
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
# Sets up a piece of the index for the given token.
|
27
27
|
#
|
28
28
|
def initialize_index_for token
|
29
29
|
index[token] ||= []
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
# Generation
|
33
33
|
#
|
34
|
-
|
34
|
+
|
35
35
|
# This method
|
36
36
|
# * loads the base index from the db
|
37
37
|
# * generates derived indexes
|
@@ -50,16 +50,16 @@ module Internals
|
|
50
50
|
generate_derived
|
51
51
|
end
|
52
52
|
def cache_from_memory_generation_message
|
53
|
-
timed_exclaim "
|
53
|
+
timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
# Generates the weights and similarity from the main index.
|
57
57
|
#
|
58
58
|
def generate_derived
|
59
59
|
generate_weights
|
60
60
|
generate_similarity
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
63
|
# Load the data from the db.
|
64
64
|
#
|
65
65
|
def load_from_index_file
|
@@ -68,7 +68,7 @@ module Internals
|
|
68
68
|
retrieve
|
69
69
|
end
|
70
70
|
def load_from_index_generation_message
|
71
|
-
timed_exclaim "
|
71
|
+
timed_exclaim %Q{"#{identifier}": Loading index.}
|
72
72
|
end
|
73
73
|
# Retrieves the prepared index data into the index.
|
74
74
|
#
|
@@ -83,7 +83,7 @@ module Internals
|
|
83
83
|
index[token] << id.send(key_format) # TODO Rewrite. Move this into the specific indexing.
|
84
84
|
end
|
85
85
|
end
|
86
|
-
|
86
|
+
|
87
87
|
# Generates a new index (writes its index) using the
|
88
88
|
# partial caching strategy of this bundle.
|
89
89
|
#
|
@@ -94,7 +94,7 @@ module Internals
|
|
94
94
|
# Generate a partial index from the given exact index.
|
95
95
|
#
|
96
96
|
def generate_partial_from exact_index
|
97
|
-
timed_exclaim "
|
97
|
+
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
98
98
|
self.index = exact_index
|
99
99
|
self.generate_partial
|
100
100
|
self
|
@@ -125,28 +125,28 @@ module Internals
|
|
125
125
|
# Dumps the core index.
|
126
126
|
#
|
127
127
|
def dump_index
|
128
|
-
timed_exclaim "
|
128
|
+
timed_exclaim %Q{"#{identifier}": Dumping index.}
|
129
129
|
backend.dump_index index
|
130
130
|
end
|
131
131
|
# Dumps the weights index.
|
132
132
|
#
|
133
133
|
def dump_weights
|
134
|
-
timed_exclaim "
|
134
|
+
timed_exclaim %Q{"#{identifier}": Dumping weights of index.}
|
135
135
|
backend.dump_weights weights
|
136
136
|
end
|
137
137
|
# Dumps the similarity index.
|
138
138
|
#
|
139
139
|
def dump_similarity
|
140
|
-
timed_exclaim "
|
140
|
+
timed_exclaim %Q{"#{identifier}": Dumping similarity of index.}
|
141
141
|
backend.dump_similarity similarity
|
142
142
|
end
|
143
143
|
# Dumps the similarity index.
|
144
144
|
#
|
145
145
|
def dump_configuration
|
146
|
-
timed_exclaim "
|
146
|
+
timed_exclaim %Q{"#{identifier}": Dumping configuration for index.}
|
147
147
|
backend.dump_configuration configuration
|
148
148
|
end
|
149
|
-
|
149
|
+
|
150
150
|
# Alerts the user if an index is missing.
|
151
151
|
#
|
152
152
|
def raise_unless_cache_exists
|
@@ -171,18 +171,18 @@ module Internals
|
|
171
171
|
raise_unless_similarity_ok
|
172
172
|
end
|
173
173
|
end
|
174
|
-
|
174
|
+
|
175
175
|
# Outputs a warning for the given cache.
|
176
176
|
#
|
177
177
|
def warn_cache_small what
|
178
|
-
|
178
|
+
warn "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
|
179
179
|
end
|
180
180
|
# Raises an appropriate error message for the given cache.
|
181
181
|
#
|
182
182
|
def raise_cache_missing what
|
183
183
|
raise "#{what} cache for #{identifier} missing."
|
184
184
|
end
|
185
|
-
|
185
|
+
|
186
186
|
# Warns the user if the similarity index is small.
|
187
187
|
#
|
188
188
|
def warn_if_similarity_small
|
@@ -193,10 +193,10 @@ module Internals
|
|
193
193
|
def raise_unless_similarity_ok
|
194
194
|
raise_cache_missing :similarity unless backend.similarity_cache_ok?
|
195
195
|
end
|
196
|
-
|
196
|
+
|
197
197
|
# TODO Spec on down.
|
198
198
|
#
|
199
|
-
|
199
|
+
|
200
200
|
# Warns the user if the core or weights indexes are small.
|
201
201
|
#
|
202
202
|
def warn_if_index_small
|
@@ -209,11 +209,11 @@ module Internals
|
|
209
209
|
raise_cache_missing :index unless backend.index_cache_ok?
|
210
210
|
raise_cache_missing :weights unless backend.weights_cache_ok?
|
211
211
|
end
|
212
|
-
|
212
|
+
|
213
213
|
end
|
214
|
-
|
214
|
+
|
215
215
|
end
|
216
|
-
|
216
|
+
|
217
217
|
end
|
218
|
-
|
218
|
+
|
219
219
|
end
|
@@ -3,23 +3,30 @@
|
|
3
3
|
module Internals
|
4
4
|
|
5
5
|
module Indexing # :nodoc:all
|
6
|
-
|
6
|
+
|
7
7
|
module Bundle
|
8
|
-
|
8
|
+
|
9
9
|
# The memory version dumps its generated indexes to disk
|
10
10
|
# (mostly JSON) to load them into memory on startup.
|
11
11
|
#
|
12
12
|
class Memory < Base
|
13
|
-
|
13
|
+
|
14
14
|
# We're using files for the memory backend.
|
15
15
|
# E.g. dump writes files.
|
16
16
|
#
|
17
17
|
alias backend files
|
18
|
-
|
18
|
+
|
19
|
+
def to_s
|
20
|
+
<<-MEMORY
|
21
|
+
Memory
|
22
|
+
#{@backend.indented_to_s}
|
23
|
+
MEMORY
|
24
|
+
end
|
25
|
+
|
19
26
|
end
|
20
|
-
|
27
|
+
|
21
28
|
end
|
22
|
-
|
29
|
+
|
23
30
|
end
|
24
|
-
|
31
|
+
|
25
32
|
end
|
@@ -1,11 +1,11 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
3
|
module Indexing
|
4
|
-
|
4
|
+
|
5
5
|
class Categories
|
6
|
-
|
6
|
+
|
7
7
|
attr_reader :categories
|
8
|
-
|
8
|
+
|
9
9
|
each_delegate :index,
|
10
10
|
:cache,
|
11
11
|
:generate_caches,
|
@@ -15,28 +15,32 @@ module Internals
|
|
15
15
|
:clear_caches,
|
16
16
|
:create_directory_structure,
|
17
17
|
:to => :categories
|
18
|
-
|
18
|
+
|
19
19
|
def initialize
|
20
20
|
@categories = []
|
21
21
|
end
|
22
|
-
|
22
|
+
|
23
|
+
def to_s
|
24
|
+
categories.indented_to_s
|
25
|
+
end
|
26
|
+
|
23
27
|
def << category
|
24
28
|
categories << category
|
25
29
|
end
|
26
|
-
|
30
|
+
|
27
31
|
def find category_name
|
28
32
|
category_name = category_name.to_sym
|
29
|
-
|
33
|
+
|
30
34
|
categories.each do |category|
|
31
35
|
next unless category.name == category_name
|
32
36
|
return category
|
33
37
|
end
|
34
|
-
|
38
|
+
|
35
39
|
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
36
40
|
end
|
37
|
-
|
41
|
+
|
38
42
|
end
|
39
|
-
|
43
|
+
|
40
44
|
end
|
41
|
-
|
45
|
+
|
42
46
|
end
|
@@ -1,11 +1,14 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
3
|
module Indexing
|
4
|
-
|
4
|
+
|
5
5
|
class Category
|
6
|
-
|
6
|
+
|
7
7
|
attr_reader :exact, :partial, :name, :configuration, :indexer
|
8
|
-
|
8
|
+
|
9
|
+
delegate :identifier, :prepare_index_directory, :to => :configuration
|
10
|
+
delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
|
11
|
+
|
9
12
|
# Mandatory params:
|
10
13
|
# * name: Category name to use as identifier and file names.
|
11
14
|
# * index: Index to which this category is attached to.
|
@@ -17,38 +20,45 @@ module Internals
|
|
17
20
|
#
|
18
21
|
# Advanced Options (TODO):
|
19
22
|
#
|
20
|
-
# * weights:
|
21
|
-
# * tokenizer:
|
23
|
+
# * weights:
|
24
|
+
# * tokenizer:
|
22
25
|
#
|
23
26
|
def initialize name, index, options = {}
|
24
27
|
@name = name
|
25
28
|
@from = options[:from]
|
26
|
-
|
29
|
+
|
27
30
|
# Now we have enough info to combine the index and the category.
|
28
31
|
#
|
29
32
|
@configuration = Configuration::Index.new index, self
|
30
|
-
|
33
|
+
|
31
34
|
@tokenizer = options[:tokenizer] || Tokenizers::Index.default
|
32
35
|
@indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
|
33
|
-
|
36
|
+
|
34
37
|
# TODO Push into Bundle. At least the weights.
|
35
38
|
#
|
36
39
|
partial = options[:partial] || Generators::Partial::Default
|
37
40
|
weights = options[:weights] || Generators::Weights::Default
|
38
41
|
similarity = options[:similarity] || Generators::Similarity::Default
|
39
|
-
|
42
|
+
|
40
43
|
bundle_class = options[:indexing_bundle_class] || Bundle::Memory
|
41
44
|
@exact = bundle_class.new(:exact, configuration, similarity, Generators::Partial::None.new, weights)
|
42
45
|
@partial = bundle_class.new(:partial, configuration, Generators::Similarity::None.new, partial, weights)
|
43
46
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
47
|
+
|
48
|
+
def to_s
|
49
|
+
<<-CATEGORY
|
50
|
+
Category(#{name} from #{from}):
|
51
|
+
Exact:
|
52
|
+
#{exact.indented_to_s(4)}
|
53
|
+
Partial:
|
54
|
+
#{partial.indented_to_s(4)}
|
55
|
+
CATEGORY
|
56
|
+
end
|
57
|
+
|
48
58
|
def from
|
49
59
|
@from || name
|
50
60
|
end
|
51
|
-
|
61
|
+
|
52
62
|
# TODO Spec.
|
53
63
|
#
|
54
64
|
def backup_caches
|
@@ -71,12 +81,12 @@ module Internals
|
|
71
81
|
exact.delete
|
72
82
|
partial.delete
|
73
83
|
end
|
74
|
-
|
84
|
+
|
75
85
|
def index
|
76
86
|
prepare_index_directory
|
77
87
|
indexer.index
|
78
88
|
end
|
79
|
-
|
89
|
+
|
80
90
|
# Generates all caches for this category.
|
81
91
|
#
|
82
92
|
def cache
|
@@ -97,7 +107,7 @@ module Internals
|
|
97
107
|
generate_partial
|
98
108
|
generate_caches_from_memory
|
99
109
|
dump_caches
|
100
|
-
timed_exclaim "
|
110
|
+
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
101
111
|
end
|
102
112
|
def generate_caches_from_source
|
103
113
|
exact.generate_caches_from_source
|
@@ -112,9 +122,9 @@ module Internals
|
|
112
122
|
exact.dump
|
113
123
|
partial.dump
|
114
124
|
end
|
115
|
-
|
125
|
+
|
116
126
|
end
|
117
|
-
|
127
|
+
|
118
128
|
end
|
119
|
-
|
129
|
+
|
120
130
|
end
|
@@ -3,16 +3,16 @@
|
|
3
3
|
module Internals
|
4
4
|
|
5
5
|
module Indexing
|
6
|
-
|
6
|
+
|
7
7
|
class Index
|
8
|
-
|
8
|
+
|
9
9
|
attr_reader :name, :source, :categories, :after_indexing
|
10
|
-
|
10
|
+
|
11
11
|
# Delegators for indexing.
|
12
12
|
#
|
13
13
|
delegate :connect_backend,
|
14
14
|
:to => :source
|
15
|
-
|
15
|
+
|
16
16
|
delegate :index,
|
17
17
|
:cache,
|
18
18
|
:generate_caches,
|
@@ -22,27 +22,27 @@ module Internals
|
|
22
22
|
:clear_caches,
|
23
23
|
:create_directory_structure,
|
24
24
|
:to => :categories
|
25
|
-
|
25
|
+
|
26
26
|
def initialize name, source, options = {}
|
27
27
|
@name = name
|
28
28
|
@source = source
|
29
|
-
|
29
|
+
|
30
30
|
@after_indexing = options[:after_indexing]
|
31
31
|
@bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
|
32
|
-
|
32
|
+
|
33
33
|
@categories = Categories.new
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
# TODO Spec. Doc.
|
37
37
|
#
|
38
38
|
def define_category category_name, options = {}
|
39
39
|
options = default_category_options.merge options
|
40
|
-
|
40
|
+
|
41
41
|
new_category = Category.new category_name, self, options
|
42
42
|
categories << new_category
|
43
43
|
new_category
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
46
|
# By default, the category uses
|
47
47
|
# * the index's source.
|
48
48
|
# * the index's bundle type.
|
@@ -53,15 +53,23 @@ module Internals
|
|
53
53
|
:indexing_bundle_class => @bundle_class
|
54
54
|
}
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
# Indexing.
|
58
58
|
#
|
59
59
|
def take_snapshot
|
60
60
|
source.take_snapshot self
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
|
+
def to_s
|
64
|
+
<<-INDEX
|
65
|
+
Indexing(#{name}):
|
66
|
+
#{"source: #{source}".indented_to_s}
|
67
|
+
#{"Categories:\n#{categories.indented_to_s}".indented_to_s}
|
68
|
+
INDEX
|
69
|
+
end
|
70
|
+
|
63
71
|
end
|
64
|
-
|
72
|
+
|
65
73
|
end
|
66
|
-
|
74
|
+
|
67
75
|
end
|
@@ -58,21 +58,6 @@ module Internals
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
61
|
-
# Returns a random id from the allocations.
|
62
|
-
#
|
63
|
-
# Note: This is an ok algorithm for small id sets.
|
64
|
-
#
|
65
|
-
# But still TODO try for a faster one.
|
66
|
-
#
|
67
|
-
# TODO For the 1 amount random case this needs to be improved.
|
68
|
-
#
|
69
|
-
def random_ids amount = 1
|
70
|
-
return [] if @allocations.empty?
|
71
|
-
ids = @allocations.first.ids
|
72
|
-
indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
|
73
|
-
indexes.first(amount).map { |id| ids[id] }
|
74
|
-
end
|
75
|
-
|
76
61
|
# This is the main method of this class that will replace ids and count.
|
77
62
|
#
|
78
63
|
# What it does is calculate the ids and counts of its allocations
|
@@ -51,8 +51,6 @@ module Internals
|
|
51
51
|
# ids that have an associated identifier that is nil.
|
52
52
|
#
|
53
53
|
def keep identifiers = []
|
54
|
-
# TODO Rewrite to use the category!!!
|
55
|
-
#
|
56
54
|
@combinations.reject! { |combination| !combination.in?(identifiers) }
|
57
55
|
end
|
58
56
|
|
@@ -64,8 +62,6 @@ module Internals
|
|
64
62
|
# ids that have an associated identifier that is nil.
|
65
63
|
#
|
66
64
|
def remove identifiers = []
|
67
|
-
# TODO Rewrite to use the category!!!
|
68
|
-
#
|
69
65
|
@combinations.reject! { |combination| combination.in?(identifiers) }
|
70
66
|
end
|
71
67
|
|
@@ -19,7 +19,7 @@ module Internals
|
|
19
19
|
def initialize combinations
|
20
20
|
super combinations
|
21
21
|
|
22
|
-
@@redis ||= ::Redis.new
|
22
|
+
@@redis ||= ::Redis.new :db => 15
|
23
23
|
end
|
24
24
|
|
25
25
|
# Returns the result ids for the allocation.
|
@@ -32,22 +32,33 @@ module Internals
|
|
32
32
|
end
|
33
33
|
|
34
34
|
result_id = generate_intermediate_result_id
|
35
|
-
|
36
|
-
#
|
35
|
+
|
36
|
+
# Intersect and store.
|
37
37
|
#
|
38
|
-
|
39
38
|
@@redis.zinterstore result_id, identifiers
|
40
39
|
|
41
|
-
|
40
|
+
# Get the stored result.
|
41
|
+
#
|
42
|
+
results = @@redis.zrange result_id, offset, (offset + amount)
|
43
|
+
|
44
|
+
# Delete the stored result as it was only for temporary purposes.
|
45
|
+
#
|
46
|
+
@@redis.del result_id
|
47
|
+
|
48
|
+
results
|
42
49
|
end
|
43
50
|
|
44
51
|
# Generate a multiple host/process safe result id.
|
45
52
|
#
|
46
|
-
|
53
|
+
require 'socket'
|
54
|
+
@@host = Socket.gethostname
|
55
|
+
define_method :host do
|
56
|
+
@@host
|
57
|
+
end
|
58
|
+
# Use the host and pid (generated lazily in child processes) for the result.
|
47
59
|
#
|
48
60
|
def generate_intermediate_result_id
|
49
|
-
#
|
50
|
-
:"host:#{Process.pid}:picky:result"
|
61
|
+
:"#{host}:#{@pid ||= Process.pid}:picky:result"
|
51
62
|
end
|
52
63
|
|
53
64
|
end
|
@@ -52,8 +52,6 @@ module Internals
|
|
52
52
|
# Add the wrapped possible allocations to the ones we already have.
|
53
53
|
#
|
54
54
|
previous_allocations + expanded_combinations.map! do |expanded_combination|
|
55
|
-
# TODO Insert Redis here?
|
56
|
-
#
|
57
55
|
@combinations_type.new(expanded_combination).pack_into_allocation(index.result_identifier) # TODO Do not extract result_identifier. Remove pack_into_allocation.
|
58
56
|
end
|
59
57
|
end)
|
@@ -116,14 +114,13 @@ module Internals
|
|
116
114
|
# ones, but I guess I am a bit sentimental.
|
117
115
|
#
|
118
116
|
def expand_combinations_from possible_combinations
|
117
|
+
# If an element has size 0, this means one of the
|
118
|
+
# tokens could not be allocated.
|
119
|
+
#
|
119
120
|
return if possible_combinations.any?(&:empty?)
|
120
121
|
|
121
122
|
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
122
123
|
#
|
123
|
-
# TODO How does this work if an element has size 0? Since below we account for size 0.
|
124
|
-
# Should we even continue if an element has size 0?
|
125
|
-
# This means one of the tokens cannot be allocated.
|
126
|
-
#
|
127
124
|
single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
|
128
125
|
|
129
126
|
# Initialize a group multiplicator.
|
@@ -8,8 +8,6 @@ module Internals
|
|
8
8
|
#
|
9
9
|
# It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
|
10
10
|
#
|
11
|
-
# TODO Make partial / similarity char configurable.
|
12
|
-
#
|
13
11
|
class Token # :nodoc:all
|
14
12
|
|
15
13
|
attr_reader :text, :original
|
@@ -179,8 +177,6 @@ module Internals
|
|
179
177
|
|
180
178
|
# Internal identifier.
|
181
179
|
#
|
182
|
-
# TODO Uh.
|
183
|
-
#
|
184
180
|
def identifier
|
185
181
|
"#{similar?? :similarity : :index}:#{@text}"
|
186
182
|
end
|
@@ -7,14 +7,7 @@ module Query
|
|
7
7
|
#
|
8
8
|
#
|
9
9
|
def initialize weights = {}
|
10
|
-
|
11
|
-
@weights = prepare weights
|
12
|
-
end
|
13
|
-
|
14
|
-
# Get the category indexes for the given bonuses.
|
15
|
-
#
|
16
|
-
def prepare weights
|
17
|
-
weights
|
10
|
+
@weights = weights
|
18
11
|
end
|
19
12
|
|
20
13
|
# Get the weight of an allocation.
|
@@ -38,10 +31,8 @@ module Query
|
|
38
31
|
# Note: Cache this if more complicated weighings become necessary.
|
39
32
|
#
|
40
33
|
def score combinations
|
41
|
-
# TODO
|
34
|
+
# TODO Or hide: combinations#to_weights_key
|
42
35
|
#
|
43
|
-
# weight_for combinations.map(&:category).clustered_uniq_fast.map!(&:name)
|
44
|
-
|
45
36
|
# TODO combinations could cluster uniq as combinations are added (since combinations don't change).
|
46
37
|
#
|
47
38
|
weight_for combinations.map(&:category_name).clustered_uniq_fast
|
@@ -15,8 +15,8 @@ module Internals
|
|
15
15
|
# Takes instances of Query::Allocations as param.
|
16
16
|
#
|
17
17
|
def initialize offset = 0, allocations = Query::Allocations.new
|
18
|
-
@offset
|
19
|
-
@allocations = allocations
|
18
|
+
@offset = offset
|
19
|
+
@allocations = allocations
|
20
20
|
end
|
21
21
|
# Create new results and calculate the ids.
|
22
22
|
#
|
@@ -26,7 +26,7 @@ module Internals
|
|
26
26
|
results
|
27
27
|
end
|
28
28
|
|
29
|
-
#
|
29
|
+
# Returns a hash with the allocations, offset, duration and total.
|
30
30
|
#
|
31
31
|
def serialize
|
32
32
|
{ allocations: allocations.to_result,
|
@@ -85,13 +85,6 @@ module Internals
|
|
85
85
|
def ids amount = 20
|
86
86
|
allocations.ids amount
|
87
87
|
end
|
88
|
-
# Gets an amout of random ids from the allocations.
|
89
|
-
#
|
90
|
-
# Note: Basically delegates to the allocations.
|
91
|
-
#
|
92
|
-
def random_ids amount = 1
|
93
|
-
allocations.random_ids amount
|
94
|
-
end
|
95
88
|
|
96
89
|
# Human readable log.
|
97
90
|
#
|