picky 1.5.2 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/analyzer.rb +154 -0
- data/lib/picky/application.rb +53 -33
- data/lib/picky/character_substituters/west_european.rb +10 -6
- data/lib/picky/cli.rb +18 -18
- data/lib/picky/index/base.rb +44 -13
- data/lib/picky/index_bundle.rb +13 -4
- data/lib/picky/indexed/indexes.rb +26 -10
- data/lib/picky/indexing/indexes.rb +26 -24
- data/lib/picky/interfaces/live_parameters.rb +23 -16
- data/lib/picky/internals/extensions/object.rb +13 -6
- data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
- data/lib/picky/internals/index/backend.rb +1 -2
- data/lib/picky/internals/index/file/basic.rb +18 -14
- data/lib/picky/internals/index/files.rb +16 -6
- data/lib/picky/internals/index/redis/basic.rb +12 -5
- data/lib/picky/internals/index/redis.rb +2 -2
- data/lib/picky/internals/indexed/bundle/base.rb +58 -14
- data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
- data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
- data/lib/picky/internals/indexed/categories.rb +19 -14
- data/lib/picky/internals/indexed/category.rb +44 -20
- data/lib/picky/internals/indexed/index.rb +23 -13
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
- data/lib/picky/internals/indexers/serial.rb +1 -1
- data/lib/picky/internals/indexing/bundle/base.rb +28 -28
- data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
- data/lib/picky/internals/indexing/categories.rb +15 -11
- data/lib/picky/internals/indexing/category.rb +30 -20
- data/lib/picky/internals/indexing/index.rb +22 -14
- data/lib/picky/internals/query/allocations.rb +0 -15
- data/lib/picky/internals/query/combinations/base.rb +0 -4
- data/lib/picky/internals/query/combinations/redis.rb +19 -8
- data/lib/picky/internals/query/indexes.rb +3 -6
- data/lib/picky/internals/query/token.rb +0 -4
- data/lib/picky/internals/query/weights.rb +2 -11
- data/lib/picky/internals/results/base.rb +3 -10
- data/lib/picky/internals/tokenizers/base.rb +64 -28
- data/lib/picky/internals/tokenizers/index.rb +8 -8
- data/lib/picky/loader.rb +59 -53
- data/lib/picky/query/base.rb +23 -29
- data/lib/picky/sources/base.rb +10 -10
- data/lib/picky/sources/couch.rb +14 -10
- data/lib/picky/sources/csv.rb +21 -14
- data/lib/picky/sources/db.rb +37 -31
- data/lib/picky/sources/delicious.rb +11 -8
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/statistics.rb +66 -0
- data/lib/tasks/application.rake +3 -0
- data/lib/tasks/checks.rake +11 -0
- data/lib/tasks/framework.rake +3 -0
- data/lib/tasks/index.rake +9 -11
- data/lib/tasks/routes.rake +3 -2
- data/lib/tasks/shortcuts.rake +17 -5
- data/lib/tasks/statistics.rake +20 -12
- data/lib/tasks/try.rake +14 -14
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/index/base_spec.rb +25 -3
- data/spec/lib/internals/extensions/object_spec.rb +46 -20
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
- data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
- data/spec/lib/internals/indexers/serial_spec.rb +1 -1
- data/spec/lib/internals/results/base_spec.rb +0 -12
- data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
- data/spec/lib/query/allocations_spec.rb +0 -56
- data/spec/lib/query/base_spec.rb +25 -21
- data/spec/lib/query/combinations/redis_spec.rb +6 -1
- data/spec/lib/sources/delicious_spec.rb +2 -2
- data/spec/lib/statistics_spec.rb +31 -0
- metadata +9 -2
@@ -3,35 +3,35 @@
|
|
3
3
|
module Internals
|
4
4
|
|
5
5
|
module Indexing # :nodoc:all
|
6
|
-
|
6
|
+
|
7
7
|
module Bundle
|
8
|
-
|
8
|
+
|
9
9
|
# This is the indexing bundle.
|
10
10
|
# It does all menial tasks that have nothing to do
|
11
11
|
# with the actual index running etc.
|
12
12
|
#
|
13
13
|
class Base < SuperBase
|
14
|
-
|
14
|
+
|
15
15
|
attr_accessor :partial_strategy, :weights_strategy
|
16
|
-
|
16
|
+
|
17
17
|
# Path is in which directory the cache is located.
|
18
18
|
#
|
19
19
|
def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
|
20
20
|
super name, configuration, similarity_strategy
|
21
|
-
|
21
|
+
|
22
22
|
@partial_strategy = partial_strategy
|
23
23
|
@weights_strategy = weights_strategy
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
# Sets up a piece of the index for the given token.
|
27
27
|
#
|
28
28
|
def initialize_index_for token
|
29
29
|
index[token] ||= []
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
# Generation
|
33
33
|
#
|
34
|
-
|
34
|
+
|
35
35
|
# This method
|
36
36
|
# * loads the base index from the db
|
37
37
|
# * generates derived indexes
|
@@ -50,16 +50,16 @@ module Internals
|
|
50
50
|
generate_derived
|
51
51
|
end
|
52
52
|
def cache_from_memory_generation_message
|
53
|
-
timed_exclaim "
|
53
|
+
timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
# Generates the weights and similarity from the main index.
|
57
57
|
#
|
58
58
|
def generate_derived
|
59
59
|
generate_weights
|
60
60
|
generate_similarity
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
63
|
# Load the data from the db.
|
64
64
|
#
|
65
65
|
def load_from_index_file
|
@@ -68,7 +68,7 @@ module Internals
|
|
68
68
|
retrieve
|
69
69
|
end
|
70
70
|
def load_from_index_generation_message
|
71
|
-
timed_exclaim "
|
71
|
+
timed_exclaim %Q{"#{identifier}": Loading index.}
|
72
72
|
end
|
73
73
|
# Retrieves the prepared index data into the index.
|
74
74
|
#
|
@@ -83,7 +83,7 @@ module Internals
|
|
83
83
|
index[token] << id.send(key_format) # TODO Rewrite. Move this into the specific indexing.
|
84
84
|
end
|
85
85
|
end
|
86
|
-
|
86
|
+
|
87
87
|
# Generates a new index (writes its index) using the
|
88
88
|
# partial caching strategy of this bundle.
|
89
89
|
#
|
@@ -94,7 +94,7 @@ module Internals
|
|
94
94
|
# Generate a partial index from the given exact index.
|
95
95
|
#
|
96
96
|
def generate_partial_from exact_index
|
97
|
-
timed_exclaim "
|
97
|
+
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
98
98
|
self.index = exact_index
|
99
99
|
self.generate_partial
|
100
100
|
self
|
@@ -125,28 +125,28 @@ module Internals
|
|
125
125
|
# Dumps the core index.
|
126
126
|
#
|
127
127
|
def dump_index
|
128
|
-
timed_exclaim "
|
128
|
+
timed_exclaim %Q{"#{identifier}": Dumping index.}
|
129
129
|
backend.dump_index index
|
130
130
|
end
|
131
131
|
# Dumps the weights index.
|
132
132
|
#
|
133
133
|
def dump_weights
|
134
|
-
timed_exclaim "
|
134
|
+
timed_exclaim %Q{"#{identifier}": Dumping weights of index.}
|
135
135
|
backend.dump_weights weights
|
136
136
|
end
|
137
137
|
# Dumps the similarity index.
|
138
138
|
#
|
139
139
|
def dump_similarity
|
140
|
-
timed_exclaim "
|
140
|
+
timed_exclaim %Q{"#{identifier}": Dumping similarity of index.}
|
141
141
|
backend.dump_similarity similarity
|
142
142
|
end
|
143
143
|
# Dumps the similarity index.
|
144
144
|
#
|
145
145
|
def dump_configuration
|
146
|
-
timed_exclaim "
|
146
|
+
timed_exclaim %Q{"#{identifier}": Dumping configuration for index.}
|
147
147
|
backend.dump_configuration configuration
|
148
148
|
end
|
149
|
-
|
149
|
+
|
150
150
|
# Alerts the user if an index is missing.
|
151
151
|
#
|
152
152
|
def raise_unless_cache_exists
|
@@ -171,18 +171,18 @@ module Internals
|
|
171
171
|
raise_unless_similarity_ok
|
172
172
|
end
|
173
173
|
end
|
174
|
-
|
174
|
+
|
175
175
|
# Outputs a warning for the given cache.
|
176
176
|
#
|
177
177
|
def warn_cache_small what
|
178
|
-
|
178
|
+
warn "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
|
179
179
|
end
|
180
180
|
# Raises an appropriate error message for the given cache.
|
181
181
|
#
|
182
182
|
def raise_cache_missing what
|
183
183
|
raise "#{what} cache for #{identifier} missing."
|
184
184
|
end
|
185
|
-
|
185
|
+
|
186
186
|
# Warns the user if the similarity index is small.
|
187
187
|
#
|
188
188
|
def warn_if_similarity_small
|
@@ -193,10 +193,10 @@ module Internals
|
|
193
193
|
def raise_unless_similarity_ok
|
194
194
|
raise_cache_missing :similarity unless backend.similarity_cache_ok?
|
195
195
|
end
|
196
|
-
|
196
|
+
|
197
197
|
# TODO Spec on down.
|
198
198
|
#
|
199
|
-
|
199
|
+
|
200
200
|
# Warns the user if the core or weights indexes are small.
|
201
201
|
#
|
202
202
|
def warn_if_index_small
|
@@ -209,11 +209,11 @@ module Internals
|
|
209
209
|
raise_cache_missing :index unless backend.index_cache_ok?
|
210
210
|
raise_cache_missing :weights unless backend.weights_cache_ok?
|
211
211
|
end
|
212
|
-
|
212
|
+
|
213
213
|
end
|
214
|
-
|
214
|
+
|
215
215
|
end
|
216
|
-
|
216
|
+
|
217
217
|
end
|
218
|
-
|
218
|
+
|
219
219
|
end
|
@@ -3,23 +3,30 @@
|
|
3
3
|
module Internals
|
4
4
|
|
5
5
|
module Indexing # :nodoc:all
|
6
|
-
|
6
|
+
|
7
7
|
module Bundle
|
8
|
-
|
8
|
+
|
9
9
|
# The memory version dumps its generated indexes to disk
|
10
10
|
# (mostly JSON) to load them into memory on startup.
|
11
11
|
#
|
12
12
|
class Memory < Base
|
13
|
-
|
13
|
+
|
14
14
|
# We're using files for the memory backend.
|
15
15
|
# E.g. dump writes files.
|
16
16
|
#
|
17
17
|
alias backend files
|
18
|
-
|
18
|
+
|
19
|
+
def to_s
|
20
|
+
<<-MEMORY
|
21
|
+
Memory
|
22
|
+
#{@backend.indented_to_s}
|
23
|
+
MEMORY
|
24
|
+
end
|
25
|
+
|
19
26
|
end
|
20
|
-
|
27
|
+
|
21
28
|
end
|
22
|
-
|
29
|
+
|
23
30
|
end
|
24
|
-
|
31
|
+
|
25
32
|
end
|
@@ -1,11 +1,11 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
3
|
module Indexing
|
4
|
-
|
4
|
+
|
5
5
|
class Categories
|
6
|
-
|
6
|
+
|
7
7
|
attr_reader :categories
|
8
|
-
|
8
|
+
|
9
9
|
each_delegate :index,
|
10
10
|
:cache,
|
11
11
|
:generate_caches,
|
@@ -15,28 +15,32 @@ module Internals
|
|
15
15
|
:clear_caches,
|
16
16
|
:create_directory_structure,
|
17
17
|
:to => :categories
|
18
|
-
|
18
|
+
|
19
19
|
def initialize
|
20
20
|
@categories = []
|
21
21
|
end
|
22
|
-
|
22
|
+
|
23
|
+
def to_s
|
24
|
+
categories.indented_to_s
|
25
|
+
end
|
26
|
+
|
23
27
|
def << category
|
24
28
|
categories << category
|
25
29
|
end
|
26
|
-
|
30
|
+
|
27
31
|
def find category_name
|
28
32
|
category_name = category_name.to_sym
|
29
|
-
|
33
|
+
|
30
34
|
categories.each do |category|
|
31
35
|
next unless category.name == category_name
|
32
36
|
return category
|
33
37
|
end
|
34
|
-
|
38
|
+
|
35
39
|
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
36
40
|
end
|
37
|
-
|
41
|
+
|
38
42
|
end
|
39
|
-
|
43
|
+
|
40
44
|
end
|
41
|
-
|
45
|
+
|
42
46
|
end
|
@@ -1,11 +1,14 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
3
|
module Indexing
|
4
|
-
|
4
|
+
|
5
5
|
class Category
|
6
|
-
|
6
|
+
|
7
7
|
attr_reader :exact, :partial, :name, :configuration, :indexer
|
8
|
-
|
8
|
+
|
9
|
+
delegate :identifier, :prepare_index_directory, :to => :configuration
|
10
|
+
delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
|
11
|
+
|
9
12
|
# Mandatory params:
|
10
13
|
# * name: Category name to use as identifier and file names.
|
11
14
|
# * index: Index to which this category is attached to.
|
@@ -17,38 +20,45 @@ module Internals
|
|
17
20
|
#
|
18
21
|
# Advanced Options (TODO):
|
19
22
|
#
|
20
|
-
# * weights:
|
21
|
-
# * tokenizer:
|
23
|
+
# * weights:
|
24
|
+
# * tokenizer:
|
22
25
|
#
|
23
26
|
def initialize name, index, options = {}
|
24
27
|
@name = name
|
25
28
|
@from = options[:from]
|
26
|
-
|
29
|
+
|
27
30
|
# Now we have enough info to combine the index and the category.
|
28
31
|
#
|
29
32
|
@configuration = Configuration::Index.new index, self
|
30
|
-
|
33
|
+
|
31
34
|
@tokenizer = options[:tokenizer] || Tokenizers::Index.default
|
32
35
|
@indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
|
33
|
-
|
36
|
+
|
34
37
|
# TODO Push into Bundle. At least the weights.
|
35
38
|
#
|
36
39
|
partial = options[:partial] || Generators::Partial::Default
|
37
40
|
weights = options[:weights] || Generators::Weights::Default
|
38
41
|
similarity = options[:similarity] || Generators::Similarity::Default
|
39
|
-
|
42
|
+
|
40
43
|
bundle_class = options[:indexing_bundle_class] || Bundle::Memory
|
41
44
|
@exact = bundle_class.new(:exact, configuration, similarity, Generators::Partial::None.new, weights)
|
42
45
|
@partial = bundle_class.new(:partial, configuration, Generators::Similarity::None.new, partial, weights)
|
43
46
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
47
|
+
|
48
|
+
def to_s
|
49
|
+
<<-CATEGORY
|
50
|
+
Category(#{name} from #{from}):
|
51
|
+
Exact:
|
52
|
+
#{exact.indented_to_s(4)}
|
53
|
+
Partial:
|
54
|
+
#{partial.indented_to_s(4)}
|
55
|
+
CATEGORY
|
56
|
+
end
|
57
|
+
|
48
58
|
def from
|
49
59
|
@from || name
|
50
60
|
end
|
51
|
-
|
61
|
+
|
52
62
|
# TODO Spec.
|
53
63
|
#
|
54
64
|
def backup_caches
|
@@ -71,12 +81,12 @@ module Internals
|
|
71
81
|
exact.delete
|
72
82
|
partial.delete
|
73
83
|
end
|
74
|
-
|
84
|
+
|
75
85
|
def index
|
76
86
|
prepare_index_directory
|
77
87
|
indexer.index
|
78
88
|
end
|
79
|
-
|
89
|
+
|
80
90
|
# Generates all caches for this category.
|
81
91
|
#
|
82
92
|
def cache
|
@@ -97,7 +107,7 @@ module Internals
|
|
97
107
|
generate_partial
|
98
108
|
generate_caches_from_memory
|
99
109
|
dump_caches
|
100
|
-
timed_exclaim "
|
110
|
+
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
101
111
|
end
|
102
112
|
def generate_caches_from_source
|
103
113
|
exact.generate_caches_from_source
|
@@ -112,9 +122,9 @@ module Internals
|
|
112
122
|
exact.dump
|
113
123
|
partial.dump
|
114
124
|
end
|
115
|
-
|
125
|
+
|
116
126
|
end
|
117
|
-
|
127
|
+
|
118
128
|
end
|
119
|
-
|
129
|
+
|
120
130
|
end
|
@@ -3,16 +3,16 @@
|
|
3
3
|
module Internals
|
4
4
|
|
5
5
|
module Indexing
|
6
|
-
|
6
|
+
|
7
7
|
class Index
|
8
|
-
|
8
|
+
|
9
9
|
attr_reader :name, :source, :categories, :after_indexing
|
10
|
-
|
10
|
+
|
11
11
|
# Delegators for indexing.
|
12
12
|
#
|
13
13
|
delegate :connect_backend,
|
14
14
|
:to => :source
|
15
|
-
|
15
|
+
|
16
16
|
delegate :index,
|
17
17
|
:cache,
|
18
18
|
:generate_caches,
|
@@ -22,27 +22,27 @@ module Internals
|
|
22
22
|
:clear_caches,
|
23
23
|
:create_directory_structure,
|
24
24
|
:to => :categories
|
25
|
-
|
25
|
+
|
26
26
|
def initialize name, source, options = {}
|
27
27
|
@name = name
|
28
28
|
@source = source
|
29
|
-
|
29
|
+
|
30
30
|
@after_indexing = options[:after_indexing]
|
31
31
|
@bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
|
32
|
-
|
32
|
+
|
33
33
|
@categories = Categories.new
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
# TODO Spec. Doc.
|
37
37
|
#
|
38
38
|
def define_category category_name, options = {}
|
39
39
|
options = default_category_options.merge options
|
40
|
-
|
40
|
+
|
41
41
|
new_category = Category.new category_name, self, options
|
42
42
|
categories << new_category
|
43
43
|
new_category
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
46
|
# By default, the category uses
|
47
47
|
# * the index's source.
|
48
48
|
# * the index's bundle type.
|
@@ -53,15 +53,23 @@ module Internals
|
|
53
53
|
:indexing_bundle_class => @bundle_class
|
54
54
|
}
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
# Indexing.
|
58
58
|
#
|
59
59
|
def take_snapshot
|
60
60
|
source.take_snapshot self
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
|
+
def to_s
|
64
|
+
<<-INDEX
|
65
|
+
Indexing(#{name}):
|
66
|
+
#{"source: #{source}".indented_to_s}
|
67
|
+
#{"Categories:\n#{categories.indented_to_s}".indented_to_s}
|
68
|
+
INDEX
|
69
|
+
end
|
70
|
+
|
63
71
|
end
|
64
|
-
|
72
|
+
|
65
73
|
end
|
66
|
-
|
74
|
+
|
67
75
|
end
|
@@ -58,21 +58,6 @@ module Internals
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
61
|
-
# Returns a random id from the allocations.
|
62
|
-
#
|
63
|
-
# Note: This is an ok algorithm for small id sets.
|
64
|
-
#
|
65
|
-
# But still TODO try for a faster one.
|
66
|
-
#
|
67
|
-
# TODO For the 1 amount random case this needs to be improved.
|
68
|
-
#
|
69
|
-
def random_ids amount = 1
|
70
|
-
return [] if @allocations.empty?
|
71
|
-
ids = @allocations.first.ids
|
72
|
-
indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
|
73
|
-
indexes.first(amount).map { |id| ids[id] }
|
74
|
-
end
|
75
|
-
|
76
61
|
# This is the main method of this class that will replace ids and count.
|
77
62
|
#
|
78
63
|
# What it does is calculate the ids and counts of its allocations
|
@@ -51,8 +51,6 @@ module Internals
|
|
51
51
|
# ids that have an associated identifier that is nil.
|
52
52
|
#
|
53
53
|
def keep identifiers = []
|
54
|
-
# TODO Rewrite to use the category!!!
|
55
|
-
#
|
56
54
|
@combinations.reject! { |combination| !combination.in?(identifiers) }
|
57
55
|
end
|
58
56
|
|
@@ -64,8 +62,6 @@ module Internals
|
|
64
62
|
# ids that have an associated identifier that is nil.
|
65
63
|
#
|
66
64
|
def remove identifiers = []
|
67
|
-
# TODO Rewrite to use the category!!!
|
68
|
-
#
|
69
65
|
@combinations.reject! { |combination| combination.in?(identifiers) }
|
70
66
|
end
|
71
67
|
|
@@ -19,7 +19,7 @@ module Internals
|
|
19
19
|
def initialize combinations
|
20
20
|
super combinations
|
21
21
|
|
22
|
-
@@redis ||= ::Redis.new
|
22
|
+
@@redis ||= ::Redis.new :db => 15
|
23
23
|
end
|
24
24
|
|
25
25
|
# Returns the result ids for the allocation.
|
@@ -32,22 +32,33 @@ module Internals
|
|
32
32
|
end
|
33
33
|
|
34
34
|
result_id = generate_intermediate_result_id
|
35
|
-
|
36
|
-
#
|
35
|
+
|
36
|
+
# Intersect and store.
|
37
37
|
#
|
38
|
-
|
39
38
|
@@redis.zinterstore result_id, identifiers
|
40
39
|
|
41
|
-
|
40
|
+
# Get the stored result.
|
41
|
+
#
|
42
|
+
results = @@redis.zrange result_id, offset, (offset + amount)
|
43
|
+
|
44
|
+
# Delete the stored result as it was only for temporary purposes.
|
45
|
+
#
|
46
|
+
@@redis.del result_id
|
47
|
+
|
48
|
+
results
|
42
49
|
end
|
43
50
|
|
44
51
|
# Generate a multiple host/process safe result id.
|
45
52
|
#
|
46
|
-
|
53
|
+
require 'socket'
|
54
|
+
@@host = Socket.gethostname
|
55
|
+
define_method :host do
|
56
|
+
@@host
|
57
|
+
end
|
58
|
+
# Use the host and pid (generated lazily in child processes) for the result.
|
47
59
|
#
|
48
60
|
def generate_intermediate_result_id
|
49
|
-
#
|
50
|
-
:"host:#{Process.pid}:picky:result"
|
61
|
+
:"#{host}:#{@pid ||= Process.pid}:picky:result"
|
51
62
|
end
|
52
63
|
|
53
64
|
end
|
@@ -52,8 +52,6 @@ module Internals
|
|
52
52
|
# Add the wrapped possible allocations to the ones we already have.
|
53
53
|
#
|
54
54
|
previous_allocations + expanded_combinations.map! do |expanded_combination|
|
55
|
-
# TODO Insert Redis here?
|
56
|
-
#
|
57
55
|
@combinations_type.new(expanded_combination).pack_into_allocation(index.result_identifier) # TODO Do not extract result_identifier. Remove pack_into_allocation.
|
58
56
|
end
|
59
57
|
end)
|
@@ -116,14 +114,13 @@ module Internals
|
|
116
114
|
# ones, but I guess I am a bit sentimental.
|
117
115
|
#
|
118
116
|
def expand_combinations_from possible_combinations
|
117
|
+
# If an element has size 0, this means one of the
|
118
|
+
# tokens could not be allocated.
|
119
|
+
#
|
119
120
|
return if possible_combinations.any?(&:empty?)
|
120
121
|
|
121
122
|
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
122
123
|
#
|
123
|
-
# TODO How does this work if an element has size 0? Since below we account for size 0.
|
124
|
-
# Should we even continue if an element has size 0?
|
125
|
-
# This means one of the tokens cannot be allocated.
|
126
|
-
#
|
127
124
|
single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
|
128
125
|
|
129
126
|
# Initialize a group multiplicator.
|
@@ -8,8 +8,6 @@ module Internals
|
|
8
8
|
#
|
9
9
|
# It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
|
10
10
|
#
|
11
|
-
# TODO Make partial / similarity char configurable.
|
12
|
-
#
|
13
11
|
class Token # :nodoc:all
|
14
12
|
|
15
13
|
attr_reader :text, :original
|
@@ -179,8 +177,6 @@ module Internals
|
|
179
177
|
|
180
178
|
# Internal identifier.
|
181
179
|
#
|
182
|
-
# TODO Uh.
|
183
|
-
#
|
184
180
|
def identifier
|
185
181
|
"#{similar?? :similarity : :index}:#{@text}"
|
186
182
|
end
|
@@ -7,14 +7,7 @@ module Query
|
|
7
7
|
#
|
8
8
|
#
|
9
9
|
def initialize weights = {}
|
10
|
-
|
11
|
-
@weights = prepare weights
|
12
|
-
end
|
13
|
-
|
14
|
-
# Get the category indexes for the given bonuses.
|
15
|
-
#
|
16
|
-
def prepare weights
|
17
|
-
weights
|
10
|
+
@weights = weights
|
18
11
|
end
|
19
12
|
|
20
13
|
# Get the weight of an allocation.
|
@@ -38,10 +31,8 @@ module Query
|
|
38
31
|
# Note: Cache this if more complicated weighings become necessary.
|
39
32
|
#
|
40
33
|
def score combinations
|
41
|
-
# TODO
|
34
|
+
# TODO Or hide: combinations#to_weights_key
|
42
35
|
#
|
43
|
-
# weight_for combinations.map(&:category).clustered_uniq_fast.map!(&:name)
|
44
|
-
|
45
36
|
# TODO combinations could cluster uniq as combinations are added (since combinations don't change).
|
46
37
|
#
|
47
38
|
weight_for combinations.map(&:category_name).clustered_uniq_fast
|
@@ -15,8 +15,8 @@ module Internals
|
|
15
15
|
# Takes instances of Query::Allocations as param.
|
16
16
|
#
|
17
17
|
def initialize offset = 0, allocations = Query::Allocations.new
|
18
|
-
@offset
|
19
|
-
@allocations = allocations
|
18
|
+
@offset = offset
|
19
|
+
@allocations = allocations
|
20
20
|
end
|
21
21
|
# Create new results and calculate the ids.
|
22
22
|
#
|
@@ -26,7 +26,7 @@ module Internals
|
|
26
26
|
results
|
27
27
|
end
|
28
28
|
|
29
|
-
#
|
29
|
+
# Returns a hash with the allocations, offset, duration and total.
|
30
30
|
#
|
31
31
|
def serialize
|
32
32
|
{ allocations: allocations.to_result,
|
@@ -85,13 +85,6 @@ module Internals
|
|
85
85
|
def ids amount = 20
|
86
86
|
allocations.ids amount
|
87
87
|
end
|
88
|
-
# Gets an amout of random ids from the allocations.
|
89
|
-
#
|
90
|
-
# Note: Basically delegates to the allocations.
|
91
|
-
#
|
92
|
-
def random_ids amount = 1
|
93
|
-
allocations.random_ids amount
|
94
|
-
end
|
95
88
|
|
96
89
|
# Human readable log.
|
97
90
|
#
|