picky 3.5.0 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/bundle.rb +37 -4
- data/lib/picky/bundle_indexed.rb +12 -8
- data/lib/picky/bundle_indexing.rb +6 -26
- data/lib/picky/bundle_realtime.rb +26 -16
- data/lib/picky/category_indexing.rb +1 -3
- data/lib/picky/category_realtime.rb +1 -1
- data/lib/picky/character_substituters/west_european.rb +4 -4
- data/lib/picky/generators/partial/infix.rb +0 -47
- data/lib/picky/generators/partial/none.rb +0 -6
- data/lib/picky/generators/partial/substring.rb +0 -47
- data/lib/picky/generators/similarity/double_metaphone.rb +3 -3
- data/lib/picky/generators/similarity/metaphone.rb +3 -3
- data/lib/picky/generators/similarity/phonetic.rb +12 -24
- data/lib/picky/generators/similarity/soundex.rb +3 -3
- data/lib/picky/generators/weights/constant.rb +46 -0
- data/lib/picky/generators/weights/dynamic.rb +37 -0
- data/lib/picky/generators/weights/logarithmic.rb +0 -10
- data/lib/picky/generators/weights/runtime.rb +41 -0
- data/lib/picky/loader.rb +3 -3
- data/lib/picky/query/allocations.rb +2 -1
- data/lib/picky/query/tokens.rb +0 -10
- data/spec/lib/category_indexed_spec.rb +1 -1
- data/spec/lib/character_substituters/west_european_spec.rb +11 -13
- data/spec/lib/generators/partial/infix_spec.rb +161 -161
- data/spec/lib/generators/partial/none_spec.rb +3 -3
- data/spec/lib/generators/partial/postfix_spec.rb +109 -109
- data/spec/lib/generators/partial/substring_spec.rb +190 -190
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +38 -38
- data/spec/lib/generators/similarity/metaphone_spec.rb +38 -38
- data/spec/lib/generators/similarity/soundex_spec.rb +38 -38
- data/spec/lib/generators/weights/constant_spec.rb +37 -0
- data/spec/lib/generators/weights/dynamic_spec.rb +27 -0
- data/spec/lib/generators/weights/logarithmic_spec.rb +10 -15
- data/spec/lib/indexed/bundle_spec.rb +3 -2
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +8 -8
- data/spec/lib/indexing/bundle_spec.rb +5 -3
- data/spec/specific/dynamic_weights_spec.rb +44 -0
- metadata +36 -28
- data/lib/picky/generators/base.rb +0 -19
data/lib/picky/bundle.rb
CHANGED
@@ -66,8 +66,11 @@ module Picky
|
|
66
66
|
|
67
67
|
# Initial indexes.
|
68
68
|
#
|
69
|
+
# Note that if the weights strategy doesn't need to be saved,
|
70
|
+
# the strategy itself pretends to be an index.
|
71
|
+
#
|
69
72
|
@inverted = @backend_inverted.initial
|
70
|
-
@weights = @backend_weights.initial
|
73
|
+
@weights = @weights_strategy.saved?? @backend_weights.initial : @weights_strategy
|
71
74
|
@similarity = @backend_similarity.initial
|
72
75
|
@configuration = @backend_configuration.initial
|
73
76
|
|
@@ -77,6 +80,30 @@ module Picky
|
|
77
80
|
"#{category.identifier}:#{name}"
|
78
81
|
end
|
79
82
|
|
83
|
+
# "Empties" the index(es) by getting a new empty
|
84
|
+
# internal backend instance.
|
85
|
+
#
|
86
|
+
def empty
|
87
|
+
empty_inverted
|
88
|
+
empty_weights
|
89
|
+
empty_similarity
|
90
|
+
empty_configuration
|
91
|
+
end
|
92
|
+
def empty_inverted
|
93
|
+
@inverted = @backend_inverted.empty
|
94
|
+
end
|
95
|
+
def empty_weights
|
96
|
+
# TODO THINK about this. Perhaps the strategies should implement the backend methods?
|
97
|
+
#
|
98
|
+
@weights = @weights_strategy.saved?? @backend_weights.empty : @weights_strategy
|
99
|
+
end
|
100
|
+
def empty_similarity
|
101
|
+
@similarity = @backend_similarity.empty
|
102
|
+
end
|
103
|
+
def empty_configuration
|
104
|
+
@configuration = @backend_configuration.empty
|
105
|
+
end
|
106
|
+
|
80
107
|
# Get a list of similar texts.
|
81
108
|
#
|
82
109
|
# Note: Does not return itself.
|
@@ -113,7 +140,9 @@ module Picky
|
|
113
140
|
#
|
114
141
|
def backup
|
115
142
|
@backend_inverted.backup if @backend_inverted.respond_to? :backup
|
116
|
-
|
143
|
+
# TODO THINK about this. Perhaps the strategies should implement the backend methods?
|
144
|
+
#
|
145
|
+
@backend_weights.backup if @backend_weights.respond_to? :backup && @weights_strategy.saved?
|
117
146
|
@backend_similarity.backup if @backend_similarity.respond_to? :backup
|
118
147
|
@backend_configuration.backup if @backend_configuration.respond_to? :backup
|
119
148
|
end
|
@@ -122,7 +151,9 @@ module Picky
|
|
122
151
|
#
|
123
152
|
def restore
|
124
153
|
@backend_inverted.restore if @backend_inverted.respond_to? :restore
|
125
|
-
|
154
|
+
# TODO THINK about this. Perhaps the strategies should implement the backend methods?
|
155
|
+
#
|
156
|
+
@backend_weights.restore if @backend_weights.respond_to? :restore && @weights_strategy.saved?
|
126
157
|
@backend_similarity.restore if @backend_similarity.respond_to? :restore
|
127
158
|
@backend_configuration.restore if @backend_configuration.respond_to? :restore
|
128
159
|
end
|
@@ -131,7 +162,9 @@ module Picky
|
|
131
162
|
#
|
132
163
|
def delete
|
133
164
|
@backend_inverted.delete if @backend_inverted.respond_to? :delete
|
134
|
-
|
165
|
+
# TODO THINK about this. Perhaps the strategies should implement the backend methods?
|
166
|
+
#
|
167
|
+
@backend_weights.delete if @backend_weights.respond_to? :delete && @weights_strategy.saved?
|
135
168
|
@backend_similarity.delete if @backend_similarity.respond_to? :delete
|
136
169
|
@backend_configuration.delete if @backend_configuration.respond_to? :delete
|
137
170
|
end
|
data/lib/picky/bundle_indexed.rb
CHANGED
@@ -22,24 +22,24 @@ module Picky
|
|
22
22
|
#
|
23
23
|
# Returns a (potentially empty) array of ids.
|
24
24
|
#
|
25
|
-
def ids
|
26
|
-
@inverted[
|
25
|
+
def ids sym_or_string
|
26
|
+
@inverted[sym_or_string] || []
|
27
27
|
end
|
28
28
|
|
29
29
|
# Get a weight for the given symbol.
|
30
30
|
#
|
31
31
|
# Returns a number, or nil.
|
32
32
|
#
|
33
|
-
def weight
|
34
|
-
@weights[
|
33
|
+
def weight sym_or_string
|
34
|
+
@weights[sym_or_string]
|
35
35
|
end
|
36
36
|
|
37
37
|
# Get settings for this bundle.
|
38
38
|
#
|
39
39
|
# Returns an object.
|
40
40
|
#
|
41
|
-
def []
|
42
|
-
@configuration[
|
41
|
+
def [] sym_or_string
|
42
|
+
@configuration[sym_or_string]
|
43
43
|
end
|
44
44
|
|
45
45
|
# Loads all indexes.
|
@@ -62,7 +62,9 @@ module Picky
|
|
62
62
|
# Loads the weights index.
|
63
63
|
#
|
64
64
|
def load_weights
|
65
|
-
|
65
|
+
# TODO THINK about this. Perhaps the strategies should implement the backend methods?
|
66
|
+
#
|
67
|
+
self.weights = @backend_weights.load if @weights_strategy.saved?
|
66
68
|
end
|
67
69
|
# Loads the similarity index.
|
68
70
|
#
|
@@ -92,7 +94,9 @@ module Picky
|
|
92
94
|
# Clears the weights index.
|
93
95
|
#
|
94
96
|
def clear_weights
|
95
|
-
|
97
|
+
# TODO THINK about this. Perhaps the strategies should implement the backend methods?
|
98
|
+
#
|
99
|
+
weights.clear if @weights_strategy.saved?
|
96
100
|
end
|
97
101
|
# Clears the similarity index.
|
98
102
|
#
|
@@ -35,28 +35,6 @@ module Picky
|
|
35
35
|
delegate :clear,
|
36
36
|
:to => :inverted
|
37
37
|
|
38
|
-
# "Empties" the index(es) by getting a new empty
|
39
|
-
# internal backend instance.
|
40
|
-
#
|
41
|
-
def empty
|
42
|
-
empty_inverted
|
43
|
-
empty_weights
|
44
|
-
empty_similarity
|
45
|
-
empty_configuration
|
46
|
-
end
|
47
|
-
def empty_inverted
|
48
|
-
@inverted = @backend_inverted.empty
|
49
|
-
end
|
50
|
-
def empty_weights
|
51
|
-
@weights = @backend_weights.empty
|
52
|
-
end
|
53
|
-
def empty_similarity
|
54
|
-
@similarity = @backend_similarity.empty
|
55
|
-
end
|
56
|
-
def empty_configuration
|
57
|
-
@configuration = @backend_configuration.empty
|
58
|
-
end
|
59
|
-
|
60
38
|
# Saves the indexes in a dump file.
|
61
39
|
#
|
62
40
|
def dump
|
@@ -68,22 +46,24 @@ module Picky
|
|
68
46
|
# Dumps the core index.
|
69
47
|
#
|
70
48
|
def dump_inverted
|
71
|
-
@backend_inverted.dump
|
49
|
+
@backend_inverted.dump @inverted
|
72
50
|
end
|
73
51
|
# Dumps the weights index.
|
74
52
|
#
|
75
53
|
def dump_weights
|
76
|
-
|
54
|
+
# TODO THINK about this. Perhaps the strategies should implement the backend methods?
|
55
|
+
#
|
56
|
+
@backend_weights.dump @weights if @weights_strategy.saved?
|
77
57
|
end
|
78
58
|
# Dumps the similarity index.
|
79
59
|
#
|
80
60
|
def dump_similarity
|
81
|
-
@backend_similarity.dump
|
61
|
+
@backend_similarity.dump @similarity
|
82
62
|
end
|
83
63
|
# Dumps the similarity index.
|
84
64
|
#
|
85
65
|
def dump_configuration
|
86
|
-
@backend_configuration.dump
|
66
|
+
@backend_configuration.dump @configuration
|
87
67
|
end
|
88
68
|
|
89
69
|
end
|
@@ -32,39 +32,49 @@ module Picky
|
|
32
32
|
|
33
33
|
# Returns a reference to the array where the id has been added.
|
34
34
|
#
|
35
|
-
|
36
|
-
|
37
|
-
def add id, sym, where = :unshift
|
38
|
-
ary = @inverted[sym]
|
35
|
+
def add id, str_or_sym, where = :unshift
|
36
|
+
ary = @inverted[str_or_sym]
|
39
37
|
|
40
|
-
|
41
|
-
|
38
|
+
str_or_syms = @realtime_mapping[id]
|
39
|
+
str_or_syms = (@realtime_mapping[id] = []) unless str_or_syms # TODO Nicefy.
|
42
40
|
|
43
41
|
# Inverted.
|
44
42
|
#
|
45
|
-
ids = if
|
46
|
-
ids = @inverted[
|
43
|
+
ids = if str_or_syms.include? str_or_sym
|
44
|
+
ids = @inverted[str_or_sym]
|
47
45
|
ids.delete id
|
48
46
|
ids.send where, id
|
49
47
|
else
|
50
|
-
|
51
|
-
ids = @inverted[
|
48
|
+
str_or_syms << str_or_sym
|
49
|
+
ids = @inverted[str_or_sym] ||= []
|
52
50
|
ids.send where, id
|
53
51
|
end
|
54
52
|
|
55
53
|
# Weights.
|
56
54
|
#
|
57
|
-
@weights[
|
55
|
+
@weights[str_or_sym] = self.weights_strategy.weight_for ids.size
|
58
56
|
|
59
57
|
# Similarity.
|
60
58
|
#
|
61
|
-
|
59
|
+
add_similarity str_or_sym, where
|
60
|
+
|
61
|
+
# Return reference.
|
62
|
+
#
|
63
|
+
ids
|
64
|
+
end
|
65
|
+
|
66
|
+
# Add string/symbol to similarity index.
|
67
|
+
#
|
68
|
+
# TODO Probably where makes no sense here. Should have its own order.
|
69
|
+
#
|
70
|
+
def add_similarity str_or_sym, where = :unshift
|
71
|
+
if encoded = self.similarity_strategy.encoded(str_or_sym)
|
62
72
|
similarity = @similarity[encoded] ||= []
|
63
|
-
if similarity.include?
|
64
|
-
similarity.delete
|
65
|
-
similarity.send where,
|
73
|
+
if similarity.include? str_or_sym
|
74
|
+
similarity.delete str_or_sym # Not completely correct, as others will also be affected, but meh.
|
75
|
+
similarity.send where, str_or_sym #
|
66
76
|
else
|
67
|
-
similarity.send where,
|
77
|
+
similarity.send where, str_or_sym
|
68
78
|
end
|
69
79
|
end
|
70
80
|
end
|
@@ -65,13 +65,11 @@ module Picky
|
|
65
65
|
# If we have no explicit source, we'll check the index for one.
|
66
66
|
#
|
67
67
|
def source
|
68
|
-
|
68
|
+
extract_source || @index.source
|
69
69
|
end
|
70
70
|
# Extract the actual source if it is wrapped in a time
|
71
71
|
# capsule, i.e. a block/lambda.
|
72
72
|
#
|
73
|
-
# TODO Extract into module.
|
74
|
-
#
|
75
73
|
def extract_source
|
76
74
|
@source = @source.respond_to?(:call) ? @source.call : @source
|
77
75
|
end
|
@@ -37,7 +37,7 @@ module Picky
|
|
37
37
|
#
|
38
38
|
def add_tokenized_token id, text, where = :unshift
|
39
39
|
return unless text
|
40
|
-
id
|
40
|
+
id = id.send key_format # TODO Speed this up!
|
41
41
|
# text = text.to_sym if @symbols # TODO Symbols.
|
42
42
|
exact.add id, text, where
|
43
43
|
partial.add_partialized id, text, where
|
@@ -28,21 +28,21 @@ module Picky
|
|
28
28
|
# (See the associated spec for all examples)
|
29
29
|
#
|
30
30
|
def substitute text
|
31
|
-
trans = @chars.new(text).normalize
|
31
|
+
trans = @chars.new(text).normalize :kd
|
32
32
|
|
33
33
|
# Substitute special cases.
|
34
34
|
#
|
35
|
-
trans.gsub!
|
35
|
+
trans.gsub! 'ß', 'ss'
|
36
36
|
|
37
37
|
# Substitute umlauts (of A,O,U,a,o,u).
|
38
38
|
#
|
39
|
-
trans.gsub!
|
39
|
+
trans.gsub! /([AOUaou])\314\210/u, '\1e'
|
40
40
|
|
41
41
|
# Get rid of ecutes, graves etc.
|
42
42
|
#
|
43
43
|
trans.unpack('U*').select { |cp|
|
44
44
|
cp < 0x0300 || cp > 0x035F
|
45
|
-
}.pack
|
45
|
+
}.pack 'U*'
|
46
46
|
end
|
47
47
|
|
48
48
|
def to_s # :nodoc:
|
@@ -41,53 +41,6 @@ module Picky
|
|
41
41
|
token.each_intoken min, max, &block
|
42
42
|
end
|
43
43
|
|
44
|
-
# Generates a partial index from the given inverted index.
|
45
|
-
#
|
46
|
-
def generate_from inverted
|
47
|
-
result = {}
|
48
|
-
|
49
|
-
# Generate for each key token the subtokens.
|
50
|
-
#
|
51
|
-
i = 0
|
52
|
-
j = 0
|
53
|
-
inverted.each_key do |token|
|
54
|
-
i += 1
|
55
|
-
if i == 5000
|
56
|
-
j += 1
|
57
|
-
timed_exclaim %Q{#{"%8i" % (i*j)} generated (current token: "#{token}").}
|
58
|
-
i = 0
|
59
|
-
end
|
60
|
-
generate_for token, inverted, result
|
61
|
-
end
|
62
|
-
|
63
|
-
# Remove duplicate ids.
|
64
|
-
#
|
65
|
-
# THINK If it is unique for a subtoken, it is
|
66
|
-
# unique for all derived longer tokens.
|
67
|
-
#
|
68
|
-
result.each_value &:uniq!
|
69
|
-
|
70
|
-
result
|
71
|
-
end
|
72
|
-
|
73
|
-
# To each shortened token of :test
|
74
|
-
# :test, :tes, :te, :t
|
75
|
-
# add all ids of :test
|
76
|
-
#
|
77
|
-
# "token" here means just text.
|
78
|
-
#
|
79
|
-
# THINK Could be improved by appending the aforegoing ids?
|
80
|
-
#
|
81
|
-
def generate_for token, inverted, result
|
82
|
-
each_partial token do |intoken|
|
83
|
-
if result[intoken]
|
84
|
-
result[intoken] += inverted[token] # unique
|
85
|
-
else
|
86
|
-
result[intoken] = inverted[token].dup
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
44
|
end
|
92
45
|
|
93
46
|
end
|
@@ -78,53 +78,6 @@ module Picky
|
|
78
78
|
@generator.each_subtoken token, &block
|
79
79
|
end
|
80
80
|
|
81
|
-
# Generates a partial index from the given inverted index.
|
82
|
-
#
|
83
|
-
def generate_from inverted
|
84
|
-
result = {}
|
85
|
-
|
86
|
-
# Generate for each key token the subtokens.
|
87
|
-
#
|
88
|
-
i = 0
|
89
|
-
j = 0
|
90
|
-
inverted.each_key do |token|
|
91
|
-
i += 1
|
92
|
-
if i == 5000
|
93
|
-
j += 1
|
94
|
-
timed_exclaim %Q{#{"%8i" % (i*j)} generated (current token: "#{token}").}
|
95
|
-
i = 0
|
96
|
-
end
|
97
|
-
generate_for token, inverted, result
|
98
|
-
end
|
99
|
-
|
100
|
-
# Remove duplicate ids.
|
101
|
-
#
|
102
|
-
# THINK If it is unique for a subtoken, it is
|
103
|
-
# unique for all derived longer tokens.
|
104
|
-
#
|
105
|
-
result.each_value &:uniq!
|
106
|
-
|
107
|
-
result
|
108
|
-
end
|
109
|
-
|
110
|
-
# To each shortened token of :test
|
111
|
-
# :test, :tes, :te, :t
|
112
|
-
# add all ids of :test
|
113
|
-
#
|
114
|
-
# "token" here means just text.
|
115
|
-
#
|
116
|
-
# THINK Could be improved by appending the aforegoing ids?
|
117
|
-
#
|
118
|
-
def generate_for token, inverted, result
|
119
|
-
each_partial token do |subtoken|
|
120
|
-
if result[subtoken]
|
121
|
-
result[subtoken] += inverted[token] # unique
|
122
|
-
else
|
123
|
-
result[subtoken] = inverted[token].dup
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
81
|
end
|
129
82
|
|
130
83
|
end
|
@@ -14,12 +14,12 @@ module Picky
|
|
14
14
|
#
|
15
15
|
class DoubleMetaphone < Phonetic
|
16
16
|
|
17
|
-
# Encodes the given symbol.
|
17
|
+
# Encodes the given string/symbol.
|
18
18
|
#
|
19
19
|
# Returns a symbol.
|
20
20
|
#
|
21
|
-
def encoded
|
22
|
-
codes = Text::Metaphone.double_metaphone
|
21
|
+
def encoded str_or_sym
|
22
|
+
codes = Text::Metaphone.double_metaphone str_or_sym.to_s
|
23
23
|
codes.first.intern unless codes.empty?
|
24
24
|
end
|
25
25
|
|
@@ -14,12 +14,12 @@ module Picky
|
|
14
14
|
#
|
15
15
|
class Metaphone < Phonetic
|
16
16
|
|
17
|
-
# Encodes the given symbol.
|
17
|
+
# Encodes the given string/symbol.
|
18
18
|
#
|
19
19
|
# Returns a symbol.
|
20
20
|
#
|
21
|
-
def encoded
|
22
|
-
code = Text::Metaphone.metaphone
|
21
|
+
def encoded str_or_sym
|
22
|
+
code = Text::Metaphone.metaphone str_or_sym.to_s
|
23
23
|
code.intern if code
|
24
24
|
end
|
25
25
|
|
@@ -23,20 +23,23 @@ module Picky
|
|
23
23
|
@amount = amount
|
24
24
|
end
|
25
25
|
|
26
|
-
# Generates an index for the given index (in exact index style).
|
27
|
-
#
|
28
|
-
# In the following form:
|
29
|
-
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
30
|
-
#
|
31
|
-
def generate_from inverted
|
32
|
-
|
33
|
-
|
34
|
-
end
|
26
|
+
# # Generates an index for the given index (in exact index style).
|
27
|
+
# #
|
28
|
+
# # In the following form:
|
29
|
+
# # [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
30
|
+
# #
|
31
|
+
# def generate_from inverted
|
32
|
+
# hash = hashify inverted.keys
|
33
|
+
# sort hash
|
34
|
+
# end
|
35
35
|
|
36
36
|
protected
|
37
37
|
|
38
38
|
# Sorts the index values in place.
|
39
39
|
#
|
40
|
+
# TODO Include this again. Sort at the end.
|
41
|
+
# Or sort when inserting in realtime.
|
42
|
+
#
|
40
43
|
def sort hash
|
41
44
|
hash.each_pair.each do |code, ary|
|
42
45
|
ary.sort_by_levenshtein! code
|
@@ -45,21 +48,6 @@ module Picky
|
|
45
48
|
hash
|
46
49
|
end
|
47
50
|
|
48
|
-
# Hashifies a list of symbols.
|
49
|
-
#
|
50
|
-
# Where:
|
51
|
-
# { encoded_sym => [syms] }
|
52
|
-
#
|
53
|
-
def hashify list
|
54
|
-
list.inject({}) do |total, element|
|
55
|
-
if code = encoded(element)
|
56
|
-
total[code] ||= []
|
57
|
-
total[code] << element
|
58
|
-
end
|
59
|
-
total
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
51
|
end
|
64
52
|
|
65
53
|
end
|
@@ -14,12 +14,12 @@ module Picky
|
|
14
14
|
#
|
15
15
|
class Soundex < Phonetic
|
16
16
|
|
17
|
-
# Encodes the given symbol.
|
17
|
+
# Encodes the given string/symbol.
|
18
18
|
#
|
19
19
|
# Returns a symbol.
|
20
20
|
#
|
21
|
-
def encoded
|
22
|
-
code = Text::Soundex.soundex
|
21
|
+
def encoded str_or_sym
|
22
|
+
code = Text::Soundex.soundex str_or_sym.to_s
|
23
23
|
code.intern if code
|
24
24
|
end
|
25
25
|
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
module Generators
|
4
|
+
|
5
|
+
module Weights
|
6
|
+
|
7
|
+
# Uses a constant weight.
|
8
|
+
# Default is 0.0.
|
9
|
+
#
|
10
|
+
# Note: This is not saved.
|
11
|
+
#
|
12
|
+
# Examples:
|
13
|
+
# * Picky::Weights::Constant.new # Uses 0.0 as a constant weight.
|
14
|
+
# * Picky::Weights::Constant.new(3.14) # Uses 3.14 as a constant weight.
|
15
|
+
#
|
16
|
+
class Constant < Runtime
|
17
|
+
|
18
|
+
def initialize weight = 0.0
|
19
|
+
@weight = weight
|
20
|
+
end
|
21
|
+
|
22
|
+
# Always returns the constant weight,
|
23
|
+
# except if there are no ids.
|
24
|
+
#
|
25
|
+
def [] _
|
26
|
+
|
27
|
+
@weight
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns the constant weight,
|
31
|
+
# except if there are no ids.
|
32
|
+
#
|
33
|
+
# Not really used, but is more
|
34
|
+
# correct this way.
|
35
|
+
#
|
36
|
+
def weight_for _
|
37
|
+
@weight
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
module Generators
|
4
|
+
|
5
|
+
module Weights
|
6
|
+
|
7
|
+
# Uses a dynamic weight.
|
8
|
+
#
|
9
|
+
# Note: This is not saved.
|
10
|
+
#
|
11
|
+
# Examples:
|
12
|
+
# * Picky::Weights::Dynamic.new do |str_or_sym|
|
13
|
+
# sym_or_str * length
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
class Dynamic < Runtime
|
17
|
+
|
18
|
+
# Give it a block that takes a string/symbol
|
19
|
+
# and returns a weight.
|
20
|
+
#
|
21
|
+
def initialize &calculation
|
22
|
+
@calculation = calculation
|
23
|
+
end
|
24
|
+
|
25
|
+
# Calls the block to calculate the weight.
|
26
|
+
#
|
27
|
+
def [] str_or_sym
|
28
|
+
@calculation.call str_or_sym
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -11,16 +11,6 @@ module Picky
|
|
11
11
|
#
|
12
12
|
class Logarithmic < Strategy
|
13
13
|
|
14
|
-
# Generates a partial index from the given inverted index.
|
15
|
-
#
|
16
|
-
def generate_from inverted
|
17
|
-
inverted.inject({}) do |hash, (text, ids)|
|
18
|
-
weight = weight_for ids.size
|
19
|
-
hash[text] ||= weight.round(2) if weight
|
20
|
-
hash
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
14
|
# Sets the weight value.
|
25
15
|
#
|
26
16
|
# If the size is 0 or one, we would get -Infinity or 0.0.
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
module Generators
|
4
|
+
module Weights
|
5
|
+
|
6
|
+
# Is used for runtime-only strategies.
|
7
|
+
#
|
8
|
+
# Note: Pretends to be a backend but
|
9
|
+
# does nothing at all.
|
10
|
+
#
|
11
|
+
# To override, implement:
|
12
|
+
# * weight_for(size) # During indextime. # Probably never used.
|
13
|
+
# * [] symbol_or_string # During runtime.
|
14
|
+
#
|
15
|
+
# TODO Find a better name.
|
16
|
+
#
|
17
|
+
class Runtime < Strategy
|
18
|
+
|
19
|
+
# It is not saved, by default.
|
20
|
+
#
|
21
|
+
def saved?
|
22
|
+
false
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns nil.
|
26
|
+
#
|
27
|
+
def weight_for _
|
28
|
+
# Nothing.
|
29
|
+
end
|
30
|
+
|
31
|
+
# Saves nothing by default.
|
32
|
+
#
|
33
|
+
def []= _, _
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|