picky 3.5.0 → 3.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/lib/picky/bundle.rb +37 -4
  2. data/lib/picky/bundle_indexed.rb +12 -8
  3. data/lib/picky/bundle_indexing.rb +6 -26
  4. data/lib/picky/bundle_realtime.rb +26 -16
  5. data/lib/picky/category_indexing.rb +1 -3
  6. data/lib/picky/category_realtime.rb +1 -1
  7. data/lib/picky/character_substituters/west_european.rb +4 -4
  8. data/lib/picky/generators/partial/infix.rb +0 -47
  9. data/lib/picky/generators/partial/none.rb +0 -6
  10. data/lib/picky/generators/partial/substring.rb +0 -47
  11. data/lib/picky/generators/similarity/double_metaphone.rb +3 -3
  12. data/lib/picky/generators/similarity/metaphone.rb +3 -3
  13. data/lib/picky/generators/similarity/phonetic.rb +12 -24
  14. data/lib/picky/generators/similarity/soundex.rb +3 -3
  15. data/lib/picky/generators/weights/constant.rb +46 -0
  16. data/lib/picky/generators/weights/dynamic.rb +37 -0
  17. data/lib/picky/generators/weights/logarithmic.rb +0 -10
  18. data/lib/picky/generators/weights/runtime.rb +41 -0
  19. data/lib/picky/loader.rb +3 -3
  20. data/lib/picky/query/allocations.rb +2 -1
  21. data/lib/picky/query/tokens.rb +0 -10
  22. data/spec/lib/category_indexed_spec.rb +1 -1
  23. data/spec/lib/character_substituters/west_european_spec.rb +11 -13
  24. data/spec/lib/generators/partial/infix_spec.rb +161 -161
  25. data/spec/lib/generators/partial/none_spec.rb +3 -3
  26. data/spec/lib/generators/partial/postfix_spec.rb +109 -109
  27. data/spec/lib/generators/partial/substring_spec.rb +190 -190
  28. data/spec/lib/generators/similarity/double_metaphone_spec.rb +38 -38
  29. data/spec/lib/generators/similarity/metaphone_spec.rb +38 -38
  30. data/spec/lib/generators/similarity/soundex_spec.rb +38 -38
  31. data/spec/lib/generators/weights/constant_spec.rb +37 -0
  32. data/spec/lib/generators/weights/dynamic_spec.rb +27 -0
  33. data/spec/lib/generators/weights/logarithmic_spec.rb +10 -15
  34. data/spec/lib/indexed/bundle_spec.rb +3 -2
  35. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +8 -8
  36. data/spec/lib/indexing/bundle_spec.rb +5 -3
  37. data/spec/specific/dynamic_weights_spec.rb +44 -0
  38. metadata +36 -28
  39. data/lib/picky/generators/base.rb +0 -19
data/lib/picky/bundle.rb CHANGED
@@ -66,8 +66,11 @@ module Picky
66
66
 
67
67
  # Initial indexes.
68
68
  #
69
+ # Note that if the weights strategy doesn't need to be saved,
70
+ # the strategy itself pretends to be an index.
71
+ #
69
72
  @inverted = @backend_inverted.initial
70
- @weights = @backend_weights.initial
73
+ @weights = @weights_strategy.saved?? @backend_weights.initial : @weights_strategy
71
74
  @similarity = @backend_similarity.initial
72
75
  @configuration = @backend_configuration.initial
73
76
 
@@ -77,6 +80,30 @@ module Picky
77
80
  "#{category.identifier}:#{name}"
78
81
  end
79
82
 
83
+ # "Empties" the index(es) by getting a new empty
84
+ # internal backend instance.
85
+ #
86
+ def empty
87
+ empty_inverted
88
+ empty_weights
89
+ empty_similarity
90
+ empty_configuration
91
+ end
92
+ def empty_inverted
93
+ @inverted = @backend_inverted.empty
94
+ end
95
+ def empty_weights
96
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
97
+ #
98
+ @weights = @weights_strategy.saved?? @backend_weights.empty : @weights_strategy
99
+ end
100
+ def empty_similarity
101
+ @similarity = @backend_similarity.empty
102
+ end
103
+ def empty_configuration
104
+ @configuration = @backend_configuration.empty
105
+ end
106
+
80
107
  # Get a list of similar texts.
81
108
  #
82
109
  # Note: Does not return itself.
@@ -113,7 +140,9 @@ module Picky
113
140
  #
114
141
  def backup
115
142
  @backend_inverted.backup if @backend_inverted.respond_to? :backup
116
- @backend_weights.backup if @backend_weights.respond_to? :backup
143
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
144
+ #
145
+ @backend_weights.backup if @backend_weights.respond_to? :backup && @weights_strategy.saved?
117
146
  @backend_similarity.backup if @backend_similarity.respond_to? :backup
118
147
  @backend_configuration.backup if @backend_configuration.respond_to? :backup
119
148
  end
@@ -122,7 +151,9 @@ module Picky
122
151
  #
123
152
  def restore
124
153
  @backend_inverted.restore if @backend_inverted.respond_to? :restore
125
- @backend_weights.restore if @backend_weights.respond_to? :restore
154
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
155
+ #
156
+ @backend_weights.restore if @backend_weights.respond_to? :restore && @weights_strategy.saved?
126
157
  @backend_similarity.restore if @backend_similarity.respond_to? :restore
127
158
  @backend_configuration.restore if @backend_configuration.respond_to? :restore
128
159
  end
@@ -131,7 +162,9 @@ module Picky
131
162
  #
132
163
  def delete
133
164
  @backend_inverted.delete if @backend_inverted.respond_to? :delete
134
- @backend_weights.delete if @backend_weights.respond_to? :delete
165
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
166
+ #
167
+ @backend_weights.delete if @backend_weights.respond_to? :delete && @weights_strategy.saved?
135
168
  @backend_similarity.delete if @backend_similarity.respond_to? :delete
136
169
  @backend_configuration.delete if @backend_configuration.respond_to? :delete
137
170
  end
@@ -22,24 +22,24 @@ module Picky
22
22
  #
23
23
  # Returns a (potentially empty) array of ids.
24
24
  #
25
- def ids sym
26
- @inverted[sym] || []
25
+ def ids sym_or_string
26
+ @inverted[sym_or_string] || []
27
27
  end
28
28
 
29
29
  # Get a weight for the given symbol.
30
30
  #
31
31
  # Returns a number, or nil.
32
32
  #
33
- def weight sym
34
- @weights[sym]
33
+ def weight sym_or_string
34
+ @weights[sym_or_string]
35
35
  end
36
36
 
37
37
  # Get settings for this bundle.
38
38
  #
39
39
  # Returns an object.
40
40
  #
41
- def [] sym
42
- @configuration[sym]
41
+ def [] sym_or_string
42
+ @configuration[sym_or_string]
43
43
  end
44
44
 
45
45
  # Loads all indexes.
@@ -62,7 +62,9 @@ module Picky
62
62
  # Loads the weights index.
63
63
  #
64
64
  def load_weights
65
- self.weights = @backend_weights.load
65
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
66
+ #
67
+ self.weights = @backend_weights.load if @weights_strategy.saved?
66
68
  end
67
69
  # Loads the similarity index.
68
70
  #
@@ -92,7 +94,9 @@ module Picky
92
94
  # Clears the weights index.
93
95
  #
94
96
  def clear_weights
95
- weights.clear
97
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
98
+ #
99
+ weights.clear if @weights_strategy.saved?
96
100
  end
97
101
  # Clears the similarity index.
98
102
  #
@@ -35,28 +35,6 @@ module Picky
35
35
  delegate :clear,
36
36
  :to => :inverted
37
37
 
38
- # "Empties" the index(es) by getting a new empty
39
- # internal backend instance.
40
- #
41
- def empty
42
- empty_inverted
43
- empty_weights
44
- empty_similarity
45
- empty_configuration
46
- end
47
- def empty_inverted
48
- @inverted = @backend_inverted.empty
49
- end
50
- def empty_weights
51
- @weights = @backend_weights.empty
52
- end
53
- def empty_similarity
54
- @similarity = @backend_similarity.empty
55
- end
56
- def empty_configuration
57
- @configuration = @backend_configuration.empty
58
- end
59
-
60
38
  # Saves the indexes in a dump file.
61
39
  #
62
40
  def dump
@@ -68,22 +46,24 @@ module Picky
68
46
  # Dumps the core index.
69
47
  #
70
48
  def dump_inverted
71
- @backend_inverted.dump self.inverted
49
+ @backend_inverted.dump @inverted
72
50
  end
73
51
  # Dumps the weights index.
74
52
  #
75
53
  def dump_weights
76
- @backend_weights.dump self.weights
54
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
55
+ #
56
+ @backend_weights.dump @weights if @weights_strategy.saved?
77
57
  end
78
58
  # Dumps the similarity index.
79
59
  #
80
60
  def dump_similarity
81
- @backend_similarity.dump self.similarity
61
+ @backend_similarity.dump @similarity
82
62
  end
83
63
  # Dumps the similarity index.
84
64
  #
85
65
  def dump_configuration
86
- @backend_configuration.dump self.configuration
66
+ @backend_configuration.dump @configuration
87
67
  end
88
68
 
89
69
  end
@@ -32,39 +32,49 @@ module Picky
32
32
 
33
33
  # Returns a reference to the array where the id has been added.
34
34
  #
35
- # TODO Rename sym.
36
- #
37
- def add id, sym, where = :unshift
38
- ary = @inverted[sym]
35
+ def add id, str_or_sym, where = :unshift
36
+ ary = @inverted[str_or_sym]
39
37
 
40
- syms = @realtime_mapping[id]
41
- syms = (@realtime_mapping[id] = []) unless syms # TODO Nicefy.
38
+ str_or_syms = @realtime_mapping[id]
39
+ str_or_syms = (@realtime_mapping[id] = []) unless str_or_syms # TODO Nicefy.
42
40
 
43
41
  # Inverted.
44
42
  #
45
- ids = if syms.include? sym
46
- ids = @inverted[sym]
43
+ ids = if str_or_syms.include? str_or_sym
44
+ ids = @inverted[str_or_sym]
47
45
  ids.delete id
48
46
  ids.send where, id
49
47
  else
50
- syms << sym
51
- ids = @inverted[sym] ||= []
48
+ str_or_syms << str_or_sym
49
+ ids = @inverted[str_or_sym] ||= []
52
50
  ids.send where, id
53
51
  end
54
52
 
55
53
  # Weights.
56
54
  #
57
- @weights[sym] = self.weights_strategy.weight_for ids.size
55
+ @weights[str_or_sym] = self.weights_strategy.weight_for ids.size
58
56
 
59
57
  # Similarity.
60
58
  #
61
- if encoded = self.similarity_strategy.encoded(sym)
59
+ add_similarity str_or_sym, where
60
+
61
+ # Return reference.
62
+ #
63
+ ids
64
+ end
65
+
66
+ # Add string/symbol to similarity index.
67
+ #
68
+ # TODO Probably where makes no sense here. Should have its own order.
69
+ #
70
+ def add_similarity str_or_sym, where = :unshift
71
+ if encoded = self.similarity_strategy.encoded(str_or_sym)
62
72
  similarity = @similarity[encoded] ||= []
63
- if similarity.include? sym
64
- similarity.delete sym # Not completely correct, as others will also be affected, but meh.
65
- similarity.send where, sym #
73
+ if similarity.include? str_or_sym
74
+ similarity.delete str_or_sym # Not completely correct, as others will also be affected, but meh.
75
+ similarity.send where, str_or_sym #
66
76
  else
67
- similarity.send where, sym
77
+ similarity.send where, str_or_sym
68
78
  end
69
79
  end
70
80
  end
@@ -65,13 +65,11 @@ module Picky
65
65
  # If we have no explicit source, we'll check the index for one.
66
66
  #
67
67
  def source
68
- (@source && extract_source) || @index.source
68
+ extract_source || @index.source
69
69
  end
70
70
  # Extract the actual source if it is wrapped in a time
71
71
  # capsule, i.e. a block/lambda.
72
72
  #
73
- # TODO Extract into module.
74
- #
75
73
  def extract_source
76
74
  @source = @source.respond_to?(:call) ? @source.call : @source
77
75
  end
@@ -37,7 +37,7 @@ module Picky
37
37
  #
38
38
  def add_tokenized_token id, text, where = :unshift
39
39
  return unless text
40
- id = id.send key_format # TODO Speed this up!
40
+ id = id.send key_format # TODO Speed this up!
41
41
  # text = text.to_sym if @symbols # TODO Symbols.
42
42
  exact.add id, text, where
43
43
  partial.add_partialized id, text, where
@@ -28,21 +28,21 @@ module Picky
28
28
  # (See the associated spec for all examples)
29
29
  #
30
30
  def substitute text
31
- trans = @chars.new(text).normalize(:kd)
31
+ trans = @chars.new(text).normalize :kd
32
32
 
33
33
  # Substitute special cases.
34
34
  #
35
- trans.gsub!('ß', 'ss')
35
+ trans.gsub! 'ß', 'ss'
36
36
 
37
37
  # Substitute umlauts (of A,O,U,a,o,u).
38
38
  #
39
- trans.gsub!(/([AOUaou])\314\210/u, '\1e')
39
+ trans.gsub! /([AOUaou])\314\210/u, '\1e'
40
40
 
41
41
  # Get rid of ecutes, graves etc.
42
42
  #
43
43
  trans.unpack('U*').select { |cp|
44
44
  cp < 0x0300 || cp > 0x035F
45
- }.pack('U*')
45
+ }.pack 'U*'
46
46
  end
47
47
 
48
48
  def to_s # :nodoc:
@@ -41,53 +41,6 @@ module Picky
41
41
  token.each_intoken min, max, &block
42
42
  end
43
43
 
44
- # Generates a partial index from the given inverted index.
45
- #
46
- def generate_from inverted
47
- result = {}
48
-
49
- # Generate for each key token the subtokens.
50
- #
51
- i = 0
52
- j = 0
53
- inverted.each_key do |token|
54
- i += 1
55
- if i == 5000
56
- j += 1
57
- timed_exclaim %Q{#{"%8i" % (i*j)} generated (current token: "#{token}").}
58
- i = 0
59
- end
60
- generate_for token, inverted, result
61
- end
62
-
63
- # Remove duplicate ids.
64
- #
65
- # THINK If it is unique for a subtoken, it is
66
- # unique for all derived longer tokens.
67
- #
68
- result.each_value &:uniq!
69
-
70
- result
71
- end
72
-
73
- # To each shortened token of :test
74
- # :test, :tes, :te, :t
75
- # add all ids of :test
76
- #
77
- # "token" here means just text.
78
- #
79
- # THINK Could be improved by appending the aforegoing ids?
80
- #
81
- def generate_for token, inverted, result
82
- each_partial token do |intoken|
83
- if result[intoken]
84
- result[intoken] += inverted[token] # unique
85
- else
86
- result[intoken] = inverted[token].dup
87
- end
88
- end
89
- end
90
-
91
44
  end
92
45
 
93
46
  end
@@ -14,12 +14,6 @@ module Picky
14
14
  # yields nothing
15
15
  end
16
16
 
17
- # Returns an empty index.
18
- #
19
- def generate_from index
20
- {}
21
- end
22
-
23
17
  # Returns if this strategy's generated file is saved.
24
18
  #
25
19
  def saved?
@@ -78,53 +78,6 @@ module Picky
78
78
  @generator.each_subtoken token, &block
79
79
  end
80
80
 
81
- # Generates a partial index from the given inverted index.
82
- #
83
- def generate_from inverted
84
- result = {}
85
-
86
- # Generate for each key token the subtokens.
87
- #
88
- i = 0
89
- j = 0
90
- inverted.each_key do |token|
91
- i += 1
92
- if i == 5000
93
- j += 1
94
- timed_exclaim %Q{#{"%8i" % (i*j)} generated (current token: "#{token}").}
95
- i = 0
96
- end
97
- generate_for token, inverted, result
98
- end
99
-
100
- # Remove duplicate ids.
101
- #
102
- # THINK If it is unique for a subtoken, it is
103
- # unique for all derived longer tokens.
104
- #
105
- result.each_value &:uniq!
106
-
107
- result
108
- end
109
-
110
- # To each shortened token of :test
111
- # :test, :tes, :te, :t
112
- # add all ids of :test
113
- #
114
- # "token" here means just text.
115
- #
116
- # THINK Could be improved by appending the aforegoing ids?
117
- #
118
- def generate_for token, inverted, result
119
- each_partial token do |subtoken|
120
- if result[subtoken]
121
- result[subtoken] += inverted[token] # unique
122
- else
123
- result[subtoken] = inverted[token].dup
124
- end
125
- end
126
- end
127
-
128
81
  end
129
82
 
130
83
  end
@@ -14,12 +14,12 @@ module Picky
14
14
  #
15
15
  class DoubleMetaphone < Phonetic
16
16
 
17
- # Encodes the given symbol.
17
+ # Encodes the given string/symbol.
18
18
  #
19
19
  # Returns a symbol.
20
20
  #
21
- def encoded sym
22
- codes = Text::Metaphone.double_metaphone sym.to_s
21
+ def encoded str_or_sym
22
+ codes = Text::Metaphone.double_metaphone str_or_sym.to_s
23
23
  codes.first.intern unless codes.empty?
24
24
  end
25
25
 
@@ -14,12 +14,12 @@ module Picky
14
14
  #
15
15
  class Metaphone < Phonetic
16
16
 
17
- # Encodes the given symbol.
17
+ # Encodes the given string/symbol.
18
18
  #
19
19
  # Returns a symbol.
20
20
  #
21
- def encoded sym
22
- code = Text::Metaphone.metaphone sym.to_s
21
+ def encoded str_or_sym
22
+ code = Text::Metaphone.metaphone str_or_sym.to_s
23
23
  code.intern if code
24
24
  end
25
25
 
@@ -23,20 +23,23 @@ module Picky
23
23
  @amount = amount
24
24
  end
25
25
 
26
- # Generates an index for the given index (in exact index style).
27
- #
28
- # In the following form:
29
- # [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
30
- #
31
- def generate_from inverted
32
- hash = hashify inverted.keys
33
- sort hash
34
- end
26
+ # # Generates an index for the given index (in exact index style).
27
+ # #
28
+ # # In the following form:
29
+ # # [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
30
+ # #
31
+ # def generate_from inverted
32
+ # hash = hashify inverted.keys
33
+ # sort hash
34
+ # end
35
35
 
36
36
  protected
37
37
 
38
38
  # Sorts the index values in place.
39
39
  #
40
+ # TODO Include this again. Sort at the end.
41
+ # Or sort when inserting in realtime.
42
+ #
40
43
  def sort hash
41
44
  hash.each_pair.each do |code, ary|
42
45
  ary.sort_by_levenshtein! code
@@ -45,21 +48,6 @@ module Picky
45
48
  hash
46
49
  end
47
50
 
48
- # Hashifies a list of symbols.
49
- #
50
- # Where:
51
- # { encoded_sym => [syms] }
52
- #
53
- def hashify list
54
- list.inject({}) do |total, element|
55
- if code = encoded(element)
56
- total[code] ||= []
57
- total[code] << element
58
- end
59
- total
60
- end
61
- end
62
-
63
51
  end
64
52
 
65
53
  end
@@ -14,12 +14,12 @@ module Picky
14
14
  #
15
15
  class Soundex < Phonetic
16
16
 
17
- # Encodes the given symbol.
17
+ # Encodes the given string/symbol.
18
18
  #
19
19
  # Returns a symbol.
20
20
  #
21
- def encoded sym
22
- code = Text::Soundex.soundex sym.to_s
21
+ def encoded str_or_sym
22
+ code = Text::Soundex.soundex str_or_sym.to_s
23
23
  code.intern if code
24
24
  end
25
25
 
@@ -0,0 +1,46 @@
1
+ module Picky
2
+
3
+ module Generators
4
+
5
+ module Weights
6
+
7
+ # Uses a constant weight.
8
+ # Default is 0.0.
9
+ #
10
+ # Note: This is not saved.
11
+ #
12
+ # Examples:
13
+ # * Picky::Weights::Constant.new # Uses 0.0 as a constant weight.
14
+ # * Picky::Weights::Constant.new(3.14) # Uses 3.14 as a constant weight.
15
+ #
16
+ class Constant < Runtime
17
+
18
+ def initialize weight = 0.0
19
+ @weight = weight
20
+ end
21
+
22
+ # Always returns the constant weight,
23
+ # except if there are no ids.
24
+ #
25
+ def [] _
26
+
27
+ @weight
28
+ end
29
+
30
+ # Returns the constant weight,
31
+ # except if there are no ids.
32
+ #
33
+ # Not really used, but is more
34
+ # correct this way.
35
+ #
36
+ def weight_for _
37
+ @weight
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+
46
+ end
@@ -0,0 +1,37 @@
1
+ module Picky
2
+
3
+ module Generators
4
+
5
+ module Weights
6
+
7
+ # Uses a dynamic weight.
8
+ #
9
+ # Note: This is not saved.
10
+ #
11
+ # Examples:
12
+ # * Picky::Weights::Dynamic.new do |str_or_sym|
13
+ # sym_or_str * length
14
+ # end
15
+ #
16
+ class Dynamic < Runtime
17
+
18
+ # Give it a block that takes a string/symbol
19
+ # and returns a weight.
20
+ #
21
+ def initialize &calculation
22
+ @calculation = calculation
23
+ end
24
+
25
+ # Calls the block to calculate the weight.
26
+ #
27
+ def [] str_or_sym
28
+ @calculation.call str_or_sym
29
+ end
30
+
31
+ end
32
+
33
+ end
34
+
35
+ end
36
+
37
+ end
@@ -11,16 +11,6 @@ module Picky
11
11
  #
12
12
  class Logarithmic < Strategy
13
13
 
14
- # Generates a partial index from the given inverted index.
15
- #
16
- def generate_from inverted
17
- inverted.inject({}) do |hash, (text, ids)|
18
- weight = weight_for ids.size
19
- hash[text] ||= weight.round(2) if weight
20
- hash
21
- end
22
- end
23
-
24
14
  # Sets the weight value.
25
15
  #
26
16
  # If the size is 0 or one, we would get -Infinity or 0.0.
@@ -0,0 +1,41 @@
1
+ module Picky
2
+
3
+ module Generators
4
+ module Weights
5
+
6
+ # Is used for runtime-only strategies.
7
+ #
8
+ # Note: Pretends to be a backend but
9
+ # does nothing at all.
10
+ #
11
+ # To override, implement:
12
+ # * weight_for(size) # During indextime. # Probably never used.
13
+ # * [] symbol_or_string # During runtime.
14
+ #
15
+ # TODO Find a better name.
16
+ #
17
+ class Runtime < Strategy
18
+
19
+ # It is not saved, by default.
20
+ #
21
+ def saved?
22
+ false
23
+ end
24
+
25
+ # Returns nil.
26
+ #
27
+ def weight_for _
28
+ # Nothing.
29
+ end
30
+
31
+ # Saves nothing by default.
32
+ #
33
+ def []= _, _
34
+
35
+ end
36
+
37
+ end
38
+ end
39
+ end
40
+
41
+ end