picky 3.5.0 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/lib/picky/bundle.rb +37 -4
  2. data/lib/picky/bundle_indexed.rb +12 -8
  3. data/lib/picky/bundle_indexing.rb +6 -26
  4. data/lib/picky/bundle_realtime.rb +26 -16
  5. data/lib/picky/category_indexing.rb +1 -3
  6. data/lib/picky/category_realtime.rb +1 -1
  7. data/lib/picky/character_substituters/west_european.rb +4 -4
  8. data/lib/picky/generators/partial/infix.rb +0 -47
  9. data/lib/picky/generators/partial/none.rb +0 -6
  10. data/lib/picky/generators/partial/substring.rb +0 -47
  11. data/lib/picky/generators/similarity/double_metaphone.rb +3 -3
  12. data/lib/picky/generators/similarity/metaphone.rb +3 -3
  13. data/lib/picky/generators/similarity/phonetic.rb +12 -24
  14. data/lib/picky/generators/similarity/soundex.rb +3 -3
  15. data/lib/picky/generators/weights/constant.rb +46 -0
  16. data/lib/picky/generators/weights/dynamic.rb +37 -0
  17. data/lib/picky/generators/weights/logarithmic.rb +0 -10
  18. data/lib/picky/generators/weights/runtime.rb +41 -0
  19. data/lib/picky/loader.rb +3 -3
  20. data/lib/picky/query/allocations.rb +2 -1
  21. data/lib/picky/query/tokens.rb +0 -10
  22. data/spec/lib/category_indexed_spec.rb +1 -1
  23. data/spec/lib/character_substituters/west_european_spec.rb +11 -13
  24. data/spec/lib/generators/partial/infix_spec.rb +161 -161
  25. data/spec/lib/generators/partial/none_spec.rb +3 -3
  26. data/spec/lib/generators/partial/postfix_spec.rb +109 -109
  27. data/spec/lib/generators/partial/substring_spec.rb +190 -190
  28. data/spec/lib/generators/similarity/double_metaphone_spec.rb +38 -38
  29. data/spec/lib/generators/similarity/metaphone_spec.rb +38 -38
  30. data/spec/lib/generators/similarity/soundex_spec.rb +38 -38
  31. data/spec/lib/generators/weights/constant_spec.rb +37 -0
  32. data/spec/lib/generators/weights/dynamic_spec.rb +27 -0
  33. data/spec/lib/generators/weights/logarithmic_spec.rb +10 -15
  34. data/spec/lib/indexed/bundle_spec.rb +3 -2
  35. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +8 -8
  36. data/spec/lib/indexing/bundle_spec.rb +5 -3
  37. data/spec/specific/dynamic_weights_spec.rb +44 -0
  38. metadata +36 -28
  39. data/lib/picky/generators/base.rb +0 -19
data/lib/picky/bundle.rb CHANGED
@@ -66,8 +66,11 @@ module Picky
66
66
 
67
67
  # Initial indexes.
68
68
  #
69
+ # Note that if the weights strategy doesn't need to be saved,
70
+ # the strategy itself pretends to be an index.
71
+ #
69
72
  @inverted = @backend_inverted.initial
70
- @weights = @backend_weights.initial
73
+ @weights = @weights_strategy.saved?? @backend_weights.initial : @weights_strategy
71
74
  @similarity = @backend_similarity.initial
72
75
  @configuration = @backend_configuration.initial
73
76
 
@@ -77,6 +80,30 @@ module Picky
77
80
  "#{category.identifier}:#{name}"
78
81
  end
79
82
 
83
+ # "Empties" the index(es) by getting a new empty
84
+ # internal backend instance.
85
+ #
86
+ def empty
87
+ empty_inverted
88
+ empty_weights
89
+ empty_similarity
90
+ empty_configuration
91
+ end
92
+ def empty_inverted
93
+ @inverted = @backend_inverted.empty
94
+ end
95
+ def empty_weights
96
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
97
+ #
98
+ @weights = @weights_strategy.saved?? @backend_weights.empty : @weights_strategy
99
+ end
100
+ def empty_similarity
101
+ @similarity = @backend_similarity.empty
102
+ end
103
+ def empty_configuration
104
+ @configuration = @backend_configuration.empty
105
+ end
106
+
80
107
  # Get a list of similar texts.
81
108
  #
82
109
  # Note: Does not return itself.
@@ -113,7 +140,9 @@ module Picky
113
140
  #
114
141
  def backup
115
142
  @backend_inverted.backup if @backend_inverted.respond_to? :backup
116
- @backend_weights.backup if @backend_weights.respond_to? :backup
143
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
144
+ #
145
+ @backend_weights.backup if @backend_weights.respond_to? :backup && @weights_strategy.saved?
117
146
  @backend_similarity.backup if @backend_similarity.respond_to? :backup
118
147
  @backend_configuration.backup if @backend_configuration.respond_to? :backup
119
148
  end
@@ -122,7 +151,9 @@ module Picky
122
151
  #
123
152
  def restore
124
153
  @backend_inverted.restore if @backend_inverted.respond_to? :restore
125
- @backend_weights.restore if @backend_weights.respond_to? :restore
154
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
155
+ #
156
+ @backend_weights.restore if @backend_weights.respond_to? :restore && @weights_strategy.saved?
126
157
  @backend_similarity.restore if @backend_similarity.respond_to? :restore
127
158
  @backend_configuration.restore if @backend_configuration.respond_to? :restore
128
159
  end
@@ -131,7 +162,9 @@ module Picky
131
162
  #
132
163
  def delete
133
164
  @backend_inverted.delete if @backend_inverted.respond_to? :delete
134
- @backend_weights.delete if @backend_weights.respond_to? :delete
165
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
166
+ #
167
+ @backend_weights.delete if @backend_weights.respond_to? :delete && @weights_strategy.saved?
135
168
  @backend_similarity.delete if @backend_similarity.respond_to? :delete
136
169
  @backend_configuration.delete if @backend_configuration.respond_to? :delete
137
170
  end
@@ -22,24 +22,24 @@ module Picky
22
22
  #
23
23
  # Returns a (potentially empty) array of ids.
24
24
  #
25
- def ids sym
26
- @inverted[sym] || []
25
+ def ids sym_or_string
26
+ @inverted[sym_or_string] || []
27
27
  end
28
28
 
29
29
  # Get a weight for the given symbol.
30
30
  #
31
31
  # Returns a number, or nil.
32
32
  #
33
- def weight sym
34
- @weights[sym]
33
+ def weight sym_or_string
34
+ @weights[sym_or_string]
35
35
  end
36
36
 
37
37
  # Get settings for this bundle.
38
38
  #
39
39
  # Returns an object.
40
40
  #
41
- def [] sym
42
- @configuration[sym]
41
+ def [] sym_or_string
42
+ @configuration[sym_or_string]
43
43
  end
44
44
 
45
45
  # Loads all indexes.
@@ -62,7 +62,9 @@ module Picky
62
62
  # Loads the weights index.
63
63
  #
64
64
  def load_weights
65
- self.weights = @backend_weights.load
65
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
66
+ #
67
+ self.weights = @backend_weights.load if @weights_strategy.saved?
66
68
  end
67
69
  # Loads the similarity index.
68
70
  #
@@ -92,7 +94,9 @@ module Picky
92
94
  # Clears the weights index.
93
95
  #
94
96
  def clear_weights
95
- weights.clear
97
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
98
+ #
99
+ weights.clear if @weights_strategy.saved?
96
100
  end
97
101
  # Clears the similarity index.
98
102
  #
@@ -35,28 +35,6 @@ module Picky
35
35
  delegate :clear,
36
36
  :to => :inverted
37
37
 
38
- # "Empties" the index(es) by getting a new empty
39
- # internal backend instance.
40
- #
41
- def empty
42
- empty_inverted
43
- empty_weights
44
- empty_similarity
45
- empty_configuration
46
- end
47
- def empty_inverted
48
- @inverted = @backend_inverted.empty
49
- end
50
- def empty_weights
51
- @weights = @backend_weights.empty
52
- end
53
- def empty_similarity
54
- @similarity = @backend_similarity.empty
55
- end
56
- def empty_configuration
57
- @configuration = @backend_configuration.empty
58
- end
59
-
60
38
  # Saves the indexes in a dump file.
61
39
  #
62
40
  def dump
@@ -68,22 +46,24 @@ module Picky
68
46
  # Dumps the core index.
69
47
  #
70
48
  def dump_inverted
71
- @backend_inverted.dump self.inverted
49
+ @backend_inverted.dump @inverted
72
50
  end
73
51
  # Dumps the weights index.
74
52
  #
75
53
  def dump_weights
76
- @backend_weights.dump self.weights
54
+ # TODO THINK about this. Perhaps the strategies should implement the backend methods?
55
+ #
56
+ @backend_weights.dump @weights if @weights_strategy.saved?
77
57
  end
78
58
  # Dumps the similarity index.
79
59
  #
80
60
  def dump_similarity
81
- @backend_similarity.dump self.similarity
61
+ @backend_similarity.dump @similarity
82
62
  end
83
63
  # Dumps the similarity index.
84
64
  #
85
65
  def dump_configuration
86
- @backend_configuration.dump self.configuration
66
+ @backend_configuration.dump @configuration
87
67
  end
88
68
 
89
69
  end
@@ -32,39 +32,49 @@ module Picky
32
32
 
33
33
  # Returns a reference to the array where the id has been added.
34
34
  #
35
- # TODO Rename sym.
36
- #
37
- def add id, sym, where = :unshift
38
- ary = @inverted[sym]
35
+ def add id, str_or_sym, where = :unshift
36
+ ary = @inverted[str_or_sym]
39
37
 
40
- syms = @realtime_mapping[id]
41
- syms = (@realtime_mapping[id] = []) unless syms # TODO Nicefy.
38
+ str_or_syms = @realtime_mapping[id]
39
+ str_or_syms = (@realtime_mapping[id] = []) unless str_or_syms # TODO Nicefy.
42
40
 
43
41
  # Inverted.
44
42
  #
45
- ids = if syms.include? sym
46
- ids = @inverted[sym]
43
+ ids = if str_or_syms.include? str_or_sym
44
+ ids = @inverted[str_or_sym]
47
45
  ids.delete id
48
46
  ids.send where, id
49
47
  else
50
- syms << sym
51
- ids = @inverted[sym] ||= []
48
+ str_or_syms << str_or_sym
49
+ ids = @inverted[str_or_sym] ||= []
52
50
  ids.send where, id
53
51
  end
54
52
 
55
53
  # Weights.
56
54
  #
57
- @weights[sym] = self.weights_strategy.weight_for ids.size
55
+ @weights[str_or_sym] = self.weights_strategy.weight_for ids.size
58
56
 
59
57
  # Similarity.
60
58
  #
61
- if encoded = self.similarity_strategy.encoded(sym)
59
+ add_similarity str_or_sym, where
60
+
61
+ # Return reference.
62
+ #
63
+ ids
64
+ end
65
+
66
+ # Add string/symbol to similarity index.
67
+ #
68
+ # TODO Probably where makes no sense here. Should have its own order.
69
+ #
70
+ def add_similarity str_or_sym, where = :unshift
71
+ if encoded = self.similarity_strategy.encoded(str_or_sym)
62
72
  similarity = @similarity[encoded] ||= []
63
- if similarity.include? sym
64
- similarity.delete sym # Not completely correct, as others will also be affected, but meh.
65
- similarity.send where, sym #
73
+ if similarity.include? str_or_sym
74
+ similarity.delete str_or_sym # Not completely correct, as others will also be affected, but meh.
75
+ similarity.send where, str_or_sym #
66
76
  else
67
- similarity.send where, sym
77
+ similarity.send where, str_or_sym
68
78
  end
69
79
  end
70
80
  end
@@ -65,13 +65,11 @@ module Picky
65
65
  # If we have no explicit source, we'll check the index for one.
66
66
  #
67
67
  def source
68
- (@source && extract_source) || @index.source
68
+ extract_source || @index.source
69
69
  end
70
70
  # Extract the actual source if it is wrapped in a time
71
71
  # capsule, i.e. a block/lambda.
72
72
  #
73
- # TODO Extract into module.
74
- #
75
73
  def extract_source
76
74
  @source = @source.respond_to?(:call) ? @source.call : @source
77
75
  end
@@ -37,7 +37,7 @@ module Picky
37
37
  #
38
38
  def add_tokenized_token id, text, where = :unshift
39
39
  return unless text
40
- id = id.send key_format # TODO Speed this up!
40
+ id = id.send key_format # TODO Speed this up!
41
41
  # text = text.to_sym if @symbols # TODO Symbols.
42
42
  exact.add id, text, where
43
43
  partial.add_partialized id, text, where
@@ -28,21 +28,21 @@ module Picky
28
28
  # (See the associated spec for all examples)
29
29
  #
30
30
  def substitute text
31
- trans = @chars.new(text).normalize(:kd)
31
+ trans = @chars.new(text).normalize :kd
32
32
 
33
33
  # Substitute special cases.
34
34
  #
35
- trans.gsub!('ß', 'ss')
35
+ trans.gsub! 'ß', 'ss'
36
36
 
37
37
  # Substitute umlauts (of A,O,U,a,o,u).
38
38
  #
39
- trans.gsub!(/([AOUaou])\314\210/u, '\1e')
39
+ trans.gsub! /([AOUaou])\314\210/u, '\1e'
40
40
 
41
41
  # Get rid of ecutes, graves etc.
42
42
  #
43
43
  trans.unpack('U*').select { |cp|
44
44
  cp < 0x0300 || cp > 0x035F
45
- }.pack('U*')
45
+ }.pack 'U*'
46
46
  end
47
47
 
48
48
  def to_s # :nodoc:
@@ -41,53 +41,6 @@ module Picky
41
41
  token.each_intoken min, max, &block
42
42
  end
43
43
 
44
- # Generates a partial index from the given inverted index.
45
- #
46
- def generate_from inverted
47
- result = {}
48
-
49
- # Generate for each key token the subtokens.
50
- #
51
- i = 0
52
- j = 0
53
- inverted.each_key do |token|
54
- i += 1
55
- if i == 5000
56
- j += 1
57
- timed_exclaim %Q{#{"%8i" % (i*j)} generated (current token: "#{token}").}
58
- i = 0
59
- end
60
- generate_for token, inverted, result
61
- end
62
-
63
- # Remove duplicate ids.
64
- #
65
- # THINK If it is unique for a subtoken, it is
66
- # unique for all derived longer tokens.
67
- #
68
- result.each_value &:uniq!
69
-
70
- result
71
- end
72
-
73
- # To each shortened token of :test
74
- # :test, :tes, :te, :t
75
- # add all ids of :test
76
- #
77
- # "token" here means just text.
78
- #
79
- # THINK Could be improved by appending the aforegoing ids?
80
- #
81
- def generate_for token, inverted, result
82
- each_partial token do |intoken|
83
- if result[intoken]
84
- result[intoken] += inverted[token] # unique
85
- else
86
- result[intoken] = inverted[token].dup
87
- end
88
- end
89
- end
90
-
91
44
  end
92
45
 
93
46
  end
@@ -14,12 +14,6 @@ module Picky
14
14
  # yields nothing
15
15
  end
16
16
 
17
- # Returns an empty index.
18
- #
19
- def generate_from index
20
- {}
21
- end
22
-
23
17
  # Returns if this strategy's generated file is saved.
24
18
  #
25
19
  def saved?
@@ -78,53 +78,6 @@ module Picky
78
78
  @generator.each_subtoken token, &block
79
79
  end
80
80
 
81
- # Generates a partial index from the given inverted index.
82
- #
83
- def generate_from inverted
84
- result = {}
85
-
86
- # Generate for each key token the subtokens.
87
- #
88
- i = 0
89
- j = 0
90
- inverted.each_key do |token|
91
- i += 1
92
- if i == 5000
93
- j += 1
94
- timed_exclaim %Q{#{"%8i" % (i*j)} generated (current token: "#{token}").}
95
- i = 0
96
- end
97
- generate_for token, inverted, result
98
- end
99
-
100
- # Remove duplicate ids.
101
- #
102
- # THINK If it is unique for a subtoken, it is
103
- # unique for all derived longer tokens.
104
- #
105
- result.each_value &:uniq!
106
-
107
- result
108
- end
109
-
110
- # To each shortened token of :test
111
- # :test, :tes, :te, :t
112
- # add all ids of :test
113
- #
114
- # "token" here means just text.
115
- #
116
- # THINK Could be improved by appending the aforegoing ids?
117
- #
118
- def generate_for token, inverted, result
119
- each_partial token do |subtoken|
120
- if result[subtoken]
121
- result[subtoken] += inverted[token] # unique
122
- else
123
- result[subtoken] = inverted[token].dup
124
- end
125
- end
126
- end
127
-
128
81
  end
129
82
 
130
83
  end
@@ -14,12 +14,12 @@ module Picky
14
14
  #
15
15
  class DoubleMetaphone < Phonetic
16
16
 
17
- # Encodes the given symbol.
17
+ # Encodes the given string/symbol.
18
18
  #
19
19
  # Returns a symbol.
20
20
  #
21
- def encoded sym
22
- codes = Text::Metaphone.double_metaphone sym.to_s
21
+ def encoded str_or_sym
22
+ codes = Text::Metaphone.double_metaphone str_or_sym.to_s
23
23
  codes.first.intern unless codes.empty?
24
24
  end
25
25
 
@@ -14,12 +14,12 @@ module Picky
14
14
  #
15
15
  class Metaphone < Phonetic
16
16
 
17
- # Encodes the given symbol.
17
+ # Encodes the given string/symbol.
18
18
  #
19
19
  # Returns a symbol.
20
20
  #
21
- def encoded sym
22
- code = Text::Metaphone.metaphone sym.to_s
21
+ def encoded str_or_sym
22
+ code = Text::Metaphone.metaphone str_or_sym.to_s
23
23
  code.intern if code
24
24
  end
25
25
 
@@ -23,20 +23,23 @@ module Picky
23
23
  @amount = amount
24
24
  end
25
25
 
26
- # Generates an index for the given index (in exact index style).
27
- #
28
- # In the following form:
29
- # [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
30
- #
31
- def generate_from inverted
32
- hash = hashify inverted.keys
33
- sort hash
34
- end
26
+ # # Generates an index for the given index (in exact index style).
27
+ # #
28
+ # # In the following form:
29
+ # # [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
30
+ # #
31
+ # def generate_from inverted
32
+ # hash = hashify inverted.keys
33
+ # sort hash
34
+ # end
35
35
 
36
36
  protected
37
37
 
38
38
  # Sorts the index values in place.
39
39
  #
40
+ # TODO Include this again. Sort at the end.
41
+ # Or sort when inserting in realtime.
42
+ #
40
43
  def sort hash
41
44
  hash.each_pair.each do |code, ary|
42
45
  ary.sort_by_levenshtein! code
@@ -45,21 +48,6 @@ module Picky
45
48
  hash
46
49
  end
47
50
 
48
- # Hashifies a list of symbols.
49
- #
50
- # Where:
51
- # { encoded_sym => [syms] }
52
- #
53
- def hashify list
54
- list.inject({}) do |total, element|
55
- if code = encoded(element)
56
- total[code] ||= []
57
- total[code] << element
58
- end
59
- total
60
- end
61
- end
62
-
63
51
  end
64
52
 
65
53
  end
@@ -14,12 +14,12 @@ module Picky
14
14
  #
15
15
  class Soundex < Phonetic
16
16
 
17
- # Encodes the given symbol.
17
+ # Encodes the given string/symbol.
18
18
  #
19
19
  # Returns a symbol.
20
20
  #
21
- def encoded sym
22
- code = Text::Soundex.soundex sym.to_s
21
+ def encoded str_or_sym
22
+ code = Text::Soundex.soundex str_or_sym.to_s
23
23
  code.intern if code
24
24
  end
25
25
 
@@ -0,0 +1,46 @@
1
+ module Picky
2
+
3
+ module Generators
4
+
5
+ module Weights
6
+
7
+ # Uses a constant weight.
8
+ # Default is 0.0.
9
+ #
10
+ # Note: This is not saved.
11
+ #
12
+ # Examples:
13
+ # * Picky::Weights::Constant.new # Uses 0.0 as a constant weight.
14
+ # * Picky::Weights::Constant.new(3.14) # Uses 3.14 as a constant weight.
15
+ #
16
+ class Constant < Runtime
17
+
18
+ def initialize weight = 0.0
19
+ @weight = weight
20
+ end
21
+
22
+ # Always returns the constant weight,
23
+ # except if there are no ids.
24
+ #
25
+ def [] _
26
+
27
+ @weight
28
+ end
29
+
30
+ # Returns the constant weight,
31
+ # except if there are no ids.
32
+ #
33
+ # Not really used, but is more
34
+ # correct this way.
35
+ #
36
+ def weight_for _
37
+ @weight
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+
46
+ end
@@ -0,0 +1,37 @@
1
+ module Picky
2
+
3
+ module Generators
4
+
5
+ module Weights
6
+
7
+ # Uses a dynamic weight.
8
+ #
9
+ # Note: This is not saved.
10
+ #
11
+ # Examples:
12
+ # * Picky::Weights::Dynamic.new do |str_or_sym|
13
+ # sym_or_str * length
14
+ # end
15
+ #
16
+ class Dynamic < Runtime
17
+
18
+ # Give it a block that takes a string/symbol
19
+ # and returns a weight.
20
+ #
21
+ def initialize &calculation
22
+ @calculation = calculation
23
+ end
24
+
25
+ # Calls the block to calculate the weight.
26
+ #
27
+ def [] str_or_sym
28
+ @calculation.call str_or_sym
29
+ end
30
+
31
+ end
32
+
33
+ end
34
+
35
+ end
36
+
37
+ end
@@ -11,16 +11,6 @@ module Picky
11
11
  #
12
12
  class Logarithmic < Strategy
13
13
 
14
- # Generates a partial index from the given inverted index.
15
- #
16
- def generate_from inverted
17
- inverted.inject({}) do |hash, (text, ids)|
18
- weight = weight_for ids.size
19
- hash[text] ||= weight.round(2) if weight
20
- hash
21
- end
22
- end
23
-
24
14
  # Sets the weight value.
25
15
  #
26
16
  # If the size is 0 or one, we would get -Infinity or 0.0.
@@ -0,0 +1,41 @@
1
+ module Picky
2
+
3
+ module Generators
4
+ module Weights
5
+
6
+ # Is used for runtime-only strategies.
7
+ #
8
+ # Note: Pretends to be a backend but
9
+ # does nothing at all.
10
+ #
11
+ # To override, implement:
12
+ # * weight_for(size) # During indextime. # Probably never used.
13
+ # * [] symbol_or_string # During runtime.
14
+ #
15
+ # TODO Find a better name.
16
+ #
17
+ class Runtime < Strategy
18
+
19
+ # It is not saved, by default.
20
+ #
21
+ def saved?
22
+ false
23
+ end
24
+
25
+ # Returns nil.
26
+ #
27
+ def weight_for _
28
+ # Nothing.
29
+ end
30
+
31
+ # Saves nothing by default.
32
+ #
33
+ def []= _, _
34
+
35
+ end
36
+
37
+ end
38
+ end
39
+ end
40
+
41
+ end