picky 4.0.0pre1 → 4.0.0pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. data/aux/picky/cli.rb +6 -2
  2. data/lib/picky.rb +10 -8
  3. data/lib/picky/backends/backend.rb +37 -0
  4. data/lib/picky/backends/file.rb +0 -20
  5. data/lib/picky/backends/memory.rb +0 -29
  6. data/lib/picky/backends/redis.rb +74 -15
  7. data/lib/picky/backends/redis/list.rb +1 -1
  8. data/lib/picky/backends/sqlite.rb +0 -27
  9. data/lib/picky/bundle.rb +2 -2
  10. data/lib/picky/bundle_indexed.rb +1 -1
  11. data/lib/picky/bundle_indexing.rb +1 -1
  12. data/lib/picky/categories_indexed.rb +1 -11
  13. data/lib/picky/category.rb +4 -4
  14. data/lib/picky/category/location.rb +25 -0
  15. data/lib/picky/category_realtime.rb +4 -3
  16. data/lib/picky/console.rb +1 -1
  17. data/lib/picky/constants.rb +1 -1
  18. data/lib/picky/ext/maybe_compile.rb +2 -2
  19. data/lib/picky/extensions/object.rb +3 -2
  20. data/lib/picky/generators/aliases.rb +7 -2
  21. data/lib/picky/generators/partial/default.rb +1 -0
  22. data/lib/picky/generators/similarity/default.rb +1 -0
  23. data/lib/picky/generators/similarity/phonetic.rb +13 -2
  24. data/lib/picky/generators/strategy.rb +0 -2
  25. data/lib/picky/generators/weights/constant.rb +1 -2
  26. data/lib/picky/generators/weights/default.rb +1 -0
  27. data/lib/picky/generators/weights/dynamic.rb +1 -1
  28. data/lib/picky/generators/weights/logarithmic.rb +1 -1
  29. data/lib/picky/generators/weights/{runtime.rb → stub.rb} +1 -3
  30. data/lib/picky/index.rb +3 -3
  31. data/lib/picky/index_indexing.rb +0 -2
  32. data/lib/picky/index_realtime.rb +1 -1
  33. data/lib/picky/indexers/base.rb +7 -0
  34. data/lib/picky/indexers/parallel.rb +2 -4
  35. data/lib/picky/indexers/serial.rb +2 -0
  36. data/lib/picky/indexes_indexing.rb +1 -1
  37. data/lib/picky/interfaces/live_parameters/master_child.rb +175 -0
  38. data/lib/picky/interfaces/live_parameters/unicorn.rb +37 -0
  39. data/lib/picky/loader.rb +238 -259
  40. data/lib/picky/query/allocation.rb +19 -10
  41. data/lib/picky/query/combination.rb +7 -1
  42. data/lib/picky/query/combinations.rb +1 -6
  43. data/lib/picky/query/token.rb +26 -36
  44. data/lib/picky/results.rb +18 -17
  45. data/lib/picky/scheduler.rb +2 -1
  46. data/lib/picky/search.rb +1 -1
  47. data/lib/picky/sinatra.rb +6 -6
  48. data/lib/picky/statistics.rb +2 -0
  49. data/lib/picky/tokenizer.rb +8 -8
  50. data/lib/picky/wrappers/bundle/calculation.rb +4 -4
  51. data/lib/picky/wrappers/bundle/location.rb +1 -2
  52. data/lib/tasks/framework.rake +1 -1
  53. data/lib/tasks/statistics.rake +1 -1
  54. data/lib/tasks/try.rake +1 -1
  55. data/lib/tasks/try.rb +1 -1
  56. data/spec/aux/picky/cli_spec.rb +12 -12
  57. data/spec/ext/performant_spec.rb +16 -16
  58. data/spec/functional/backends/file_spec.rb +78 -7
  59. data/spec/functional/backends/memory_spec.rb +78 -7
  60. data/spec/functional/backends/redis_spec.rb +73 -13
  61. data/spec/functional/dynamic_weights_spec.rb +3 -4
  62. data/spec/functional/realtime_spec.rb +2 -2
  63. data/spec/functional/speed_spec.rb +2 -2
  64. data/spec/functional/terminate_early_spec.rb +3 -3
  65. data/spec/lib/analytics_spec.rb +1 -1
  66. data/spec/lib/analyzer_spec.rb +5 -3
  67. data/spec/lib/categories_indexed_spec.rb +38 -20
  68. data/spec/lib/category/location_spec.rb +30 -0
  69. data/spec/lib/character_substituters/west_european_spec.rb +1 -0
  70. data/spec/lib/extensions/hash_spec.rb +6 -5
  71. data/spec/lib/extensions/module_spec.rb +6 -6
  72. data/spec/lib/extensions/object_spec.rb +9 -8
  73. data/spec/lib/extensions/string_spec.rb +1 -1
  74. data/spec/lib/generators/similarity/phonetic_spec.rb +11 -0
  75. data/spec/lib/index_realtime_spec.rb +5 -5
  76. data/spec/lib/interfaces/{live_parameters_spec.rb → live_parameters/master_child_spec.rb} +26 -26
  77. data/spec/lib/interfaces/live_parameters/unicorn_spec.rb +160 -0
  78. data/spec/lib/loader_spec.rb +65 -25
  79. data/spec/lib/query/allocation_spec.rb +25 -22
  80. data/spec/lib/query/combinations_spec.rb +13 -36
  81. data/spec/lib/query/token_spec.rb +144 -131
  82. data/spec/lib/query/tokens_spec.rb +14 -0
  83. data/spec/lib/results_spec.rb +14 -8
  84. data/spec/lib/search_spec.rb +1 -1
  85. data/spec/lib/sinatra_spec.rb +8 -8
  86. metadata +28 -91
  87. data/lib/picky/adapters/rack.rb +0 -34
  88. data/lib/picky/adapters/rack/base.rb +0 -27
  89. data/lib/picky/adapters/rack/live_parameters.rb +0 -37
  90. data/lib/picky/adapters/rack/search.rb +0 -67
  91. data/lib/picky/application.rb +0 -268
  92. data/lib/picky/frontend_adapters/rack.rb +0 -161
  93. data/lib/picky/interfaces/live_parameters.rb +0 -187
  94. data/lib/picky/sources/base.rb +0 -92
  95. data/lib/picky/sources/couch.rb +0 -76
  96. data/lib/picky/sources/csv.rb +0 -83
  97. data/lib/picky/sources/db.rb +0 -189
  98. data/lib/picky/sources/delicious.rb +0 -63
  99. data/lib/picky/sources/mongo.rb +0 -80
  100. data/lib/picky/wrappers/category/location.rb +0 -38
  101. data/lib/tasks/routes.rake +0 -8
  102. data/spec/lib/adapters/rack/base_spec.rb +0 -24
  103. data/spec/lib/adapters/rack/live_parameters_spec.rb +0 -26
  104. data/spec/lib/adapters/rack/query_spec.rb +0 -39
  105. data/spec/lib/application_spec.rb +0 -155
  106. data/spec/lib/frontend_adapters/rack_spec.rb +0 -294
  107. data/spec/lib/sources/base_spec.rb +0 -53
  108. data/spec/lib/sources/couch_spec.rb +0 -114
  109. data/spec/lib/sources/csv_spec.rb +0 -89
  110. data/spec/lib/sources/db_spec.rb +0 -125
  111. data/spec/lib/sources/delicious_spec.rb +0 -94
  112. data/spec/lib/sources/mongo_spec.rb +0 -50
@@ -0,0 +1,25 @@
1
+ module Picky
2
+ class Category
3
+
4
+ module Location
5
+
6
+ def self.install_on category, grid, precision, anchor
7
+ category.extend self
8
+
9
+ exact_bundle = category.exact
10
+ category.exact = Wrappers::Bundle::Location.new(exact_bundle, grid, precision: precision, anchor: anchor)
11
+ category.partial = Wrappers::Bundle::Location.new(exact_bundle, grid, precision: precision, anchor: anchor)
12
+
13
+ category
14
+ end
15
+
16
+ # Only uses a basic tokenizer that's already geared towards numbers.
17
+ #
18
+ def tokenizer
19
+ @tokenizer ||= Tokenizer.new
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+ end
@@ -43,16 +43,17 @@ module Picky
43
43
  # strings to the index for the given id.
44
44
  #
45
45
  def add_tokenized id, tokens, where = :unshift
46
- tokens.each { |text| add_tokenized_token id, text, where }
46
+ tokens.each { |text| add_tokenized_token id.send(key_format), text, where, false }
47
47
  end
48
48
 
49
49
  #
50
50
  #
51
- def add_tokenized_token id, text, where = :unshift
51
+ def add_tokenized_token id, text, where = :unshift, format = true
52
52
  return unless text
53
- id = id.send key_format # TODO Speed this up!
54
53
 
54
+ id = id.send key_format if format
55
55
  # text = text.to_sym if @symbols # TODO Symbols.
56
+
56
57
  exact.add id, text, where
57
58
  partial.add_partialized id, text, where
58
59
  end
data/lib/picky/console.rb CHANGED
@@ -3,7 +3,7 @@
3
3
  #
4
4
  module Picky
5
5
 
6
- # TODO Improve.
6
+ # Handles the IRB console for Picky.
7
7
  #
8
8
  class Console
9
9
 
@@ -8,4 +8,4 @@ ENV['PICKY_ENV'] ||= ENV['RACK_ENV']
8
8
  PICKY_ENVIRONMENT = ENV['PICKY_ENV'] || 'development' unless defined? PICKY_ENVIRONMENT
9
9
  PICKY_ROOT = Dir.pwd unless defined? PICKY_ROOT
10
10
 
11
- EMPTY_STRING = ''.freeze
11
+ EMPTY_STRING = ''.freeze unless defined? EMPTY_STRING
@@ -1,7 +1,7 @@
1
1
  begin
2
- require File.expand_path '../ruby19/performant', __FILE__
2
+ require_relative 'ruby19/performant'
3
3
  rescue LoadError
4
- require File.expand_path '../ruby19/extconf.rb', __FILE__
4
+ require_relative 'ruby19/extconf'
5
5
  Dir.chdir File.expand_path('../ruby19', __FILE__) do
6
6
  %x{ ruby extconf.rb && make }
7
7
  end
@@ -10,13 +10,14 @@ class Object # :nodoc:all
10
10
  # Just puts the given text.
11
11
  #
12
12
  def exclaim text
13
- puts text
13
+ STDOUT.puts text
14
+ STDOUT.flush
14
15
  end
15
16
 
16
17
  # Puts a text that informs the user of a missing gem.
17
18
  #
18
19
  def warn_gem_missing gem_name, message
19
- warn "#{gem_name} gem missing!\nTo use #{message}, you need to:\n 1. Add the following line to Gemfile:\n gem '#{gem_name}'\n or\n require '#{gem_name}'\n for example on top of your app.rb/application.rb\n 2. Then, run:\n bundle update\n"
20
+ warn "#{gem_name} gem missing!\nTo use #{message}, you need to:\n 1. Add the following line to Gemfile:\n gem '#{gem_name}'\n or\n require '#{gem_name}'\n for example at the top of your app.rb file.\n 2. Then, run:\n bundle update\n"
20
21
  end
21
22
 
22
23
  # Indents each line by <tt>amount=2</tt> spaces.
@@ -1,5 +1,10 @@
1
1
  module Picky
2
- Partial = Generators::Partial
2
+ remove_const :Partial if defined? Partial
3
+ Partial = Generators::Partial
4
+
5
+ remove_const :Similarity if defined? Similarity
3
6
  Similarity = Generators::Similarity
4
- Weights = Generators::Weights
7
+
8
+ remove_const :Weights if defined? Weights
9
+ Weights = Generators::Weights
5
10
  end
@@ -2,6 +2,7 @@ module Picky
2
2
 
3
3
  module Generators
4
4
  module Partial
5
+ remove_const :Default if defined? Default
5
6
  Default = Postfix.new from: -3
6
7
  end
7
8
  end
@@ -4,6 +4,7 @@ module Picky
4
4
  module Similarity
5
5
  # Default is no similarity.
6
6
  #
7
+ remove_const :Default if defined? Default
7
8
  Default = None.new
8
9
  end
9
10
  end
@@ -19,15 +19,26 @@ module Picky
19
19
  #
20
20
  #
21
21
  def initialize amount = 10
22
+ check_gem
23
+
22
24
  raise "In Picky 2.0+, the Similarity::Phonetic has been renamed to Similarity::DoubleMetaphone. Please use that one. Thanks!" if self.class == Phonetic
23
25
  @amount = amount
24
26
  end
25
27
 
28
+ # Tries to require the text gem.
29
+ #
30
+ def check_gem # :nodoc:
31
+ require 'text'
32
+ rescue LoadError
33
+ warn_gem_missing 'text', 'a phonetic Similarity'
34
+ exit 1
35
+ end
36
+
26
37
  # Sorts the index values in place.
27
38
  #
28
39
  def sort ary, code
29
- ary.sort_by_levenshtein! code
30
- ary.slice! amount, ary.size # THINK size is not perfectly correct, but anyway
40
+ ary.sort_by_levenshtein! code
41
+ ary.slice! amount, ary.size # THINK size is not perfectly correct, but anyway
31
42
  end
32
43
 
33
44
  end
@@ -7,8 +7,6 @@ module Picky
7
7
  # By default, all caches are saved in a
8
8
  # storage (like a file).
9
9
  #
10
- # TODO Move to the backends? Rename to backend?
11
- #
12
10
  def saved?
13
11
  true
14
12
  end
@@ -13,7 +13,7 @@ module Picky
13
13
  # * Picky::Weights::Constant.new # Uses 0.0 as a constant weight.
14
14
  # * Picky::Weights::Constant.new(3.14) # Uses 3.14 as a constant weight.
15
15
  #
16
- class Constant < Runtime
16
+ class Constant < Stub
17
17
 
18
18
  def initialize weight = 0.0
19
19
  @weight = weight
@@ -23,7 +23,6 @@ module Picky
23
23
  # except if there are no ids.
24
24
  #
25
25
  def [] _
26
-
27
26
  @weight
28
27
  end
29
28
 
@@ -4,6 +4,7 @@ module Picky
4
4
  module Weights
5
5
  # Default is Logarithmic.
6
6
  #
7
+ remove_const :Default if defined? Default
7
8
  Default = Logarithmic.new
8
9
  end
9
10
  end
@@ -13,7 +13,7 @@ module Picky
13
13
  # sym_or_str * length
14
14
  # end
15
15
  #
16
- class Dynamic < Runtime
16
+ class Dynamic < Stub
17
17
 
18
18
  # Give it a block that takes a string/symbol
19
19
  # and returns a weight.
@@ -16,7 +16,7 @@ module Picky
16
16
  # If the size is 0 or one, we would get -Infinity or 0.0.
17
17
  # Thus we do not set a value if there is just one. The default, dynamically, is 0.
18
18
  #
19
- # BUT: We need the value, even if 0. To designate that there is a weight!
19
+ # BUT: We need the value, even if 0. To designate that there IS a weight!
20
20
  #
21
21
  def weight_for amount
22
22
  return 0 if amount < 1
@@ -12,9 +12,7 @@ module Picky
12
12
  # * [symbol_or_string] # During runtime.
13
13
  # * weight_for(size) # During indextime. # Probably never used.
14
14
  #
15
- # TODO Find a better name.
16
- #
17
- class Runtime < Strategy
15
+ class Stub < Strategy
18
16
 
19
17
  # It is not saved, by default.
20
18
  #
data/lib/picky/index.rb CHANGED
@@ -251,7 +251,7 @@ module Picky
251
251
  options = { partial: Partial::None.new }.merge options
252
252
 
253
253
  category category_name, options do |cat|
254
- Wrappers::Category::Location.wrap cat, range, precision, anchor
254
+ Category::Location.install_on cat, range, precision, anchor
255
255
  end
256
256
  end
257
257
 
@@ -285,8 +285,8 @@ module Picky
285
285
  # * lat_from: The data category to take the data for the latitude from.
286
286
  # * lng_from: The data category to take the data for the longitude from.
287
287
  #
288
- # TODO Will have to write a wrapper that combines two categories that are
289
- # indexed simultaneously, since lat/lng are correlated.
288
+ # THINK Will have to write a wrapper that combines two categories that are
289
+ # indexed simultaneously, since lat/lng are correlated.
290
290
  #
291
291
  def geo_categories lat_name, lng_name, radius, options = {} # :nodoc:
292
292
 
@@ -104,8 +104,6 @@ module Picky
104
104
  # Extract the actual source if it is wrapped in a time
105
105
  # capsule, i.e. a block/lambda.
106
106
  #
107
- # TODO Extract into module.
108
- #
109
107
  def extract_source
110
108
  @source = @source.respond_to?(:call) ? @source.call : @source
111
109
  end
@@ -6,7 +6,7 @@ module Picky
6
6
 
7
7
  delegate :remove, # aka "delete".
8
8
  :add, # aka "insert".
9
- :replace, # aka "insert or update".
9
+ :replace, # aka "insert or update". Thus, not called update.
10
10
  :clear_realtime,
11
11
  :build_realtime_mapping,
12
12
  :to => :categories
@@ -27,6 +27,13 @@ module Picky
27
27
  end
28
28
  end
29
29
 
30
+ # Explicitly reset the source to avoid caching trouble.
31
+ #
32
+ def reset_source
33
+ source.reset if source.respond_to?(:reset)
34
+ source.reconnect! if source.respond_to?(:reconnect!)
35
+ end
36
+
30
37
  def check_source # :nodoc:
31
38
  raise "Trying to index without a source for #{@index_or_category.name}." unless source
32
39
  end
@@ -22,14 +22,12 @@ module Picky
22
22
  [category, category.prepared_index_file, [], (category.tokenizer || tokenizer)]
23
23
  end
24
24
 
25
- # Explicitly reset the source to avoid caching trouble.
26
- #
27
- source.reset if source.respond_to?(:reset)
28
-
29
25
  # Go through each object in the source.
30
26
  #
31
27
  objects = []
32
28
 
29
+ reset_source
30
+
33
31
  source.each do |object|
34
32
 
35
33
  # Accumulate objects.
@@ -25,6 +25,8 @@ module Picky
25
25
  result = []
26
26
  tokenizer = category.tokenizer
27
27
 
28
+ reset_source
29
+
28
30
  source.harvest(category) do |*data|
29
31
 
30
32
  # Accumulate data.
@@ -35,7 +35,7 @@ module Picky
35
35
  #
36
36
  #
37
37
  def tokenizer
38
- Tokenizer.index_default
38
+ Tokenizer.indexing
39
39
  end
40
40
 
41
41
  end
@@ -0,0 +1,175 @@
1
+ module Picky
2
+
3
+ # This is very optional.
4
+ # Only load if the user wants it.
5
+ #
6
+ module Interfaces
7
+
8
+ module LiveParameters
9
+
10
+ # This is an interface that provides the user of
11
+ # Picky with the possibility to change parameters
12
+ # while the Application is running.
13
+ #
14
+ class MasterChild
15
+
16
+ def initialize
17
+ @child, @parent = IO.pipe
18
+ start_master_process_thread
19
+ end
20
+
21
+ # This runs a thread that listens to child processes.
22
+ #
23
+ def start_master_process_thread
24
+ # This thread is stopped in the children.
25
+ #
26
+ Thread.new do
27
+ loop do
28
+ IO.select([@child], nil, nil, 2) or next
29
+ result = @child.gets ';;;'
30
+ pid, configuration_hash = eval result
31
+ next unless Hash === configuration_hash
32
+ next if configuration_hash.empty?
33
+ exclaim "Trying to update MASTER configuration."
34
+ try_updating_configuration_with configuration_hash
35
+ kill_each_worker_except pid
36
+
37
+ # Fails hard on an error.
38
+ #
39
+ end
40
+ end
41
+ end
42
+
43
+ # Taken from Unicorn.
44
+ #
45
+ def kill_each_worker_except pid
46
+ worker_pids.each do |wpid|
47
+ next if wpid == pid
48
+ kill_worker :KILL, wpid
49
+ end
50
+ end
51
+ def kill_worker signal, wpid
52
+ Process.kill signal, wpid
53
+ exclaim "Killing worker ##{wpid} with signal #{signal}."
54
+ rescue Errno::ESRCH
55
+ remove_worker wpid
56
+ end
57
+
58
+ # Updates any parameters with the ones given and
59
+ # returns the updated params.
60
+ #
61
+ # The params are a strictly defined hash of:
62
+ # * querying_removes_characters: Regexp
63
+ # * querying_stopwords: Regexp
64
+ # * querying_splits_text_on: Regexp
65
+ #
66
+ # This first tries to update in the child process,
67
+ # and if successful, in the parent process
68
+ #
69
+ def parameters configuration_hash
70
+ close_child
71
+ exclaim "Trying to update worker child configuration." unless configuration_hash.empty?
72
+ try_updating_configuration_with configuration_hash
73
+ write_parent configuration_hash
74
+ extract_configuration
75
+ rescue CouldNotUpdateConfigurationError => e
76
+ # I need to die such that my broken config is never used.
77
+ #
78
+ exclaim "Child process #{Process.pid} performs harakiri because of broken config."
79
+ harakiri
80
+ { e.config_key => :ERROR }
81
+ end
82
+ # Kills itself, but still answering the request honorably.
83
+ #
84
+ def harakiri
85
+ Process.kill :QUIT, Process.pid
86
+ end
87
+ # Write the parent.
88
+ #
89
+ # Note: The ;;; is the end marker for the message.
90
+ #
91
+ def write_parent configuration_hash
92
+ @parent.write "#{[Process.pid, configuration_hash]};;;"
93
+ end
94
+ # Close the child if it isn't yet closed.
95
+ #
96
+ def close_child
97
+ @child.close unless @child.closed?
98
+ end
99
+
100
+ class CouldNotUpdateConfigurationError < StandardError
101
+ attr_reader :config_key
102
+ def initialize config_key, message
103
+ super message
104
+ @config_key = config_key
105
+ end
106
+ end
107
+
108
+ # Tries updating the configuration in the child process or parent process.
109
+ #
110
+ def try_updating_configuration_with configuration_hash
111
+ current_key = nil
112
+ begin
113
+ configuration_hash.each_pair do |key, new_value|
114
+ exclaim " Setting #{key} with #{new_value}."
115
+ current_key = key
116
+ send :"#{key}=", new_value
117
+ end
118
+ rescue StandardError => e
119
+ # Catch any error and reraise as config error.
120
+ #
121
+ raise CouldNotUpdateConfigurationError.new current_key, e.message
122
+ end
123
+ end
124
+
125
+ def extract_configuration
126
+ {
127
+ querying_removes_characters: querying_removes_characters,
128
+ querying_stopwords: querying_stopwords,
129
+ querying_splits_text_on: querying_splits_text_on
130
+ }
131
+ end
132
+
133
+ # TODO Move to Interface object.
134
+ #
135
+ def querying_removes_characters
136
+ regexp = Tokenizer.searching.instance_variable_get :@removes_characters_regexp
137
+ regexp && regexp.source
138
+ end
139
+ def querying_removes_characters= new_value
140
+ Tokenizer.searching.removes_characters %r{#{new_value}}
141
+ # Tokenizer.searching.instance_variable_set(:@removes_characters_regexp, %r{#{new_value}})
142
+ end
143
+ def querying_stopwords
144
+ regexp = Tokenizer.searching.instance_variable_get :@remove_stopwords_regexp
145
+ regexp && regexp.source
146
+ end
147
+ def querying_stopwords= new_value
148
+ Tokenizer.searching.instance_variable_set(:@remove_stopwords_regexp, %r{#{new_value}})
149
+ end
150
+ def querying_splits_text_on
151
+ splits = Tokenizer.searching.instance_variable_get :@splits_text_on
152
+ splits && splits.respond_to?(:source) ? splits.source : splits
153
+ end
154
+ def querying_splits_text_on= new_value
155
+ splits = Tokenizer.searching.instance_variable_get :@splits_text_on
156
+ if splits.respond_to?(:source)
157
+ Tokenizer.searching.instance_variable_set(:@splits_text_on, %r{#{new_value}})
158
+ else
159
+ Tokenizer.searching.instance_variable_set(:@splits_text_on, new_value)
160
+ end
161
+ end
162
+
163
+ #
164
+ #
165
+ def to_s
166
+ "Suckerfish Live Interface (Use the picky-live gem to introspect)"
167
+ end
168
+
169
+ end
170
+
171
+ end
172
+
173
+ end
174
+
175
+ end