picky 1.5.2 → 1.5.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data/lib/picky/analyzer.rb +154 -0
  2. data/lib/picky/application.rb +53 -33
  3. data/lib/picky/character_substituters/west_european.rb +10 -6
  4. data/lib/picky/cli.rb +18 -18
  5. data/lib/picky/index/base.rb +44 -13
  6. data/lib/picky/index_bundle.rb +13 -4
  7. data/lib/picky/indexed/indexes.rb +26 -10
  8. data/lib/picky/indexing/indexes.rb +26 -24
  9. data/lib/picky/interfaces/live_parameters.rb +23 -16
  10. data/lib/picky/internals/extensions/object.rb +13 -6
  11. data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
  12. data/lib/picky/internals/index/backend.rb +1 -2
  13. data/lib/picky/internals/index/file/basic.rb +18 -14
  14. data/lib/picky/internals/index/files.rb +16 -6
  15. data/lib/picky/internals/index/redis/basic.rb +12 -5
  16. data/lib/picky/internals/index/redis.rb +2 -2
  17. data/lib/picky/internals/indexed/bundle/base.rb +58 -14
  18. data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
  19. data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
  20. data/lib/picky/internals/indexed/categories.rb +19 -14
  21. data/lib/picky/internals/indexed/category.rb +44 -20
  22. data/lib/picky/internals/indexed/index.rb +23 -13
  23. data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
  24. data/lib/picky/internals/indexers/serial.rb +1 -1
  25. data/lib/picky/internals/indexing/bundle/base.rb +28 -28
  26. data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
  27. data/lib/picky/internals/indexing/categories.rb +15 -11
  28. data/lib/picky/internals/indexing/category.rb +30 -20
  29. data/lib/picky/internals/indexing/index.rb +22 -14
  30. data/lib/picky/internals/query/allocations.rb +0 -15
  31. data/lib/picky/internals/query/combinations/base.rb +0 -4
  32. data/lib/picky/internals/query/combinations/redis.rb +19 -8
  33. data/lib/picky/internals/query/indexes.rb +3 -6
  34. data/lib/picky/internals/query/token.rb +0 -4
  35. data/lib/picky/internals/query/weights.rb +2 -11
  36. data/lib/picky/internals/results/base.rb +3 -10
  37. data/lib/picky/internals/tokenizers/base.rb +64 -28
  38. data/lib/picky/internals/tokenizers/index.rb +8 -8
  39. data/lib/picky/loader.rb +59 -53
  40. data/lib/picky/query/base.rb +23 -29
  41. data/lib/picky/sources/base.rb +10 -10
  42. data/lib/picky/sources/couch.rb +14 -10
  43. data/lib/picky/sources/csv.rb +21 -14
  44. data/lib/picky/sources/db.rb +37 -31
  45. data/lib/picky/sources/delicious.rb +11 -8
  46. data/lib/picky/sources/wrappers/base.rb +3 -1
  47. data/lib/picky/statistics.rb +66 -0
  48. data/lib/tasks/application.rake +3 -0
  49. data/lib/tasks/checks.rake +11 -0
  50. data/lib/tasks/framework.rake +3 -0
  51. data/lib/tasks/index.rake +9 -11
  52. data/lib/tasks/routes.rake +3 -2
  53. data/lib/tasks/shortcuts.rake +17 -5
  54. data/lib/tasks/statistics.rake +20 -12
  55. data/lib/tasks/try.rake +14 -14
  56. data/spec/lib/application_spec.rb +3 -3
  57. data/spec/lib/index/base_spec.rb +25 -3
  58. data/spec/lib/internals/extensions/object_spec.rb +46 -20
  59. data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
  60. data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
  61. data/spec/lib/internals/indexers/serial_spec.rb +1 -1
  62. data/spec/lib/internals/results/base_spec.rb +0 -12
  63. data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
  64. data/spec/lib/query/allocations_spec.rb +0 -56
  65. data/spec/lib/query/base_spec.rb +25 -21
  66. data/spec/lib/query/combinations/redis_spec.rb +6 -1
  67. data/spec/lib/sources/delicious_spec.rb +2 -2
  68. data/spec/lib/statistics_spec.rb +31 -0
  69. metadata +9 -2
@@ -1,45 +1,61 @@
1
1
  module Indexed
2
-
2
+
3
3
  # Registers the indexes held at runtime, for queries.
4
4
  #
5
5
  class Indexes
6
-
6
+
7
7
  attr_reader :indexes, :index_mapping
8
-
8
+
9
9
  each_delegate :load_from_cache,
10
10
  :to => :indexes
11
-
11
+
12
12
  def initialize
13
13
  clear
14
14
  end
15
-
15
+
16
+ def to_s
17
+ indexes.indented_to_s
18
+ end
19
+
16
20
  # Clears the indexes and the mapping.
17
21
  #
18
22
  def clear
19
23
  @indexes = []
20
24
  @index_mapping = {}
21
25
  end
22
-
26
+
23
27
  # Reloads all indexes, one after another,
24
28
  # in the order they were added.
25
29
  #
26
30
  def reload
27
31
  load_from_cache
28
32
  end
29
-
33
+
30
34
  # Registers an index with the indexes.
31
35
  #
32
36
  def register index
33
37
  self.indexes << index
34
38
  self.index_mapping[index.name] = index
35
39
  end
36
-
40
+
41
+ # Load each index, and analyze it.
42
+ #
43
+ # Returns a hash with the findings.
44
+ #
45
+ def analyze
46
+ result = {}
47
+ self.indexes.each do |index|
48
+ index.analyze result
49
+ end
50
+ result
51
+ end
52
+
37
53
  # Extracts an index, given its identifier.
38
54
  #
39
55
  def [] identifier
40
56
  index_mapping[identifier.to_sym]
41
57
  end
42
-
58
+
43
59
  end
44
-
60
+
45
61
  end
@@ -1,11 +1,11 @@
1
1
  module Indexing
2
-
2
+
3
3
  # Registers the indexes held at index time, for indexing.
4
4
  #
5
5
  class Indexes
6
-
6
+
7
7
  attr_reader :indexes
8
-
8
+
9
9
  each_delegate :take_snapshot,
10
10
  :generate_caches,
11
11
  :backup_caches,
@@ -14,57 +14,59 @@ module Indexing
14
14
  :clear_caches,
15
15
  :create_directory_structure,
16
16
  :to => :indexes
17
-
17
+
18
18
  def initialize
19
19
  clear
20
20
  end
21
-
21
+
22
+ def to_s
23
+ indexes.indented_to_s
24
+ end
25
+
22
26
  # Clears the array of indexes.
23
27
  #
24
28
  def clear
25
29
  @indexes = []
26
30
  end
27
-
31
+
28
32
  # Registers an index with the indexes.
29
33
  #
30
34
  def register index
31
35
  self.indexes << index
32
36
  end
33
-
37
+
34
38
  # Runs the indexers in parallel (index + cache).
35
39
  #
36
40
  # TODO Spec.
37
41
  #
38
42
  def index randomly = true
39
43
  take_snapshot
40
-
44
+
41
45
  # Run in parallel.
42
46
  #
43
- timed_exclaim "INDEXING USING #{Cores.max_processors} PROCESSORS, IN #{randomly ? 'RANDOM' : 'GIVEN'} ORDER."
44
-
45
- # TODO Think about having serial work units.
47
+ timed_exclaim "Indexing using #{Cores.max_processors} processors, in #{randomly ? 'random' : 'given'} order."
48
+
49
+ # Run indexing/caching forked.
46
50
  #
47
51
  Cores.forked self.indexes, { randomly: randomly } do |an_index|
48
52
  an_index.index
49
- # TODO
50
- # end
51
- # Cores.forked self.indexes, { randomly: randomly } do |an_index|
52
53
  an_index.cache
53
54
  end
54
- timed_exclaim "INDEXING FINISHED."
55
+
56
+ timed_exclaim "Indexing finished."
55
57
  end
56
-
58
+
57
59
  # For integration testing – indexes for the tests without forking and shouting ;)
58
60
  #
59
61
  def index_for_tests
60
62
  take_snapshot
61
-
63
+
62
64
  self.indexes.each do |an_index|
63
65
  an_index.index
64
66
  an_index.cache
65
67
  end
66
68
  end
67
-
69
+
68
70
  # Generate only the index for the given index:category pair.
69
71
  #
70
72
  def generate_index_only index_name, category_name = nil
@@ -77,23 +79,23 @@ module Indexing
77
79
  found = find index_name, category_name
78
80
  found.generate_caches if found
79
81
  end
80
-
82
+
81
83
  # Find a given index:category pair.
82
84
  #
83
85
  def find index_name, category_name
84
86
  index_name = index_name.to_sym
85
-
87
+
86
88
  indexes.each do |index|
87
89
  next unless index.name == index_name
88
-
90
+
89
91
  return index unless category_name
90
-
92
+
91
93
  found = index.categories.find category_name
92
94
  return found if found
93
95
  end
94
-
96
+
95
97
  raise %Q{Index "#{index_name}" not found. Possible indexes: "#{indexes.map(&:name).join('", "')}".}
96
98
  end
97
-
99
+
98
100
  end
99
101
  end
@@ -9,12 +9,12 @@ module Interfaces
9
9
  # Important Note: This will only work in Master/Child configurations.
10
10
  #
11
11
  class LiveParameters
12
-
12
+
13
13
  def initialize
14
14
  @child, @parent = IO.pipe
15
15
  start_master_process_thread
16
16
  end
17
-
17
+
18
18
  # This runs a thread that listens to child processes.
19
19
  #
20
20
  def start_master_process_thread
@@ -30,18 +30,19 @@ module Interfaces
30
30
  exclaim "Trying to update MASTER configuration."
31
31
  try_updating_configuration_with configuration_hash
32
32
  kill_each_worker_except pid
33
- # TODO rescue on error.
34
-
33
+
34
+ # Fails hard on an error.
35
+ #
35
36
  end
36
37
  end
37
38
  end
38
-
39
+
39
40
  # TODO This needs to be webserver agnostic.
40
41
  #
41
42
  def worker_pids
42
43
  Unicorn::HttpServer::WORKERS.keys
43
44
  end
44
-
45
+
45
46
  # Taken from Unicorn.
46
47
  #
47
48
  def kill_each_worker_except pid
@@ -61,14 +62,14 @@ module Interfaces
61
62
  def remove_worker wpid
62
63
  worker = Unicorn::HttpServer::WORKERS.delete(wpid) and worker.tmp.close rescue nil
63
64
  end
64
-
65
+
65
66
  # Updates any parameters with the ones given and
66
67
  # returns the updated params.
67
68
  #
68
69
  # The params are a strictly defined hash of:
69
70
  # * querying_removes_characters: Regexp
70
71
  # * querying_stopwords: Regexp
71
- # TODO etc.
72
+ # * querying_splits_text_on: Regexp
72
73
  #
73
74
  # This first tries to update in the child process,
74
75
  # and if successful, in the parent process
@@ -103,7 +104,7 @@ module Interfaces
103
104
  def close_child
104
105
  @child.close unless @child.closed?
105
106
  end
106
-
107
+
107
108
  class CouldNotUpdateConfigurationError < StandardError
108
109
  attr_reader :config_key
109
110
  def initialize config_key, message
@@ -111,9 +112,9 @@ module Interfaces
111
112
  @config_key = config_key
112
113
  end
113
114
  end
114
-
115
+
115
116
  # Tries updating the configuration in the child process or parent process.
116
- #
117
+ #
117
118
  def try_updating_configuration_with configuration_hash
118
119
  current_key = nil
119
120
  begin
@@ -128,7 +129,7 @@ module Interfaces
128
129
  raise CouldNotUpdateConfigurationError.new current_key, e.message
129
130
  end
130
131
  end
131
-
132
+
132
133
  def extract_configuration
133
134
  {
134
135
  querying_removes_characters: querying_removes_characters,
@@ -136,7 +137,7 @@ module Interfaces
136
137
  querying_splits_text_on: querying_splits_text_on
137
138
  }
138
139
  end
139
-
140
+
140
141
  # TODO Move to Interface object.
141
142
  #
142
143
  def querying_removes_characters
@@ -157,11 +158,17 @@ module Interfaces
157
158
  def querying_splits_text_on= new_value
158
159
  Tokenizers::Query.default.instance_variable_set(:@splits_text_on_regexp, %r{#{new_value}})
159
160
  end
160
-
161
+
162
+ #
163
+ #
164
+ def to_s
165
+ "Suckerfish Live Interface (Use the picky-live gem to introspect)"
166
+ end
167
+
161
168
  end
162
-
169
+
163
170
  # Aka.
164
171
  #
165
172
  ::LiveParameters = LiveParameters
166
-
173
+
167
174
  end
@@ -1,22 +1,29 @@
1
1
  class Object # :nodoc:all
2
-
2
+
3
3
  # Puts a text in the form:
4
4
  # 12:34:56: text here
5
5
  #
6
6
  def timed_exclaim text
7
7
  exclaim "#{Time.now.strftime("%H:%M:%S")}: #{text}"
8
8
  end
9
-
9
+
10
10
  # Just puts the given text.
11
11
  #
12
12
  def exclaim text
13
13
  puts text
14
14
  end
15
-
15
+
16
16
  # Puts a text that informs the user of a missing gem.
17
17
  #
18
- def puts_gem_missing gem_name, message
19
- puts "#{gem_name} gem missing!\nTo use #{message}, you need to:\n 1. Add the following line to Gemfile:\n gem '#{gem_name}'\n 2. Then, run:\n bundle update\n"
18
+ def warn_gem_missing gem_name, message
19
+ warn "#{gem_name} gem missing!\nTo use #{message}, you need to:\n 1. Add the following line to Gemfile:\n gem '#{gem_name}'\n 2. Then, run:\n bundle update\n"
20
20
  end
21
-
21
+
22
+ # Indents each line by <tt>amount=2</tt> spaces.
23
+ #
24
+ def indented_to_s amount = 2
25
+ ary = self.respond_to?(:join) ? self : self.to_s.split("\n")
26
+ ary.map { |s| "#{" "*amount}#{s}"}.join("\n")
27
+ end
28
+
22
29
  end
@@ -7,18 +7,8 @@ module Internals
7
7
  # TODO Rename to Routing again. Push everything back into appropriate Adapters.
8
8
  #
9
9
  class Rack # :nodoc:all
10
-
11
- @@defaults = {
12
- query_key: 'query'.freeze,
13
- offset_key: 'offset'.freeze,
14
- content_type: 'application/octet-stream'.freeze # TODO Wrong.
15
- }
16
-
17
- def initialize
18
- @defaults = @@defaults.dup
19
- end
20
-
21
- #
10
+
11
+ #
22
12
  #
23
13
  def reset_routes
24
14
  @routes = ::Rack::Mount::RouteSet.new
@@ -29,13 +19,13 @@ module Internals
29
19
  def finalize
30
20
  routes.freeze
31
21
  end
32
-
22
+
33
23
  # Routing simply delegates to the route set to handle a request.
34
24
  #
35
25
  def call env
36
26
  routes.call env
37
27
  end
38
-
28
+
39
29
  # API method.
40
30
  #
41
31
  def route options = {}
@@ -61,7 +51,7 @@ module Internals
61
51
  end
62
52
  def route_one url, query, route_options = {}
63
53
  raise RouteTargetNilError.new(url) unless query
64
- routes.add_route Internals::Adapters::Rack.app_for(query, route_options), default_options(url, route_options)
54
+ routes.add_route Internals::Adapters::Rack.app_for(query, route_options), default_options(url, route_options), {}, query.to_s
65
55
  end
66
56
  class RouteTargetNilError < StandardError
67
57
  def initialize url
@@ -81,9 +71,9 @@ module Internals
81
71
  def default status
82
72
  answer nil, STATUSES[status]
83
73
  end
84
-
85
-
86
-
74
+
75
+
76
+
87
77
  # TODO Can Rack handle this for me?
88
78
  #
89
79
  # Note: Rack-mount already handles the 404.
@@ -92,21 +82,21 @@ module Internals
92
82
  200 => lambda { |_| [200, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] },
93
83
  404 => lambda { |_| [404, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] }
94
84
  }
95
-
85
+
96
86
  #
97
87
  #
98
88
  def default_options url, route_options = {}
99
89
  url = normalized url
100
-
90
+
101
91
  options = { request_method: 'GET' }.merge route_options
102
-
92
+
103
93
  options[:path_info] = url if url
104
-
94
+
105
95
  options.delete :content_type
106
-
96
+
107
97
  query_params = options.delete :query
108
98
  options[:query_string] = %r{#{generate_query_string(query_params)}} if query_params
109
-
99
+
110
100
  options
111
101
  end
112
102
  #
@@ -117,38 +107,44 @@ module Internals
117
107
  k, v = query_params.first
118
108
  "#{k}=#{v}"
119
109
  end
120
-
110
+
121
111
  # Setup a route that answers using the given app.
122
112
  #
123
113
  def answer url = nil, app = nil
124
114
  routes.add_route (app || STATUSES[200]), default_options(url)
125
115
  end
126
-
116
+
127
117
  # Returns a regular expression for the url if it is given a String-like object.
128
118
  #
129
119
  def normalized url
130
120
  url.respond_to?(:to_str) ? %r{#{url}} : url
131
121
  end
132
-
122
+
133
123
  # Returns true if there are no routes defined.
134
124
  #
135
125
  def empty?
136
126
  routes.length.zero?
137
127
  end
138
-
139
- # TODO Beautify.
128
+
129
+ # TODO Beautify. Rewrite!
140
130
  #
141
131
  def to_s
132
+ max_length = routes.instance_variable_get(:@routes).reduce(0) do |current_max, route|
133
+ route_length = route.conditions[:path_info].source.to_s.size
134
+ route_length > current_max ? route_length : current_max
135
+ end
136
+ "Note: Anchored (\u2713) regexps are faster, e.g. /\\A.*\\Z/ or /^.*$/.\n\n" +
142
137
  routes.instance_variable_get(:@routes).map do |route|
143
138
  path_info = route.conditions[:path_info]
144
139
  anchored = ::Rack::Mount::Utils.regexp_anchored?(path_info)
145
140
  anchored_ok = anchored ? "\u2713" : " "
146
- "#{anchored_ok} #{path_info.source}"
147
- end.join "\n"
141
+ source = path_info.source
142
+ "#{anchored_ok} #{source.ljust(max_length)} => #{route.name}"
143
+ end.join("\n")
148
144
  end
149
-
145
+
150
146
  end
151
-
147
+
152
148
  end
153
-
149
+
154
150
  end
@@ -95,8 +95,7 @@ module Internals
95
95
  similarity.restore
96
96
  configuration.restore
97
97
  end
98
-
99
-
98
+
100
99
  # Delete all index files.
101
100
  #
102
101
  def delete
@@ -1,11 +1,11 @@
1
1
  module Internals
2
2
 
3
3
  module Index
4
-
4
+
5
5
  # Handles all aspects of index files, such as dumping/loading.
6
6
  #
7
7
  module File
8
-
8
+
9
9
  # Base class for all index files.
10
10
  #
11
11
  # Provides necessary helper methods for its
@@ -14,22 +14,26 @@ module Internals
14
14
  # dump/load methods.
15
15
  #
16
16
  class Basic
17
-
17
+
18
18
  attr_reader :cache_path
19
-
19
+
20
20
  # An index cache takes a path, without file extension,
21
21
  # which will be provided by the subclasses.
22
22
  #
23
23
  def initialize cache_path
24
24
  @cache_path = "#{cache_path}.#{extension}"
25
25
  end
26
-
26
+
27
+ def to_s
28
+ cache_path
29
+ end
30
+
27
31
  # The default extension for index files is "index".
28
32
  #
29
33
  def extension
30
34
  :index
31
35
  end
32
-
36
+
33
37
  # Will copy the index file to a location that
34
38
  # is in a directory named "backup" right under
35
39
  # the directory the index file is in.
@@ -49,7 +53,7 @@ module Internals
49
53
  def prepare_backup target
50
54
  FileUtils.mkdir target unless Dir.exists?(target)
51
55
  end
52
-
56
+
53
57
  # Copies the file from its backup location back
54
58
  # to the original location.
55
59
  #
@@ -62,16 +66,16 @@ module Internals
62
66
  dir, name = ::File.split path
63
67
  ::File.join dir, 'backup', name
64
68
  end
65
-
69
+
66
70
  # Deletes the file.
67
71
  #
68
72
  def delete
69
73
  `rm -Rf #{cache_path}`
70
74
  end
71
-
75
+
72
76
  # Checks.
73
77
  #
74
-
78
+
75
79
  # Is this cache file suspiciously small?
76
80
  # (less than 8 Bytes of size)
77
81
  #
@@ -91,11 +95,11 @@ module Internals
91
95
  def size_of path
92
96
  `ls -l #{path} | awk '{print $5}'`.to_i
93
97
  end
94
-
98
+
95
99
  end
96
-
100
+
97
101
  end
98
-
102
+
99
103
  end
100
-
104
+
101
105
  end
@@ -1,12 +1,12 @@
1
1
  module Internals
2
2
 
3
3
  module Index
4
-
4
+
5
5
  class Files < Backend
6
-
6
+
7
7
  def initialize bundle_name, config
8
8
  super bundle_name, config
9
-
9
+
10
10
  # Note: We marshal the similarity, as the
11
11
  # Yajl json lib cannot load symbolized
12
12
  # values, just keys.
@@ -16,9 +16,19 @@ module Internals
16
16
  @similarity = File::Marshal.new config.index_path(bundle_name, :similarity)
17
17
  @configuration = File::JSON.new config.index_path(bundle_name, :configuration)
18
18
  end
19
-
19
+
20
+ def to_s
21
+ <<-FILES
22
+ Files:
23
+ #{"Index: #{@index}".indented_to_s}
24
+ #{"Weights: #{@weights}".indented_to_s}
25
+ #{"Similarity: #{@similarity}".indented_to_s}
26
+ #{"Config: #{@configuration}".indented_to_s}
27
+ FILES
28
+ end
29
+
20
30
  end
21
-
31
+
22
32
  end
23
-
33
+
24
34
  end
@@ -49,7 +49,10 @@ module Internals
49
49
  # Deletes the Redis index namespace.
50
50
  #
51
51
  def delete
52
- # TODO @backend.
52
+ # Not implemented here.
53
+ # Note: backend.flushdb might be the way to go,
54
+ # but since we cannot delete by key pattern,
55
+ # we don't do anything.
53
56
  end
54
57
 
55
58
  # Checks.
@@ -58,19 +61,23 @@ module Internals
58
61
  # Is this cache suspiciously small?
59
62
  #
60
63
  def cache_small?
61
- false # TODO
64
+ size < 1
62
65
  end
63
66
  # Is the cache ok?
64
67
  #
65
68
  # A small cache is still ok.
66
69
  #
67
70
  def cache_ok?
68
- false # TODO
71
+ size > 0
69
72
  end
70
73
  # Extracts the size of the file in Bytes.
71
74
  #
72
- def size_of path
73
- # TODO
75
+ # Note: This is a very forgiving implementation.
76
+ # But as long as Redis does not implement
77
+ # DBSIZE KEYPATTERN, we are stuck with this.
78
+ #
79
+ def size
80
+ backend.dbsize
74
81
  end
75
82
 
76
83
  end
@@ -8,8 +8,8 @@ module Internals
8
8
 
9
9
  def initialize bundle_name, config
10
10
  super bundle_name, config
11
-
12
- # TODO
11
+
12
+ # Refine a few Redis "types".
13
13
  #
14
14
  @index = Redis::ListHash.new "#{config.identifier}:#{bundle_name}:index"
15
15
  @weights = Redis::StringHash.new "#{config.identifier}:#{bundle_name}:weights"