picky 3.6.7 → 3.6.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/lib/picky/backends/file/basic.rb +1 -1
  2. data/lib/picky/backends/file/json.rb +5 -1
  3. data/lib/picky/backends/file.rb +7 -0
  4. data/lib/picky/backends/memory.rb +7 -0
  5. data/lib/picky/backends/redis/basic.rb +3 -11
  6. data/lib/picky/backends/redis/directly_manipulable.rb +48 -0
  7. data/lib/picky/backends/redis/list.rb +39 -15
  8. data/lib/picky/backends/redis/string.rb +17 -9
  9. data/lib/picky/backends/redis.rb +102 -66
  10. data/lib/picky/backends/sqlite/array.rb +38 -0
  11. data/lib/picky/backends/sqlite/basic.rb +100 -0
  12. data/lib/picky/backends/sqlite/directly_manipulable.rb +42 -0
  13. data/lib/picky/backends/sqlite/value.rb +34 -0
  14. data/lib/picky/backends/sqlite.rb +14 -4
  15. data/lib/picky/bundle.rb +12 -5
  16. data/lib/picky/bundle_indexed.rb +15 -2
  17. data/lib/picky/bundle_indexing.rb +6 -5
  18. data/lib/picky/bundle_realtime.rb +22 -31
  19. data/lib/picky/categories_realtime.rb +1 -1
  20. data/lib/picky/category_indexed.rb +1 -1
  21. data/lib/picky/category_indexing.rb +7 -5
  22. data/lib/picky/category_realtime.rb +17 -5
  23. data/lib/picky/generators/strategy.rb +4 -0
  24. data/lib/picky/index_indexing.rb +1 -4
  25. data/lib/picky/index_realtime.rb +16 -6
  26. data/lib/picky/indexers/base.rb +7 -1
  27. data/lib/picky/indexes.rb +1 -0
  28. data/lib/picky/loader.rb +11 -7
  29. data/lib/picky/query/allocation.rb +1 -1
  30. data/lib/picky/query/indexes.rb +2 -2
  31. data/lib/picky/query/token.rb +1 -1
  32. data/lib/picky/search.rb +20 -8
  33. data/lib/picky/tokenizer.rb +6 -6
  34. data/lib/picky/wrappers/bundle/delegators.rb +3 -1
  35. data/spec/category_realtime_spec.rb +33 -0
  36. data/spec/functional/backends/file_spec.rb +98 -0
  37. data/spec/functional/backends/memory_spec.rb +96 -0
  38. data/spec/functional/backends/redis_spec.rb +107 -0
  39. data/spec/functional/backends/sqlite_spec.rb +104 -0
  40. data/spec/{specific → functional}/dynamic_weights_spec.rb +0 -0
  41. data/spec/{specific → functional}/exact_first_spec.rb +2 -4
  42. data/spec/functional/max_allocations_spec.rb +33 -0
  43. data/spec/{specific → functional}/realtime_spec.rb +0 -0
  44. data/spec/{specific → functional}/regression_spec.rb +0 -0
  45. data/spec/{specific → functional}/speed_spec.rb +0 -0
  46. data/spec/lib/backends/file/basic_spec.rb +1 -1
  47. data/spec/lib/backends/redis/basic_spec.rb +12 -13
  48. data/spec/lib/backends/redis/directly_manipulable_spec.rb +91 -0
  49. data/spec/lib/backends/redis/float_spec.rb +17 -17
  50. data/spec/lib/backends/redis/list_spec.rb +9 -9
  51. data/spec/lib/backends/sqlite/array_spec.rb +143 -0
  52. data/spec/lib/backends/sqlite/directly_manipulable_spec.rb +65 -0
  53. data/spec/lib/backends/sqlite/{db_spec.rb → value_spec.rb} +2 -7
  54. data/spec/lib/backends/sqlite_spec.rb +22 -20
  55. data/spec/lib/category_indexed_spec.rb +1 -1
  56. data/spec/lib/category_indexing_spec.rb +2 -2
  57. data/spec/lib/index_indexing_spec.rb +0 -7
  58. data/spec/lib/index_realtime_spec.rb +34 -0
  59. data/spec/lib/indexed/bundle_realtime_spec.rb +166 -75
  60. data/spec/lib/indexers/base_spec.rb +13 -1
  61. data/spec/lib/search_spec.rb +31 -20
  62. metadata +58 -34
  63. data/lib/picky/backends/sqlite/db.rb +0 -84
@@ -52,7 +52,7 @@ module Picky
52
52
  # as in #load.
53
53
  #
54
54
  def initial
55
- @initial && @initial.clone || nil
55
+ @initial && @initial.clone || {}
56
56
  end
57
57
 
58
58
  # Deletes the file.
@@ -8,11 +8,15 @@ module Picky
8
8
  #
9
9
  class JSON < Basic
10
10
 
11
+ attr_writer :mapping
12
+
11
13
  # The in-memory mapping hash, mapping
12
14
  # a Symbol key to [length, offset] of
13
15
  # the JSON data in the file.
14
16
  #
15
- attr_accessor :mapping
17
+ def mapping
18
+ @mapping || raise("The File index/category needs to be loaded first.")
19
+ end
16
20
 
17
21
  # See lib/picky/backends/file.rb for what this should return.
18
22
  #
@@ -38,6 +38,13 @@ module Picky
38
38
  extract_lambda_or(configuration, bundle) ||
39
39
  JSON.new(bundle.index_path(:configuration))
40
40
  end
41
+ # Returns an object that on #initial, #load returns an object that responds to:
42
+ # [id] # => [:sym1, :sym2]
43
+ #
44
+ def create_realtime bundle
45
+ extract_lambda_or(similarity, bundle) ||
46
+ JSON.new(bundle.index_path(:realtime))
47
+ end
41
48
 
42
49
  # Currently, the loaded ids are intersected using
43
50
  # the fast C-based intersection.
@@ -32,6 +32,13 @@ module Picky
32
32
  extract_lambda_or(configuration, bundle) ||
33
33
  JSON.new(bundle.index_path(:configuration))
34
34
  end
35
+ # Returns an object that on #initial, #load returns an object that responds to:
36
+ # [id] # => [:sym1, :sym2]
37
+ #
38
+ def create_realtime bundle
39
+ extract_lambda_or(similarity, bundle) ||
40
+ JSON.new(bundle.index_path(:realtime))
41
+ end
35
42
 
36
43
  # Returns the result ids for the allocation.
37
44
  #
@@ -24,13 +24,14 @@ module Picky
24
24
 
25
25
  @empty = options[:empty]
26
26
  @initial = options[:initial]
27
+ @immediate = options[:immediate]
27
28
  end
28
29
 
29
30
  # The empty index that is used for putting the index
30
31
  # together.
31
32
  #
32
33
  def empty
33
- @empty && @empty.clone || {}
34
+ @empty && @empty.clone || (@immediate ? self : {})
34
35
  end
35
36
 
36
37
  # The initial content before loading.
@@ -39,7 +40,7 @@ module Picky
39
40
  # this just returns the same thing as #load.
40
41
  #
41
42
  def initial
42
- @initial && @initial.clone || self
43
+ @initial && @initial.clone || (@immediate ? self : {})
43
44
  end
44
45
 
45
46
  # Returns itself.
@@ -54,15 +55,6 @@ module Picky
54
55
  # Nothing.
55
56
  end
56
57
 
57
- # Deletes the Redis index namespace.
58
- #
59
- def delete
60
- # Not implemented here.
61
- # Note: backend.flushdb might be the way to go,
62
- # but since we cannot delete by key pattern,
63
- # we don't do anything.
64
- end
65
-
66
58
  #
67
59
  #
68
60
  def to_s
@@ -0,0 +1,48 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class Redis
6
+
7
+ module DirectlyManipulable
8
+
9
+ attr_accessor :backend, :key
10
+
11
+ def self.make backend, list, key
12
+ list.extend DirectlyManipulable
13
+ list.backend = backend
14
+ list.key = key
15
+ end
16
+
17
+ # TODO Current implementation is very brittle.
18
+ #
19
+ @@append_index = 0
20
+ def << value
21
+ super value
22
+ backend.client.zadd "#{backend.namespace}:#{key}", (@@append_index+=1), value
23
+ backend[key]
24
+ end
25
+
26
+ # TODO Current implementation is very brittle.
27
+ #
28
+ @@unshift_index = 0
29
+ def unshift value
30
+ super value
31
+ backend.client.zadd "#{backend.namespace}:#{key}", (@@unshift_index-=1), value
32
+ backend[key]
33
+ end
34
+
35
+ def delete value
36
+ result = super value
37
+ if result
38
+ backend.client.zrem "#{backend.namespace}:#{key}", value # TODO if super(value) ?
39
+ end
40
+ result
41
+ end
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -6,20 +6,6 @@ module Picky
6
6
 
7
7
  class List < Basic
8
8
 
9
- # Writes the hash into Redis.
10
- #
11
- def dump hash
12
- clear
13
- hash.each_pair do |key, values|
14
- redis_key = "#{namespace}:#{key}"
15
- i = 0
16
- values.each do |value|
17
- i += 1
18
- client.zadd redis_key, i, value
19
- end
20
- end
21
- end
22
-
23
9
  # Clear the index for this list.
24
10
  #
25
11
  # Note: Perhaps we can use a server only command.
@@ -32,12 +18,50 @@ module Picky
32
18
  end
33
19
  end
34
20
 
21
+ # Deletes the list for the key.
22
+ #
23
+ def delete key
24
+ client.del key
25
+ end
26
+
27
+ # Writes the hash into Redis.
28
+ #
29
+ def dump hash
30
+ unless @immediate
31
+ clear
32
+ # client.pipelined do
33
+ hash.each_pair do |key, values|
34
+ redis_key = "#{namespace}:#{key}"
35
+ i = 0
36
+ values.each do |value|
37
+ i += 1
38
+ client.zadd redis_key, i, value
39
+ end
40
+ end
41
+ # end
42
+ end
43
+ end
44
+
35
45
  # Get a collection.
36
46
  #
37
47
  # Internal API method for the index.
38
48
  #
39
49
  def [] key
40
- client.zrange "#{namespace}:#{key}", 0, -1
50
+ list = client.zrange "#{namespace}:#{key}", :'0', :'-1'
51
+ DirectlyManipulable.make self, list, key
52
+ list
53
+ end
54
+
55
+ # Set a single list.
56
+ #
57
+ def []= key, values
58
+ redis_key = "#{namespace}:#{key}"
59
+ i = 0
60
+ values.each do |value|
61
+ i += 1
62
+ client.zadd redis_key, i, value
63
+ end
64
+ self[key] # TODO Performance?
41
65
  end
42
66
 
43
67
  end
@@ -6,23 +6,31 @@ module Picky
6
6
 
7
7
  class String < Basic
8
8
 
9
+ # Clears the hash.
10
+ #
11
+ def clear
12
+ client.del namespace
13
+ end
14
+
15
+ # Deletes the single value.
16
+ #
17
+ def delete key
18
+ client.hdel namespace, key
19
+ end
20
+
9
21
  # Writes the hash into Redis.
10
22
  #
11
23
  # Note: We could use multi, but it did not help.
12
24
  #
13
25
  def dump hash
14
- clear
15
- hash.each_pair do |key, value|
16
- client.hset namespace, key, value
26
+ unless @immediate
27
+ clear
28
+ hash.each_pair do |key, value|
29
+ client.hset namespace, key, value
30
+ end
17
31
  end
18
32
  end
19
33
 
20
- # Clears the hash.
21
- #
22
- def clear
23
- client.del namespace
24
- end
25
-
26
34
  # Get a single value.
27
35
  #
28
36
  # Internal API method for the index.
@@ -6,13 +6,15 @@ module Picky
6
6
  #
7
7
  class Redis < Backend
8
8
 
9
- attr_reader :client
9
+ attr_reader :client,
10
+ :immediate
10
11
 
11
12
  def initialize options = {}
12
13
  super options
13
14
 
14
15
  require 'redis'
15
- @client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
16
+ @client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
17
+ @immediate = options[:immediate]
16
18
  rescue LoadError => e
17
19
  warn_gem_missing 'redis', 'the Redis client'
18
20
  end
@@ -22,28 +24,35 @@ module Picky
22
24
  #
23
25
  def create_inverted bundle
24
26
  extract_lambda_or(inverted, bundle, client) ||
25
- List.new(client, "#{bundle.identifier}:inverted")
27
+ List.new(client, "#{bundle.identifier}:inverted", immediate: immediate)
26
28
  end
27
29
  # Returns an object that on #initial, #load returns an object that responds to:
28
30
  # [:token] # => 1.23 (a weight)
29
31
  #
30
32
  def create_weights bundle
31
33
  extract_lambda_or(weights, bundle, client) ||
32
- Float.new(client, "#{bundle.identifier}:weights")
34
+ Float.new(client, "#{bundle.identifier}:weights", immediate: immediate)
33
35
  end
34
36
  # Returns an object that on #initial, #load returns an object that responds to:
35
37
  # [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
36
38
  #
37
39
  def create_similarity bundle
38
40
  extract_lambda_or(similarity, bundle, client) ||
39
- List.new(client, "#{bundle.identifier}:similarity")
41
+ List.new(client, "#{bundle.identifier}:similarity", immediate: immediate)
40
42
  end
41
43
  # Returns an object that on #initial, #load returns an object that responds to:
42
44
  # [:key] # => value (a value for this config key)
43
45
  #
44
46
  def create_configuration bundle
45
47
  extract_lambda_or(configuration, bundle, client) ||
46
- String.new(client, "#{bundle.identifier}:configuration")
48
+ String.new(client, "#{bundle.identifier}:configuration", immediate: immediate)
49
+ end
50
+ # Returns an object that on #initial, #load returns an object that responds to:
51
+ # [id] # => [:sym1, :sym2]
52
+ #
53
+ def create_realtime bundle
54
+ extract_lambda_or(similarity, bundle) ||
55
+ List.new(client, "#{bundle.identifier}:realtime", immediate: immediate)
47
56
  end
48
57
 
49
58
  # Does the Redis version already include
@@ -88,76 +97,103 @@ module Picky
88
97
  #
89
98
  # Note: We use the amount and offset hints to speed Redis up.
90
99
  #
100
+ # TODO What if it hasn't been dumped?
101
+ # Move this method to the actual backends?
102
+ #
91
103
  def ids combinations, amount, offset
92
- # Just checked once on the first call.
93
- #
94
- if redis_with_scripting?
95
- @@script = "local intersected = redis.call('zinterstore', ARGV[1], #(KEYS), unpack(KEYS)); if intersected == 0 then redis.call('del', ARGV[1]); return {}; end local results = redis.call('zrange', ARGV[1], tonumber(ARGV[2]), tonumber(ARGV[3])); redis.call('del', ARGV[1]); return results;"
104
+ if immediate
105
+ # Just checked once on the first call.
106
+ #
107
+ if redis_with_scripting?
108
+ @@script = "local intersected = redis.call('zinterstore', ARGV[1], #(KEYS), unpack(KEYS)); if intersected == 0 then redis.call('del', ARGV[1]); return {}; end local results = redis.call('zrange', ARGV[1], tonumber(ARGV[2]), tonumber(ARGV[3])); redis.call('del', ARGV[1]); return results;"
96
109
 
97
- require 'digest/sha1'
98
- @@sent_once = nil
110
+ require 'digest/sha1'
111
+ @@sent_once = nil
99
112
 
100
- # Scripting version of #ids.
101
- #
102
- def ids combinations, amount, offset
103
- identifiers = combinations.inject([]) do |identifiers, combination|
104
- identifiers << "#{combination.identifier}"
113
+ # Scripting version of #ids.
114
+ #
115
+ class << self
116
+ def ids combinations, amount, offset
117
+ identifiers = combinations.inject([]) do |identifiers, combination|
118
+ identifiers << "#{combination.identifier}"
119
+ end
120
+
121
+ # Assume it's using EVALSHA.
122
+ #
123
+ begin
124
+ client.evalsha @@sent_once,
125
+ identifiers.size,
126
+ *identifiers,
127
+ generate_intermediate_result_id,
128
+ offset,
129
+ (offset + amount)
130
+ rescue RuntimeError => e
131
+ # Make the server have a SHA-1 for the script.
132
+ #
133
+ @@sent_once = Digest::SHA1.hexdigest @@script
134
+ client.eval @@script,
135
+ identifiers.size,
136
+ *identifiers,
137
+ generate_intermediate_result_id,
138
+ offset,
139
+ (offset + amount)
140
+ end
141
+ end
105
142
  end
106
-
107
- # Assume it's using EVALSHA.
143
+ else
144
+ # Non-Scripting version of #ids.
108
145
  #
109
- begin
110
- client.evalsha @@sent_once,
111
- identifiers.size,
112
- *identifiers,
113
- generate_intermediate_result_id,
114
- offset,
115
- (offset + amount)
116
- rescue RuntimeError => e
117
- # Make the server have a SHA-1 for the script.
118
- #
119
- @@sent_once = Digest::SHA1.hexdigest @@script
120
- client.eval @@script,
121
- identifiers.size,
122
- *identifiers,
123
- generate_intermediate_result_id,
124
- offset,
125
- (offset + amount)
146
+ class << self
147
+ def ids combinations, amount, offset
148
+ identifiers = combinations.inject([]) do |identifiers, combination|
149
+ identifiers << "#{combination.identifier}"
150
+ end
151
+
152
+ result_id = generate_intermediate_result_id
153
+
154
+ # Intersect and store.
155
+ #
156
+ intersected = client.zinterstore result_id, identifiers
157
+
158
+ # Return clean and early if there has been no intersection.
159
+ #
160
+ if intersected.zero?
161
+ client.del result_id
162
+ return []
163
+ end
164
+
165
+ # Get the stored result.
166
+ #
167
+ results = client.zrange result_id, offset, (offset + amount)
168
+
169
+ # Delete the stored result as it was only for temporary purposes.
170
+ #
171
+ # Note: I could also not delete it, but that
172
+ # would not be clean at all.
173
+ #
174
+ client.del result_id
175
+
176
+ results
177
+ end
126
178
  end
127
179
  end
128
180
  else
129
- # Non-Scripting version of #ids.
181
+ # TODO Refactor!
130
182
  #
131
- def ids combinations, amount, offset
132
- identifiers = combinations.inject([]) do |identifiers, combination|
133
- identifiers << "#{combination.identifier}"
134
- end
135
-
136
- result_id = generate_intermediate_result_id
137
-
138
- # Intersect and store.
139
- #
140
- intersected = client.zinterstore result_id, identifiers
183
+ class << self
184
+ def ids combinations, _, _
185
+ # Get the ids for each combination.
186
+ #
187
+ id_arrays = combinations.inject([]) do |total, combination|
188
+ total << combination.ids
189
+ end
141
190
 
142
- # Return clean and early if there has been no intersection.
143
- #
144
- if intersected.zero?
145
- client.del result_id
146
- return []
191
+ # Call the optimized C algorithm.
192
+ #
193
+ # Note: It orders the passed arrays by size.
194
+ #
195
+ Performant::Array.memory_efficient_intersect id_arrays
147
196
  end
148
-
149
- # Get the stored result.
150
- #
151
- results = client.zrange result_id, offset, (offset + amount)
152
-
153
- # Delete the stored result as it was only for temporary purposes.
154
- #
155
- # Note: I could also not delete it, but that
156
- # would not be clean at all.
157
- #
158
- client.del result_id
159
-
160
- results
161
197
  end
162
198
  end
163
199
 
@@ -184,7 +220,7 @@ module Picky
184
220
  # Use the host and pid (generated lazily in child processes) for the result.
185
221
  #
186
222
  def generate_intermediate_result_id
187
- :"#{host}:#{pid}:picky:result"
223
+ @intermediate_result_id ||= "#{host}:#{pid}:picky:result"
188
224
  end
189
225
 
190
226
  end
@@ -0,0 +1,38 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class SQLite
6
+
7
+ class Array < Basic
8
+
9
+ def []= key, array
10
+ if array.empty?
11
+ db.execute 'insert or replace into key_value (key, value) values (?,?)', key.to_s, Yajl::Encoder.encode(array)
12
+ end
13
+
14
+ DirectlyManipulable.make self, array, key
15
+ array
16
+ end
17
+
18
+ def [] key
19
+ res = db.execute "select value from key_value where key = ? limit 1;", key.to_s
20
+
21
+ return res unless res
22
+
23
+ array = res.empty? ? [] : Yajl::Parser.parse(res.first.first)
24
+ DirectlyManipulable.make self, array, key
25
+ array
26
+ end
27
+
28
+ def delete key
29
+ db.execute "delete from key_value where key = (?)", key.to_s
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,100 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class SQLite
6
+
7
+ class Basic
8
+
9
+ include Helpers::File
10
+
11
+ attr_reader :cache_path, :db
12
+
13
+ def initialize cache_path, options = {}
14
+ @cache_path = "#{cache_path}.sqlite3"
15
+ @empty = options[:empty]
16
+ @initial = options[:initial]
17
+ @self_indexed = options[:self_indexed]
18
+
19
+ lazily_initialize_client
20
+ end
21
+
22
+ def initial
23
+ @initial && @initial.clone || (@self_indexed ? self : {})
24
+ end
25
+
26
+ def empty
27
+ @empty && @empty.clone || (@self_indexed ? self.reset_db.asynchronous : {})
28
+ end
29
+
30
+ def dump internal
31
+ dump_sqlite internal unless @self_indexed
32
+ self
33
+ end
34
+
35
+ def load
36
+ self
37
+ end
38
+
39
+ def clear
40
+ db.execute 'delete from key_value'
41
+ end
42
+
43
+ def lazily_initialize_client
44
+ @db ||= SQLite3::Database.new cache_path
45
+ end
46
+
47
+ def dump_sqlite internal
48
+ reset_db
49
+
50
+ transaction do
51
+ # Note: Internal structures need to
52
+ # implement each.
53
+ #
54
+ internal.each do |key, value|
55
+ encoded_value = Yajl::Encoder.encode value
56
+ db.execute 'insert into key_value values (?,?)', key.to_s, encoded_value
57
+ end
58
+ end
59
+ end
60
+
61
+ def reset_db
62
+ create_directory cache_path
63
+ lazily_initialize_client
64
+
65
+ # TODO Could this be replaced by a truncate statement?
66
+ #
67
+ db.execute 'drop table if exists key_value;'
68
+ db.execute 'create table key_value (key varchar(255), value text);'
69
+ db.execute 'create index key_idx on key_value (key);'
70
+
71
+ self
72
+ end
73
+
74
+ def asynchronous
75
+ db.execute 'PRAGMA synchronous = OFF;'
76
+ self
77
+ end
78
+
79
+ def synchronous
80
+ db.execute 'PRAGMA synchronous = ON;'
81
+ self
82
+ end
83
+
84
+ def transaction
85
+ db.execute 'BEGIN;'
86
+ yield
87
+ db.execute 'COMMIT;'
88
+ end
89
+
90
+ def to_s
91
+ "#{self.class}(#{cache_path})"
92
+ end
93
+
94
+ end
95
+
96
+ end
97
+
98
+ end
99
+
100
+ end
@@ -0,0 +1,42 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class SQLite
6
+
7
+ module DirectlyManipulable
8
+
9
+ attr_accessor :backend, :key
10
+
11
+ def self.make backend, array, key
12
+ array.extend DirectlyManipulable
13
+ array.backend = backend
14
+ array.key = key
15
+ end
16
+
17
+ def << value
18
+ super value
19
+ backend[key] = self
20
+ self
21
+ end
22
+
23
+ def unshift value
24
+ super value
25
+ backend[key] = self
26
+ self
27
+ end
28
+
29
+ def delete value
30
+ value = super value
31
+ if value
32
+ backend[key] = self
33
+ end
34
+ value
35
+ end
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+
42
+ end