picky 3.6.7 → 3.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/lib/picky/backends/file/basic.rb +1 -1
  2. data/lib/picky/backends/file/json.rb +5 -1
  3. data/lib/picky/backends/file.rb +7 -0
  4. data/lib/picky/backends/memory.rb +7 -0
  5. data/lib/picky/backends/redis/basic.rb +3 -11
  6. data/lib/picky/backends/redis/directly_manipulable.rb +48 -0
  7. data/lib/picky/backends/redis/list.rb +39 -15
  8. data/lib/picky/backends/redis/string.rb +17 -9
  9. data/lib/picky/backends/redis.rb +102 -66
  10. data/lib/picky/backends/sqlite/array.rb +38 -0
  11. data/lib/picky/backends/sqlite/basic.rb +100 -0
  12. data/lib/picky/backends/sqlite/directly_manipulable.rb +42 -0
  13. data/lib/picky/backends/sqlite/value.rb +34 -0
  14. data/lib/picky/backends/sqlite.rb +14 -4
  15. data/lib/picky/bundle.rb +12 -5
  16. data/lib/picky/bundle_indexed.rb +15 -2
  17. data/lib/picky/bundle_indexing.rb +6 -5
  18. data/lib/picky/bundle_realtime.rb +22 -31
  19. data/lib/picky/categories_realtime.rb +1 -1
  20. data/lib/picky/category_indexed.rb +1 -1
  21. data/lib/picky/category_indexing.rb +7 -5
  22. data/lib/picky/category_realtime.rb +17 -5
  23. data/lib/picky/generators/strategy.rb +4 -0
  24. data/lib/picky/index_indexing.rb +1 -4
  25. data/lib/picky/index_realtime.rb +16 -6
  26. data/lib/picky/indexers/base.rb +7 -1
  27. data/lib/picky/indexes.rb +1 -0
  28. data/lib/picky/loader.rb +11 -7
  29. data/lib/picky/query/allocation.rb +1 -1
  30. data/lib/picky/query/indexes.rb +2 -2
  31. data/lib/picky/query/token.rb +1 -1
  32. data/lib/picky/search.rb +20 -8
  33. data/lib/picky/tokenizer.rb +6 -6
  34. data/lib/picky/wrappers/bundle/delegators.rb +3 -1
  35. data/spec/category_realtime_spec.rb +33 -0
  36. data/spec/functional/backends/file_spec.rb +98 -0
  37. data/spec/functional/backends/memory_spec.rb +96 -0
  38. data/spec/functional/backends/redis_spec.rb +107 -0
  39. data/spec/functional/backends/sqlite_spec.rb +104 -0
  40. data/spec/{specific → functional}/dynamic_weights_spec.rb +0 -0
  41. data/spec/{specific → functional}/exact_first_spec.rb +2 -4
  42. data/spec/functional/max_allocations_spec.rb +33 -0
  43. data/spec/{specific → functional}/realtime_spec.rb +0 -0
  44. data/spec/{specific → functional}/regression_spec.rb +0 -0
  45. data/spec/{specific → functional}/speed_spec.rb +0 -0
  46. data/spec/lib/backends/file/basic_spec.rb +1 -1
  47. data/spec/lib/backends/redis/basic_spec.rb +12 -13
  48. data/spec/lib/backends/redis/directly_manipulable_spec.rb +91 -0
  49. data/spec/lib/backends/redis/float_spec.rb +17 -17
  50. data/spec/lib/backends/redis/list_spec.rb +9 -9
  51. data/spec/lib/backends/sqlite/array_spec.rb +143 -0
  52. data/spec/lib/backends/sqlite/directly_manipulable_spec.rb +65 -0
  53. data/spec/lib/backends/sqlite/{db_spec.rb → value_spec.rb} +2 -7
  54. data/spec/lib/backends/sqlite_spec.rb +22 -20
  55. data/spec/lib/category_indexed_spec.rb +1 -1
  56. data/spec/lib/category_indexing_spec.rb +2 -2
  57. data/spec/lib/index_indexing_spec.rb +0 -7
  58. data/spec/lib/index_realtime_spec.rb +34 -0
  59. data/spec/lib/indexed/bundle_realtime_spec.rb +166 -75
  60. data/spec/lib/indexers/base_spec.rb +13 -1
  61. data/spec/lib/search_spec.rb +31 -20
  62. metadata +58 -34
  63. data/lib/picky/backends/sqlite/db.rb +0 -84
@@ -52,7 +52,7 @@ module Picky
52
52
  # as in #load.
53
53
  #
54
54
  def initial
55
- @initial && @initial.clone || nil
55
+ @initial && @initial.clone || {}
56
56
  end
57
57
 
58
58
  # Deletes the file.
@@ -8,11 +8,15 @@ module Picky
8
8
  #
9
9
  class JSON < Basic
10
10
 
11
+ attr_writer :mapping
12
+
11
13
  # The in-memory mapping hash, mapping
12
14
  # a Symbol key to [length, offset] of
13
15
  # the JSON data in the file.
14
16
  #
15
- attr_accessor :mapping
17
+ def mapping
18
+ @mapping || raise("The File index/category needs to be loaded first.")
19
+ end
16
20
 
17
21
  # See lib/picky/backends/file.rb for what this should return.
18
22
  #
@@ -38,6 +38,13 @@ module Picky
38
38
  extract_lambda_or(configuration, bundle) ||
39
39
  JSON.new(bundle.index_path(:configuration))
40
40
  end
41
+ # Returns an object that on #initial, #load returns an object that responds to:
42
+ # [id] # => [:sym1, :sym2]
43
+ #
44
+ def create_realtime bundle
45
+ extract_lambda_or(similarity, bundle) ||
46
+ JSON.new(bundle.index_path(:realtime))
47
+ end
41
48
 
42
49
  # Currently, the loaded ids are intersected using
43
50
  # the fast C-based intersection.
@@ -32,6 +32,13 @@ module Picky
32
32
  extract_lambda_or(configuration, bundle) ||
33
33
  JSON.new(bundle.index_path(:configuration))
34
34
  end
35
+ # Returns an object that on #initial, #load returns an object that responds to:
36
+ # [id] # => [:sym1, :sym2]
37
+ #
38
+ def create_realtime bundle
39
+ extract_lambda_or(similarity, bundle) ||
40
+ JSON.new(bundle.index_path(:realtime))
41
+ end
35
42
 
36
43
  # Returns the result ids for the allocation.
37
44
  #
@@ -24,13 +24,14 @@ module Picky
24
24
 
25
25
  @empty = options[:empty]
26
26
  @initial = options[:initial]
27
+ @immediate = options[:immediate]
27
28
  end
28
29
 
29
30
  # The empty index that is used for putting the index
30
31
  # together.
31
32
  #
32
33
  def empty
33
- @empty && @empty.clone || {}
34
+ @empty && @empty.clone || (@immediate ? self : {})
34
35
  end
35
36
 
36
37
  # The initial content before loading.
@@ -39,7 +40,7 @@ module Picky
39
40
  # this just returns the same thing as #load.
40
41
  #
41
42
  def initial
42
- @initial && @initial.clone || self
43
+ @initial && @initial.clone || (@immediate ? self : {})
43
44
  end
44
45
 
45
46
  # Returns itself.
@@ -54,15 +55,6 @@ module Picky
54
55
  # Nothing.
55
56
  end
56
57
 
57
- # Deletes the Redis index namespace.
58
- #
59
- def delete
60
- # Not implemented here.
61
- # Note: backend.flushdb might be the way to go,
62
- # but since we cannot delete by key pattern,
63
- # we don't do anything.
64
- end
65
-
66
58
  #
67
59
  #
68
60
  def to_s
@@ -0,0 +1,48 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class Redis
6
+
7
+ module DirectlyManipulable
8
+
9
+ attr_accessor :backend, :key
10
+
11
+ def self.make backend, list, key
12
+ list.extend DirectlyManipulable
13
+ list.backend = backend
14
+ list.key = key
15
+ end
16
+
17
+ # TODO Current implementation is very brittle.
18
+ #
19
+ @@append_index = 0
20
+ def << value
21
+ super value
22
+ backend.client.zadd "#{backend.namespace}:#{key}", (@@append_index+=1), value
23
+ backend[key]
24
+ end
25
+
26
+ # TODO Current implementation is very brittle.
27
+ #
28
+ @@unshift_index = 0
29
+ def unshift value
30
+ super value
31
+ backend.client.zadd "#{backend.namespace}:#{key}", (@@unshift_index-=1), value
32
+ backend[key]
33
+ end
34
+
35
+ def delete value
36
+ result = super value
37
+ if result
38
+ backend.client.zrem "#{backend.namespace}:#{key}", value # TODO if super(value) ?
39
+ end
40
+ result
41
+ end
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -6,20 +6,6 @@ module Picky
6
6
 
7
7
  class List < Basic
8
8
 
9
- # Writes the hash into Redis.
10
- #
11
- def dump hash
12
- clear
13
- hash.each_pair do |key, values|
14
- redis_key = "#{namespace}:#{key}"
15
- i = 0
16
- values.each do |value|
17
- i += 1
18
- client.zadd redis_key, i, value
19
- end
20
- end
21
- end
22
-
23
9
  # Clear the index for this list.
24
10
  #
25
11
  # Note: Perhaps we can use a server only command.
@@ -32,12 +18,50 @@ module Picky
32
18
  end
33
19
  end
34
20
 
21
+ # Deletes the list for the key.
22
+ #
23
+ def delete key
24
+ client.del key
25
+ end
26
+
27
+ # Writes the hash into Redis.
28
+ #
29
+ def dump hash
30
+ unless @immediate
31
+ clear
32
+ # client.pipelined do
33
+ hash.each_pair do |key, values|
34
+ redis_key = "#{namespace}:#{key}"
35
+ i = 0
36
+ values.each do |value|
37
+ i += 1
38
+ client.zadd redis_key, i, value
39
+ end
40
+ end
41
+ # end
42
+ end
43
+ end
44
+
35
45
  # Get a collection.
36
46
  #
37
47
  # Internal API method for the index.
38
48
  #
39
49
  def [] key
40
- client.zrange "#{namespace}:#{key}", 0, -1
50
+ list = client.zrange "#{namespace}:#{key}", :'0', :'-1'
51
+ DirectlyManipulable.make self, list, key
52
+ list
53
+ end
54
+
55
+ # Set a single list.
56
+ #
57
+ def []= key, values
58
+ redis_key = "#{namespace}:#{key}"
59
+ i = 0
60
+ values.each do |value|
61
+ i += 1
62
+ client.zadd redis_key, i, value
63
+ end
64
+ self[key] # TODO Performance?
41
65
  end
42
66
 
43
67
  end
@@ -6,23 +6,31 @@ module Picky
6
6
 
7
7
  class String < Basic
8
8
 
9
+ # Clears the hash.
10
+ #
11
+ def clear
12
+ client.del namespace
13
+ end
14
+
15
+ # Deletes the single value.
16
+ #
17
+ def delete key
18
+ client.hdel namespace, key
19
+ end
20
+
9
21
  # Writes the hash into Redis.
10
22
  #
11
23
  # Note: We could use multi, but it did not help.
12
24
  #
13
25
  def dump hash
14
- clear
15
- hash.each_pair do |key, value|
16
- client.hset namespace, key, value
26
+ unless @immediate
27
+ clear
28
+ hash.each_pair do |key, value|
29
+ client.hset namespace, key, value
30
+ end
17
31
  end
18
32
  end
19
33
 
20
- # Clears the hash.
21
- #
22
- def clear
23
- client.del namespace
24
- end
25
-
26
34
  # Get a single value.
27
35
  #
28
36
  # Internal API method for the index.
@@ -6,13 +6,15 @@ module Picky
6
6
  #
7
7
  class Redis < Backend
8
8
 
9
- attr_reader :client
9
+ attr_reader :client,
10
+ :immediate
10
11
 
11
12
  def initialize options = {}
12
13
  super options
13
14
 
14
15
  require 'redis'
15
- @client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
16
+ @client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
17
+ @immediate = options[:immediate]
16
18
  rescue LoadError => e
17
19
  warn_gem_missing 'redis', 'the Redis client'
18
20
  end
@@ -22,28 +24,35 @@ module Picky
22
24
  #
23
25
  def create_inverted bundle
24
26
  extract_lambda_or(inverted, bundle, client) ||
25
- List.new(client, "#{bundle.identifier}:inverted")
27
+ List.new(client, "#{bundle.identifier}:inverted", immediate: immediate)
26
28
  end
27
29
  # Returns an object that on #initial, #load returns an object that responds to:
28
30
  # [:token] # => 1.23 (a weight)
29
31
  #
30
32
  def create_weights bundle
31
33
  extract_lambda_or(weights, bundle, client) ||
32
- Float.new(client, "#{bundle.identifier}:weights")
34
+ Float.new(client, "#{bundle.identifier}:weights", immediate: immediate)
33
35
  end
34
36
  # Returns an object that on #initial, #load returns an object that responds to:
35
37
  # [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
36
38
  #
37
39
  def create_similarity bundle
38
40
  extract_lambda_or(similarity, bundle, client) ||
39
- List.new(client, "#{bundle.identifier}:similarity")
41
+ List.new(client, "#{bundle.identifier}:similarity", immediate: immediate)
40
42
  end
41
43
  # Returns an object that on #initial, #load returns an object that responds to:
42
44
  # [:key] # => value (a value for this config key)
43
45
  #
44
46
  def create_configuration bundle
45
47
  extract_lambda_or(configuration, bundle, client) ||
46
- String.new(client, "#{bundle.identifier}:configuration")
48
+ String.new(client, "#{bundle.identifier}:configuration", immediate: immediate)
49
+ end
50
+ # Returns an object that on #initial, #load returns an object that responds to:
51
+ # [id] # => [:sym1, :sym2]
52
+ #
53
+ def create_realtime bundle
54
+ extract_lambda_or(similarity, bundle) ||
55
+ List.new(client, "#{bundle.identifier}:realtime", immediate: immediate)
47
56
  end
48
57
 
49
58
  # Does the Redis version already include
@@ -88,76 +97,103 @@ module Picky
88
97
  #
89
98
  # Note: We use the amount and offset hints to speed Redis up.
90
99
  #
100
+ # TODO What if it hasn't been dumped?
101
+ # Move this method to the actual backends?
102
+ #
91
103
  def ids combinations, amount, offset
92
- # Just checked once on the first call.
93
- #
94
- if redis_with_scripting?
95
- @@script = "local intersected = redis.call('zinterstore', ARGV[1], #(KEYS), unpack(KEYS)); if intersected == 0 then redis.call('del', ARGV[1]); return {}; end local results = redis.call('zrange', ARGV[1], tonumber(ARGV[2]), tonumber(ARGV[3])); redis.call('del', ARGV[1]); return results;"
104
+ if immediate
105
+ # Just checked once on the first call.
106
+ #
107
+ if redis_with_scripting?
108
+ @@script = "local intersected = redis.call('zinterstore', ARGV[1], #(KEYS), unpack(KEYS)); if intersected == 0 then redis.call('del', ARGV[1]); return {}; end local results = redis.call('zrange', ARGV[1], tonumber(ARGV[2]), tonumber(ARGV[3])); redis.call('del', ARGV[1]); return results;"
96
109
 
97
- require 'digest/sha1'
98
- @@sent_once = nil
110
+ require 'digest/sha1'
111
+ @@sent_once = nil
99
112
 
100
- # Scripting version of #ids.
101
- #
102
- def ids combinations, amount, offset
103
- identifiers = combinations.inject([]) do |identifiers, combination|
104
- identifiers << "#{combination.identifier}"
113
+ # Scripting version of #ids.
114
+ #
115
+ class << self
116
+ def ids combinations, amount, offset
117
+ identifiers = combinations.inject([]) do |identifiers, combination|
118
+ identifiers << "#{combination.identifier}"
119
+ end
120
+
121
+ # Assume it's using EVALSHA.
122
+ #
123
+ begin
124
+ client.evalsha @@sent_once,
125
+ identifiers.size,
126
+ *identifiers,
127
+ generate_intermediate_result_id,
128
+ offset,
129
+ (offset + amount)
130
+ rescue RuntimeError => e
131
+ # Make the server have a SHA-1 for the script.
132
+ #
133
+ @@sent_once = Digest::SHA1.hexdigest @@script
134
+ client.eval @@script,
135
+ identifiers.size,
136
+ *identifiers,
137
+ generate_intermediate_result_id,
138
+ offset,
139
+ (offset + amount)
140
+ end
141
+ end
105
142
  end
106
-
107
- # Assume it's using EVALSHA.
143
+ else
144
+ # Non-Scripting version of #ids.
108
145
  #
109
- begin
110
- client.evalsha @@sent_once,
111
- identifiers.size,
112
- *identifiers,
113
- generate_intermediate_result_id,
114
- offset,
115
- (offset + amount)
116
- rescue RuntimeError => e
117
- # Make the server have a SHA-1 for the script.
118
- #
119
- @@sent_once = Digest::SHA1.hexdigest @@script
120
- client.eval @@script,
121
- identifiers.size,
122
- *identifiers,
123
- generate_intermediate_result_id,
124
- offset,
125
- (offset + amount)
146
+ class << self
147
+ def ids combinations, amount, offset
148
+ identifiers = combinations.inject([]) do |identifiers, combination|
149
+ identifiers << "#{combination.identifier}"
150
+ end
151
+
152
+ result_id = generate_intermediate_result_id
153
+
154
+ # Intersect and store.
155
+ #
156
+ intersected = client.zinterstore result_id, identifiers
157
+
158
+ # Return clean and early if there has been no intersection.
159
+ #
160
+ if intersected.zero?
161
+ client.del result_id
162
+ return []
163
+ end
164
+
165
+ # Get the stored result.
166
+ #
167
+ results = client.zrange result_id, offset, (offset + amount)
168
+
169
+ # Delete the stored result as it was only for temporary purposes.
170
+ #
171
+ # Note: I could also not delete it, but that
172
+ # would not be clean at all.
173
+ #
174
+ client.del result_id
175
+
176
+ results
177
+ end
126
178
  end
127
179
  end
128
180
  else
129
- # Non-Scripting version of #ids.
181
+ # TODO Refactor!
130
182
  #
131
- def ids combinations, amount, offset
132
- identifiers = combinations.inject([]) do |identifiers, combination|
133
- identifiers << "#{combination.identifier}"
134
- end
135
-
136
- result_id = generate_intermediate_result_id
137
-
138
- # Intersect and store.
139
- #
140
- intersected = client.zinterstore result_id, identifiers
183
+ class << self
184
+ def ids combinations, _, _
185
+ # Get the ids for each combination.
186
+ #
187
+ id_arrays = combinations.inject([]) do |total, combination|
188
+ total << combination.ids
189
+ end
141
190
 
142
- # Return clean and early if there has been no intersection.
143
- #
144
- if intersected.zero?
145
- client.del result_id
146
- return []
191
+ # Call the optimized C algorithm.
192
+ #
193
+ # Note: It orders the passed arrays by size.
194
+ #
195
+ Performant::Array.memory_efficient_intersect id_arrays
147
196
  end
148
-
149
- # Get the stored result.
150
- #
151
- results = client.zrange result_id, offset, (offset + amount)
152
-
153
- # Delete the stored result as it was only for temporary purposes.
154
- #
155
- # Note: I could also not delete it, but that
156
- # would not be clean at all.
157
- #
158
- client.del result_id
159
-
160
- results
161
197
  end
162
198
  end
163
199
 
@@ -184,7 +220,7 @@ module Picky
184
220
  # Use the host and pid (generated lazily in child processes) for the result.
185
221
  #
186
222
  def generate_intermediate_result_id
187
- :"#{host}:#{pid}:picky:result"
223
+ @intermediate_result_id ||= "#{host}:#{pid}:picky:result"
188
224
  end
189
225
 
190
226
  end
@@ -0,0 +1,38 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class SQLite
6
+
7
+ class Array < Basic
8
+
9
+ def []= key, array
10
+ if array.empty?
11
+ db.execute 'insert or replace into key_value (key, value) values (?,?)', key.to_s, Yajl::Encoder.encode(array)
12
+ end
13
+
14
+ DirectlyManipulable.make self, array, key
15
+ array
16
+ end
17
+
18
+ def [] key
19
+ res = db.execute "select value from key_value where key = ? limit 1;", key.to_s
20
+
21
+ return res unless res
22
+
23
+ array = res.empty? ? [] : Yajl::Parser.parse(res.first.first)
24
+ DirectlyManipulable.make self, array, key
25
+ array
26
+ end
27
+
28
+ def delete key
29
+ db.execute "delete from key_value where key = (?)", key.to_s
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,100 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class SQLite
6
+
7
+ class Basic
8
+
9
+ include Helpers::File
10
+
11
+ attr_reader :cache_path, :db
12
+
13
+ def initialize cache_path, options = {}
14
+ @cache_path = "#{cache_path}.sqlite3"
15
+ @empty = options[:empty]
16
+ @initial = options[:initial]
17
+ @self_indexed = options[:self_indexed]
18
+
19
+ lazily_initialize_client
20
+ end
21
+
22
+ def initial
23
+ @initial && @initial.clone || (@self_indexed ? self : {})
24
+ end
25
+
26
+ def empty
27
+ @empty && @empty.clone || (@self_indexed ? self.reset_db.asynchronous : {})
28
+ end
29
+
30
+ def dump internal
31
+ dump_sqlite internal unless @self_indexed
32
+ self
33
+ end
34
+
35
+ def load
36
+ self
37
+ end
38
+
39
+ def clear
40
+ db.execute 'delete from key_value'
41
+ end
42
+
43
+ def lazily_initialize_client
44
+ @db ||= SQLite3::Database.new cache_path
45
+ end
46
+
47
+ def dump_sqlite internal
48
+ reset_db
49
+
50
+ transaction do
51
+ # Note: Internal structures need to
52
+ # implement each.
53
+ #
54
+ internal.each do |key, value|
55
+ encoded_value = Yajl::Encoder.encode value
56
+ db.execute 'insert into key_value values (?,?)', key.to_s, encoded_value
57
+ end
58
+ end
59
+ end
60
+
61
+ def reset_db
62
+ create_directory cache_path
63
+ lazily_initialize_client
64
+
65
+ # TODO Could this be replaced by a truncate statement?
66
+ #
67
+ db.execute 'drop table if exists key_value;'
68
+ db.execute 'create table key_value (key varchar(255), value text);'
69
+ db.execute 'create index key_idx on key_value (key);'
70
+
71
+ self
72
+ end
73
+
74
+ def asynchronous
75
+ db.execute 'PRAGMA synchronous = OFF;'
76
+ self
77
+ end
78
+
79
+ def synchronous
80
+ db.execute 'PRAGMA synchronous = ON;'
81
+ self
82
+ end
83
+
84
+ def transaction
85
+ db.execute 'BEGIN;'
86
+ yield
87
+ db.execute 'COMMIT;'
88
+ end
89
+
90
+ def to_s
91
+ "#{self.class}(#{cache_path})"
92
+ end
93
+
94
+ end
95
+
96
+ end
97
+
98
+ end
99
+
100
+ end
@@ -0,0 +1,42 @@
1
+ module Picky
2
+
3
+ module Backends
4
+
5
+ class SQLite
6
+
7
+ module DirectlyManipulable
8
+
9
+ attr_accessor :backend, :key
10
+
11
+ def self.make backend, array, key
12
+ array.extend DirectlyManipulable
13
+ array.backend = backend
14
+ array.key = key
15
+ end
16
+
17
+ def << value
18
+ super value
19
+ backend[key] = self
20
+ self
21
+ end
22
+
23
+ def unshift value
24
+ super value
25
+ backend[key] = self
26
+ self
27
+ end
28
+
29
+ def delete value
30
+ value = super value
31
+ if value
32
+ backend[key] = self
33
+ end
34
+ value
35
+ end
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+
42
+ end